{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 40365, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.432181345224824e-05, "grad_norm": 262.8539293718085, "learning_rate": 1.6515276630883567e-08, "loss": 1.1271, "step": 1 }, { "epoch": 0.00014864362690449647, "grad_norm": 62.93137500062459, "learning_rate": 3.3030553261767134e-08, "loss": 1.2008, "step": 2 }, { "epoch": 0.0002229654403567447, "grad_norm": 68.8617022660782, "learning_rate": 4.95458298926507e-08, "loss": 1.1702, "step": 3 }, { "epoch": 0.00029728725380899295, "grad_norm": 78.62592028093474, "learning_rate": 6.606110652353427e-08, "loss": 1.3431, "step": 4 }, { "epoch": 0.0003716090672612412, "grad_norm": 66.423614684566, "learning_rate": 8.257638315441784e-08, "loss": 1.344, "step": 5 }, { "epoch": 0.0004459308807134894, "grad_norm": 87.52352855942003, "learning_rate": 9.90916597853014e-08, "loss": 1.5252, "step": 6 }, { "epoch": 0.0005202526941657376, "grad_norm": 32.63692483672798, "learning_rate": 1.1560693641618497e-07, "loss": 0.9939, "step": 7 }, { "epoch": 0.0005945745076179859, "grad_norm": 40.38411474278398, "learning_rate": 1.3212221304706854e-07, "loss": 1.1417, "step": 8 }, { "epoch": 0.0006688963210702341, "grad_norm": 43.147319627453264, "learning_rate": 1.486374896779521e-07, "loss": 1.168, "step": 9 }, { "epoch": 0.0007432181345224824, "grad_norm": 95.72172606148666, "learning_rate": 1.6515276630883567e-07, "loss": 1.4735, "step": 10 }, { "epoch": 0.0008175399479747306, "grad_norm": 134.98361441375005, "learning_rate": 1.8166804293971927e-07, "loss": 1.3477, "step": 11 }, { "epoch": 0.0008918617614269788, "grad_norm": 1182.6860095564014, "learning_rate": 1.981833195706028e-07, "loss": 1.3051, "step": 12 }, { "epoch": 0.000966183574879227, "grad_norm": 70.7513025589656, "learning_rate": 2.146985962014864e-07, "loss": 1.1491, "step": 13 }, { "epoch": 0.0010405053883314752, "grad_norm": 21.790434375413664, "learning_rate": 2.3121387283236994e-07, "loss": 1.0899, "step": 14 }, { "epoch": 0.0011148272017837235, "grad_norm": 55.064874501761864, "learning_rate": 2.4772914946325353e-07, "loss": 1.2768, "step": 15 }, { "epoch": 0.0011891490152359718, "grad_norm": 76.45554666456012, "learning_rate": 2.642444260941371e-07, "loss": 1.022, "step": 16 }, { "epoch": 0.00126347082868822, "grad_norm": 536.655120617148, "learning_rate": 2.8075970272502067e-07, "loss": 1.2097, "step": 17 }, { "epoch": 0.0013377926421404682, "grad_norm": 59.82273932463312, "learning_rate": 2.972749793559042e-07, "loss": 1.0085, "step": 18 }, { "epoch": 0.0014121144555927165, "grad_norm": 117.22040364008618, "learning_rate": 3.137902559867878e-07, "loss": 1.1932, "step": 19 }, { "epoch": 0.0014864362690449647, "grad_norm": 64.855656990526, "learning_rate": 3.3030553261767134e-07, "loss": 1.2533, "step": 20 }, { "epoch": 0.001560758082497213, "grad_norm": 36.57733737390025, "learning_rate": 3.468208092485549e-07, "loss": 0.7926, "step": 21 }, { "epoch": 0.0016350798959494611, "grad_norm": 102.93898846301865, "learning_rate": 3.6333608587943853e-07, "loss": 1.0585, "step": 22 }, { "epoch": 0.0017094017094017094, "grad_norm": 48.770399213008965, "learning_rate": 3.7985136251032207e-07, "loss": 1.0917, "step": 23 }, { "epoch": 0.0017837235228539577, "grad_norm": 55.28482981507771, "learning_rate": 3.963666391412056e-07, "loss": 1.1529, "step": 24 }, { "epoch": 0.0018580453363062058, "grad_norm": 43.30409151490718, "learning_rate": 4.1288191577208926e-07, "loss": 1.3661, "step": 25 }, { "epoch": 0.001932367149758454, "grad_norm": 194.15009522528223, "learning_rate": 4.293971924029728e-07, "loss": 1.2042, "step": 26 }, { "epoch": 0.002006688963210702, "grad_norm": 173.97819972102954, "learning_rate": 4.4591246903385634e-07, "loss": 1.1473, "step": 27 }, { "epoch": 0.0020810107766629504, "grad_norm": 122.53813857126143, "learning_rate": 4.624277456647399e-07, "loss": 1.1746, "step": 28 }, { "epoch": 0.0021553325901151987, "grad_norm": 24.417650521699645, "learning_rate": 4.789430222956235e-07, "loss": 1.1091, "step": 29 }, { "epoch": 0.002229654403567447, "grad_norm": 132.3174552498263, "learning_rate": 4.954582989265071e-07, "loss": 0.7994, "step": 30 }, { "epoch": 0.0023039762170196953, "grad_norm": 25.3715438081504, "learning_rate": 5.119735755573907e-07, "loss": 1.0921, "step": 31 }, { "epoch": 0.0023782980304719436, "grad_norm": 18.16203681573205, "learning_rate": 5.284888521882741e-07, "loss": 1.0385, "step": 32 }, { "epoch": 0.002452619843924192, "grad_norm": 66.94177671677245, "learning_rate": 5.450041288191577e-07, "loss": 1.2373, "step": 33 }, { "epoch": 0.00252694165737644, "grad_norm": 35.02239203870258, "learning_rate": 5.615194054500413e-07, "loss": 1.0515, "step": 34 }, { "epoch": 0.002601263470828688, "grad_norm": 35.89298169175025, "learning_rate": 5.780346820809249e-07, "loss": 0.9778, "step": 35 }, { "epoch": 0.0026755852842809363, "grad_norm": 95.68520363927958, "learning_rate": 5.945499587118084e-07, "loss": 1.1996, "step": 36 }, { "epoch": 0.0027499070977331846, "grad_norm": 42.08771175198993, "learning_rate": 6.11065235342692e-07, "loss": 1.0285, "step": 37 }, { "epoch": 0.002824228911185433, "grad_norm": 26.088603896660963, "learning_rate": 6.275805119735756e-07, "loss": 1.1515, "step": 38 }, { "epoch": 0.002898550724637681, "grad_norm": 32.64409739154178, "learning_rate": 6.440957886044592e-07, "loss": 1.3507, "step": 39 }, { "epoch": 0.0029728725380899295, "grad_norm": 45.93462644339662, "learning_rate": 6.606110652353427e-07, "loss": 1.0047, "step": 40 }, { "epoch": 0.0030471943515421778, "grad_norm": 64.29959559864189, "learning_rate": 6.771263418662263e-07, "loss": 0.9798, "step": 41 }, { "epoch": 0.003121516164994426, "grad_norm": 231.0161815414332, "learning_rate": 6.936416184971098e-07, "loss": 1.147, "step": 42 }, { "epoch": 0.003195837978446674, "grad_norm": 42.28638329246225, "learning_rate": 7.101568951279935e-07, "loss": 1.1519, "step": 43 }, { "epoch": 0.0032701597918989222, "grad_norm": 39.488161200061676, "learning_rate": 7.266721717588771e-07, "loss": 1.0641, "step": 44 }, { "epoch": 0.0033444816053511705, "grad_norm": 52.83858977841949, "learning_rate": 7.431874483897605e-07, "loss": 1.0194, "step": 45 }, { "epoch": 0.003418803418803419, "grad_norm": 15.877451968151586, "learning_rate": 7.597027250206441e-07, "loss": 1.0599, "step": 46 }, { "epoch": 0.003493125232255667, "grad_norm": 56.53847005277177, "learning_rate": 7.762180016515277e-07, "loss": 0.7802, "step": 47 }, { "epoch": 0.0035674470457079154, "grad_norm": 18.941715401039634, "learning_rate": 7.927332782824112e-07, "loss": 0.8889, "step": 48 }, { "epoch": 0.0036417688591601637, "grad_norm": 35.91556452593514, "learning_rate": 8.092485549132949e-07, "loss": 1.1948, "step": 49 }, { "epoch": 0.0037160906726124115, "grad_norm": 27.146533578573642, "learning_rate": 8.257638315441785e-07, "loss": 1.1862, "step": 50 }, { "epoch": 0.00379041248606466, "grad_norm": 16.4890846886779, "learning_rate": 8.42279108175062e-07, "loss": 0.9232, "step": 51 }, { "epoch": 0.003864734299516908, "grad_norm": 203.0891458899361, "learning_rate": 8.587943848059456e-07, "loss": 1.1714, "step": 52 }, { "epoch": 0.003939056112969156, "grad_norm": 63.48817318886278, "learning_rate": 8.753096614368291e-07, "loss": 1.172, "step": 53 }, { "epoch": 0.004013377926421404, "grad_norm": 19.526556291510296, "learning_rate": 8.918249380677127e-07, "loss": 1.0436, "step": 54 }, { "epoch": 0.004087699739873653, "grad_norm": 103.82961514747407, "learning_rate": 9.083402146985963e-07, "loss": 1.1586, "step": 55 }, { "epoch": 0.004162021553325901, "grad_norm": 981.2584187689912, "learning_rate": 9.248554913294798e-07, "loss": 1.2366, "step": 56 }, { "epoch": 0.00423634336677815, "grad_norm": 60.06984698644029, "learning_rate": 9.413707679603635e-07, "loss": 0.824, "step": 57 }, { "epoch": 0.004310665180230397, "grad_norm": 32.90467301669933, "learning_rate": 9.57886044591247e-07, "loss": 1.0025, "step": 58 }, { "epoch": 0.004384986993682646, "grad_norm": 30.88196409704957, "learning_rate": 9.744013212221305e-07, "loss": 0.9283, "step": 59 }, { "epoch": 0.004459308807134894, "grad_norm": 77.25544380202223, "learning_rate": 9.909165978530141e-07, "loss": 1.095, "step": 60 }, { "epoch": 0.004533630620587143, "grad_norm": 93.66265071446723, "learning_rate": 1.0074318744838975e-06, "loss": 0.9748, "step": 61 }, { "epoch": 0.004607952434039391, "grad_norm": 212.35749101325243, "learning_rate": 1.0239471511147813e-06, "loss": 1.2503, "step": 62 }, { "epoch": 0.0046822742474916385, "grad_norm": 14.71531921315116, "learning_rate": 1.040462427745665e-06, "loss": 1.0108, "step": 63 }, { "epoch": 0.004756596060943887, "grad_norm": 65.67328492282145, "learning_rate": 1.0569777043765483e-06, "loss": 1.1466, "step": 64 }, { "epoch": 0.004830917874396135, "grad_norm": 47.36323506232601, "learning_rate": 1.0734929810074319e-06, "loss": 1.1064, "step": 65 }, { "epoch": 0.004905239687848384, "grad_norm": 18.58951583485136, "learning_rate": 1.0900082576383155e-06, "loss": 1.2069, "step": 66 }, { "epoch": 0.004979561501300632, "grad_norm": 22.070634278799737, "learning_rate": 1.106523534269199e-06, "loss": 0.9572, "step": 67 }, { "epoch": 0.00505388331475288, "grad_norm": 30.76119218443657, "learning_rate": 1.1230388109000827e-06, "loss": 0.8161, "step": 68 }, { "epoch": 0.005128205128205128, "grad_norm": 314.63017329374964, "learning_rate": 1.1395540875309663e-06, "loss": 0.9565, "step": 69 }, { "epoch": 0.005202526941657376, "grad_norm": 25.159528902197714, "learning_rate": 1.1560693641618499e-06, "loss": 0.9072, "step": 70 }, { "epoch": 0.005276848755109625, "grad_norm": 316.97807001182395, "learning_rate": 1.1725846407927335e-06, "loss": 0.9738, "step": 71 }, { "epoch": 0.005351170568561873, "grad_norm": 29.276242618103105, "learning_rate": 1.1890999174236168e-06, "loss": 0.9628, "step": 72 }, { "epoch": 0.005425492382014121, "grad_norm": 25.29306507200243, "learning_rate": 1.2056151940545004e-06, "loss": 1.0181, "step": 73 }, { "epoch": 0.005499814195466369, "grad_norm": 21.752264627376764, "learning_rate": 1.222130470685384e-06, "loss": 1.2792, "step": 74 }, { "epoch": 0.005574136008918618, "grad_norm": 54.40140152108396, "learning_rate": 1.2386457473162676e-06, "loss": 0.9551, "step": 75 }, { "epoch": 0.005648457822370866, "grad_norm": 175.7369844324256, "learning_rate": 1.2551610239471512e-06, "loss": 1.0669, "step": 76 }, { "epoch": 0.005722779635823114, "grad_norm": 692.5617233673141, "learning_rate": 1.2716763005780348e-06, "loss": 0.9744, "step": 77 }, { "epoch": 0.005797101449275362, "grad_norm": 43.76779706914383, "learning_rate": 1.2881915772089184e-06, "loss": 1.0152, "step": 78 }, { "epoch": 0.00587142326272761, "grad_norm": 78.02770794534558, "learning_rate": 1.304706853839802e-06, "loss": 0.9785, "step": 79 }, { "epoch": 0.005945745076179859, "grad_norm": 73.95328278789955, "learning_rate": 1.3212221304706854e-06, "loss": 0.894, "step": 80 }, { "epoch": 0.006020066889632107, "grad_norm": 19.732728798286438, "learning_rate": 1.3377374071015692e-06, "loss": 0.8368, "step": 81 }, { "epoch": 0.0060943887030843556, "grad_norm": 22.870262680688118, "learning_rate": 1.3542526837324526e-06, "loss": 0.8135, "step": 82 }, { "epoch": 0.006168710516536603, "grad_norm": 33.13032950617681, "learning_rate": 1.3707679603633362e-06, "loss": 1.0765, "step": 83 }, { "epoch": 0.006243032329988852, "grad_norm": 57.0828102175208, "learning_rate": 1.3872832369942195e-06, "loss": 0.8129, "step": 84 }, { "epoch": 0.0063173541434411, "grad_norm": 19.713997150966637, "learning_rate": 1.4037985136251033e-06, "loss": 1.1119, "step": 85 }, { "epoch": 0.006391675956893348, "grad_norm": 38.81078008700569, "learning_rate": 1.420313790255987e-06, "loss": 1.0406, "step": 86 }, { "epoch": 0.006465997770345597, "grad_norm": 150.66638220823276, "learning_rate": 1.4368290668868703e-06, "loss": 0.8253, "step": 87 }, { "epoch": 0.0065403195837978444, "grad_norm": 20.126245509467402, "learning_rate": 1.4533443435177541e-06, "loss": 0.9583, "step": 88 }, { "epoch": 0.006614641397250093, "grad_norm": 18.552292917357615, "learning_rate": 1.4698596201486375e-06, "loss": 0.8346, "step": 89 }, { "epoch": 0.006688963210702341, "grad_norm": 12.817400454697621, "learning_rate": 1.486374896779521e-06, "loss": 0.929, "step": 90 }, { "epoch": 0.00676328502415459, "grad_norm": 61.099521891184374, "learning_rate": 1.502890173410405e-06, "loss": 0.8709, "step": 91 }, { "epoch": 0.006837606837606838, "grad_norm": 32.1898814982294, "learning_rate": 1.5194054500412883e-06, "loss": 1.0205, "step": 92 }, { "epoch": 0.0069119286510590855, "grad_norm": 58.034672388125934, "learning_rate": 1.5359207266721719e-06, "loss": 0.8305, "step": 93 }, { "epoch": 0.006986250464511334, "grad_norm": 12.027987564508026, "learning_rate": 1.5524360033030555e-06, "loss": 0.902, "step": 94 }, { "epoch": 0.007060572277963582, "grad_norm": 30.021278582909712, "learning_rate": 1.568951279933939e-06, "loss": 1.0393, "step": 95 }, { "epoch": 0.007134894091415831, "grad_norm": 61.21811714244554, "learning_rate": 1.5854665565648224e-06, "loss": 1.0035, "step": 96 }, { "epoch": 0.007209215904868079, "grad_norm": 23.327172676151214, "learning_rate": 1.6019818331957063e-06, "loss": 0.8949, "step": 97 }, { "epoch": 0.007283537718320327, "grad_norm": 16.283274458627197, "learning_rate": 1.6184971098265898e-06, "loss": 0.8861, "step": 98 }, { "epoch": 0.007357859531772575, "grad_norm": 12.025285684378602, "learning_rate": 1.6350123864574732e-06, "loss": 0.9439, "step": 99 }, { "epoch": 0.007432181345224823, "grad_norm": 49.65426385167436, "learning_rate": 1.651527663088357e-06, "loss": 1.1412, "step": 100 }, { "epoch": 0.007506503158677072, "grad_norm": 16.814208863954036, "learning_rate": 1.6680429397192404e-06, "loss": 0.8494, "step": 101 }, { "epoch": 0.00758082497212932, "grad_norm": 79.96248380978372, "learning_rate": 1.684558216350124e-06, "loss": 1.0192, "step": 102 }, { "epoch": 0.007655146785581568, "grad_norm": 32.72761355230525, "learning_rate": 1.7010734929810074e-06, "loss": 1.2501, "step": 103 }, { "epoch": 0.007729468599033816, "grad_norm": 28.896903452282157, "learning_rate": 1.7175887696118912e-06, "loss": 1.0409, "step": 104 }, { "epoch": 0.007803790412486065, "grad_norm": 75.39169196633263, "learning_rate": 1.7341040462427746e-06, "loss": 0.8297, "step": 105 }, { "epoch": 0.007878112225938313, "grad_norm": 82.73057733859116, "learning_rate": 1.7506193228736582e-06, "loss": 0.8309, "step": 106 }, { "epoch": 0.007952434039390562, "grad_norm": 26.905956329773815, "learning_rate": 1.767134599504542e-06, "loss": 0.9587, "step": 107 }, { "epoch": 0.008026755852842809, "grad_norm": 53.14529740201828, "learning_rate": 1.7836498761354254e-06, "loss": 0.8762, "step": 108 }, { "epoch": 0.008101077666295057, "grad_norm": 25.2938833501351, "learning_rate": 1.800165152766309e-06, "loss": 1.0975, "step": 109 }, { "epoch": 0.008175399479747306, "grad_norm": 22.46577040791282, "learning_rate": 1.8166804293971925e-06, "loss": 0.8145, "step": 110 }, { "epoch": 0.008249721293199555, "grad_norm": 34.77736541080027, "learning_rate": 1.8331957060280761e-06, "loss": 0.9101, "step": 111 }, { "epoch": 0.008324043106651802, "grad_norm": 24.40500317065218, "learning_rate": 1.8497109826589595e-06, "loss": 0.7392, "step": 112 }, { "epoch": 0.00839836492010405, "grad_norm": 66.09831365979154, "learning_rate": 1.8662262592898433e-06, "loss": 0.8492, "step": 113 }, { "epoch": 0.0084726867335563, "grad_norm": 109.85524703528024, "learning_rate": 1.882741535920727e-06, "loss": 0.983, "step": 114 }, { "epoch": 0.008547008547008548, "grad_norm": 76.33063381357077, "learning_rate": 1.8992568125516103e-06, "loss": 0.9855, "step": 115 }, { "epoch": 0.008621330360460795, "grad_norm": 12.03443177071438, "learning_rate": 1.915772089182494e-06, "loss": 0.8985, "step": 116 }, { "epoch": 0.008695652173913044, "grad_norm": 20.87867281670042, "learning_rate": 1.9322873658133777e-06, "loss": 0.9533, "step": 117 }, { "epoch": 0.008769973987365292, "grad_norm": 120.60485779945705, "learning_rate": 1.948802642444261e-06, "loss": 0.8308, "step": 118 }, { "epoch": 0.00884429580081754, "grad_norm": 27.774278959420933, "learning_rate": 1.965317919075145e-06, "loss": 0.8791, "step": 119 }, { "epoch": 0.008918617614269788, "grad_norm": 9.768805649833762, "learning_rate": 1.9818331957060283e-06, "loss": 0.8365, "step": 120 }, { "epoch": 0.008992939427722037, "grad_norm": 13.159480479609341, "learning_rate": 1.9983484723369117e-06, "loss": 0.9658, "step": 121 }, { "epoch": 0.009067261241174285, "grad_norm": 9.711976508938664, "learning_rate": 2.014863748967795e-06, "loss": 0.9772, "step": 122 }, { "epoch": 0.009141583054626532, "grad_norm": 39.35381730704443, "learning_rate": 2.031379025598679e-06, "loss": 0.7842, "step": 123 }, { "epoch": 0.009215904868078781, "grad_norm": 41.18222563192021, "learning_rate": 2.0478943022295626e-06, "loss": 0.8353, "step": 124 }, { "epoch": 0.00929022668153103, "grad_norm": 24.059658432400692, "learning_rate": 2.064409578860446e-06, "loss": 0.7836, "step": 125 }, { "epoch": 0.009364548494983277, "grad_norm": 88.96767068878358, "learning_rate": 2.08092485549133e-06, "loss": 0.7786, "step": 126 }, { "epoch": 0.009438870308435526, "grad_norm": 48.402798984025154, "learning_rate": 2.0974401321222132e-06, "loss": 0.8155, "step": 127 }, { "epoch": 0.009513192121887774, "grad_norm": 11.737467090977793, "learning_rate": 2.1139554087530966e-06, "loss": 0.8076, "step": 128 }, { "epoch": 0.009587513935340023, "grad_norm": 49.05666127605545, "learning_rate": 2.1304706853839804e-06, "loss": 0.8323, "step": 129 }, { "epoch": 0.00966183574879227, "grad_norm": 55.32523513559019, "learning_rate": 2.1469859620148638e-06, "loss": 1.0133, "step": 130 }, { "epoch": 0.009736157562244519, "grad_norm": 238.4601583033988, "learning_rate": 2.1635012386457476e-06, "loss": 1.0181, "step": 131 }, { "epoch": 0.009810479375696768, "grad_norm": 32.93960688542608, "learning_rate": 2.180016515276631e-06, "loss": 0.8863, "step": 132 }, { "epoch": 0.009884801189149015, "grad_norm": 30.703636565980165, "learning_rate": 2.1965317919075148e-06, "loss": 1.0052, "step": 133 }, { "epoch": 0.009959123002601263, "grad_norm": 51.235854821016254, "learning_rate": 2.213047068538398e-06, "loss": 0.8281, "step": 134 }, { "epoch": 0.010033444816053512, "grad_norm": 83.67010945084742, "learning_rate": 2.229562345169282e-06, "loss": 0.9268, "step": 135 }, { "epoch": 0.01010776662950576, "grad_norm": 52.479506258565834, "learning_rate": 2.2460776218001653e-06, "loss": 1.1931, "step": 136 }, { "epoch": 0.010182088442958008, "grad_norm": 22.900223466677307, "learning_rate": 2.2625928984310487e-06, "loss": 0.8741, "step": 137 }, { "epoch": 0.010256410256410256, "grad_norm": 14.633807718407889, "learning_rate": 2.2791081750619325e-06, "loss": 0.8979, "step": 138 }, { "epoch": 0.010330732069862505, "grad_norm": 73.89924359167682, "learning_rate": 2.295623451692816e-06, "loss": 0.7812, "step": 139 }, { "epoch": 0.010405053883314752, "grad_norm": 21.575738730109588, "learning_rate": 2.3121387283236997e-06, "loss": 0.9561, "step": 140 }, { "epoch": 0.010479375696767, "grad_norm": 21.931827908176015, "learning_rate": 2.328654004954583e-06, "loss": 0.9766, "step": 141 }, { "epoch": 0.01055369751021925, "grad_norm": 26.92897838663781, "learning_rate": 2.345169281585467e-06, "loss": 0.8047, "step": 142 }, { "epoch": 0.010628019323671498, "grad_norm": 21.84919204188917, "learning_rate": 2.3616845582163503e-06, "loss": 0.7857, "step": 143 }, { "epoch": 0.010702341137123745, "grad_norm": 32.70642385896187, "learning_rate": 2.3781998348472337e-06, "loss": 1.037, "step": 144 }, { "epoch": 0.010776662950575994, "grad_norm": 119.20529895217695, "learning_rate": 2.3947151114781175e-06, "loss": 0.9787, "step": 145 }, { "epoch": 0.010850984764028243, "grad_norm": 18.434393905369024, "learning_rate": 2.411230388109001e-06, "loss": 0.8741, "step": 146 }, { "epoch": 0.01092530657748049, "grad_norm": 14.470855142276026, "learning_rate": 2.4277456647398847e-06, "loss": 0.8538, "step": 147 }, { "epoch": 0.010999628390932738, "grad_norm": 14.571396093898198, "learning_rate": 2.444260941370768e-06, "loss": 0.7867, "step": 148 }, { "epoch": 0.011073950204384987, "grad_norm": 26.179827222885084, "learning_rate": 2.460776218001652e-06, "loss": 0.9915, "step": 149 }, { "epoch": 0.011148272017837236, "grad_norm": 11.059770713303706, "learning_rate": 2.4772914946325352e-06, "loss": 0.8326, "step": 150 }, { "epoch": 0.011222593831289483, "grad_norm": 80.12194569283957, "learning_rate": 2.493806771263419e-06, "loss": 1.0846, "step": 151 }, { "epoch": 0.011296915644741732, "grad_norm": 15.75087708884106, "learning_rate": 2.5103220478943024e-06, "loss": 0.8892, "step": 152 }, { "epoch": 0.01137123745819398, "grad_norm": 17.093482343146732, "learning_rate": 2.526837324525186e-06, "loss": 0.7831, "step": 153 }, { "epoch": 0.011445559271646227, "grad_norm": 56.000368611406024, "learning_rate": 2.5433526011560696e-06, "loss": 0.9514, "step": 154 }, { "epoch": 0.011519881085098476, "grad_norm": 80.25791910253395, "learning_rate": 2.559867877786953e-06, "loss": 0.6969, "step": 155 }, { "epoch": 0.011594202898550725, "grad_norm": 77.6202985810577, "learning_rate": 2.576383154417837e-06, "loss": 1.0407, "step": 156 }, { "epoch": 0.011668524712002974, "grad_norm": 27.02655847297387, "learning_rate": 2.5928984310487206e-06, "loss": 1.0586, "step": 157 }, { "epoch": 0.01174284652545522, "grad_norm": 93.04754704919816, "learning_rate": 2.609413707679604e-06, "loss": 0.894, "step": 158 }, { "epoch": 0.01181716833890747, "grad_norm": 20.661150982565918, "learning_rate": 2.6259289843104874e-06, "loss": 1.1092, "step": 159 }, { "epoch": 0.011891490152359718, "grad_norm": 15.035005627950676, "learning_rate": 2.6424442609413707e-06, "loss": 1.0666, "step": 160 }, { "epoch": 0.011965811965811967, "grad_norm": 19.11576943489642, "learning_rate": 2.658959537572254e-06, "loss": 1.0719, "step": 161 }, { "epoch": 0.012040133779264214, "grad_norm": 49.954658064556796, "learning_rate": 2.6754748142031384e-06, "loss": 0.7136, "step": 162 }, { "epoch": 0.012114455592716462, "grad_norm": 241.13208671516043, "learning_rate": 2.6919900908340217e-06, "loss": 0.7679, "step": 163 }, { "epoch": 0.012188777406168711, "grad_norm": 118.02168057371051, "learning_rate": 2.708505367464905e-06, "loss": 0.849, "step": 164 }, { "epoch": 0.012263099219620958, "grad_norm": 32.07062391259436, "learning_rate": 2.725020644095789e-06, "loss": 0.9513, "step": 165 }, { "epoch": 0.012337421033073207, "grad_norm": 21.88471431128742, "learning_rate": 2.7415359207266723e-06, "loss": 0.8032, "step": 166 }, { "epoch": 0.012411742846525456, "grad_norm": 32.23014635676175, "learning_rate": 2.7580511973575557e-06, "loss": 0.9431, "step": 167 }, { "epoch": 0.012486064659977704, "grad_norm": 38.12767574771901, "learning_rate": 2.774566473988439e-06, "loss": 0.9948, "step": 168 }, { "epoch": 0.012560386473429951, "grad_norm": 19.100426759955592, "learning_rate": 2.7910817506193233e-06, "loss": 1.0099, "step": 169 }, { "epoch": 0.0126347082868822, "grad_norm": 18.18689057036626, "learning_rate": 2.8075970272502067e-06, "loss": 0.8247, "step": 170 }, { "epoch": 0.012709030100334449, "grad_norm": 41.63436050885159, "learning_rate": 2.82411230388109e-06, "loss": 1.1158, "step": 171 }, { "epoch": 0.012783351913786696, "grad_norm": 9.808068993514963, "learning_rate": 2.840627580511974e-06, "loss": 0.9881, "step": 172 }, { "epoch": 0.012857673727238944, "grad_norm": 85.72629562304452, "learning_rate": 2.8571428571428573e-06, "loss": 0.8752, "step": 173 }, { "epoch": 0.012931995540691193, "grad_norm": 44.84733504447003, "learning_rate": 2.8736581337737406e-06, "loss": 0.9022, "step": 174 }, { "epoch": 0.013006317354143442, "grad_norm": 149.18160713544114, "learning_rate": 2.890173410404625e-06, "loss": 0.8455, "step": 175 }, { "epoch": 0.013080639167595689, "grad_norm": 16.63302713492347, "learning_rate": 2.9066886870355082e-06, "loss": 1.1032, "step": 176 }, { "epoch": 0.013154960981047938, "grad_norm": 19.588683403137612, "learning_rate": 2.9232039636663916e-06, "loss": 0.9279, "step": 177 }, { "epoch": 0.013229282794500186, "grad_norm": 18.7922046300255, "learning_rate": 2.939719240297275e-06, "loss": 0.9401, "step": 178 }, { "epoch": 0.013303604607952433, "grad_norm": 23.48858192957818, "learning_rate": 2.956234516928159e-06, "loss": 1.0999, "step": 179 }, { "epoch": 0.013377926421404682, "grad_norm": 31.88392488989054, "learning_rate": 2.972749793559042e-06, "loss": 0.7707, "step": 180 }, { "epoch": 0.01345224823485693, "grad_norm": 23.360904718076917, "learning_rate": 2.989265070189926e-06, "loss": 0.8308, "step": 181 }, { "epoch": 0.01352657004830918, "grad_norm": 13.829054559071798, "learning_rate": 3.00578034682081e-06, "loss": 0.865, "step": 182 }, { "epoch": 0.013600891861761426, "grad_norm": 50.33295139019381, "learning_rate": 3.022295623451693e-06, "loss": 0.8021, "step": 183 }, { "epoch": 0.013675213675213675, "grad_norm": 32.91053382545361, "learning_rate": 3.0388109000825766e-06, "loss": 0.7555, "step": 184 }, { "epoch": 0.013749535488665924, "grad_norm": 45.962120090408504, "learning_rate": 3.05532617671346e-06, "loss": 0.9699, "step": 185 }, { "epoch": 0.013823857302118171, "grad_norm": 67.20136584207995, "learning_rate": 3.0718414533443438e-06, "loss": 0.8886, "step": 186 }, { "epoch": 0.01389817911557042, "grad_norm": 42.23957148921901, "learning_rate": 3.088356729975227e-06, "loss": 0.8859, "step": 187 }, { "epoch": 0.013972500929022668, "grad_norm": 66.5387982185986, "learning_rate": 3.104872006606111e-06, "loss": 0.8313, "step": 188 }, { "epoch": 0.014046822742474917, "grad_norm": 62.5047527937561, "learning_rate": 3.1213872832369948e-06, "loss": 0.7023, "step": 189 }, { "epoch": 0.014121144555927164, "grad_norm": 17.91153052575759, "learning_rate": 3.137902559867878e-06, "loss": 0.8419, "step": 190 }, { "epoch": 0.014195466369379413, "grad_norm": 14.787856282266967, "learning_rate": 3.1544178364987615e-06, "loss": 0.6374, "step": 191 }, { "epoch": 0.014269788182831662, "grad_norm": 21.892355372399145, "learning_rate": 3.170933113129645e-06, "loss": 0.7618, "step": 192 }, { "epoch": 0.014344109996283909, "grad_norm": 361.0636195309694, "learning_rate": 3.1874483897605287e-06, "loss": 0.7016, "step": 193 }, { "epoch": 0.014418431809736157, "grad_norm": 393.3470921298757, "learning_rate": 3.2039636663914125e-06, "loss": 0.9761, "step": 194 }, { "epoch": 0.014492753623188406, "grad_norm": 38.69690006915274, "learning_rate": 3.220478943022296e-06, "loss": 0.9736, "step": 195 }, { "epoch": 0.014567075436640655, "grad_norm": 214.59534754870396, "learning_rate": 3.2369942196531797e-06, "loss": 0.7926, "step": 196 }, { "epoch": 0.014641397250092902, "grad_norm": 29.93811979866582, "learning_rate": 3.253509496284063e-06, "loss": 0.7655, "step": 197 }, { "epoch": 0.01471571906354515, "grad_norm": 22.870825732177643, "learning_rate": 3.2700247729149465e-06, "loss": 0.721, "step": 198 }, { "epoch": 0.0147900408769974, "grad_norm": 15.563011324562266, "learning_rate": 3.28654004954583e-06, "loss": 0.7332, "step": 199 }, { "epoch": 0.014864362690449646, "grad_norm": 43.808896317457275, "learning_rate": 3.303055326176714e-06, "loss": 0.8304, "step": 200 }, { "epoch": 0.014938684503901895, "grad_norm": 95.76201367836865, "learning_rate": 3.3195706028075975e-06, "loss": 0.893, "step": 201 }, { "epoch": 0.015013006317354144, "grad_norm": 38.26257954310363, "learning_rate": 3.336085879438481e-06, "loss": 0.6643, "step": 202 }, { "epoch": 0.015087328130806392, "grad_norm": 89.42834279982775, "learning_rate": 3.3526011560693642e-06, "loss": 1.0018, "step": 203 }, { "epoch": 0.01516164994425864, "grad_norm": 15.594834512241528, "learning_rate": 3.369116432700248e-06, "loss": 0.9398, "step": 204 }, { "epoch": 0.015235971757710888, "grad_norm": 13.999041789502375, "learning_rate": 3.3856317093311314e-06, "loss": 0.9158, "step": 205 }, { "epoch": 0.015310293571163137, "grad_norm": 12.598168793140847, "learning_rate": 3.4021469859620148e-06, "loss": 0.9354, "step": 206 }, { "epoch": 0.015384615384615385, "grad_norm": 22.112257647406846, "learning_rate": 3.418662262592899e-06, "loss": 0.8243, "step": 207 }, { "epoch": 0.015458937198067632, "grad_norm": 42.12625583204373, "learning_rate": 3.4351775392237824e-06, "loss": 0.9887, "step": 208 }, { "epoch": 0.015533259011519881, "grad_norm": 52.69823720509405, "learning_rate": 3.4516928158546658e-06, "loss": 1.0239, "step": 209 }, { "epoch": 0.01560758082497213, "grad_norm": 10.429987914346194, "learning_rate": 3.468208092485549e-06, "loss": 0.9342, "step": 210 }, { "epoch": 0.015681902638424377, "grad_norm": 26.273976779677998, "learning_rate": 3.484723369116433e-06, "loss": 0.9147, "step": 211 }, { "epoch": 0.015756224451876626, "grad_norm": 23.354751340909395, "learning_rate": 3.5012386457473163e-06, "loss": 0.6131, "step": 212 }, { "epoch": 0.015830546265328874, "grad_norm": 50.16024347269646, "learning_rate": 3.5177539223782e-06, "loss": 0.9861, "step": 213 }, { "epoch": 0.015904868078781123, "grad_norm": 14.445802731720457, "learning_rate": 3.534269199009084e-06, "loss": 0.8048, "step": 214 }, { "epoch": 0.015979189892233372, "grad_norm": 26.54277250220753, "learning_rate": 3.5507844756399673e-06, "loss": 0.8463, "step": 215 }, { "epoch": 0.016053511705685617, "grad_norm": 88.0289687504708, "learning_rate": 3.5672997522708507e-06, "loss": 0.8744, "step": 216 }, { "epoch": 0.016127833519137866, "grad_norm": 42.15650878144847, "learning_rate": 3.583815028901734e-06, "loss": 0.8588, "step": 217 }, { "epoch": 0.016202155332590115, "grad_norm": 30.573837834375365, "learning_rate": 3.600330305532618e-06, "loss": 1.0422, "step": 218 }, { "epoch": 0.016276477146042363, "grad_norm": 76.97512484558374, "learning_rate": 3.6168455821635017e-06, "loss": 0.7997, "step": 219 }, { "epoch": 0.016350798959494612, "grad_norm": 25.947511416566766, "learning_rate": 3.633360858794385e-06, "loss": 0.79, "step": 220 }, { "epoch": 0.01642512077294686, "grad_norm": 41.77089554915694, "learning_rate": 3.649876135425269e-06, "loss": 0.774, "step": 221 }, { "epoch": 0.01649944258639911, "grad_norm": 16.735569869364184, "learning_rate": 3.6663914120561523e-06, "loss": 0.8142, "step": 222 }, { "epoch": 0.016573764399851355, "grad_norm": 15.496087927336413, "learning_rate": 3.6829066886870357e-06, "loss": 0.8508, "step": 223 }, { "epoch": 0.016648086213303603, "grad_norm": 34.09049148887268, "learning_rate": 3.699421965317919e-06, "loss": 0.9862, "step": 224 }, { "epoch": 0.016722408026755852, "grad_norm": 16.27068637704025, "learning_rate": 3.715937241948803e-06, "loss": 0.796, "step": 225 }, { "epoch": 0.0167967298402081, "grad_norm": 22.479006045142853, "learning_rate": 3.7324525185796867e-06, "loss": 0.7776, "step": 226 }, { "epoch": 0.01687105165366035, "grad_norm": 37.22277646802463, "learning_rate": 3.74896779521057e-06, "loss": 0.7051, "step": 227 }, { "epoch": 0.0169453734671126, "grad_norm": 1750.9479331935308, "learning_rate": 3.765483071841454e-06, "loss": 0.7907, "step": 228 }, { "epoch": 0.017019695280564847, "grad_norm": 82.18591428963938, "learning_rate": 3.7819983484723372e-06, "loss": 0.7285, "step": 229 }, { "epoch": 0.017094017094017096, "grad_norm": 13.957365545179217, "learning_rate": 3.7985136251032206e-06, "loss": 0.7794, "step": 230 }, { "epoch": 0.01716833890746934, "grad_norm": 23.148133427488926, "learning_rate": 3.815028901734104e-06, "loss": 0.9382, "step": 231 }, { "epoch": 0.01724266072092159, "grad_norm": 18.369692329730636, "learning_rate": 3.831544178364988e-06, "loss": 0.9481, "step": 232 }, { "epoch": 0.01731698253437384, "grad_norm": 21.29828958498331, "learning_rate": 3.848059454995872e-06, "loss": 0.8757, "step": 233 }, { "epoch": 0.017391304347826087, "grad_norm": 24.562477016810245, "learning_rate": 3.864574731626755e-06, "loss": 0.7114, "step": 234 }, { "epoch": 0.017465626161278336, "grad_norm": 43.874243318025385, "learning_rate": 3.881090008257639e-06, "loss": 0.7436, "step": 235 }, { "epoch": 0.017539947974730585, "grad_norm": 13.94997500145113, "learning_rate": 3.897605284888522e-06, "loss": 0.8198, "step": 236 }, { "epoch": 0.017614269788182833, "grad_norm": 59.08307880507661, "learning_rate": 3.9141205615194056e-06, "loss": 0.8745, "step": 237 }, { "epoch": 0.01768859160163508, "grad_norm": 40.31187929457818, "learning_rate": 3.93063583815029e-06, "loss": 0.9293, "step": 238 }, { "epoch": 0.017762913415087327, "grad_norm": 20.07012889635926, "learning_rate": 3.947151114781173e-06, "loss": 0.7873, "step": 239 }, { "epoch": 0.017837235228539576, "grad_norm": 95.07265825008919, "learning_rate": 3.9636663914120565e-06, "loss": 0.9205, "step": 240 }, { "epoch": 0.017911557041991825, "grad_norm": 17.41760625091382, "learning_rate": 3.98018166804294e-06, "loss": 0.9371, "step": 241 }, { "epoch": 0.017985878855444073, "grad_norm": 27.597655398475865, "learning_rate": 3.996696944673823e-06, "loss": 0.8825, "step": 242 }, { "epoch": 0.018060200668896322, "grad_norm": 11.265866509208827, "learning_rate": 4.013212221304707e-06, "loss": 0.8822, "step": 243 }, { "epoch": 0.01813452248234857, "grad_norm": 19.38237822129454, "learning_rate": 4.02972749793559e-06, "loss": 1.0107, "step": 244 }, { "epoch": 0.018208844295800816, "grad_norm": 27.33893614191149, "learning_rate": 4.046242774566474e-06, "loss": 0.7856, "step": 245 }, { "epoch": 0.018283166109253065, "grad_norm": 32.43515365239075, "learning_rate": 4.062758051197358e-06, "loss": 0.9028, "step": 246 }, { "epoch": 0.018357487922705314, "grad_norm": 41.639566481880365, "learning_rate": 4.079273327828241e-06, "loss": 0.7612, "step": 247 }, { "epoch": 0.018431809736157562, "grad_norm": 70.02322388052586, "learning_rate": 4.095788604459125e-06, "loss": 0.8878, "step": 248 }, { "epoch": 0.01850613154960981, "grad_norm": 204.60334264456068, "learning_rate": 4.112303881090009e-06, "loss": 0.9607, "step": 249 }, { "epoch": 0.01858045336306206, "grad_norm": 14.145972567751825, "learning_rate": 4.128819157720892e-06, "loss": 0.9369, "step": 250 }, { "epoch": 0.01865477517651431, "grad_norm": 13.838534382866358, "learning_rate": 4.145334434351776e-06, "loss": 0.8503, "step": 251 }, { "epoch": 0.018729096989966554, "grad_norm": 27.026086347250065, "learning_rate": 4.16184971098266e-06, "loss": 0.9077, "step": 252 }, { "epoch": 0.018803418803418803, "grad_norm": 28.9923073135923, "learning_rate": 4.178364987613543e-06, "loss": 0.9252, "step": 253 }, { "epoch": 0.01887774061687105, "grad_norm": 15.422446111005208, "learning_rate": 4.1948802642444264e-06, "loss": 0.7922, "step": 254 }, { "epoch": 0.0189520624303233, "grad_norm": 16.51667807977307, "learning_rate": 4.21139554087531e-06, "loss": 0.7242, "step": 255 }, { "epoch": 0.01902638424377555, "grad_norm": 105.38543759366729, "learning_rate": 4.227910817506193e-06, "loss": 0.8475, "step": 256 }, { "epoch": 0.019100706057227797, "grad_norm": 27.99306108129524, "learning_rate": 4.2444260941370774e-06, "loss": 0.8223, "step": 257 }, { "epoch": 0.019175027870680046, "grad_norm": 29.37253494897107, "learning_rate": 4.260941370767961e-06, "loss": 0.7749, "step": 258 }, { "epoch": 0.01924934968413229, "grad_norm": 17.215271105010366, "learning_rate": 4.277456647398844e-06, "loss": 0.8641, "step": 259 }, { "epoch": 0.01932367149758454, "grad_norm": 26.263916327473375, "learning_rate": 4.2939719240297276e-06, "loss": 0.8149, "step": 260 }, { "epoch": 0.01939799331103679, "grad_norm": 13.741130432909346, "learning_rate": 4.310487200660611e-06, "loss": 0.9961, "step": 261 }, { "epoch": 0.019472315124489038, "grad_norm": 73.3155866974601, "learning_rate": 4.327002477291495e-06, "loss": 0.9196, "step": 262 }, { "epoch": 0.019546636937941286, "grad_norm": 77.97047455649118, "learning_rate": 4.3435177539223786e-06, "loss": 0.8136, "step": 263 }, { "epoch": 0.019620958751393535, "grad_norm": 45.041667403092454, "learning_rate": 4.360033030553262e-06, "loss": 0.8232, "step": 264 }, { "epoch": 0.019695280564845784, "grad_norm": 109.52753335325447, "learning_rate": 4.376548307184146e-06, "loss": 0.949, "step": 265 }, { "epoch": 0.01976960237829803, "grad_norm": 29.517905237612517, "learning_rate": 4.3930635838150296e-06, "loss": 0.876, "step": 266 }, { "epoch": 0.019843924191750278, "grad_norm": 139.54752131474302, "learning_rate": 4.409578860445913e-06, "loss": 0.8553, "step": 267 }, { "epoch": 0.019918246005202526, "grad_norm": 11.890455904298705, "learning_rate": 4.426094137076796e-06, "loss": 1.0071, "step": 268 }, { "epoch": 0.019992567818654775, "grad_norm": 30.78033266648289, "learning_rate": 4.44260941370768e-06, "loss": 1.0782, "step": 269 }, { "epoch": 0.020066889632107024, "grad_norm": 45.18602813798451, "learning_rate": 4.459124690338564e-06, "loss": 0.8511, "step": 270 }, { "epoch": 0.020141211445559273, "grad_norm": 21.46440452882634, "learning_rate": 4.475639966969447e-06, "loss": 0.987, "step": 271 }, { "epoch": 0.02021553325901152, "grad_norm": 21.777309532076316, "learning_rate": 4.492155243600331e-06, "loss": 0.9576, "step": 272 }, { "epoch": 0.020289855072463767, "grad_norm": 37.80662446760743, "learning_rate": 4.508670520231214e-06, "loss": 0.8061, "step": 273 }, { "epoch": 0.020364176885916015, "grad_norm": 8.55210760421792, "learning_rate": 4.5251857968620975e-06, "loss": 0.8657, "step": 274 }, { "epoch": 0.020438498699368264, "grad_norm": 13.925384276925707, "learning_rate": 4.541701073492981e-06, "loss": 0.8702, "step": 275 }, { "epoch": 0.020512820512820513, "grad_norm": 35.18509543437223, "learning_rate": 4.558216350123865e-06, "loss": 0.8222, "step": 276 }, { "epoch": 0.02058714232627276, "grad_norm": 21.164150108653825, "learning_rate": 4.5747316267547485e-06, "loss": 0.9332, "step": 277 }, { "epoch": 0.02066146413972501, "grad_norm": 18.21151109272459, "learning_rate": 4.591246903385632e-06, "loss": 0.8255, "step": 278 }, { "epoch": 0.02073578595317726, "grad_norm": 72.12963673018999, "learning_rate": 4.607762180016516e-06, "loss": 0.8491, "step": 279 }, { "epoch": 0.020810107766629504, "grad_norm": 40.99512498833735, "learning_rate": 4.6242774566473994e-06, "loss": 0.8501, "step": 280 }, { "epoch": 0.020884429580081753, "grad_norm": 29.9138128681257, "learning_rate": 4.640792733278283e-06, "loss": 0.7335, "step": 281 }, { "epoch": 0.020958751393534, "grad_norm": 8.864898350467206, "learning_rate": 4.657308009909166e-06, "loss": 0.9049, "step": 282 }, { "epoch": 0.02103307320698625, "grad_norm": 25.449493238071838, "learning_rate": 4.6738232865400504e-06, "loss": 0.9702, "step": 283 }, { "epoch": 0.0211073950204385, "grad_norm": 34.74183192139651, "learning_rate": 4.690338563170934e-06, "loss": 0.9063, "step": 284 }, { "epoch": 0.021181716833890748, "grad_norm": 11.933747028288892, "learning_rate": 4.706853839801817e-06, "loss": 0.9463, "step": 285 }, { "epoch": 0.021256038647342997, "grad_norm": 25.340259976494774, "learning_rate": 4.723369116432701e-06, "loss": 0.8192, "step": 286 }, { "epoch": 0.021330360460795242, "grad_norm": 20.209836711547904, "learning_rate": 4.739884393063584e-06, "loss": 0.9398, "step": 287 }, { "epoch": 0.02140468227424749, "grad_norm": 19.640952131786086, "learning_rate": 4.756399669694467e-06, "loss": 0.9194, "step": 288 }, { "epoch": 0.02147900408769974, "grad_norm": 23.03978661177964, "learning_rate": 4.772914946325352e-06, "loss": 0.9651, "step": 289 }, { "epoch": 0.021553325901151988, "grad_norm": 36.85458127258869, "learning_rate": 4.789430222956235e-06, "loss": 0.8079, "step": 290 }, { "epoch": 0.021627647714604237, "grad_norm": 16.61099680343905, "learning_rate": 4.805945499587118e-06, "loss": 0.8648, "step": 291 }, { "epoch": 0.021701969528056485, "grad_norm": 20.752157884203605, "learning_rate": 4.822460776218002e-06, "loss": 0.841, "step": 292 }, { "epoch": 0.021776291341508734, "grad_norm": 11.964559780856101, "learning_rate": 4.838976052848885e-06, "loss": 0.8733, "step": 293 }, { "epoch": 0.02185061315496098, "grad_norm": 12.625111566769096, "learning_rate": 4.855491329479769e-06, "loss": 0.8283, "step": 294 }, { "epoch": 0.021924934968413228, "grad_norm": 12.093050149123625, "learning_rate": 4.872006606110653e-06, "loss": 0.9868, "step": 295 }, { "epoch": 0.021999256781865477, "grad_norm": 97.02176375571284, "learning_rate": 4.888521882741536e-06, "loss": 0.7668, "step": 296 }, { "epoch": 0.022073578595317726, "grad_norm": 59.13818786197017, "learning_rate": 4.90503715937242e-06, "loss": 0.8534, "step": 297 }, { "epoch": 0.022147900408769974, "grad_norm": 145.46316258786584, "learning_rate": 4.921552436003304e-06, "loss": 1.1054, "step": 298 }, { "epoch": 0.022222222222222223, "grad_norm": 184.94182310154153, "learning_rate": 4.938067712634187e-06, "loss": 0.8266, "step": 299 }, { "epoch": 0.022296544035674472, "grad_norm": 17.367687704530287, "learning_rate": 4.9545829892650705e-06, "loss": 0.8707, "step": 300 }, { "epoch": 0.022370865849126717, "grad_norm": 13.193838848270763, "learning_rate": 4.971098265895954e-06, "loss": 0.8572, "step": 301 }, { "epoch": 0.022445187662578966, "grad_norm": 57.94851306489819, "learning_rate": 4.987613542526838e-06, "loss": 0.8261, "step": 302 }, { "epoch": 0.022519509476031215, "grad_norm": 84.05695469858085, "learning_rate": 5.004128819157721e-06, "loss": 0.7427, "step": 303 }, { "epoch": 0.022593831289483463, "grad_norm": 72.1531338242187, "learning_rate": 5.020644095788605e-06, "loss": 0.7948, "step": 304 }, { "epoch": 0.022668153102935712, "grad_norm": 27.66604417443273, "learning_rate": 5.037159372419489e-06, "loss": 0.8422, "step": 305 }, { "epoch": 0.02274247491638796, "grad_norm": 146.29951793981343, "learning_rate": 5.053674649050372e-06, "loss": 0.8641, "step": 306 }, { "epoch": 0.02281679672984021, "grad_norm": 22.97982908662766, "learning_rate": 5.070189925681256e-06, "loss": 0.9002, "step": 307 }, { "epoch": 0.022891118543292455, "grad_norm": 22.0432127834928, "learning_rate": 5.086705202312139e-06, "loss": 0.9779, "step": 308 }, { "epoch": 0.022965440356744703, "grad_norm": 35.58176907182218, "learning_rate": 5.103220478943023e-06, "loss": 0.8295, "step": 309 }, { "epoch": 0.023039762170196952, "grad_norm": 36.886791093566345, "learning_rate": 5.119735755573906e-06, "loss": 0.9401, "step": 310 }, { "epoch": 0.0231140839836492, "grad_norm": 19.699464799172393, "learning_rate": 5.13625103220479e-06, "loss": 0.801, "step": 311 }, { "epoch": 0.02318840579710145, "grad_norm": 31.066699615779754, "learning_rate": 5.152766308835674e-06, "loss": 0.8552, "step": 312 }, { "epoch": 0.0232627276105537, "grad_norm": 7.446365262938509, "learning_rate": 5.169281585466557e-06, "loss": 0.9604, "step": 313 }, { "epoch": 0.023337049424005947, "grad_norm": 14.570371025444107, "learning_rate": 5.185796862097441e-06, "loss": 0.7892, "step": 314 }, { "epoch": 0.023411371237458192, "grad_norm": 19.64903043506884, "learning_rate": 5.202312138728324e-06, "loss": 0.9812, "step": 315 }, { "epoch": 0.02348569305091044, "grad_norm": 15.086669781430507, "learning_rate": 5.218827415359208e-06, "loss": 0.9505, "step": 316 }, { "epoch": 0.02356001486436269, "grad_norm": 46.75104961165545, "learning_rate": 5.2353426919900905e-06, "loss": 0.904, "step": 317 }, { "epoch": 0.02363433667781494, "grad_norm": 12.44966462870736, "learning_rate": 5.251857968620975e-06, "loss": 0.8742, "step": 318 }, { "epoch": 0.023708658491267187, "grad_norm": 100.44613781655266, "learning_rate": 5.268373245251859e-06, "loss": 0.8829, "step": 319 }, { "epoch": 0.023782980304719436, "grad_norm": 21.461574354997122, "learning_rate": 5.2848885218827415e-06, "loss": 0.7456, "step": 320 }, { "epoch": 0.023857302118171685, "grad_norm": 20.601030463150803, "learning_rate": 5.301403798513626e-06, "loss": 0.8961, "step": 321 }, { "epoch": 0.023931623931623933, "grad_norm": 52.93717264924342, "learning_rate": 5.317919075144508e-06, "loss": 0.8663, "step": 322 }, { "epoch": 0.02400594574507618, "grad_norm": 51.84686379701702, "learning_rate": 5.3344343517753925e-06, "loss": 0.7219, "step": 323 }, { "epoch": 0.024080267558528427, "grad_norm": 47.4056524408585, "learning_rate": 5.350949628406277e-06, "loss": 1.0453, "step": 324 }, { "epoch": 0.024154589371980676, "grad_norm": 10.485348436560157, "learning_rate": 5.367464905037159e-06, "loss": 0.8159, "step": 325 }, { "epoch": 0.024228911185432925, "grad_norm": 11.073252869778305, "learning_rate": 5.3839801816680435e-06, "loss": 0.6483, "step": 326 }, { "epoch": 0.024303232998885173, "grad_norm": 128.59490733437124, "learning_rate": 5.400495458298927e-06, "loss": 1.1016, "step": 327 }, { "epoch": 0.024377554812337422, "grad_norm": 77.35581691502124, "learning_rate": 5.41701073492981e-06, "loss": 0.9141, "step": 328 }, { "epoch": 0.02445187662578967, "grad_norm": 11.912133467278602, "learning_rate": 5.433526011560694e-06, "loss": 0.7756, "step": 329 }, { "epoch": 0.024526198439241916, "grad_norm": 17.486252476765564, "learning_rate": 5.450041288191578e-06, "loss": 0.744, "step": 330 }, { "epoch": 0.024600520252694165, "grad_norm": 17.752139069574785, "learning_rate": 5.466556564822461e-06, "loss": 0.8657, "step": 331 }, { "epoch": 0.024674842066146414, "grad_norm": 21.031126041042647, "learning_rate": 5.483071841453345e-06, "loss": 0.8628, "step": 332 }, { "epoch": 0.024749163879598662, "grad_norm": 31.698271578178574, "learning_rate": 5.499587118084229e-06, "loss": 0.7704, "step": 333 }, { "epoch": 0.02482348569305091, "grad_norm": 16.351032950393325, "learning_rate": 5.516102394715111e-06, "loss": 0.8405, "step": 334 }, { "epoch": 0.02489780750650316, "grad_norm": 20.689330733189117, "learning_rate": 5.532617671345996e-06, "loss": 1.0644, "step": 335 }, { "epoch": 0.02497212931995541, "grad_norm": 7.988414365043635, "learning_rate": 5.549132947976878e-06, "loss": 0.8506, "step": 336 }, { "epoch": 0.025046451133407654, "grad_norm": 21.572443139884616, "learning_rate": 5.565648224607762e-06, "loss": 0.7821, "step": 337 }, { "epoch": 0.025120772946859903, "grad_norm": 19.543694445441112, "learning_rate": 5.582163501238647e-06, "loss": 0.8003, "step": 338 }, { "epoch": 0.02519509476031215, "grad_norm": 15.095372015817194, "learning_rate": 5.598678777869529e-06, "loss": 0.9859, "step": 339 }, { "epoch": 0.0252694165737644, "grad_norm": 21.182269799848775, "learning_rate": 5.615194054500413e-06, "loss": 0.8277, "step": 340 }, { "epoch": 0.02534373838721665, "grad_norm": 16.074230202187923, "learning_rate": 5.631709331131297e-06, "loss": 0.8101, "step": 341 }, { "epoch": 0.025418060200668897, "grad_norm": 12.989306932408862, "learning_rate": 5.64822460776218e-06, "loss": 0.8638, "step": 342 }, { "epoch": 0.025492382014121146, "grad_norm": 15.612859000530676, "learning_rate": 5.664739884393064e-06, "loss": 0.5606, "step": 343 }, { "epoch": 0.02556670382757339, "grad_norm": 22.59515254101064, "learning_rate": 5.681255161023948e-06, "loss": 0.6426, "step": 344 }, { "epoch": 0.02564102564102564, "grad_norm": 37.00121715349491, "learning_rate": 5.697770437654831e-06, "loss": 0.8218, "step": 345 }, { "epoch": 0.02571534745447789, "grad_norm": 12.100807925109804, "learning_rate": 5.7142857142857145e-06, "loss": 0.9639, "step": 346 }, { "epoch": 0.025789669267930138, "grad_norm": 21.947901834152386, "learning_rate": 5.730800990916599e-06, "loss": 0.8097, "step": 347 }, { "epoch": 0.025863991081382386, "grad_norm": 9.851222276543739, "learning_rate": 5.747316267547481e-06, "loss": 0.9576, "step": 348 }, { "epoch": 0.025938312894834635, "grad_norm": 17.850689287605814, "learning_rate": 5.7638315441783655e-06, "loss": 0.9385, "step": 349 }, { "epoch": 0.026012634708286884, "grad_norm": 56.03719622023012, "learning_rate": 5.78034682080925e-06, "loss": 0.7092, "step": 350 }, { "epoch": 0.02608695652173913, "grad_norm": 12.382125670099324, "learning_rate": 5.796862097440132e-06, "loss": 0.8077, "step": 351 }, { "epoch": 0.026161278335191378, "grad_norm": 68.83325581825848, "learning_rate": 5.8133773740710165e-06, "loss": 0.7836, "step": 352 }, { "epoch": 0.026235600148643626, "grad_norm": 16.69639661542422, "learning_rate": 5.829892650701899e-06, "loss": 0.8658, "step": 353 }, { "epoch": 0.026309921962095875, "grad_norm": 11.567483857677205, "learning_rate": 5.846407927332783e-06, "loss": 1.0318, "step": 354 }, { "epoch": 0.026384243775548124, "grad_norm": 12.19887531741006, "learning_rate": 5.862923203963667e-06, "loss": 0.8878, "step": 355 }, { "epoch": 0.026458565589000373, "grad_norm": 23.624773171090204, "learning_rate": 5.87943848059455e-06, "loss": 1.2083, "step": 356 }, { "epoch": 0.02653288740245262, "grad_norm": 9.800847917556109, "learning_rate": 5.895953757225434e-06, "loss": 0.8671, "step": 357 }, { "epoch": 0.026607209215904867, "grad_norm": 8.526063789638313, "learning_rate": 5.912469033856318e-06, "loss": 0.7663, "step": 358 }, { "epoch": 0.026681531029357115, "grad_norm": 36.36766166591483, "learning_rate": 5.928984310487201e-06, "loss": 0.9519, "step": 359 }, { "epoch": 0.026755852842809364, "grad_norm": 47.047493960185875, "learning_rate": 5.945499587118084e-06, "loss": 0.8138, "step": 360 }, { "epoch": 0.026830174656261613, "grad_norm": 8.91765139286314, "learning_rate": 5.962014863748969e-06, "loss": 0.9117, "step": 361 }, { "epoch": 0.02690449646971386, "grad_norm": 15.962803171686918, "learning_rate": 5.978530140379852e-06, "loss": 1.0255, "step": 362 }, { "epoch": 0.02697881828316611, "grad_norm": 96.62025881577907, "learning_rate": 5.995045417010735e-06, "loss": 0.8286, "step": 363 }, { "epoch": 0.02705314009661836, "grad_norm": 29.09777337536442, "learning_rate": 6.01156069364162e-06, "loss": 0.7868, "step": 364 }, { "epoch": 0.027127461910070604, "grad_norm": 17.85650144473671, "learning_rate": 6.028075970272502e-06, "loss": 0.8309, "step": 365 }, { "epoch": 0.027201783723522853, "grad_norm": 30.024839930500626, "learning_rate": 6.044591246903386e-06, "loss": 0.9089, "step": 366 }, { "epoch": 0.0272761055369751, "grad_norm": 73.55838538346664, "learning_rate": 6.061106523534269e-06, "loss": 0.8422, "step": 367 }, { "epoch": 0.02735042735042735, "grad_norm": 11.028902734464179, "learning_rate": 6.077621800165153e-06, "loss": 0.9114, "step": 368 }, { "epoch": 0.0274247491638796, "grad_norm": 7.856103407841303, "learning_rate": 6.094137076796037e-06, "loss": 0.8269, "step": 369 }, { "epoch": 0.027499070977331848, "grad_norm": 26.098542834460737, "learning_rate": 6.11065235342692e-06, "loss": 0.7541, "step": 370 }, { "epoch": 0.027573392790784097, "grad_norm": 17.829714394325226, "learning_rate": 6.127167630057804e-06, "loss": 0.9832, "step": 371 }, { "epoch": 0.027647714604236342, "grad_norm": 17.288165304037815, "learning_rate": 6.1436829066886875e-06, "loss": 0.8082, "step": 372 }, { "epoch": 0.02772203641768859, "grad_norm": 8.693745552426497, "learning_rate": 6.160198183319571e-06, "loss": 0.8125, "step": 373 }, { "epoch": 0.02779635823114084, "grad_norm": 20.326092597500534, "learning_rate": 6.176713459950454e-06, "loss": 0.7655, "step": 374 }, { "epoch": 0.027870680044593088, "grad_norm": 10.487913161578527, "learning_rate": 6.1932287365813385e-06, "loss": 0.7328, "step": 375 }, { "epoch": 0.027945001858045337, "grad_norm": 74.937900719971, "learning_rate": 6.209744013212222e-06, "loss": 0.9361, "step": 376 }, { "epoch": 0.028019323671497585, "grad_norm": 18.40078628711329, "learning_rate": 6.226259289843105e-06, "loss": 0.919, "step": 377 }, { "epoch": 0.028093645484949834, "grad_norm": 18.725812347031404, "learning_rate": 6.2427745664739895e-06, "loss": 0.7911, "step": 378 }, { "epoch": 0.02816796729840208, "grad_norm": 16.841209496946266, "learning_rate": 6.259289843104872e-06, "loss": 0.7902, "step": 379 }, { "epoch": 0.028242289111854328, "grad_norm": 15.805700023012758, "learning_rate": 6.275805119735756e-06, "loss": 0.8953, "step": 380 }, { "epoch": 0.028316610925306577, "grad_norm": 190.12034468687008, "learning_rate": 6.2923203963666405e-06, "loss": 0.7692, "step": 381 }, { "epoch": 0.028390932738758826, "grad_norm": 56.91303336039801, "learning_rate": 6.308835672997523e-06, "loss": 0.8882, "step": 382 }, { "epoch": 0.028465254552211074, "grad_norm": 13.08687449934129, "learning_rate": 6.325350949628407e-06, "loss": 0.8679, "step": 383 }, { "epoch": 0.028539576365663323, "grad_norm": 27.958301929687774, "learning_rate": 6.34186622625929e-06, "loss": 0.7953, "step": 384 }, { "epoch": 0.028613898179115572, "grad_norm": 36.90096551909633, "learning_rate": 6.358381502890174e-06, "loss": 0.8581, "step": 385 }, { "epoch": 0.028688219992567817, "grad_norm": 56.8330926002631, "learning_rate": 6.374896779521057e-06, "loss": 0.8615, "step": 386 }, { "epoch": 0.028762541806020066, "grad_norm": 54.45442697085576, "learning_rate": 6.391412056151941e-06, "loss": 0.9116, "step": 387 }, { "epoch": 0.028836863619472314, "grad_norm": 21.029451379677283, "learning_rate": 6.407927332782825e-06, "loss": 0.7159, "step": 388 }, { "epoch": 0.028911185432924563, "grad_norm": 32.8787941740936, "learning_rate": 6.424442609413708e-06, "loss": 0.8661, "step": 389 }, { "epoch": 0.028985507246376812, "grad_norm": 146.27914248928855, "learning_rate": 6.440957886044592e-06, "loss": 0.938, "step": 390 }, { "epoch": 0.02905982905982906, "grad_norm": 27.882759884857037, "learning_rate": 6.457473162675475e-06, "loss": 0.8796, "step": 391 }, { "epoch": 0.02913415087328131, "grad_norm": 10.580594429279108, "learning_rate": 6.473988439306359e-06, "loss": 1.0114, "step": 392 }, { "epoch": 0.029208472686733555, "grad_norm": 27.951913214755525, "learning_rate": 6.490503715937242e-06, "loss": 0.8969, "step": 393 }, { "epoch": 0.029282794500185803, "grad_norm": 18.883874192672188, "learning_rate": 6.507018992568126e-06, "loss": 0.8467, "step": 394 }, { "epoch": 0.029357116313638052, "grad_norm": 14.95222993544682, "learning_rate": 6.52353426919901e-06, "loss": 0.7732, "step": 395 }, { "epoch": 0.0294314381270903, "grad_norm": 10.052038603544823, "learning_rate": 6.540049545829893e-06, "loss": 0.9035, "step": 396 }, { "epoch": 0.02950575994054255, "grad_norm": 36.98496083412028, "learning_rate": 6.556564822460777e-06, "loss": 1.0237, "step": 397 }, { "epoch": 0.0295800817539948, "grad_norm": 20.02927273489494, "learning_rate": 6.57308009909166e-06, "loss": 1.1031, "step": 398 }, { "epoch": 0.029654403567447047, "grad_norm": 18.939779189487123, "learning_rate": 6.589595375722544e-06, "loss": 0.9628, "step": 399 }, { "epoch": 0.029728725380899292, "grad_norm": 10.730656728594951, "learning_rate": 6.606110652353428e-06, "loss": 0.9866, "step": 400 }, { "epoch": 0.02980304719435154, "grad_norm": 221.5703651823673, "learning_rate": 6.622625928984311e-06, "loss": 0.7914, "step": 401 }, { "epoch": 0.02987736900780379, "grad_norm": 18.63031873375911, "learning_rate": 6.639141205615195e-06, "loss": 0.79, "step": 402 }, { "epoch": 0.02995169082125604, "grad_norm": 25.589633222460524, "learning_rate": 6.6556564822460774e-06, "loss": 0.8085, "step": 403 }, { "epoch": 0.030026012634708287, "grad_norm": 14.65629374946967, "learning_rate": 6.672171758876962e-06, "loss": 0.6806, "step": 404 }, { "epoch": 0.030100334448160536, "grad_norm": 15.713230958989651, "learning_rate": 6.688687035507845e-06, "loss": 0.8018, "step": 405 }, { "epoch": 0.030174656261612785, "grad_norm": 93.14068076136486, "learning_rate": 6.7052023121387284e-06, "loss": 0.7456, "step": 406 }, { "epoch": 0.03024897807506503, "grad_norm": 37.776752477249154, "learning_rate": 6.721717588769613e-06, "loss": 1.0056, "step": 407 }, { "epoch": 0.03032329988851728, "grad_norm": 20.819223908946068, "learning_rate": 6.738232865400496e-06, "loss": 0.8839, "step": 408 }, { "epoch": 0.030397621701969527, "grad_norm": 21.62933460869254, "learning_rate": 6.7547481420313794e-06, "loss": 0.9055, "step": 409 }, { "epoch": 0.030471943515421776, "grad_norm": 29.93258267215044, "learning_rate": 6.771263418662263e-06, "loss": 0.6865, "step": 410 }, { "epoch": 0.030546265328874025, "grad_norm": 27.688137746263063, "learning_rate": 6.787778695293147e-06, "loss": 1.0211, "step": 411 }, { "epoch": 0.030620587142326273, "grad_norm": 10.292392896507446, "learning_rate": 6.8042939719240296e-06, "loss": 0.9257, "step": 412 }, { "epoch": 0.030694908955778522, "grad_norm": 21.013378727230684, "learning_rate": 6.820809248554914e-06, "loss": 0.9644, "step": 413 }, { "epoch": 0.03076923076923077, "grad_norm": 12.14059403668328, "learning_rate": 6.837324525185798e-06, "loss": 0.9772, "step": 414 }, { "epoch": 0.030843552582683016, "grad_norm": 86.1571618615527, "learning_rate": 6.8538398018166806e-06, "loss": 1.0615, "step": 415 }, { "epoch": 0.030917874396135265, "grad_norm": 75.74562663191307, "learning_rate": 6.870355078447565e-06, "loss": 0.8441, "step": 416 }, { "epoch": 0.030992196209587514, "grad_norm": 63.37567287464785, "learning_rate": 6.886870355078447e-06, "loss": 0.8127, "step": 417 }, { "epoch": 0.031066518023039762, "grad_norm": 21.39823624695339, "learning_rate": 6.9033856317093316e-06, "loss": 0.703, "step": 418 }, { "epoch": 0.03114083983649201, "grad_norm": 77.04225601963132, "learning_rate": 6.919900908340216e-06, "loss": 0.8184, "step": 419 }, { "epoch": 0.03121516164994426, "grad_norm": 133.3491663970741, "learning_rate": 6.936416184971098e-06, "loss": 0.8704, "step": 420 }, { "epoch": 0.03128948346339651, "grad_norm": 12.602008427203556, "learning_rate": 6.9529314616019826e-06, "loss": 0.7082, "step": 421 }, { "epoch": 0.031363805276848754, "grad_norm": 219.98125799639817, "learning_rate": 6.969446738232866e-06, "loss": 0.896, "step": 422 }, { "epoch": 0.031438127090301006, "grad_norm": 21.110756217651208, "learning_rate": 6.985962014863749e-06, "loss": 0.8919, "step": 423 }, { "epoch": 0.03151244890375325, "grad_norm": 17.361735651716742, "learning_rate": 7.002477291494633e-06, "loss": 0.7147, "step": 424 }, { "epoch": 0.031586770717205497, "grad_norm": 14.283704364549859, "learning_rate": 7.018992568125517e-06, "loss": 0.8818, "step": 425 }, { "epoch": 0.03166109253065775, "grad_norm": 14.450572698497108, "learning_rate": 7.0355078447564e-06, "loss": 0.8371, "step": 426 }, { "epoch": 0.031735414344109994, "grad_norm": 39.60781783964532, "learning_rate": 7.052023121387284e-06, "loss": 0.9047, "step": 427 }, { "epoch": 0.031809736157562246, "grad_norm": 14.294510382733227, "learning_rate": 7.068538398018168e-06, "loss": 0.7715, "step": 428 }, { "epoch": 0.03188405797101449, "grad_norm": 12.963059346660232, "learning_rate": 7.0850536746490505e-06, "loss": 0.9901, "step": 429 }, { "epoch": 0.031958379784466744, "grad_norm": 19.616574471360668, "learning_rate": 7.101568951279935e-06, "loss": 0.9969, "step": 430 }, { "epoch": 0.03203270159791899, "grad_norm": 9.23183540310868, "learning_rate": 7.118084227910817e-06, "loss": 0.7229, "step": 431 }, { "epoch": 0.032107023411371234, "grad_norm": 7.594228500699823, "learning_rate": 7.1345995045417014e-06, "loss": 0.8911, "step": 432 }, { "epoch": 0.032181345224823486, "grad_norm": 20.268774510585615, "learning_rate": 7.151114781172586e-06, "loss": 0.8803, "step": 433 }, { "epoch": 0.03225566703827573, "grad_norm": 44.98279324957546, "learning_rate": 7.167630057803468e-06, "loss": 0.7569, "step": 434 }, { "epoch": 0.032329988851727984, "grad_norm": 7.274050493536042, "learning_rate": 7.1841453344343524e-06, "loss": 1.0016, "step": 435 }, { "epoch": 0.03240431066518023, "grad_norm": 24.522243694527365, "learning_rate": 7.200660611065236e-06, "loss": 0.73, "step": 436 }, { "epoch": 0.03247863247863248, "grad_norm": 6.775967480602027, "learning_rate": 7.217175887696119e-06, "loss": 0.8265, "step": 437 }, { "epoch": 0.032552954292084726, "grad_norm": 19.85820636416158, "learning_rate": 7.2336911643270034e-06, "loss": 0.799, "step": 438 }, { "epoch": 0.03262727610553697, "grad_norm": 35.56379623153537, "learning_rate": 7.250206440957887e-06, "loss": 0.6472, "step": 439 }, { "epoch": 0.032701597918989224, "grad_norm": 10.950182108075982, "learning_rate": 7.26672171758877e-06, "loss": 0.9805, "step": 440 }, { "epoch": 0.03277591973244147, "grad_norm": 8.449351859171024, "learning_rate": 7.283236994219654e-06, "loss": 0.5645, "step": 441 }, { "epoch": 0.03285024154589372, "grad_norm": 27.16107541127751, "learning_rate": 7.299752270850538e-06, "loss": 0.796, "step": 442 }, { "epoch": 0.03292456335934597, "grad_norm": 11.153344328895644, "learning_rate": 7.31626754748142e-06, "loss": 0.9053, "step": 443 }, { "epoch": 0.03299888517279822, "grad_norm": 11.324957495387876, "learning_rate": 7.3327828241123046e-06, "loss": 0.8478, "step": 444 }, { "epoch": 0.033073206986250464, "grad_norm": 9.795178222590295, "learning_rate": 7.349298100743189e-06, "loss": 0.8041, "step": 445 }, { "epoch": 0.03314752879970271, "grad_norm": 15.601420071046103, "learning_rate": 7.365813377374071e-06, "loss": 0.8993, "step": 446 }, { "epoch": 0.03322185061315496, "grad_norm": 15.714221900404482, "learning_rate": 7.3823286540049556e-06, "loss": 0.8013, "step": 447 }, { "epoch": 0.03329617242660721, "grad_norm": 13.517620112952148, "learning_rate": 7.398843930635838e-06, "loss": 0.819, "step": 448 }, { "epoch": 0.03337049424005946, "grad_norm": 14.08256544145929, "learning_rate": 7.415359207266722e-06, "loss": 0.7999, "step": 449 }, { "epoch": 0.033444816053511704, "grad_norm": 31.465243395527374, "learning_rate": 7.431874483897606e-06, "loss": 0.8458, "step": 450 }, { "epoch": 0.033519137866963956, "grad_norm": 19.933816745307283, "learning_rate": 7.448389760528489e-06, "loss": 0.5881, "step": 451 }, { "epoch": 0.0335934596804162, "grad_norm": 23.97153358739213, "learning_rate": 7.464905037159373e-06, "loss": 0.8876, "step": 452 }, { "epoch": 0.03366778149386845, "grad_norm": 19.20587953941202, "learning_rate": 7.481420313790257e-06, "loss": 0.8334, "step": 453 }, { "epoch": 0.0337421033073207, "grad_norm": 96.06211209309703, "learning_rate": 7.49793559042114e-06, "loss": 0.8057, "step": 454 }, { "epoch": 0.033816425120772944, "grad_norm": 69.17393187981325, "learning_rate": 7.5144508670520235e-06, "loss": 1.0484, "step": 455 }, { "epoch": 0.0338907469342252, "grad_norm": 11.051356319717268, "learning_rate": 7.530966143682908e-06, "loss": 0.7705, "step": 456 }, { "epoch": 0.03396506874767744, "grad_norm": 26.38771741602207, "learning_rate": 7.547481420313791e-06, "loss": 0.9334, "step": 457 }, { "epoch": 0.034039390561129694, "grad_norm": 29.003216304350637, "learning_rate": 7.5639966969446745e-06, "loss": 1.059, "step": 458 }, { "epoch": 0.03411371237458194, "grad_norm": 78.2169964078363, "learning_rate": 7.580511973575559e-06, "loss": 0.7502, "step": 459 }, { "epoch": 0.03418803418803419, "grad_norm": 11.014866317911387, "learning_rate": 7.597027250206441e-06, "loss": 0.7131, "step": 460 }, { "epoch": 0.03426235600148644, "grad_norm": 9.845704076430563, "learning_rate": 7.6135425268373255e-06, "loss": 0.7374, "step": 461 }, { "epoch": 0.03433667781493868, "grad_norm": 16.847430695674788, "learning_rate": 7.630057803468209e-06, "loss": 0.9057, "step": 462 }, { "epoch": 0.034410999628390934, "grad_norm": 10.624625687795893, "learning_rate": 7.646573080099093e-06, "loss": 0.7997, "step": 463 }, { "epoch": 0.03448532144184318, "grad_norm": 14.064057087511374, "learning_rate": 7.663088356729976e-06, "loss": 0.7768, "step": 464 }, { "epoch": 0.03455964325529543, "grad_norm": 84.79945889000632, "learning_rate": 7.67960363336086e-06, "loss": 0.9074, "step": 465 }, { "epoch": 0.03463396506874768, "grad_norm": 38.34309178872219, "learning_rate": 7.696118909991744e-06, "loss": 0.8965, "step": 466 }, { "epoch": 0.03470828688219993, "grad_norm": 29.830510283629067, "learning_rate": 7.712634186622627e-06, "loss": 0.7798, "step": 467 }, { "epoch": 0.034782608695652174, "grad_norm": 225.14589846398525, "learning_rate": 7.72914946325351e-06, "loss": 0.8172, "step": 468 }, { "epoch": 0.03485693050910442, "grad_norm": 12.880608530041266, "learning_rate": 7.745664739884393e-06, "loss": 0.937, "step": 469 }, { "epoch": 0.03493125232255667, "grad_norm": 143.71270416752313, "learning_rate": 7.762180016515278e-06, "loss": 0.9609, "step": 470 }, { "epoch": 0.03500557413600892, "grad_norm": 25.474593150398807, "learning_rate": 7.778695293146162e-06, "loss": 0.6622, "step": 471 }, { "epoch": 0.03507989594946117, "grad_norm": 22.386826716237806, "learning_rate": 7.795210569777044e-06, "loss": 0.9643, "step": 472 }, { "epoch": 0.035154217762913414, "grad_norm": 23.097522066654566, "learning_rate": 7.811725846407929e-06, "loss": 1.0359, "step": 473 }, { "epoch": 0.03522853957636567, "grad_norm": 19.009382726575875, "learning_rate": 7.828241123038811e-06, "loss": 0.8665, "step": 474 }, { "epoch": 0.03530286138981791, "grad_norm": 34.87023640887603, "learning_rate": 7.844756399669695e-06, "loss": 0.9037, "step": 475 }, { "epoch": 0.03537718320327016, "grad_norm": 14.18401210191608, "learning_rate": 7.86127167630058e-06, "loss": 0.9313, "step": 476 }, { "epoch": 0.03545150501672241, "grad_norm": 9.441452733824676, "learning_rate": 7.877786952931462e-06, "loss": 0.9508, "step": 477 }, { "epoch": 0.035525826830174655, "grad_norm": 11.877529816144126, "learning_rate": 7.894302229562346e-06, "loss": 0.6832, "step": 478 }, { "epoch": 0.03560014864362691, "grad_norm": 11.437599048538043, "learning_rate": 7.910817506193229e-06, "loss": 0.8016, "step": 479 }, { "epoch": 0.03567447045707915, "grad_norm": 25.96315644704054, "learning_rate": 7.927332782824113e-06, "loss": 0.8574, "step": 480 }, { "epoch": 0.035748792270531404, "grad_norm": 6.239713874112656, "learning_rate": 7.943848059454996e-06, "loss": 0.992, "step": 481 }, { "epoch": 0.03582311408398365, "grad_norm": 60.11069264339122, "learning_rate": 7.96036333608588e-06, "loss": 0.831, "step": 482 }, { "epoch": 0.035897435897435895, "grad_norm": 15.448388675218897, "learning_rate": 7.976878612716764e-06, "loss": 0.6662, "step": 483 }, { "epoch": 0.03597175771088815, "grad_norm": 12.688445155850445, "learning_rate": 7.993393889347647e-06, "loss": 0.8144, "step": 484 }, { "epoch": 0.03604607952434039, "grad_norm": 10.999451699754237, "learning_rate": 8.009909165978531e-06, "loss": 0.7993, "step": 485 }, { "epoch": 0.036120401337792644, "grad_norm": 14.180620081542456, "learning_rate": 8.026424442609413e-06, "loss": 0.8459, "step": 486 }, { "epoch": 0.03619472315124489, "grad_norm": 9.68479325424554, "learning_rate": 8.042939719240298e-06, "loss": 0.8129, "step": 487 }, { "epoch": 0.03626904496469714, "grad_norm": 13.654776143317127, "learning_rate": 8.05945499587118e-06, "loss": 0.797, "step": 488 }, { "epoch": 0.03634336677814939, "grad_norm": 24.48577040572616, "learning_rate": 8.075970272502064e-06, "loss": 1.1091, "step": 489 }, { "epoch": 0.03641768859160163, "grad_norm": 27.17303454067573, "learning_rate": 8.092485549132949e-06, "loss": 1.0514, "step": 490 }, { "epoch": 0.036492010405053885, "grad_norm": 10.033130554537022, "learning_rate": 8.109000825763831e-06, "loss": 0.9649, "step": 491 }, { "epoch": 0.03656633221850613, "grad_norm": 12.485368467921996, "learning_rate": 8.125516102394715e-06, "loss": 0.8587, "step": 492 }, { "epoch": 0.03664065403195838, "grad_norm": 9.288502444073684, "learning_rate": 8.1420313790256e-06, "loss": 0.8187, "step": 493 }, { "epoch": 0.03671497584541063, "grad_norm": 20.475933499267907, "learning_rate": 8.158546655656482e-06, "loss": 1.0333, "step": 494 }, { "epoch": 0.03678929765886288, "grad_norm": 13.885719258436007, "learning_rate": 8.175061932287366e-06, "loss": 1.0371, "step": 495 }, { "epoch": 0.036863619472315125, "grad_norm": 61.547873079892824, "learning_rate": 8.19157720891825e-06, "loss": 0.9717, "step": 496 }, { "epoch": 0.03693794128576737, "grad_norm": 124.69505155583741, "learning_rate": 8.208092485549133e-06, "loss": 0.9313, "step": 497 }, { "epoch": 0.03701226309921962, "grad_norm": 17.04713779491477, "learning_rate": 8.224607762180017e-06, "loss": 0.8698, "step": 498 }, { "epoch": 0.03708658491267187, "grad_norm": 8.30125193126919, "learning_rate": 8.241123038810902e-06, "loss": 0.838, "step": 499 }, { "epoch": 0.03716090672612412, "grad_norm": 8.155108392811279, "learning_rate": 8.257638315441784e-06, "loss": 0.9826, "step": 500 }, { "epoch": 0.037235228539576365, "grad_norm": 8.060165690252315, "learning_rate": 8.274153592072668e-06, "loss": 0.8597, "step": 501 }, { "epoch": 0.03730955035302862, "grad_norm": 34.982453575297264, "learning_rate": 8.290668868703553e-06, "loss": 0.7497, "step": 502 }, { "epoch": 0.03738387216648086, "grad_norm": 6.061475538230716, "learning_rate": 8.307184145334435e-06, "loss": 0.8568, "step": 503 }, { "epoch": 0.03745819397993311, "grad_norm": 16.44550148306124, "learning_rate": 8.32369942196532e-06, "loss": 0.8612, "step": 504 }, { "epoch": 0.03753251579338536, "grad_norm": 11.070952887813485, "learning_rate": 8.340214698596202e-06, "loss": 0.7897, "step": 505 }, { "epoch": 0.037606837606837605, "grad_norm": 11.389409558821033, "learning_rate": 8.356729975227086e-06, "loss": 0.8626, "step": 506 }, { "epoch": 0.03768115942028986, "grad_norm": 6.785259497010099, "learning_rate": 8.373245251857969e-06, "loss": 0.8366, "step": 507 }, { "epoch": 0.0377554812337421, "grad_norm": 19.11797072728217, "learning_rate": 8.389760528488853e-06, "loss": 0.9077, "step": 508 }, { "epoch": 0.037829803047194355, "grad_norm": 22.395256963740312, "learning_rate": 8.406275805119737e-06, "loss": 0.8886, "step": 509 }, { "epoch": 0.0379041248606466, "grad_norm": 29.596810415498336, "learning_rate": 8.42279108175062e-06, "loss": 0.8376, "step": 510 }, { "epoch": 0.037978446674098845, "grad_norm": 50.47793275635638, "learning_rate": 8.439306358381504e-06, "loss": 1.0106, "step": 511 }, { "epoch": 0.0380527684875511, "grad_norm": 15.984430244985415, "learning_rate": 8.455821635012386e-06, "loss": 0.9932, "step": 512 }, { "epoch": 0.03812709030100334, "grad_norm": 9.96862324214178, "learning_rate": 8.47233691164327e-06, "loss": 0.9905, "step": 513 }, { "epoch": 0.038201412114455595, "grad_norm": 7.108494809648106, "learning_rate": 8.488852188274155e-06, "loss": 0.9423, "step": 514 }, { "epoch": 0.03827573392790784, "grad_norm": 29.608707827599503, "learning_rate": 8.505367464905037e-06, "loss": 0.7884, "step": 515 }, { "epoch": 0.03835005574136009, "grad_norm": 9.044498580032093, "learning_rate": 8.521882741535922e-06, "loss": 0.7981, "step": 516 }, { "epoch": 0.03842437755481234, "grad_norm": 10.008818802371392, "learning_rate": 8.538398018166804e-06, "loss": 0.6532, "step": 517 }, { "epoch": 0.03849869936826458, "grad_norm": 11.059224408061798, "learning_rate": 8.554913294797688e-06, "loss": 1.008, "step": 518 }, { "epoch": 0.038573021181716835, "grad_norm": 8.195194875315902, "learning_rate": 8.571428571428571e-06, "loss": 0.8674, "step": 519 }, { "epoch": 0.03864734299516908, "grad_norm": 111.09843076956459, "learning_rate": 8.587943848059455e-06, "loss": 0.7696, "step": 520 }, { "epoch": 0.03872166480862133, "grad_norm": 8.335990554688674, "learning_rate": 8.60445912469034e-06, "loss": 0.8962, "step": 521 }, { "epoch": 0.03879598662207358, "grad_norm": 22.734959712564923, "learning_rate": 8.620974401321222e-06, "loss": 0.8299, "step": 522 }, { "epoch": 0.03887030843552583, "grad_norm": 16.631644648923647, "learning_rate": 8.637489677952106e-06, "loss": 0.8163, "step": 523 }, { "epoch": 0.038944630248978075, "grad_norm": 25.22176837467493, "learning_rate": 8.65400495458299e-06, "loss": 0.9575, "step": 524 }, { "epoch": 0.03901895206243032, "grad_norm": 8.196056437828139, "learning_rate": 8.670520231213873e-06, "loss": 1.0533, "step": 525 }, { "epoch": 0.03909327387588257, "grad_norm": 19.379550840756618, "learning_rate": 8.687035507844757e-06, "loss": 0.846, "step": 526 }, { "epoch": 0.03916759568933482, "grad_norm": 18.53083527643487, "learning_rate": 8.703550784475641e-06, "loss": 1.0351, "step": 527 }, { "epoch": 0.03924191750278707, "grad_norm": 10.723827482552029, "learning_rate": 8.720066061106524e-06, "loss": 0.875, "step": 528 }, { "epoch": 0.039316239316239315, "grad_norm": 10.699637906673265, "learning_rate": 8.736581337737408e-06, "loss": 0.8476, "step": 529 }, { "epoch": 0.03939056112969157, "grad_norm": 6.67679318914261, "learning_rate": 8.753096614368292e-06, "loss": 0.7549, "step": 530 }, { "epoch": 0.03946488294314381, "grad_norm": 10.97186723166918, "learning_rate": 8.769611890999175e-06, "loss": 0.952, "step": 531 }, { "epoch": 0.03953920475659606, "grad_norm": 11.085568460969608, "learning_rate": 8.786127167630059e-06, "loss": 0.7178, "step": 532 }, { "epoch": 0.03961352657004831, "grad_norm": 15.838975963632528, "learning_rate": 8.802642444260943e-06, "loss": 0.6931, "step": 533 }, { "epoch": 0.039687848383500555, "grad_norm": 16.77074133193813, "learning_rate": 8.819157720891826e-06, "loss": 0.9069, "step": 534 }, { "epoch": 0.03976217019695281, "grad_norm": 18.908810408225282, "learning_rate": 8.83567299752271e-06, "loss": 0.7247, "step": 535 }, { "epoch": 0.03983649201040505, "grad_norm": 5.762637312114609, "learning_rate": 8.852188274153593e-06, "loss": 0.9441, "step": 536 }, { "epoch": 0.039910813823857305, "grad_norm": 12.088851801410106, "learning_rate": 8.868703550784477e-06, "loss": 0.8345, "step": 537 }, { "epoch": 0.03998513563730955, "grad_norm": 11.927344906661942, "learning_rate": 8.88521882741536e-06, "loss": 0.809, "step": 538 }, { "epoch": 0.040059457450761796, "grad_norm": 12.309917951270666, "learning_rate": 8.901734104046244e-06, "loss": 0.8703, "step": 539 }, { "epoch": 0.04013377926421405, "grad_norm": 49.74641615175079, "learning_rate": 8.918249380677128e-06, "loss": 0.9428, "step": 540 }, { "epoch": 0.04020810107766629, "grad_norm": 15.78013688184459, "learning_rate": 8.93476465730801e-06, "loss": 0.7964, "step": 541 }, { "epoch": 0.040282422891118545, "grad_norm": 17.17613489601685, "learning_rate": 8.951279933938895e-06, "loss": 1.067, "step": 542 }, { "epoch": 0.04035674470457079, "grad_norm": 12.517173447778955, "learning_rate": 8.967795210569777e-06, "loss": 0.8118, "step": 543 }, { "epoch": 0.04043106651802304, "grad_norm": 16.136614309576366, "learning_rate": 8.984310487200661e-06, "loss": 0.8977, "step": 544 }, { "epoch": 0.04050538833147529, "grad_norm": 21.456403006559356, "learning_rate": 9.000825763831544e-06, "loss": 0.639, "step": 545 }, { "epoch": 0.04057971014492753, "grad_norm": 11.073034121292935, "learning_rate": 9.017341040462428e-06, "loss": 0.8602, "step": 546 }, { "epoch": 0.040654031958379785, "grad_norm": 20.969635834351788, "learning_rate": 9.033856317093312e-06, "loss": 0.922, "step": 547 }, { "epoch": 0.04072835377183203, "grad_norm": 11.39824707706513, "learning_rate": 9.050371593724195e-06, "loss": 0.7628, "step": 548 }, { "epoch": 0.04080267558528428, "grad_norm": 18.66745290773401, "learning_rate": 9.066886870355079e-06, "loss": 0.8728, "step": 549 }, { "epoch": 0.04087699739873653, "grad_norm": 11.072294562557465, "learning_rate": 9.083402146985962e-06, "loss": 0.8357, "step": 550 }, { "epoch": 0.04095131921218878, "grad_norm": 13.360791297517716, "learning_rate": 9.099917423616846e-06, "loss": 1.0121, "step": 551 }, { "epoch": 0.041025641025641026, "grad_norm": 43.38387493146034, "learning_rate": 9.11643270024773e-06, "loss": 1.0923, "step": 552 }, { "epoch": 0.04109996283909327, "grad_norm": 14.205090758336684, "learning_rate": 9.132947976878613e-06, "loss": 0.8251, "step": 553 }, { "epoch": 0.04117428465254552, "grad_norm": 31.73604186597157, "learning_rate": 9.149463253509497e-06, "loss": 0.577, "step": 554 }, { "epoch": 0.04124860646599777, "grad_norm": 7.149778694082696, "learning_rate": 9.165978530140381e-06, "loss": 0.9527, "step": 555 }, { "epoch": 0.04132292827945002, "grad_norm": 8.953268234183946, "learning_rate": 9.182493806771264e-06, "loss": 0.9342, "step": 556 }, { "epoch": 0.041397250092902266, "grad_norm": 15.348324173243364, "learning_rate": 9.199009083402148e-06, "loss": 1.0256, "step": 557 }, { "epoch": 0.04147157190635452, "grad_norm": 146.6076082011086, "learning_rate": 9.215524360033032e-06, "loss": 0.8981, "step": 558 }, { "epoch": 0.04154589371980676, "grad_norm": 8.34568734742224, "learning_rate": 9.232039636663915e-06, "loss": 0.9653, "step": 559 }, { "epoch": 0.04162021553325901, "grad_norm": 15.598169502458394, "learning_rate": 9.248554913294799e-06, "loss": 0.6332, "step": 560 }, { "epoch": 0.04169453734671126, "grad_norm": 18.554778463829937, "learning_rate": 9.265070189925683e-06, "loss": 1.058, "step": 561 }, { "epoch": 0.041768859160163506, "grad_norm": 8.834618915270514, "learning_rate": 9.281585466556566e-06, "loss": 0.84, "step": 562 }, { "epoch": 0.04184318097361576, "grad_norm": 9.344454499142389, "learning_rate": 9.29810074318745e-06, "loss": 0.9421, "step": 563 }, { "epoch": 0.041917502787068, "grad_norm": 9.167772790160294, "learning_rate": 9.314616019818332e-06, "loss": 0.7455, "step": 564 }, { "epoch": 0.041991824600520256, "grad_norm": 13.382715133171658, "learning_rate": 9.331131296449217e-06, "loss": 0.7761, "step": 565 }, { "epoch": 0.0420661464139725, "grad_norm": 22.27095095870756, "learning_rate": 9.347646573080101e-06, "loss": 0.7997, "step": 566 }, { "epoch": 0.042140468227424746, "grad_norm": 17.945166911065456, "learning_rate": 9.364161849710983e-06, "loss": 0.9447, "step": 567 }, { "epoch": 0.042214790040877, "grad_norm": 12.656261166165951, "learning_rate": 9.380677126341868e-06, "loss": 0.6923, "step": 568 }, { "epoch": 0.042289111854329244, "grad_norm": 12.11873281514985, "learning_rate": 9.39719240297275e-06, "loss": 1.0332, "step": 569 }, { "epoch": 0.042363433667781496, "grad_norm": 16.53503349135637, "learning_rate": 9.413707679603634e-06, "loss": 0.8257, "step": 570 }, { "epoch": 0.04243775548123374, "grad_norm": 49.37170753825014, "learning_rate": 9.430222956234519e-06, "loss": 0.8027, "step": 571 }, { "epoch": 0.04251207729468599, "grad_norm": 5.591763484257745, "learning_rate": 9.446738232865401e-06, "loss": 0.8536, "step": 572 }, { "epoch": 0.04258639910813824, "grad_norm": 16.15801301626146, "learning_rate": 9.463253509496285e-06, "loss": 1.0478, "step": 573 }, { "epoch": 0.042660720921590484, "grad_norm": 17.35805139346463, "learning_rate": 9.479768786127168e-06, "loss": 0.9952, "step": 574 }, { "epoch": 0.042735042735042736, "grad_norm": 22.66090912155767, "learning_rate": 9.496284062758052e-06, "loss": 0.8216, "step": 575 }, { "epoch": 0.04280936454849498, "grad_norm": 8.65061065373123, "learning_rate": 9.512799339388935e-06, "loss": 0.6865, "step": 576 }, { "epoch": 0.04288368636194723, "grad_norm": 11.820303125169005, "learning_rate": 9.529314616019819e-06, "loss": 0.7382, "step": 577 }, { "epoch": 0.04295800817539948, "grad_norm": 6.3152626899339195, "learning_rate": 9.545829892650703e-06, "loss": 0.9891, "step": 578 }, { "epoch": 0.04303232998885173, "grad_norm": 10.468902826818113, "learning_rate": 9.562345169281586e-06, "loss": 0.84, "step": 579 }, { "epoch": 0.043106651802303976, "grad_norm": 10.556934307871508, "learning_rate": 9.57886044591247e-06, "loss": 1.0401, "step": 580 }, { "epoch": 0.04318097361575622, "grad_norm": 19.08974101258741, "learning_rate": 9.595375722543352e-06, "loss": 1.0728, "step": 581 }, { "epoch": 0.043255295429208473, "grad_norm": 5.732431924988558, "learning_rate": 9.611890999174237e-06, "loss": 0.9665, "step": 582 }, { "epoch": 0.04332961724266072, "grad_norm": 12.427496733532548, "learning_rate": 9.62840627580512e-06, "loss": 0.902, "step": 583 }, { "epoch": 0.04340393905611297, "grad_norm": 70.35506749636762, "learning_rate": 9.644921552436003e-06, "loss": 0.9209, "step": 584 }, { "epoch": 0.043478260869565216, "grad_norm": 82.07997866106223, "learning_rate": 9.661436829066888e-06, "loss": 0.9757, "step": 585 }, { "epoch": 0.04355258268301747, "grad_norm": 18.307358297234817, "learning_rate": 9.67795210569777e-06, "loss": 1.0353, "step": 586 }, { "epoch": 0.043626904496469714, "grad_norm": 77.9464202709845, "learning_rate": 9.694467382328654e-06, "loss": 0.8884, "step": 587 }, { "epoch": 0.04370122630992196, "grad_norm": 9.129097745288847, "learning_rate": 9.710982658959539e-06, "loss": 0.8731, "step": 588 }, { "epoch": 0.04377554812337421, "grad_norm": 57.06791269606767, "learning_rate": 9.727497935590421e-06, "loss": 0.9107, "step": 589 }, { "epoch": 0.043849869936826456, "grad_norm": 28.699515875180136, "learning_rate": 9.744013212221305e-06, "loss": 0.8926, "step": 590 }, { "epoch": 0.04392419175027871, "grad_norm": 18.22189843357053, "learning_rate": 9.76052848885219e-06, "loss": 1.0391, "step": 591 }, { "epoch": 0.043998513563730954, "grad_norm": 6.465435924933423, "learning_rate": 9.777043765483072e-06, "loss": 0.9383, "step": 592 }, { "epoch": 0.044072835377183206, "grad_norm": 13.779536180313471, "learning_rate": 9.793559042113956e-06, "loss": 0.8693, "step": 593 }, { "epoch": 0.04414715719063545, "grad_norm": 19.508030658847545, "learning_rate": 9.81007431874484e-06, "loss": 0.9718, "step": 594 }, { "epoch": 0.044221479004087696, "grad_norm": 18.287654337486277, "learning_rate": 9.826589595375723e-06, "loss": 0.9064, "step": 595 }, { "epoch": 0.04429580081753995, "grad_norm": 14.429015111777916, "learning_rate": 9.843104872006607e-06, "loss": 0.8698, "step": 596 }, { "epoch": 0.044370122630992194, "grad_norm": 7.269294529554696, "learning_rate": 9.859620148637492e-06, "loss": 0.9572, "step": 597 }, { "epoch": 0.044444444444444446, "grad_norm": 19.427051316730232, "learning_rate": 9.876135425268374e-06, "loss": 0.9029, "step": 598 }, { "epoch": 0.04451876625789669, "grad_norm": 33.06849979106973, "learning_rate": 9.892650701899258e-06, "loss": 0.9499, "step": 599 }, { "epoch": 0.044593088071348944, "grad_norm": 10.177291846782062, "learning_rate": 9.909165978530141e-06, "loss": 0.7806, "step": 600 }, { "epoch": 0.04466740988480119, "grad_norm": 10.206218523447175, "learning_rate": 9.925681255161025e-06, "loss": 0.9039, "step": 601 }, { "epoch": 0.044741731698253434, "grad_norm": 10.374688299301203, "learning_rate": 9.942196531791908e-06, "loss": 0.901, "step": 602 }, { "epoch": 0.044816053511705686, "grad_norm": 8.083131429854044, "learning_rate": 9.958711808422792e-06, "loss": 0.7599, "step": 603 }, { "epoch": 0.04489037532515793, "grad_norm": 15.156281292489165, "learning_rate": 9.975227085053676e-06, "loss": 0.9624, "step": 604 }, { "epoch": 0.044964697138610184, "grad_norm": 11.091989190862042, "learning_rate": 9.991742361684559e-06, "loss": 0.9816, "step": 605 }, { "epoch": 0.04503901895206243, "grad_norm": 10.49635809151885, "learning_rate": 1.0008257638315441e-05, "loss": 0.9874, "step": 606 }, { "epoch": 0.04511334076551468, "grad_norm": 24.3224597137882, "learning_rate": 1.0024772914946325e-05, "loss": 0.9652, "step": 607 }, { "epoch": 0.045187662578966926, "grad_norm": 59.673374641586776, "learning_rate": 1.004128819157721e-05, "loss": 0.9528, "step": 608 }, { "epoch": 0.04526198439241917, "grad_norm": 10.51491234098336, "learning_rate": 1.0057803468208094e-05, "loss": 0.8536, "step": 609 }, { "epoch": 0.045336306205871424, "grad_norm": 18.014107856176466, "learning_rate": 1.0074318744838978e-05, "loss": 0.8709, "step": 610 }, { "epoch": 0.04541062801932367, "grad_norm": 13.558289092558113, "learning_rate": 1.0090834021469859e-05, "loss": 0.7469, "step": 611 }, { "epoch": 0.04548494983277592, "grad_norm": 16.365713701414276, "learning_rate": 1.0107349298100743e-05, "loss": 0.9447, "step": 612 }, { "epoch": 0.04555927164622817, "grad_norm": 7.5831688629974145, "learning_rate": 1.0123864574731627e-05, "loss": 0.8679, "step": 613 }, { "epoch": 0.04563359345968042, "grad_norm": 15.31720982615247, "learning_rate": 1.0140379851362512e-05, "loss": 0.8934, "step": 614 }, { "epoch": 0.045707915273132664, "grad_norm": 6.886330946833057, "learning_rate": 1.0156895127993394e-05, "loss": 0.8148, "step": 615 }, { "epoch": 0.04578223708658491, "grad_norm": 7.65417557682706, "learning_rate": 1.0173410404624278e-05, "loss": 0.9741, "step": 616 }, { "epoch": 0.04585655890003716, "grad_norm": 8.593669478413384, "learning_rate": 1.0189925681255161e-05, "loss": 1.0067, "step": 617 }, { "epoch": 0.04593088071348941, "grad_norm": 10.478808968520255, "learning_rate": 1.0206440957886045e-05, "loss": 1.0079, "step": 618 }, { "epoch": 0.04600520252694166, "grad_norm": 14.727896502863798, "learning_rate": 1.022295623451693e-05, "loss": 0.8805, "step": 619 }, { "epoch": 0.046079524340393904, "grad_norm": 14.625890979466712, "learning_rate": 1.0239471511147812e-05, "loss": 0.8801, "step": 620 }, { "epoch": 0.046153846153846156, "grad_norm": 16.196791952176728, "learning_rate": 1.0255986787778696e-05, "loss": 0.9723, "step": 621 }, { "epoch": 0.0462281679672984, "grad_norm": 52.85510915017196, "learning_rate": 1.027250206440958e-05, "loss": 0.8614, "step": 622 }, { "epoch": 0.04630248978075065, "grad_norm": 17.791024175357922, "learning_rate": 1.0289017341040463e-05, "loss": 0.9035, "step": 623 }, { "epoch": 0.0463768115942029, "grad_norm": 7.956048768908875, "learning_rate": 1.0305532617671347e-05, "loss": 0.9915, "step": 624 }, { "epoch": 0.046451133407655144, "grad_norm": 9.311599384654476, "learning_rate": 1.032204789430223e-05, "loss": 0.9757, "step": 625 }, { "epoch": 0.0465254552211074, "grad_norm": 21.36132936606889, "learning_rate": 1.0338563170933114e-05, "loss": 0.9873, "step": 626 }, { "epoch": 0.04659977703455964, "grad_norm": 10.67108637321637, "learning_rate": 1.0355078447563998e-05, "loss": 0.815, "step": 627 }, { "epoch": 0.046674098848011894, "grad_norm": 19.070522000566385, "learning_rate": 1.0371593724194882e-05, "loss": 0.8448, "step": 628 }, { "epoch": 0.04674842066146414, "grad_norm": 10.44012448636743, "learning_rate": 1.0388109000825765e-05, "loss": 0.9505, "step": 629 }, { "epoch": 0.046822742474916385, "grad_norm": 8.779705792361922, "learning_rate": 1.0404624277456647e-05, "loss": 0.8631, "step": 630 }, { "epoch": 0.04689706428836864, "grad_norm": 22.002219326989824, "learning_rate": 1.0421139554087532e-05, "loss": 0.881, "step": 631 }, { "epoch": 0.04697138610182088, "grad_norm": 19.134318810516742, "learning_rate": 1.0437654830718416e-05, "loss": 0.8849, "step": 632 }, { "epoch": 0.047045707915273134, "grad_norm": 19.60003503306113, "learning_rate": 1.04541701073493e-05, "loss": 1.007, "step": 633 }, { "epoch": 0.04712002972872538, "grad_norm": 8.230884349943478, "learning_rate": 1.0470685383980181e-05, "loss": 0.8307, "step": 634 }, { "epoch": 0.04719435154217763, "grad_norm": 32.14130429369902, "learning_rate": 1.0487200660611065e-05, "loss": 0.9904, "step": 635 }, { "epoch": 0.04726867335562988, "grad_norm": 13.458956376476678, "learning_rate": 1.050371593724195e-05, "loss": 0.8679, "step": 636 }, { "epoch": 0.04734299516908213, "grad_norm": 5.956626289769337, "learning_rate": 1.0520231213872834e-05, "loss": 0.9203, "step": 637 }, { "epoch": 0.047417316982534374, "grad_norm": 25.52811456955531, "learning_rate": 1.0536746490503718e-05, "loss": 0.8531, "step": 638 }, { "epoch": 0.04749163879598662, "grad_norm": 7.243257194788287, "learning_rate": 1.0553261767134599e-05, "loss": 0.8898, "step": 639 }, { "epoch": 0.04756596060943887, "grad_norm": 35.33874564107978, "learning_rate": 1.0569777043765483e-05, "loss": 0.951, "step": 640 }, { "epoch": 0.04764028242289112, "grad_norm": 7.709429379070777, "learning_rate": 1.0586292320396367e-05, "loss": 1.0281, "step": 641 }, { "epoch": 0.04771460423634337, "grad_norm": 17.300805899630728, "learning_rate": 1.0602807597027251e-05, "loss": 0.904, "step": 642 }, { "epoch": 0.047788926049795614, "grad_norm": 7.7278122705418095, "learning_rate": 1.0619322873658136e-05, "loss": 0.7616, "step": 643 }, { "epoch": 0.04786324786324787, "grad_norm": 74.55741555232886, "learning_rate": 1.0635838150289017e-05, "loss": 0.8357, "step": 644 }, { "epoch": 0.04793756967670011, "grad_norm": 12.587548286155496, "learning_rate": 1.06523534269199e-05, "loss": 0.8847, "step": 645 }, { "epoch": 0.04801189149015236, "grad_norm": 21.79760972844678, "learning_rate": 1.0668868703550785e-05, "loss": 0.7475, "step": 646 }, { "epoch": 0.04808621330360461, "grad_norm": 12.342577945530842, "learning_rate": 1.068538398018167e-05, "loss": 0.8937, "step": 647 }, { "epoch": 0.048160535117056855, "grad_norm": 15.729087862219448, "learning_rate": 1.0701899256812553e-05, "loss": 0.7951, "step": 648 }, { "epoch": 0.04823485693050911, "grad_norm": 9.827629751649463, "learning_rate": 1.0718414533443436e-05, "loss": 1.0531, "step": 649 }, { "epoch": 0.04830917874396135, "grad_norm": 102.31957820103541, "learning_rate": 1.0734929810074319e-05, "loss": 0.7411, "step": 650 }, { "epoch": 0.048383500557413604, "grad_norm": 16.002997321102516, "learning_rate": 1.0751445086705203e-05, "loss": 0.9757, "step": 651 }, { "epoch": 0.04845782237086585, "grad_norm": 7.35216865926898, "learning_rate": 1.0767960363336087e-05, "loss": 0.7963, "step": 652 }, { "epoch": 0.048532144184318095, "grad_norm": 21.41333784303101, "learning_rate": 1.078447563996697e-05, "loss": 0.7195, "step": 653 }, { "epoch": 0.04860646599777035, "grad_norm": 11.025750957320101, "learning_rate": 1.0800990916597854e-05, "loss": 0.7954, "step": 654 }, { "epoch": 0.04868078781122259, "grad_norm": 11.983369284605235, "learning_rate": 1.0817506193228738e-05, "loss": 0.8395, "step": 655 }, { "epoch": 0.048755109624674844, "grad_norm": 9.564150173208608, "learning_rate": 1.083402146985962e-05, "loss": 0.8052, "step": 656 }, { "epoch": 0.04882943143812709, "grad_norm": 7.823687609896382, "learning_rate": 1.0850536746490505e-05, "loss": 0.911, "step": 657 }, { "epoch": 0.04890375325157934, "grad_norm": 65.2289044015902, "learning_rate": 1.0867052023121387e-05, "loss": 0.9045, "step": 658 }, { "epoch": 0.04897807506503159, "grad_norm": 22.050729408333854, "learning_rate": 1.0883567299752271e-05, "loss": 0.7861, "step": 659 }, { "epoch": 0.04905239687848383, "grad_norm": 5.41144651655195, "learning_rate": 1.0900082576383156e-05, "loss": 1.0086, "step": 660 }, { "epoch": 0.049126718691936085, "grad_norm": 18.604210808872413, "learning_rate": 1.091659785301404e-05, "loss": 0.8879, "step": 661 }, { "epoch": 0.04920104050538833, "grad_norm": 18.005381533227997, "learning_rate": 1.0933113129644922e-05, "loss": 0.7265, "step": 662 }, { "epoch": 0.04927536231884058, "grad_norm": 21.133845604243575, "learning_rate": 1.0949628406275805e-05, "loss": 0.9894, "step": 663 }, { "epoch": 0.04934968413229283, "grad_norm": 16.08308931412282, "learning_rate": 1.096614368290669e-05, "loss": 1.0473, "step": 664 }, { "epoch": 0.04942400594574508, "grad_norm": 22.56753617928217, "learning_rate": 1.0982658959537573e-05, "loss": 1.0051, "step": 665 }, { "epoch": 0.049498327759197325, "grad_norm": 15.370168690976069, "learning_rate": 1.0999174236168458e-05, "loss": 1.0846, "step": 666 }, { "epoch": 0.04957264957264957, "grad_norm": 33.11599024533992, "learning_rate": 1.1015689512799342e-05, "loss": 0.9081, "step": 667 }, { "epoch": 0.04964697138610182, "grad_norm": 26.108153650382242, "learning_rate": 1.1032204789430223e-05, "loss": 0.8917, "step": 668 }, { "epoch": 0.04972129319955407, "grad_norm": 5.978566841364713, "learning_rate": 1.1048720066061107e-05, "loss": 0.7943, "step": 669 }, { "epoch": 0.04979561501300632, "grad_norm": 13.592014187378632, "learning_rate": 1.1065235342691991e-05, "loss": 1.1279, "step": 670 }, { "epoch": 0.049869936826458565, "grad_norm": 6.552583539161017, "learning_rate": 1.1081750619322875e-05, "loss": 0.8885, "step": 671 }, { "epoch": 0.04994425863991082, "grad_norm": 37.91514609419018, "learning_rate": 1.1098265895953756e-05, "loss": 0.8826, "step": 672 }, { "epoch": 0.05001858045336306, "grad_norm": 7.188474547404735, "learning_rate": 1.111478117258464e-05, "loss": 1.0118, "step": 673 }, { "epoch": 0.05009290226681531, "grad_norm": 30.86387214954666, "learning_rate": 1.1131296449215525e-05, "loss": 0.8105, "step": 674 }, { "epoch": 0.05016722408026756, "grad_norm": 17.2319580668477, "learning_rate": 1.1147811725846409e-05, "loss": 0.9186, "step": 675 }, { "epoch": 0.050241545893719805, "grad_norm": 5.848395579108825, "learning_rate": 1.1164327002477293e-05, "loss": 1.1132, "step": 676 }, { "epoch": 0.05031586770717206, "grad_norm": 55.24654497458918, "learning_rate": 1.1180842279108176e-05, "loss": 0.9584, "step": 677 }, { "epoch": 0.0503901895206243, "grad_norm": 9.799024822966658, "learning_rate": 1.1197357555739058e-05, "loss": 0.9478, "step": 678 }, { "epoch": 0.050464511334076555, "grad_norm": 24.12650440606459, "learning_rate": 1.1213872832369943e-05, "loss": 1.0696, "step": 679 }, { "epoch": 0.0505388331475288, "grad_norm": 33.07193521600053, "learning_rate": 1.1230388109000827e-05, "loss": 0.9426, "step": 680 }, { "epoch": 0.050613154960981045, "grad_norm": 17.140868202117474, "learning_rate": 1.1246903385631711e-05, "loss": 0.6577, "step": 681 }, { "epoch": 0.0506874767744333, "grad_norm": 24.34563726645322, "learning_rate": 1.1263418662262594e-05, "loss": 0.8694, "step": 682 }, { "epoch": 0.05076179858788554, "grad_norm": 14.501359474972777, "learning_rate": 1.1279933938893478e-05, "loss": 0.806, "step": 683 }, { "epoch": 0.050836120401337795, "grad_norm": 13.703670626239811, "learning_rate": 1.129644921552436e-05, "loss": 1.1593, "step": 684 }, { "epoch": 0.05091044221479004, "grad_norm": 71.7695979105598, "learning_rate": 1.1312964492155245e-05, "loss": 0.717, "step": 685 }, { "epoch": 0.05098476402824229, "grad_norm": 8.760103489475306, "learning_rate": 1.1329479768786129e-05, "loss": 0.737, "step": 686 }, { "epoch": 0.05105908584169454, "grad_norm": 11.28436964562038, "learning_rate": 1.1345995045417011e-05, "loss": 1.0012, "step": 687 }, { "epoch": 0.05113340765514678, "grad_norm": 12.100295605693388, "learning_rate": 1.1362510322047895e-05, "loss": 0.901, "step": 688 }, { "epoch": 0.051207729468599035, "grad_norm": 11.734999043318764, "learning_rate": 1.137902559867878e-05, "loss": 1.086, "step": 689 }, { "epoch": 0.05128205128205128, "grad_norm": 25.779936584201586, "learning_rate": 1.1395540875309662e-05, "loss": 0.9296, "step": 690 }, { "epoch": 0.05135637309550353, "grad_norm": 23.101550336836645, "learning_rate": 1.1412056151940545e-05, "loss": 0.8754, "step": 691 }, { "epoch": 0.05143069490895578, "grad_norm": 9.363594155660683, "learning_rate": 1.1428571428571429e-05, "loss": 0.7508, "step": 692 }, { "epoch": 0.05150501672240803, "grad_norm": 21.530760216782966, "learning_rate": 1.1445086705202313e-05, "loss": 1.0349, "step": 693 }, { "epoch": 0.051579338535860275, "grad_norm": 26.640607672772905, "learning_rate": 1.1461601981833197e-05, "loss": 0.8119, "step": 694 }, { "epoch": 0.05165366034931252, "grad_norm": 37.20820285370154, "learning_rate": 1.1478117258464082e-05, "loss": 0.6482, "step": 695 }, { "epoch": 0.05172798216276477, "grad_norm": 22.571624753930774, "learning_rate": 1.1494632535094963e-05, "loss": 0.9938, "step": 696 }, { "epoch": 0.05180230397621702, "grad_norm": 19.02809048033469, "learning_rate": 1.1511147811725847e-05, "loss": 0.7025, "step": 697 }, { "epoch": 0.05187662578966927, "grad_norm": 5.928890779665518, "learning_rate": 1.1527663088356731e-05, "loss": 0.8741, "step": 698 }, { "epoch": 0.051950947603121515, "grad_norm": 22.437540212283153, "learning_rate": 1.1544178364987615e-05, "loss": 0.7678, "step": 699 }, { "epoch": 0.05202526941657377, "grad_norm": 6.96195049184466, "learning_rate": 1.15606936416185e-05, "loss": 1.1797, "step": 700 }, { "epoch": 0.05209959123002601, "grad_norm": 37.44979863749393, "learning_rate": 1.157720891824938e-05, "loss": 0.9492, "step": 701 }, { "epoch": 0.05217391304347826, "grad_norm": 11.580123042260084, "learning_rate": 1.1593724194880265e-05, "loss": 0.7243, "step": 702 }, { "epoch": 0.05224823485693051, "grad_norm": 19.141455966934174, "learning_rate": 1.1610239471511149e-05, "loss": 0.8994, "step": 703 }, { "epoch": 0.052322556670382755, "grad_norm": 23.397771066961635, "learning_rate": 1.1626754748142033e-05, "loss": 0.7501, "step": 704 }, { "epoch": 0.05239687848383501, "grad_norm": 50.72593115581215, "learning_rate": 1.1643270024772917e-05, "loss": 0.9701, "step": 705 }, { "epoch": 0.05247120029728725, "grad_norm": 114.10918688216765, "learning_rate": 1.1659785301403798e-05, "loss": 0.7568, "step": 706 }, { "epoch": 0.052545522110739505, "grad_norm": 15.12552095819129, "learning_rate": 1.1676300578034682e-05, "loss": 0.848, "step": 707 }, { "epoch": 0.05261984392419175, "grad_norm": 9.69885590856677, "learning_rate": 1.1692815854665567e-05, "loss": 0.9522, "step": 708 }, { "epoch": 0.052694165737643996, "grad_norm": 9.405588559599991, "learning_rate": 1.170933113129645e-05, "loss": 0.871, "step": 709 }, { "epoch": 0.05276848755109625, "grad_norm": 8.65949471681155, "learning_rate": 1.1725846407927333e-05, "loss": 0.8864, "step": 710 }, { "epoch": 0.05284280936454849, "grad_norm": 28.43302251348596, "learning_rate": 1.1742361684558218e-05, "loss": 0.6793, "step": 711 }, { "epoch": 0.052917131178000745, "grad_norm": 16.609499761697347, "learning_rate": 1.17588769611891e-05, "loss": 0.9491, "step": 712 }, { "epoch": 0.05299145299145299, "grad_norm": 57.0918135892598, "learning_rate": 1.1775392237819984e-05, "loss": 0.9017, "step": 713 }, { "epoch": 0.05306577480490524, "grad_norm": 15.877215128878081, "learning_rate": 1.1791907514450869e-05, "loss": 1.1169, "step": 714 }, { "epoch": 0.05314009661835749, "grad_norm": 12.765637530578912, "learning_rate": 1.1808422791081751e-05, "loss": 0.828, "step": 715 }, { "epoch": 0.05321441843180973, "grad_norm": 19.84452210075621, "learning_rate": 1.1824938067712635e-05, "loss": 0.9897, "step": 716 }, { "epoch": 0.053288740245261985, "grad_norm": 37.9763118119668, "learning_rate": 1.184145334434352e-05, "loss": 1.0356, "step": 717 }, { "epoch": 0.05336306205871423, "grad_norm": 18.959438840776787, "learning_rate": 1.1857968620974402e-05, "loss": 0.9397, "step": 718 }, { "epoch": 0.05343738387216648, "grad_norm": 10.192161059546466, "learning_rate": 1.1874483897605286e-05, "loss": 0.7784, "step": 719 }, { "epoch": 0.05351170568561873, "grad_norm": 22.02605968931141, "learning_rate": 1.1890999174236169e-05, "loss": 0.7083, "step": 720 }, { "epoch": 0.05358602749907098, "grad_norm": 5.8432165282579, "learning_rate": 1.1907514450867053e-05, "loss": 0.874, "step": 721 }, { "epoch": 0.053660349312523226, "grad_norm": 58.97667744220424, "learning_rate": 1.1924029727497937e-05, "loss": 0.8093, "step": 722 }, { "epoch": 0.05373467112597547, "grad_norm": 14.430959706297935, "learning_rate": 1.1940545004128821e-05, "loss": 0.8377, "step": 723 }, { "epoch": 0.05380899293942772, "grad_norm": 7.404001382252545, "learning_rate": 1.1957060280759704e-05, "loss": 0.7631, "step": 724 }, { "epoch": 0.05388331475287997, "grad_norm": 10.46875950994586, "learning_rate": 1.1973575557390587e-05, "loss": 0.9911, "step": 725 }, { "epoch": 0.05395763656633222, "grad_norm": 12.93694700062899, "learning_rate": 1.199009083402147e-05, "loss": 0.733, "step": 726 }, { "epoch": 0.054031958379784466, "grad_norm": 6.5998108940572315, "learning_rate": 1.2006606110652355e-05, "loss": 0.9772, "step": 727 }, { "epoch": 0.05410628019323672, "grad_norm": 20.905880710358343, "learning_rate": 1.202312138728324e-05, "loss": 0.9602, "step": 728 }, { "epoch": 0.05418060200668896, "grad_norm": 7.578748015258747, "learning_rate": 1.203963666391412e-05, "loss": 1.1289, "step": 729 }, { "epoch": 0.05425492382014121, "grad_norm": 132.30725972785567, "learning_rate": 1.2056151940545004e-05, "loss": 1.0521, "step": 730 }, { "epoch": 0.05432924563359346, "grad_norm": 9.583328106597964, "learning_rate": 1.2072667217175889e-05, "loss": 0.9187, "step": 731 }, { "epoch": 0.054403567447045706, "grad_norm": 13.701837451491288, "learning_rate": 1.2089182493806773e-05, "loss": 0.9613, "step": 732 }, { "epoch": 0.05447788926049796, "grad_norm": 6.244104973905111, "learning_rate": 1.2105697770437657e-05, "loss": 1.1203, "step": 733 }, { "epoch": 0.0545522110739502, "grad_norm": 27.695978769042267, "learning_rate": 1.2122213047068538e-05, "loss": 1.0974, "step": 734 }, { "epoch": 0.054626532887402456, "grad_norm": 7.372260552017998, "learning_rate": 1.2138728323699422e-05, "loss": 0.9246, "step": 735 }, { "epoch": 0.0547008547008547, "grad_norm": 24.476198597660126, "learning_rate": 1.2155243600330306e-05, "loss": 0.752, "step": 736 }, { "epoch": 0.054775176514306946, "grad_norm": 14.614532609315289, "learning_rate": 1.217175887696119e-05, "loss": 0.9484, "step": 737 }, { "epoch": 0.0548494983277592, "grad_norm": 9.126152282149961, "learning_rate": 1.2188274153592075e-05, "loss": 0.8057, "step": 738 }, { "epoch": 0.054923820141211444, "grad_norm": 6.7409406452199425, "learning_rate": 1.2204789430222956e-05, "loss": 0.96, "step": 739 }, { "epoch": 0.054998141954663696, "grad_norm": 18.01696761815133, "learning_rate": 1.222130470685384e-05, "loss": 0.9141, "step": 740 }, { "epoch": 0.05507246376811594, "grad_norm": 15.476600199628317, "learning_rate": 1.2237819983484724e-05, "loss": 0.7739, "step": 741 }, { "epoch": 0.05514678558156819, "grad_norm": 5.814641007260757, "learning_rate": 1.2254335260115608e-05, "loss": 0.7637, "step": 742 }, { "epoch": 0.05522110739502044, "grad_norm": 11.393124009006986, "learning_rate": 1.2270850536746493e-05, "loss": 0.754, "step": 743 }, { "epoch": 0.055295429208472684, "grad_norm": 7.790709857654731, "learning_rate": 1.2287365813377375e-05, "loss": 0.8304, "step": 744 }, { "epoch": 0.055369751021924936, "grad_norm": 14.179521369878069, "learning_rate": 1.2303881090008258e-05, "loss": 0.9351, "step": 745 }, { "epoch": 0.05544407283537718, "grad_norm": 329.2200174569497, "learning_rate": 1.2320396366639142e-05, "loss": 0.8947, "step": 746 }, { "epoch": 0.05551839464882943, "grad_norm": 9.610759295455493, "learning_rate": 1.2336911643270026e-05, "loss": 0.9027, "step": 747 }, { "epoch": 0.05559271646228168, "grad_norm": 12.668123031278558, "learning_rate": 1.2353426919900909e-05, "loss": 0.8854, "step": 748 }, { "epoch": 0.05566703827573393, "grad_norm": 18.69263385692476, "learning_rate": 1.2369942196531793e-05, "loss": 1.1685, "step": 749 }, { "epoch": 0.055741360089186176, "grad_norm": 15.526614308525357, "learning_rate": 1.2386457473162677e-05, "loss": 0.7916, "step": 750 }, { "epoch": 0.05581568190263842, "grad_norm": 18.558312925628623, "learning_rate": 1.240297274979356e-05, "loss": 0.6928, "step": 751 }, { "epoch": 0.05589000371609067, "grad_norm": 36.66194872741858, "learning_rate": 1.2419488026424444e-05, "loss": 0.9294, "step": 752 }, { "epoch": 0.05596432552954292, "grad_norm": 13.447194171635704, "learning_rate": 1.2436003303055326e-05, "loss": 0.7185, "step": 753 }, { "epoch": 0.05603864734299517, "grad_norm": 32.01072628886554, "learning_rate": 1.245251857968621e-05, "loss": 0.9526, "step": 754 }, { "epoch": 0.056112969156447416, "grad_norm": 144.1670410565116, "learning_rate": 1.2469033856317095e-05, "loss": 0.9225, "step": 755 }, { "epoch": 0.05618729096989967, "grad_norm": 8.393407370054836, "learning_rate": 1.2485549132947979e-05, "loss": 0.9468, "step": 756 }, { "epoch": 0.056261612783351914, "grad_norm": 36.92090294003004, "learning_rate": 1.2502064409578862e-05, "loss": 0.8341, "step": 757 }, { "epoch": 0.05633593459680416, "grad_norm": 31.097928290483882, "learning_rate": 1.2518579686209744e-05, "loss": 0.88, "step": 758 }, { "epoch": 0.05641025641025641, "grad_norm": 45.03107861384347, "learning_rate": 1.2535094962840628e-05, "loss": 1.1453, "step": 759 }, { "epoch": 0.056484578223708656, "grad_norm": 14.660453496456737, "learning_rate": 1.2551610239471513e-05, "loss": 0.7785, "step": 760 }, { "epoch": 0.05655890003716091, "grad_norm": 13.983891961346053, "learning_rate": 1.2568125516102397e-05, "loss": 0.9849, "step": 761 }, { "epoch": 0.056633221850613154, "grad_norm": 23.449384641676282, "learning_rate": 1.2584640792733281e-05, "loss": 0.9944, "step": 762 }, { "epoch": 0.056707543664065406, "grad_norm": 12.138190058317479, "learning_rate": 1.2601156069364162e-05, "loss": 0.8188, "step": 763 }, { "epoch": 0.05678186547751765, "grad_norm": 56.43532543038422, "learning_rate": 1.2617671345995046e-05, "loss": 0.8055, "step": 764 }, { "epoch": 0.056856187290969896, "grad_norm": 30.69706185653195, "learning_rate": 1.263418662262593e-05, "loss": 0.9229, "step": 765 }, { "epoch": 0.05693050910442215, "grad_norm": 9.886678911960836, "learning_rate": 1.2650701899256815e-05, "loss": 1.141, "step": 766 }, { "epoch": 0.057004830917874394, "grad_norm": 9.930499333348477, "learning_rate": 1.2667217175887695e-05, "loss": 0.7892, "step": 767 }, { "epoch": 0.057079152731326646, "grad_norm": 29.320341376775065, "learning_rate": 1.268373245251858e-05, "loss": 0.985, "step": 768 }, { "epoch": 0.05715347454477889, "grad_norm": 25.26308520262069, "learning_rate": 1.2700247729149464e-05, "loss": 1.1405, "step": 769 }, { "epoch": 0.057227796358231144, "grad_norm": 22.23376355549486, "learning_rate": 1.2716763005780348e-05, "loss": 0.9556, "step": 770 }, { "epoch": 0.05730211817168339, "grad_norm": 8.147252938566913, "learning_rate": 1.2733278282411232e-05, "loss": 0.9974, "step": 771 }, { "epoch": 0.057376439985135634, "grad_norm": 4.680758054647874, "learning_rate": 1.2749793559042115e-05, "loss": 0.8526, "step": 772 }, { "epoch": 0.057450761798587886, "grad_norm": 5.948021084851789, "learning_rate": 1.2766308835672997e-05, "loss": 1.0094, "step": 773 }, { "epoch": 0.05752508361204013, "grad_norm": 19.8497278994749, "learning_rate": 1.2782824112303882e-05, "loss": 0.7905, "step": 774 }, { "epoch": 0.057599405425492384, "grad_norm": 10.221094338809104, "learning_rate": 1.2799339388934766e-05, "loss": 0.7755, "step": 775 }, { "epoch": 0.05767372723894463, "grad_norm": 10.96559211281836, "learning_rate": 1.281585466556565e-05, "loss": 0.9555, "step": 776 }, { "epoch": 0.05774804905239688, "grad_norm": 13.44290240289814, "learning_rate": 1.2832369942196533e-05, "loss": 0.8203, "step": 777 }, { "epoch": 0.057822370865849126, "grad_norm": 3.8556540358421625, "learning_rate": 1.2848885218827417e-05, "loss": 1.0354, "step": 778 }, { "epoch": 0.05789669267930137, "grad_norm": 18.1558635689097, "learning_rate": 1.28654004954583e-05, "loss": 0.9512, "step": 779 }, { "epoch": 0.057971014492753624, "grad_norm": 36.71155727485411, "learning_rate": 1.2881915772089184e-05, "loss": 0.9424, "step": 780 }, { "epoch": 0.05804533630620587, "grad_norm": 4.920475834961964, "learning_rate": 1.2898431048720068e-05, "loss": 1.0576, "step": 781 }, { "epoch": 0.05811965811965812, "grad_norm": 16.198152604265957, "learning_rate": 1.291494632535095e-05, "loss": 0.9107, "step": 782 }, { "epoch": 0.05819397993311037, "grad_norm": 10.073654635119802, "learning_rate": 1.2931461601981835e-05, "loss": 0.8588, "step": 783 }, { "epoch": 0.05826830174656262, "grad_norm": 8.891373379869727, "learning_rate": 1.2947976878612719e-05, "loss": 1.014, "step": 784 }, { "epoch": 0.058342623560014864, "grad_norm": 11.93703945300879, "learning_rate": 1.2964492155243601e-05, "loss": 0.9648, "step": 785 }, { "epoch": 0.05841694537346711, "grad_norm": 25.154461902776987, "learning_rate": 1.2981007431874484e-05, "loss": 0.8239, "step": 786 }, { "epoch": 0.05849126718691936, "grad_norm": 8.217679416679895, "learning_rate": 1.2997522708505368e-05, "loss": 0.9985, "step": 787 }, { "epoch": 0.05856558900037161, "grad_norm": 34.376775108569625, "learning_rate": 1.3014037985136252e-05, "loss": 1.1101, "step": 788 }, { "epoch": 0.05863991081382386, "grad_norm": 44.49900423984137, "learning_rate": 1.3030553261767137e-05, "loss": 0.8225, "step": 789 }, { "epoch": 0.058714232627276104, "grad_norm": 27.355318530550274, "learning_rate": 1.304706853839802e-05, "loss": 0.7399, "step": 790 }, { "epoch": 0.058788554440728356, "grad_norm": 21.127500520627592, "learning_rate": 1.3063583815028902e-05, "loss": 0.9237, "step": 791 }, { "epoch": 0.0588628762541806, "grad_norm": 19.912290694616498, "learning_rate": 1.3080099091659786e-05, "loss": 0.8841, "step": 792 }, { "epoch": 0.05893719806763285, "grad_norm": 8.207747425372837, "learning_rate": 1.309661436829067e-05, "loss": 0.7045, "step": 793 }, { "epoch": 0.0590115198810851, "grad_norm": 6.466542961536017, "learning_rate": 1.3113129644921554e-05, "loss": 0.9676, "step": 794 }, { "epoch": 0.059085841694537344, "grad_norm": 39.85173864061716, "learning_rate": 1.3129644921552439e-05, "loss": 0.7441, "step": 795 }, { "epoch": 0.0591601635079896, "grad_norm": 210.4447871196351, "learning_rate": 1.314616019818332e-05, "loss": 0.986, "step": 796 }, { "epoch": 0.05923448532144184, "grad_norm": 14.439875454168469, "learning_rate": 1.3162675474814204e-05, "loss": 0.9677, "step": 797 }, { "epoch": 0.059308807134894094, "grad_norm": 23.31969412707722, "learning_rate": 1.3179190751445088e-05, "loss": 1.0685, "step": 798 }, { "epoch": 0.05938312894834634, "grad_norm": 7.676109298170099, "learning_rate": 1.3195706028075972e-05, "loss": 1.0458, "step": 799 }, { "epoch": 0.059457450761798585, "grad_norm": 15.919163902035944, "learning_rate": 1.3212221304706856e-05, "loss": 1.2281, "step": 800 }, { "epoch": 0.05953177257525084, "grad_norm": 28.491304057619875, "learning_rate": 1.3228736581337737e-05, "loss": 0.8495, "step": 801 }, { "epoch": 0.05960609438870308, "grad_norm": 10.233694827503564, "learning_rate": 1.3245251857968621e-05, "loss": 0.7578, "step": 802 }, { "epoch": 0.059680416202155334, "grad_norm": 18.997746732559076, "learning_rate": 1.3261767134599506e-05, "loss": 0.9473, "step": 803 }, { "epoch": 0.05975473801560758, "grad_norm": 13.083977512884479, "learning_rate": 1.327828241123039e-05, "loss": 0.7196, "step": 804 }, { "epoch": 0.05982905982905983, "grad_norm": 19.705512915283453, "learning_rate": 1.3294797687861272e-05, "loss": 0.8622, "step": 805 }, { "epoch": 0.05990338164251208, "grad_norm": 6.215679258050832, "learning_rate": 1.3311312964492155e-05, "loss": 1.2207, "step": 806 }, { "epoch": 0.05997770345596432, "grad_norm": 23.50602759012699, "learning_rate": 1.3327828241123039e-05, "loss": 1.0138, "step": 807 }, { "epoch": 0.060052025269416574, "grad_norm": 31.34756043579468, "learning_rate": 1.3344343517753923e-05, "loss": 1.1003, "step": 808 }, { "epoch": 0.06012634708286882, "grad_norm": 11.530788157753754, "learning_rate": 1.3360858794384808e-05, "loss": 0.8032, "step": 809 }, { "epoch": 0.06020066889632107, "grad_norm": 7.208959781671996, "learning_rate": 1.337737407101569e-05, "loss": 0.8445, "step": 810 }, { "epoch": 0.06027499070977332, "grad_norm": 14.453716178722228, "learning_rate": 1.3393889347646574e-05, "loss": 1.1751, "step": 811 }, { "epoch": 0.06034931252322557, "grad_norm": 7.2550736842877, "learning_rate": 1.3410404624277457e-05, "loss": 0.8571, "step": 812 }, { "epoch": 0.060423634336677814, "grad_norm": 8.823336640665223, "learning_rate": 1.3426919900908341e-05, "loss": 0.9692, "step": 813 }, { "epoch": 0.06049795615013006, "grad_norm": 31.893322638994984, "learning_rate": 1.3443435177539225e-05, "loss": 0.7936, "step": 814 }, { "epoch": 0.06057227796358231, "grad_norm": 15.82703009066614, "learning_rate": 1.3459950454170108e-05, "loss": 0.9314, "step": 815 }, { "epoch": 0.06064659977703456, "grad_norm": 6.1617807966478155, "learning_rate": 1.3476465730800992e-05, "loss": 0.8968, "step": 816 }, { "epoch": 0.06072092159048681, "grad_norm": 8.184527982971268, "learning_rate": 1.3492981007431876e-05, "loss": 0.9252, "step": 817 }, { "epoch": 0.060795243403939055, "grad_norm": 49.70723647843905, "learning_rate": 1.3509496284062759e-05, "loss": 0.942, "step": 818 }, { "epoch": 0.06086956521739131, "grad_norm": 15.315578796791286, "learning_rate": 1.3526011560693643e-05, "loss": 1.0537, "step": 819 }, { "epoch": 0.06094388703084355, "grad_norm": 14.794717007294032, "learning_rate": 1.3542526837324526e-05, "loss": 1.2131, "step": 820 }, { "epoch": 0.061018208844295804, "grad_norm": 11.079483210926114, "learning_rate": 1.355904211395541e-05, "loss": 1.0735, "step": 821 }, { "epoch": 0.06109253065774805, "grad_norm": 40.36942704164068, "learning_rate": 1.3575557390586294e-05, "loss": 1.0397, "step": 822 }, { "epoch": 0.061166852471200295, "grad_norm": 6.103367824016503, "learning_rate": 1.3592072667217178e-05, "loss": 0.9123, "step": 823 }, { "epoch": 0.06124117428465255, "grad_norm": 5.882681053723556, "learning_rate": 1.3608587943848059e-05, "loss": 0.7697, "step": 824 }, { "epoch": 0.06131549609810479, "grad_norm": 8.283902015687572, "learning_rate": 1.3625103220478943e-05, "loss": 1.0092, "step": 825 }, { "epoch": 0.061389817911557044, "grad_norm": 6.255571234699694, "learning_rate": 1.3641618497109828e-05, "loss": 0.95, "step": 826 }, { "epoch": 0.06146413972500929, "grad_norm": 7.150470136587667, "learning_rate": 1.3658133773740712e-05, "loss": 0.9255, "step": 827 }, { "epoch": 0.06153846153846154, "grad_norm": 6.562116295043511, "learning_rate": 1.3674649050371596e-05, "loss": 0.9455, "step": 828 }, { "epoch": 0.06161278335191379, "grad_norm": 7.349005792369009, "learning_rate": 1.3691164327002477e-05, "loss": 0.7925, "step": 829 }, { "epoch": 0.06168710516536603, "grad_norm": 8.135793668635134, "learning_rate": 1.3707679603633361e-05, "loss": 0.8634, "step": 830 }, { "epoch": 0.061761426978818285, "grad_norm": 9.275289137256026, "learning_rate": 1.3724194880264245e-05, "loss": 1.0456, "step": 831 }, { "epoch": 0.06183574879227053, "grad_norm": 3.8705992271859273, "learning_rate": 1.374071015689513e-05, "loss": 0.6902, "step": 832 }, { "epoch": 0.06191007060572278, "grad_norm": 13.763602436055262, "learning_rate": 1.3757225433526014e-05, "loss": 0.8334, "step": 833 }, { "epoch": 0.06198439241917503, "grad_norm": 31.189511604972427, "learning_rate": 1.3773740710156895e-05, "loss": 0.8479, "step": 834 }, { "epoch": 0.06205871423262728, "grad_norm": 8.352230250503428, "learning_rate": 1.3790255986787779e-05, "loss": 0.9243, "step": 835 }, { "epoch": 0.062133036046079525, "grad_norm": 5.040546202687885, "learning_rate": 1.3806771263418663e-05, "loss": 0.8748, "step": 836 }, { "epoch": 0.06220735785953177, "grad_norm": 21.57101878543733, "learning_rate": 1.3823286540049547e-05, "loss": 0.7128, "step": 837 }, { "epoch": 0.06228167967298402, "grad_norm": 5.110602917379835, "learning_rate": 1.3839801816680432e-05, "loss": 1.0053, "step": 838 }, { "epoch": 0.06235600148643627, "grad_norm": 99.66180975243289, "learning_rate": 1.3856317093311314e-05, "loss": 0.8155, "step": 839 }, { "epoch": 0.06243032329988852, "grad_norm": 11.292784869458183, "learning_rate": 1.3872832369942197e-05, "loss": 0.9654, "step": 840 }, { "epoch": 0.06250464511334076, "grad_norm": 4.086968021983747, "learning_rate": 1.3889347646573081e-05, "loss": 0.8833, "step": 841 }, { "epoch": 0.06257896692679302, "grad_norm": 6.002455897396817, "learning_rate": 1.3905862923203965e-05, "loss": 0.8349, "step": 842 }, { "epoch": 0.06265328874024526, "grad_norm": 6.845159168981061, "learning_rate": 1.3922378199834848e-05, "loss": 0.9412, "step": 843 }, { "epoch": 0.06272761055369751, "grad_norm": 16.56131645084519, "learning_rate": 1.3938893476465732e-05, "loss": 0.9901, "step": 844 }, { "epoch": 0.06280193236714976, "grad_norm": 8.197880519319776, "learning_rate": 1.3955408753096616e-05, "loss": 1.0208, "step": 845 }, { "epoch": 0.06287625418060201, "grad_norm": 5.128001551038357, "learning_rate": 1.3971924029727499e-05, "loss": 0.9061, "step": 846 }, { "epoch": 0.06295057599405425, "grad_norm": 5.582279799324808, "learning_rate": 1.3988439306358383e-05, "loss": 1.0703, "step": 847 }, { "epoch": 0.0630248978075065, "grad_norm": 11.116667466939989, "learning_rate": 1.4004954582989265e-05, "loss": 0.9215, "step": 848 }, { "epoch": 0.06309921962095875, "grad_norm": 16.9873747496574, "learning_rate": 1.402146985962015e-05, "loss": 0.9272, "step": 849 }, { "epoch": 0.06317354143441099, "grad_norm": 6.327901522561529, "learning_rate": 1.4037985136251034e-05, "loss": 0.9486, "step": 850 }, { "epoch": 0.06324786324786325, "grad_norm": 13.074681053638825, "learning_rate": 1.4054500412881918e-05, "loss": 0.9734, "step": 851 }, { "epoch": 0.0633221850613155, "grad_norm": 10.455830432711641, "learning_rate": 1.40710156895128e-05, "loss": 0.9374, "step": 852 }, { "epoch": 0.06339650687476775, "grad_norm": 72.0762519746415, "learning_rate": 1.4087530966143683e-05, "loss": 0.9309, "step": 853 }, { "epoch": 0.06347082868821999, "grad_norm": 34.97969357312386, "learning_rate": 1.4104046242774567e-05, "loss": 1.0013, "step": 854 }, { "epoch": 0.06354515050167224, "grad_norm": 11.591156358896372, "learning_rate": 1.4120561519405452e-05, "loss": 0.9194, "step": 855 }, { "epoch": 0.06361947231512449, "grad_norm": 16.361873110725128, "learning_rate": 1.4137076796036336e-05, "loss": 0.9114, "step": 856 }, { "epoch": 0.06369379412857673, "grad_norm": 25.803653467429204, "learning_rate": 1.415359207266722e-05, "loss": 0.7246, "step": 857 }, { "epoch": 0.06376811594202898, "grad_norm": 7.417504061995801, "learning_rate": 1.4170107349298101e-05, "loss": 0.762, "step": 858 }, { "epoch": 0.06384243775548124, "grad_norm": 119.13091154141539, "learning_rate": 1.4186622625928985e-05, "loss": 0.7782, "step": 859 }, { "epoch": 0.06391675956893349, "grad_norm": 6.6619707866523985, "learning_rate": 1.420313790255987e-05, "loss": 0.8105, "step": 860 }, { "epoch": 0.06399108138238573, "grad_norm": 15.53123117993089, "learning_rate": 1.4219653179190754e-05, "loss": 1.0061, "step": 861 }, { "epoch": 0.06406540319583798, "grad_norm": 29.606743374323344, "learning_rate": 1.4236168455821634e-05, "loss": 0.8231, "step": 862 }, { "epoch": 0.06413972500929023, "grad_norm": 106.10812047390678, "learning_rate": 1.4252683732452519e-05, "loss": 0.9923, "step": 863 }, { "epoch": 0.06421404682274247, "grad_norm": 19.98746378843955, "learning_rate": 1.4269199009083403e-05, "loss": 0.8496, "step": 864 }, { "epoch": 0.06428836863619472, "grad_norm": 8.397237925534423, "learning_rate": 1.4285714285714287e-05, "loss": 1.0599, "step": 865 }, { "epoch": 0.06436269044964697, "grad_norm": 20.090666175658463, "learning_rate": 1.4302229562345171e-05, "loss": 1.1556, "step": 866 }, { "epoch": 0.06443701226309922, "grad_norm": 7.908051904654295, "learning_rate": 1.4318744838976054e-05, "loss": 0.7489, "step": 867 }, { "epoch": 0.06451133407655146, "grad_norm": 11.034524224522855, "learning_rate": 1.4335260115606936e-05, "loss": 0.8948, "step": 868 }, { "epoch": 0.06458565589000372, "grad_norm": 7.718420738281377, "learning_rate": 1.435177539223782e-05, "loss": 0.9213, "step": 869 }, { "epoch": 0.06465997770345597, "grad_norm": 35.05549737258707, "learning_rate": 1.4368290668868705e-05, "loss": 0.9651, "step": 870 }, { "epoch": 0.0647342995169082, "grad_norm": 12.775474475015733, "learning_rate": 1.4384805945499589e-05, "loss": 0.8842, "step": 871 }, { "epoch": 0.06480862133036046, "grad_norm": 8.187296178013588, "learning_rate": 1.4401321222130472e-05, "loss": 1.2034, "step": 872 }, { "epoch": 0.06488294314381271, "grad_norm": 40.45524374861177, "learning_rate": 1.4417836498761356e-05, "loss": 0.9432, "step": 873 }, { "epoch": 0.06495726495726496, "grad_norm": 5.476513110825434, "learning_rate": 1.4434351775392238e-05, "loss": 0.7195, "step": 874 }, { "epoch": 0.0650315867707172, "grad_norm": 59.28590586906804, "learning_rate": 1.4450867052023123e-05, "loss": 0.8893, "step": 875 }, { "epoch": 0.06510590858416945, "grad_norm": 9.32652504340218, "learning_rate": 1.4467382328654007e-05, "loss": 0.7026, "step": 876 }, { "epoch": 0.0651802303976217, "grad_norm": 23.524489674426654, "learning_rate": 1.448389760528489e-05, "loss": 0.9373, "step": 877 }, { "epoch": 0.06525455221107394, "grad_norm": 16.415050339050303, "learning_rate": 1.4500412881915774e-05, "loss": 0.8639, "step": 878 }, { "epoch": 0.0653288740245262, "grad_norm": 20.52462240932433, "learning_rate": 1.4516928158546658e-05, "loss": 0.7127, "step": 879 }, { "epoch": 0.06540319583797845, "grad_norm": 7.420059026765666, "learning_rate": 1.453344343517754e-05, "loss": 0.7368, "step": 880 }, { "epoch": 0.0654775176514307, "grad_norm": 5.340111578002602, "learning_rate": 1.4549958711808423e-05, "loss": 0.8205, "step": 881 }, { "epoch": 0.06555183946488294, "grad_norm": 4.231587006875324, "learning_rate": 1.4566473988439307e-05, "loss": 0.8966, "step": 882 }, { "epoch": 0.06562616127833519, "grad_norm": 8.459453786908501, "learning_rate": 1.4582989265070191e-05, "loss": 0.8417, "step": 883 }, { "epoch": 0.06570048309178744, "grad_norm": 22.44134575723104, "learning_rate": 1.4599504541701076e-05, "loss": 0.8734, "step": 884 }, { "epoch": 0.06577480490523968, "grad_norm": 3.879914511088393, "learning_rate": 1.461601981833196e-05, "loss": 0.7176, "step": 885 }, { "epoch": 0.06584912671869193, "grad_norm": 36.024293046882605, "learning_rate": 1.463253509496284e-05, "loss": 0.9453, "step": 886 }, { "epoch": 0.06592344853214419, "grad_norm": 11.654327632627211, "learning_rate": 1.4649050371593725e-05, "loss": 0.7141, "step": 887 }, { "epoch": 0.06599777034559644, "grad_norm": 19.168141225633924, "learning_rate": 1.4665565648224609e-05, "loss": 1.0594, "step": 888 }, { "epoch": 0.06607209215904868, "grad_norm": 11.259460077385315, "learning_rate": 1.4682080924855493e-05, "loss": 0.8247, "step": 889 }, { "epoch": 0.06614641397250093, "grad_norm": 10.67738680949065, "learning_rate": 1.4698596201486378e-05, "loss": 0.8253, "step": 890 }, { "epoch": 0.06622073578595318, "grad_norm": 5.229269136630886, "learning_rate": 1.4715111478117258e-05, "loss": 0.8735, "step": 891 }, { "epoch": 0.06629505759940542, "grad_norm": 25.728016560751684, "learning_rate": 1.4731626754748143e-05, "loss": 0.7932, "step": 892 }, { "epoch": 0.06636937941285767, "grad_norm": 18.713958708302968, "learning_rate": 1.4748142031379027e-05, "loss": 0.9759, "step": 893 }, { "epoch": 0.06644370122630992, "grad_norm": 21.460800477746464, "learning_rate": 1.4764657308009911e-05, "loss": 0.9257, "step": 894 }, { "epoch": 0.06651802303976218, "grad_norm": 21.1372909385787, "learning_rate": 1.4781172584640795e-05, "loss": 0.8579, "step": 895 }, { "epoch": 0.06659234485321441, "grad_norm": 10.772678959453128, "learning_rate": 1.4797687861271676e-05, "loss": 0.8747, "step": 896 }, { "epoch": 0.06666666666666667, "grad_norm": 15.651999922507994, "learning_rate": 1.481420313790256e-05, "loss": 0.8944, "step": 897 }, { "epoch": 0.06674098848011892, "grad_norm": 8.746093889173679, "learning_rate": 1.4830718414533445e-05, "loss": 0.817, "step": 898 }, { "epoch": 0.06681531029357116, "grad_norm": 13.481033439471402, "learning_rate": 1.4847233691164329e-05, "loss": 1.0278, "step": 899 }, { "epoch": 0.06688963210702341, "grad_norm": 11.62382807214302, "learning_rate": 1.4863748967795211e-05, "loss": 0.7511, "step": 900 }, { "epoch": 0.06696395392047566, "grad_norm": 6.895620593579477, "learning_rate": 1.4880264244426094e-05, "loss": 0.8997, "step": 901 }, { "epoch": 0.06703827573392791, "grad_norm": 9.297740463032987, "learning_rate": 1.4896779521056978e-05, "loss": 0.9448, "step": 902 }, { "epoch": 0.06711259754738015, "grad_norm": 8.882645290307932, "learning_rate": 1.4913294797687862e-05, "loss": 0.8864, "step": 903 }, { "epoch": 0.0671869193608324, "grad_norm": 17.668431556490855, "learning_rate": 1.4929810074318747e-05, "loss": 1.0454, "step": 904 }, { "epoch": 0.06726124117428466, "grad_norm": 7.7975888342732915, "learning_rate": 1.494632535094963e-05, "loss": 0.9498, "step": 905 }, { "epoch": 0.0673355629877369, "grad_norm": 5.2397234472969645, "learning_rate": 1.4962840627580513e-05, "loss": 0.7852, "step": 906 }, { "epoch": 0.06740988480118915, "grad_norm": 120.24000566482343, "learning_rate": 1.4979355904211396e-05, "loss": 0.8844, "step": 907 }, { "epoch": 0.0674842066146414, "grad_norm": 9.7073457477338, "learning_rate": 1.499587118084228e-05, "loss": 0.932, "step": 908 }, { "epoch": 0.06755852842809365, "grad_norm": 12.10365145092314, "learning_rate": 1.5012386457473164e-05, "loss": 0.9065, "step": 909 }, { "epoch": 0.06763285024154589, "grad_norm": 42.90428291824797, "learning_rate": 1.5028901734104047e-05, "loss": 0.962, "step": 910 }, { "epoch": 0.06770717205499814, "grad_norm": 2440.9194977953357, "learning_rate": 1.5045417010734931e-05, "loss": 0.7982, "step": 911 }, { "epoch": 0.0677814938684504, "grad_norm": 320.34772523445776, "learning_rate": 1.5061932287365815e-05, "loss": 0.9756, "step": 912 }, { "epoch": 0.06785581568190265, "grad_norm": 13.407831386501098, "learning_rate": 1.5078447563996698e-05, "loss": 0.834, "step": 913 }, { "epoch": 0.06793013749535488, "grad_norm": 4.145063633629845, "learning_rate": 1.5094962840627582e-05, "loss": 0.7739, "step": 914 }, { "epoch": 0.06800445930880714, "grad_norm": 20.982606246032358, "learning_rate": 1.5111478117258465e-05, "loss": 1.0206, "step": 915 }, { "epoch": 0.06807878112225939, "grad_norm": 6.886377471339822, "learning_rate": 1.5127993393889349e-05, "loss": 1.0374, "step": 916 }, { "epoch": 0.06815310293571163, "grad_norm": 22.193752164672752, "learning_rate": 1.5144508670520233e-05, "loss": 0.8705, "step": 917 }, { "epoch": 0.06822742474916388, "grad_norm": 9.4060614820947, "learning_rate": 1.5161023947151117e-05, "loss": 0.8902, "step": 918 }, { "epoch": 0.06830174656261613, "grad_norm": 7.604643890851094, "learning_rate": 1.5177539223781998e-05, "loss": 0.84, "step": 919 }, { "epoch": 0.06837606837606838, "grad_norm": 29.571392649507448, "learning_rate": 1.5194054500412882e-05, "loss": 1.2472, "step": 920 }, { "epoch": 0.06845039018952062, "grad_norm": 18.241614806259058, "learning_rate": 1.5210569777043767e-05, "loss": 0.782, "step": 921 }, { "epoch": 0.06852471200297287, "grad_norm": 6.999169520622949, "learning_rate": 1.5227085053674651e-05, "loss": 1.0231, "step": 922 }, { "epoch": 0.06859903381642513, "grad_norm": 7.670165571228617, "learning_rate": 1.5243600330305535e-05, "loss": 0.9275, "step": 923 }, { "epoch": 0.06867335562987736, "grad_norm": 14.292006424224066, "learning_rate": 1.5260115606936418e-05, "loss": 0.9155, "step": 924 }, { "epoch": 0.06874767744332962, "grad_norm": 4.082841343171692, "learning_rate": 1.5276630883567302e-05, "loss": 0.9043, "step": 925 }, { "epoch": 0.06882199925678187, "grad_norm": 4.4011479704220795, "learning_rate": 1.5293146160198186e-05, "loss": 0.7016, "step": 926 }, { "epoch": 0.06889632107023412, "grad_norm": 11.966434551005273, "learning_rate": 1.5309661436829067e-05, "loss": 0.8256, "step": 927 }, { "epoch": 0.06897064288368636, "grad_norm": 5.837570399027989, "learning_rate": 1.532617671345995e-05, "loss": 0.7299, "step": 928 }, { "epoch": 0.06904496469713861, "grad_norm": 6.311859829653688, "learning_rate": 1.5342691990090835e-05, "loss": 0.8603, "step": 929 }, { "epoch": 0.06911928651059086, "grad_norm": 24.130844504034965, "learning_rate": 1.535920726672172e-05, "loss": 1.1628, "step": 930 }, { "epoch": 0.0691936083240431, "grad_norm": 9.401963414428343, "learning_rate": 1.5375722543352604e-05, "loss": 0.8387, "step": 931 }, { "epoch": 0.06926793013749535, "grad_norm": 5.535858225212391, "learning_rate": 1.5392237819983488e-05, "loss": 1.0707, "step": 932 }, { "epoch": 0.0693422519509476, "grad_norm": 8.894187333876989, "learning_rate": 1.540875309661437e-05, "loss": 0.8574, "step": 933 }, { "epoch": 0.06941657376439986, "grad_norm": 4.448865242150379, "learning_rate": 1.5425268373245253e-05, "loss": 0.8403, "step": 934 }, { "epoch": 0.0694908955778521, "grad_norm": 7.052033317671387, "learning_rate": 1.5441783649876137e-05, "loss": 0.9592, "step": 935 }, { "epoch": 0.06956521739130435, "grad_norm": 5.605669108096229, "learning_rate": 1.545829892650702e-05, "loss": 0.9994, "step": 936 }, { "epoch": 0.0696395392047566, "grad_norm": 4.846940861095094, "learning_rate": 1.5474814203137906e-05, "loss": 0.9719, "step": 937 }, { "epoch": 0.06971386101820884, "grad_norm": 8.440725785977438, "learning_rate": 1.5491329479768787e-05, "loss": 1.0157, "step": 938 }, { "epoch": 0.06978818283166109, "grad_norm": 7.184907863005458, "learning_rate": 1.550784475639967e-05, "loss": 0.9431, "step": 939 }, { "epoch": 0.06986250464511334, "grad_norm": 5.311136152667086, "learning_rate": 1.5524360033030555e-05, "loss": 0.983, "step": 940 }, { "epoch": 0.0699368264585656, "grad_norm": 3.688251884738903, "learning_rate": 1.554087530966144e-05, "loss": 0.8648, "step": 941 }, { "epoch": 0.07001114827201783, "grad_norm": 6.87852134462123, "learning_rate": 1.5557390586292324e-05, "loss": 0.8415, "step": 942 }, { "epoch": 0.07008547008547009, "grad_norm": 14.170097347088044, "learning_rate": 1.5573905862923204e-05, "loss": 1.0515, "step": 943 }, { "epoch": 0.07015979189892234, "grad_norm": 13.468903044091721, "learning_rate": 1.559042113955409e-05, "loss": 1.0326, "step": 944 }, { "epoch": 0.07023411371237458, "grad_norm": 4.117699862513538, "learning_rate": 1.5606936416184973e-05, "loss": 0.8915, "step": 945 }, { "epoch": 0.07030843552582683, "grad_norm": 5.631317012533656, "learning_rate": 1.5623451692815857e-05, "loss": 0.9019, "step": 946 }, { "epoch": 0.07038275733927908, "grad_norm": 4.212800362338361, "learning_rate": 1.563996696944674e-05, "loss": 0.8916, "step": 947 }, { "epoch": 0.07045707915273133, "grad_norm": 6.161876954747647, "learning_rate": 1.5656482246077622e-05, "loss": 0.9285, "step": 948 }, { "epoch": 0.07053140096618357, "grad_norm": 9.025644403797031, "learning_rate": 1.5672997522708506e-05, "loss": 1.0328, "step": 949 }, { "epoch": 0.07060572277963582, "grad_norm": 5.240490170611306, "learning_rate": 1.568951279933939e-05, "loss": 0.8553, "step": 950 }, { "epoch": 0.07068004459308808, "grad_norm": 7.729193069252934, "learning_rate": 1.5706028075970275e-05, "loss": 1.1531, "step": 951 }, { "epoch": 0.07075436640654031, "grad_norm": 7.328236017645548, "learning_rate": 1.572254335260116e-05, "loss": 0.9737, "step": 952 }, { "epoch": 0.07082868821999257, "grad_norm": 23.081458765251483, "learning_rate": 1.573905862923204e-05, "loss": 1.0781, "step": 953 }, { "epoch": 0.07090301003344482, "grad_norm": 6.951443437072861, "learning_rate": 1.5755573905862924e-05, "loss": 0.9667, "step": 954 }, { "epoch": 0.07097733184689707, "grad_norm": 17.25031962193238, "learning_rate": 1.577208918249381e-05, "loss": 0.8513, "step": 955 }, { "epoch": 0.07105165366034931, "grad_norm": 9.188052002025923, "learning_rate": 1.5788604459124693e-05, "loss": 1.1161, "step": 956 }, { "epoch": 0.07112597547380156, "grad_norm": 8.226190007562673, "learning_rate": 1.5805119735755574e-05, "loss": 0.9835, "step": 957 }, { "epoch": 0.07120029728725381, "grad_norm": 7.826069572873885, "learning_rate": 1.5821635012386458e-05, "loss": 0.9527, "step": 958 }, { "epoch": 0.07127461910070605, "grad_norm": 7.133183986712873, "learning_rate": 1.5838150289017342e-05, "loss": 0.9098, "step": 959 }, { "epoch": 0.0713489409141583, "grad_norm": 4.722473885528865, "learning_rate": 1.5854665565648226e-05, "loss": 0.8873, "step": 960 }, { "epoch": 0.07142326272761056, "grad_norm": 36.86221382860443, "learning_rate": 1.587118084227911e-05, "loss": 0.9669, "step": 961 }, { "epoch": 0.07149758454106281, "grad_norm": 4.57909138140835, "learning_rate": 1.588769611890999e-05, "loss": 0.8387, "step": 962 }, { "epoch": 0.07157190635451505, "grad_norm": 6.317822498558855, "learning_rate": 1.5904211395540875e-05, "loss": 0.9841, "step": 963 }, { "epoch": 0.0716462281679673, "grad_norm": 6.4148251387016195, "learning_rate": 1.592072667217176e-05, "loss": 0.8196, "step": 964 }, { "epoch": 0.07172054998141955, "grad_norm": 4.0830453235609685, "learning_rate": 1.5937241948802644e-05, "loss": 0.8668, "step": 965 }, { "epoch": 0.07179487179487179, "grad_norm": 3.8480360189752694, "learning_rate": 1.5953757225433528e-05, "loss": 0.953, "step": 966 }, { "epoch": 0.07186919360832404, "grad_norm": 3.9826823799110596, "learning_rate": 1.597027250206441e-05, "loss": 0.7536, "step": 967 }, { "epoch": 0.0719435154217763, "grad_norm": 4.39643194493955, "learning_rate": 1.5986787778695293e-05, "loss": 0.9774, "step": 968 }, { "epoch": 0.07201783723522855, "grad_norm": 3.838084422102202, "learning_rate": 1.6003303055326177e-05, "loss": 0.8946, "step": 969 }, { "epoch": 0.07209215904868078, "grad_norm": 18.595942767819146, "learning_rate": 1.6019818331957062e-05, "loss": 0.9592, "step": 970 }, { "epoch": 0.07216648086213304, "grad_norm": 4.3609971791098925, "learning_rate": 1.6036333608587946e-05, "loss": 0.9271, "step": 971 }, { "epoch": 0.07224080267558529, "grad_norm": 11.603953566642316, "learning_rate": 1.6052848885218827e-05, "loss": 1.2172, "step": 972 }, { "epoch": 0.07231512448903753, "grad_norm": 3.901190551806528, "learning_rate": 1.606936416184971e-05, "loss": 0.7902, "step": 973 }, { "epoch": 0.07238944630248978, "grad_norm": 13.224671014129852, "learning_rate": 1.6085879438480595e-05, "loss": 0.93, "step": 974 }, { "epoch": 0.07246376811594203, "grad_norm": 6.72594455009977, "learning_rate": 1.610239471511148e-05, "loss": 0.9815, "step": 975 }, { "epoch": 0.07253808992939428, "grad_norm": 6.220122606916275, "learning_rate": 1.611890999174236e-05, "loss": 1.0066, "step": 976 }, { "epoch": 0.07261241174284652, "grad_norm": 5.743317760331608, "learning_rate": 1.6135425268373245e-05, "loss": 0.912, "step": 977 }, { "epoch": 0.07268673355629877, "grad_norm": 4.070576444406518, "learning_rate": 1.615194054500413e-05, "loss": 0.898, "step": 978 }, { "epoch": 0.07276105536975103, "grad_norm": 8.980386686900053, "learning_rate": 1.6168455821635013e-05, "loss": 1.0143, "step": 979 }, { "epoch": 0.07283537718320326, "grad_norm": 4.526172019355136, "learning_rate": 1.6184971098265897e-05, "loss": 0.8284, "step": 980 }, { "epoch": 0.07290969899665552, "grad_norm": 3.4657124951916876, "learning_rate": 1.620148637489678e-05, "loss": 0.8797, "step": 981 }, { "epoch": 0.07298402081010777, "grad_norm": 7.884792266996125, "learning_rate": 1.6218001651527662e-05, "loss": 0.6019, "step": 982 }, { "epoch": 0.07305834262356002, "grad_norm": 4.971428021476153, "learning_rate": 1.6234516928158547e-05, "loss": 0.9976, "step": 983 }, { "epoch": 0.07313266443701226, "grad_norm": 4.802231666994836, "learning_rate": 1.625103220478943e-05, "loss": 1.0469, "step": 984 }, { "epoch": 0.07320698625046451, "grad_norm": 3.070244478421154, "learning_rate": 1.6267547481420315e-05, "loss": 0.8415, "step": 985 }, { "epoch": 0.07328130806391676, "grad_norm": 11.692334518588204, "learning_rate": 1.62840627580512e-05, "loss": 0.7622, "step": 986 }, { "epoch": 0.073355629877369, "grad_norm": 7.191358502181783, "learning_rate": 1.6300578034682083e-05, "loss": 0.8291, "step": 987 }, { "epoch": 0.07342995169082125, "grad_norm": 4.661185326662034, "learning_rate": 1.6317093311312964e-05, "loss": 0.9443, "step": 988 }, { "epoch": 0.0735042735042735, "grad_norm": 4.151104449060036, "learning_rate": 1.633360858794385e-05, "loss": 0.8874, "step": 989 }, { "epoch": 0.07357859531772576, "grad_norm": 5.442597324615902, "learning_rate": 1.6350123864574733e-05, "loss": 0.8035, "step": 990 }, { "epoch": 0.073652917131178, "grad_norm": 3.4792435242373196, "learning_rate": 1.6366639141205617e-05, "loss": 0.9322, "step": 991 }, { "epoch": 0.07372723894463025, "grad_norm": 6.766880127003532, "learning_rate": 1.63831544178365e-05, "loss": 1.0247, "step": 992 }, { "epoch": 0.0738015607580825, "grad_norm": 18.647722560384285, "learning_rate": 1.6399669694467385e-05, "loss": 0.7824, "step": 993 }, { "epoch": 0.07387588257153474, "grad_norm": 5.69885610245266, "learning_rate": 1.6416184971098266e-05, "loss": 1.2063, "step": 994 }, { "epoch": 0.07395020438498699, "grad_norm": 5.71279054433423, "learning_rate": 1.643270024772915e-05, "loss": 0.9655, "step": 995 }, { "epoch": 0.07402452619843924, "grad_norm": 5.315565852123177, "learning_rate": 1.6449215524360035e-05, "loss": 1.0595, "step": 996 }, { "epoch": 0.0740988480118915, "grad_norm": 4.075670566619999, "learning_rate": 1.646573080099092e-05, "loss": 0.8589, "step": 997 }, { "epoch": 0.07417316982534373, "grad_norm": 5.400945465508197, "learning_rate": 1.6482246077621803e-05, "loss": 1.0261, "step": 998 }, { "epoch": 0.07424749163879599, "grad_norm": 11.152084832891038, "learning_rate": 1.6498761354252687e-05, "loss": 0.9873, "step": 999 }, { "epoch": 0.07432181345224824, "grad_norm": 6.809696697209522, "learning_rate": 1.6515276630883568e-05, "loss": 0.9153, "step": 1000 }, { "epoch": 0.07439613526570048, "grad_norm": 6.229539171894625, "learning_rate": 1.6531791907514452e-05, "loss": 0.8398, "step": 1001 }, { "epoch": 0.07447045707915273, "grad_norm": 5.781899438739705, "learning_rate": 1.6548307184145337e-05, "loss": 0.8349, "step": 1002 }, { "epoch": 0.07454477889260498, "grad_norm": 9.332356619931808, "learning_rate": 1.656482246077622e-05, "loss": 0.9716, "step": 1003 }, { "epoch": 0.07461910070605723, "grad_norm": 5.597526255205049, "learning_rate": 1.6581337737407105e-05, "loss": 0.766, "step": 1004 }, { "epoch": 0.07469342251950947, "grad_norm": 7.887242701117761, "learning_rate": 1.6597853014037986e-05, "loss": 0.76, "step": 1005 }, { "epoch": 0.07476774433296172, "grad_norm": 11.998040140871078, "learning_rate": 1.661436829066887e-05, "loss": 0.8238, "step": 1006 }, { "epoch": 0.07484206614641398, "grad_norm": 15.452158456447002, "learning_rate": 1.6630883567299754e-05, "loss": 0.8527, "step": 1007 }, { "epoch": 0.07491638795986622, "grad_norm": 4.8449778259643645, "learning_rate": 1.664739884393064e-05, "loss": 0.7573, "step": 1008 }, { "epoch": 0.07499070977331847, "grad_norm": 6.8811020364642985, "learning_rate": 1.6663914120561523e-05, "loss": 1.0104, "step": 1009 }, { "epoch": 0.07506503158677072, "grad_norm": 14.926753849771835, "learning_rate": 1.6680429397192404e-05, "loss": 0.8161, "step": 1010 }, { "epoch": 0.07513935340022297, "grad_norm": 4.4166297552484215, "learning_rate": 1.6696944673823288e-05, "loss": 0.9379, "step": 1011 }, { "epoch": 0.07521367521367521, "grad_norm": 8.40680060666227, "learning_rate": 1.6713459950454172e-05, "loss": 0.9456, "step": 1012 }, { "epoch": 0.07528799702712746, "grad_norm": 5.0499071190275195, "learning_rate": 1.6729975227085056e-05, "loss": 0.9502, "step": 1013 }, { "epoch": 0.07536231884057971, "grad_norm": 3.607574410532465, "learning_rate": 1.6746490503715937e-05, "loss": 1.0292, "step": 1014 }, { "epoch": 0.07543664065403195, "grad_norm": 26.238968815294733, "learning_rate": 1.676300578034682e-05, "loss": 1.0456, "step": 1015 }, { "epoch": 0.0755109624674842, "grad_norm": 5.198661822985127, "learning_rate": 1.6779521056977706e-05, "loss": 0.9491, "step": 1016 }, { "epoch": 0.07558528428093646, "grad_norm": 10.7635492106792, "learning_rate": 1.679603633360859e-05, "loss": 0.9634, "step": 1017 }, { "epoch": 0.07565960609438871, "grad_norm": 4.446662321044092, "learning_rate": 1.6812551610239474e-05, "loss": 1.0757, "step": 1018 }, { "epoch": 0.07573392790784095, "grad_norm": 14.965934852614767, "learning_rate": 1.6829066886870355e-05, "loss": 0.9274, "step": 1019 }, { "epoch": 0.0758082497212932, "grad_norm": 8.05804689897131, "learning_rate": 1.684558216350124e-05, "loss": 0.9776, "step": 1020 }, { "epoch": 0.07588257153474545, "grad_norm": 9.450356071807823, "learning_rate": 1.6862097440132124e-05, "loss": 0.9161, "step": 1021 }, { "epoch": 0.07595689334819769, "grad_norm": 4.904881444138228, "learning_rate": 1.6878612716763008e-05, "loss": 1.1483, "step": 1022 }, { "epoch": 0.07603121516164994, "grad_norm": 3.3307778158097263, "learning_rate": 1.6895127993393892e-05, "loss": 0.817, "step": 1023 }, { "epoch": 0.0761055369751022, "grad_norm": 5.543107225452925, "learning_rate": 1.6911643270024773e-05, "loss": 0.9871, "step": 1024 }, { "epoch": 0.07617985878855445, "grad_norm": 3.4488191507760764, "learning_rate": 1.6928158546655657e-05, "loss": 0.8702, "step": 1025 }, { "epoch": 0.07625418060200669, "grad_norm": 6.725424096331201, "learning_rate": 1.694467382328654e-05, "loss": 0.7791, "step": 1026 }, { "epoch": 0.07632850241545894, "grad_norm": 19.38221576080939, "learning_rate": 1.6961189099917425e-05, "loss": 0.8509, "step": 1027 }, { "epoch": 0.07640282422891119, "grad_norm": 5.622939317850253, "learning_rate": 1.697770437654831e-05, "loss": 0.8909, "step": 1028 }, { "epoch": 0.07647714604236343, "grad_norm": 3.1021489482136264, "learning_rate": 1.699421965317919e-05, "loss": 0.8564, "step": 1029 }, { "epoch": 0.07655146785581568, "grad_norm": 6.04182021279553, "learning_rate": 1.7010734929810075e-05, "loss": 0.9783, "step": 1030 }, { "epoch": 0.07662578966926793, "grad_norm": 12.94462997934049, "learning_rate": 1.702725020644096e-05, "loss": 0.8306, "step": 1031 }, { "epoch": 0.07670011148272018, "grad_norm": 5.6144555604481825, "learning_rate": 1.7043765483071843e-05, "loss": 0.9801, "step": 1032 }, { "epoch": 0.07677443329617242, "grad_norm": 10.057827286119654, "learning_rate": 1.7060280759702724e-05, "loss": 1.0001, "step": 1033 }, { "epoch": 0.07684875510962468, "grad_norm": 5.559260691597159, "learning_rate": 1.707679603633361e-05, "loss": 0.8305, "step": 1034 }, { "epoch": 0.07692307692307693, "grad_norm": 6.601755357175879, "learning_rate": 1.7093311312964493e-05, "loss": 0.9391, "step": 1035 }, { "epoch": 0.07699739873652917, "grad_norm": 4.128906451990414, "learning_rate": 1.7109826589595377e-05, "loss": 1.0703, "step": 1036 }, { "epoch": 0.07707172054998142, "grad_norm": 4.536103003863094, "learning_rate": 1.712634186622626e-05, "loss": 0.8279, "step": 1037 }, { "epoch": 0.07714604236343367, "grad_norm": 37.771114809684605, "learning_rate": 1.7142857142857142e-05, "loss": 0.9299, "step": 1038 }, { "epoch": 0.07722036417688592, "grad_norm": 6.464593430513636, "learning_rate": 1.7159372419488026e-05, "loss": 0.818, "step": 1039 }, { "epoch": 0.07729468599033816, "grad_norm": 28.153206095215076, "learning_rate": 1.717588769611891e-05, "loss": 1.099, "step": 1040 }, { "epoch": 0.07736900780379041, "grad_norm": 10.484410802799346, "learning_rate": 1.7192402972749795e-05, "loss": 1.0838, "step": 1041 }, { "epoch": 0.07744332961724266, "grad_norm": 11.913958859018862, "learning_rate": 1.720891824938068e-05, "loss": 0.986, "step": 1042 }, { "epoch": 0.0775176514306949, "grad_norm": 4.207209027209101, "learning_rate": 1.722543352601156e-05, "loss": 0.9321, "step": 1043 }, { "epoch": 0.07759197324414716, "grad_norm": 12.567870968606975, "learning_rate": 1.7241948802642444e-05, "loss": 0.8726, "step": 1044 }, { "epoch": 0.07766629505759941, "grad_norm": 5.661163724565283, "learning_rate": 1.7258464079273328e-05, "loss": 0.9587, "step": 1045 }, { "epoch": 0.07774061687105166, "grad_norm": 4.010188557395575, "learning_rate": 1.7274979355904212e-05, "loss": 0.8725, "step": 1046 }, { "epoch": 0.0778149386845039, "grad_norm": 7.929406635015021, "learning_rate": 1.7291494632535097e-05, "loss": 0.8745, "step": 1047 }, { "epoch": 0.07788926049795615, "grad_norm": 4.756818195760536, "learning_rate": 1.730800990916598e-05, "loss": 1.0224, "step": 1048 }, { "epoch": 0.0779635823114084, "grad_norm": 8.484681016320419, "learning_rate": 1.732452518579686e-05, "loss": 0.6843, "step": 1049 }, { "epoch": 0.07803790412486064, "grad_norm": 7.548478081028107, "learning_rate": 1.7341040462427746e-05, "loss": 1.0601, "step": 1050 }, { "epoch": 0.0781122259383129, "grad_norm": 7.749106312034147, "learning_rate": 1.735755573905863e-05, "loss": 0.7987, "step": 1051 }, { "epoch": 0.07818654775176515, "grad_norm": 6.470850401116202, "learning_rate": 1.7374071015689514e-05, "loss": 0.9555, "step": 1052 }, { "epoch": 0.0782608695652174, "grad_norm": 5.756789496312123, "learning_rate": 1.73905862923204e-05, "loss": 1.0003, "step": 1053 }, { "epoch": 0.07833519137866964, "grad_norm": 17.384685416247155, "learning_rate": 1.7407101568951283e-05, "loss": 1.006, "step": 1054 }, { "epoch": 0.07840951319212189, "grad_norm": 9.777475499511239, "learning_rate": 1.7423616845582167e-05, "loss": 0.9868, "step": 1055 }, { "epoch": 0.07848383500557414, "grad_norm": 8.117524039234164, "learning_rate": 1.7440132122213048e-05, "loss": 0.8861, "step": 1056 }, { "epoch": 0.07855815681902638, "grad_norm": 3.787403766583329, "learning_rate": 1.7456647398843932e-05, "loss": 0.9042, "step": 1057 }, { "epoch": 0.07863247863247863, "grad_norm": 3.507577752622767, "learning_rate": 1.7473162675474816e-05, "loss": 0.6828, "step": 1058 }, { "epoch": 0.07870680044593088, "grad_norm": 2.856682200355101, "learning_rate": 1.74896779521057e-05, "loss": 0.7645, "step": 1059 }, { "epoch": 0.07878112225938314, "grad_norm": 6.67516816605483, "learning_rate": 1.7506193228736585e-05, "loss": 0.9673, "step": 1060 }, { "epoch": 0.07885544407283537, "grad_norm": 4.938816938051439, "learning_rate": 1.752270850536747e-05, "loss": 0.8143, "step": 1061 }, { "epoch": 0.07892976588628763, "grad_norm": 9.779336656448347, "learning_rate": 1.753922378199835e-05, "loss": 0.7738, "step": 1062 }, { "epoch": 0.07900408769973988, "grad_norm": 5.024144298270168, "learning_rate": 1.7555739058629234e-05, "loss": 1.1012, "step": 1063 }, { "epoch": 0.07907840951319212, "grad_norm": 4.302466771723048, "learning_rate": 1.7572254335260118e-05, "loss": 0.9541, "step": 1064 }, { "epoch": 0.07915273132664437, "grad_norm": 3.7649180621482023, "learning_rate": 1.7588769611891002e-05, "loss": 0.73, "step": 1065 }, { "epoch": 0.07922705314009662, "grad_norm": 4.677563815097714, "learning_rate": 1.7605284888521887e-05, "loss": 0.6689, "step": 1066 }, { "epoch": 0.07930137495354887, "grad_norm": 7.597432774180935, "learning_rate": 1.7621800165152768e-05, "loss": 0.8083, "step": 1067 }, { "epoch": 0.07937569676700111, "grad_norm": 5.4665696111385875, "learning_rate": 1.7638315441783652e-05, "loss": 1.0779, "step": 1068 }, { "epoch": 0.07945001858045336, "grad_norm": 13.209152779015426, "learning_rate": 1.7654830718414536e-05, "loss": 0.981, "step": 1069 }, { "epoch": 0.07952434039390562, "grad_norm": 4.449092848521529, "learning_rate": 1.767134599504542e-05, "loss": 0.837, "step": 1070 }, { "epoch": 0.07959866220735785, "grad_norm": 4.767098194487908, "learning_rate": 1.76878612716763e-05, "loss": 0.7594, "step": 1071 }, { "epoch": 0.0796729840208101, "grad_norm": 4.743657278633281, "learning_rate": 1.7704376548307185e-05, "loss": 0.9859, "step": 1072 }, { "epoch": 0.07974730583426236, "grad_norm": 10.964250222398668, "learning_rate": 1.772089182493807e-05, "loss": 1.1268, "step": 1073 }, { "epoch": 0.07982162764771461, "grad_norm": 4.21821958893946, "learning_rate": 1.7737407101568954e-05, "loss": 1.0312, "step": 1074 }, { "epoch": 0.07989594946116685, "grad_norm": 10.74574726960332, "learning_rate": 1.7753922378199838e-05, "loss": 1.0004, "step": 1075 }, { "epoch": 0.0799702712746191, "grad_norm": 3.4232119647365655, "learning_rate": 1.777043765483072e-05, "loss": 0.7413, "step": 1076 }, { "epoch": 0.08004459308807135, "grad_norm": 6.597729572036802, "learning_rate": 1.7786952931461603e-05, "loss": 1.0651, "step": 1077 }, { "epoch": 0.08011891490152359, "grad_norm": 4.8638749976355635, "learning_rate": 1.7803468208092487e-05, "loss": 1.0161, "step": 1078 }, { "epoch": 0.08019323671497584, "grad_norm": 2.923706528737039, "learning_rate": 1.781998348472337e-05, "loss": 0.6969, "step": 1079 }, { "epoch": 0.0802675585284281, "grad_norm": 4.758142634025539, "learning_rate": 1.7836498761354256e-05, "loss": 0.6609, "step": 1080 }, { "epoch": 0.08034188034188035, "grad_norm": 54.50968288771929, "learning_rate": 1.7853014037985137e-05, "loss": 0.8956, "step": 1081 }, { "epoch": 0.08041620215533259, "grad_norm": 6.516573159126154, "learning_rate": 1.786952931461602e-05, "loss": 1.14, "step": 1082 }, { "epoch": 0.08049052396878484, "grad_norm": 6.381182289163388, "learning_rate": 1.7886044591246905e-05, "loss": 1.1715, "step": 1083 }, { "epoch": 0.08056484578223709, "grad_norm": 7.386186381098367, "learning_rate": 1.790255986787779e-05, "loss": 0.9884, "step": 1084 }, { "epoch": 0.08063916759568933, "grad_norm": 9.62697330821128, "learning_rate": 1.7919075144508673e-05, "loss": 0.8781, "step": 1085 }, { "epoch": 0.08071348940914158, "grad_norm": 8.143832007124969, "learning_rate": 1.7935590421139554e-05, "loss": 0.878, "step": 1086 }, { "epoch": 0.08078781122259383, "grad_norm": 21.290535842555126, "learning_rate": 1.795210569777044e-05, "loss": 0.8138, "step": 1087 }, { "epoch": 0.08086213303604609, "grad_norm": 8.669217710854548, "learning_rate": 1.7968620974401323e-05, "loss": 0.9878, "step": 1088 }, { "epoch": 0.08093645484949832, "grad_norm": 5.510263996464113, "learning_rate": 1.7985136251032207e-05, "loss": 1.2015, "step": 1089 }, { "epoch": 0.08101077666295058, "grad_norm": 5.625873486215272, "learning_rate": 1.8001651527663088e-05, "loss": 0.7284, "step": 1090 }, { "epoch": 0.08108509847640283, "grad_norm": 4.698418322621202, "learning_rate": 1.8018166804293972e-05, "loss": 1.108, "step": 1091 }, { "epoch": 0.08115942028985507, "grad_norm": 3.5514945178614843, "learning_rate": 1.8034682080924856e-05, "loss": 0.9461, "step": 1092 }, { "epoch": 0.08123374210330732, "grad_norm": 3.874282314787771, "learning_rate": 1.805119735755574e-05, "loss": 0.961, "step": 1093 }, { "epoch": 0.08130806391675957, "grad_norm": 3.973043291214081, "learning_rate": 1.8067712634186625e-05, "loss": 0.9237, "step": 1094 }, { "epoch": 0.08138238573021182, "grad_norm": 42.382689286222444, "learning_rate": 1.8084227910817506e-05, "loss": 1.0656, "step": 1095 }, { "epoch": 0.08145670754366406, "grad_norm": 5.393633824199575, "learning_rate": 1.810074318744839e-05, "loss": 1.1168, "step": 1096 }, { "epoch": 0.08153102935711631, "grad_norm": 3.9913546396086654, "learning_rate": 1.8117258464079274e-05, "loss": 1.0237, "step": 1097 }, { "epoch": 0.08160535117056857, "grad_norm": 9.950414388983813, "learning_rate": 1.8133773740710158e-05, "loss": 0.9736, "step": 1098 }, { "epoch": 0.0816796729840208, "grad_norm": 7.901192943451989, "learning_rate": 1.8150289017341043e-05, "loss": 1.2199, "step": 1099 }, { "epoch": 0.08175399479747306, "grad_norm": 4.434491413034505, "learning_rate": 1.8166804293971923e-05, "loss": 0.7844, "step": 1100 }, { "epoch": 0.08182831661092531, "grad_norm": 9.245977695906971, "learning_rate": 1.8183319570602808e-05, "loss": 1.027, "step": 1101 }, { "epoch": 0.08190263842437756, "grad_norm": 3.033010487048222, "learning_rate": 1.8199834847233692e-05, "loss": 0.9549, "step": 1102 }, { "epoch": 0.0819769602378298, "grad_norm": 4.197837606992814, "learning_rate": 1.8216350123864576e-05, "loss": 0.78, "step": 1103 }, { "epoch": 0.08205128205128205, "grad_norm": 3.3396902878173025, "learning_rate": 1.823286540049546e-05, "loss": 0.9172, "step": 1104 }, { "epoch": 0.0821256038647343, "grad_norm": 10.415787168194301, "learning_rate": 1.824938067712634e-05, "loss": 1.0793, "step": 1105 }, { "epoch": 0.08219992567818654, "grad_norm": 5.306950866273019, "learning_rate": 1.8265895953757225e-05, "loss": 0.8786, "step": 1106 }, { "epoch": 0.0822742474916388, "grad_norm": 5.118435432277182, "learning_rate": 1.828241123038811e-05, "loss": 1.1214, "step": 1107 }, { "epoch": 0.08234856930509105, "grad_norm": 4.827902931928903, "learning_rate": 1.8298926507018994e-05, "loss": 0.9996, "step": 1108 }, { "epoch": 0.0824228911185433, "grad_norm": 3.805595049021857, "learning_rate": 1.8315441783649878e-05, "loss": 0.958, "step": 1109 }, { "epoch": 0.08249721293199554, "grad_norm": 4.8243307037486405, "learning_rate": 1.8331957060280762e-05, "loss": 1.0857, "step": 1110 }, { "epoch": 0.08257153474544779, "grad_norm": 2.8001331400734806, "learning_rate": 1.8348472336911643e-05, "loss": 0.8261, "step": 1111 }, { "epoch": 0.08264585655890004, "grad_norm": 6.968237654589087, "learning_rate": 1.8364987613542527e-05, "loss": 0.9484, "step": 1112 }, { "epoch": 0.08272017837235228, "grad_norm": 4.312114461444166, "learning_rate": 1.838150289017341e-05, "loss": 0.9627, "step": 1113 }, { "epoch": 0.08279450018580453, "grad_norm": 3.5350078775179945, "learning_rate": 1.8398018166804296e-05, "loss": 0.8205, "step": 1114 }, { "epoch": 0.08286882199925678, "grad_norm": 6.6381933471120185, "learning_rate": 1.841453344343518e-05, "loss": 1.1514, "step": 1115 }, { "epoch": 0.08294314381270904, "grad_norm": 4.1599818305694125, "learning_rate": 1.8431048720066064e-05, "loss": 0.9797, "step": 1116 }, { "epoch": 0.08301746562616127, "grad_norm": 3.2062075744314056, "learning_rate": 1.8447563996696945e-05, "loss": 1.0583, "step": 1117 }, { "epoch": 0.08309178743961353, "grad_norm": 8.982564537743464, "learning_rate": 1.846407927332783e-05, "loss": 1.2063, "step": 1118 }, { "epoch": 0.08316610925306578, "grad_norm": 3.100082476653639, "learning_rate": 1.8480594549958714e-05, "loss": 0.9767, "step": 1119 }, { "epoch": 0.08324043106651802, "grad_norm": 4.581737158219452, "learning_rate": 1.8497109826589598e-05, "loss": 1.2254, "step": 1120 }, { "epoch": 0.08331475287997027, "grad_norm": 3.06614380681405, "learning_rate": 1.8513625103220482e-05, "loss": 0.9282, "step": 1121 }, { "epoch": 0.08338907469342252, "grad_norm": 15.99311403683675, "learning_rate": 1.8530140379851366e-05, "loss": 1.2384, "step": 1122 }, { "epoch": 0.08346339650687477, "grad_norm": 3.247232805748779, "learning_rate": 1.8546655656482247e-05, "loss": 0.8736, "step": 1123 }, { "epoch": 0.08353771832032701, "grad_norm": 5.1096733522924715, "learning_rate": 1.856317093311313e-05, "loss": 0.9274, "step": 1124 }, { "epoch": 0.08361204013377926, "grad_norm": 3.2109377622663646, "learning_rate": 1.8579686209744016e-05, "loss": 0.939, "step": 1125 }, { "epoch": 0.08368636194723152, "grad_norm": 3.594502026590353, "learning_rate": 1.85962014863749e-05, "loss": 1.0498, "step": 1126 }, { "epoch": 0.08376068376068375, "grad_norm": 2.5587964895071553, "learning_rate": 1.8612716763005784e-05, "loss": 0.7743, "step": 1127 }, { "epoch": 0.083835005574136, "grad_norm": 4.133241382478264, "learning_rate": 1.8629232039636665e-05, "loss": 0.9525, "step": 1128 }, { "epoch": 0.08390932738758826, "grad_norm": 3.9784458007642005, "learning_rate": 1.864574731626755e-05, "loss": 0.9814, "step": 1129 }, { "epoch": 0.08398364920104051, "grad_norm": 3.5509245381969765, "learning_rate": 1.8662262592898433e-05, "loss": 1.1203, "step": 1130 }, { "epoch": 0.08405797101449275, "grad_norm": 5.522750673073655, "learning_rate": 1.8678777869529318e-05, "loss": 1.0438, "step": 1131 }, { "epoch": 0.084132292827945, "grad_norm": 3.5543739623496347, "learning_rate": 1.8695293146160202e-05, "loss": 1.0904, "step": 1132 }, { "epoch": 0.08420661464139725, "grad_norm": 4.362119556067401, "learning_rate": 1.8711808422791083e-05, "loss": 0.9595, "step": 1133 }, { "epoch": 0.08428093645484949, "grad_norm": 3.831462253229433, "learning_rate": 1.8728323699421967e-05, "loss": 0.8884, "step": 1134 }, { "epoch": 0.08435525826830174, "grad_norm": 8.290459060240108, "learning_rate": 1.874483897605285e-05, "loss": 0.9943, "step": 1135 }, { "epoch": 0.084429580081754, "grad_norm": 22.951382779787316, "learning_rate": 1.8761354252683735e-05, "loss": 1.1084, "step": 1136 }, { "epoch": 0.08450390189520625, "grad_norm": 5.823998991668392, "learning_rate": 1.877786952931462e-05, "loss": 0.7881, "step": 1137 }, { "epoch": 0.08457822370865849, "grad_norm": 3.9564896163535246, "learning_rate": 1.87943848059455e-05, "loss": 0.8852, "step": 1138 }, { "epoch": 0.08465254552211074, "grad_norm": 4.859350636317579, "learning_rate": 1.8810900082576385e-05, "loss": 0.7806, "step": 1139 }, { "epoch": 0.08472686733556299, "grad_norm": 4.105423866690643, "learning_rate": 1.882741535920727e-05, "loss": 0.8755, "step": 1140 }, { "epoch": 0.08480118914901523, "grad_norm": 4.3846949696793285, "learning_rate": 1.8843930635838153e-05, "loss": 1.1278, "step": 1141 }, { "epoch": 0.08487551096246748, "grad_norm": 3.711001291567701, "learning_rate": 1.8860445912469037e-05, "loss": 0.8544, "step": 1142 }, { "epoch": 0.08494983277591973, "grad_norm": 4.103060266134039, "learning_rate": 1.8876961189099918e-05, "loss": 0.8765, "step": 1143 }, { "epoch": 0.08502415458937199, "grad_norm": 5.576100974733643, "learning_rate": 1.8893476465730802e-05, "loss": 1.0528, "step": 1144 }, { "epoch": 0.08509847640282422, "grad_norm": 9.056071312719233, "learning_rate": 1.8909991742361687e-05, "loss": 1.0957, "step": 1145 }, { "epoch": 0.08517279821627648, "grad_norm": 4.6782869428235445, "learning_rate": 1.892650701899257e-05, "loss": 0.8974, "step": 1146 }, { "epoch": 0.08524712002972873, "grad_norm": 7.701834227834206, "learning_rate": 1.894302229562345e-05, "loss": 1.058, "step": 1147 }, { "epoch": 0.08532144184318097, "grad_norm": 44.859988251148636, "learning_rate": 1.8959537572254336e-05, "loss": 0.9087, "step": 1148 }, { "epoch": 0.08539576365663322, "grad_norm": 3.17813262424358, "learning_rate": 1.897605284888522e-05, "loss": 0.7447, "step": 1149 }, { "epoch": 0.08547008547008547, "grad_norm": 4.8755483376685165, "learning_rate": 1.8992568125516104e-05, "loss": 1.0216, "step": 1150 }, { "epoch": 0.08554440728353772, "grad_norm": 3.5914020351019627, "learning_rate": 1.900908340214699e-05, "loss": 0.9513, "step": 1151 }, { "epoch": 0.08561872909698996, "grad_norm": 3.7501943871728725, "learning_rate": 1.902559867877787e-05, "loss": 0.9625, "step": 1152 }, { "epoch": 0.08569305091044221, "grad_norm": 3.7280344037860664, "learning_rate": 1.9042113955408754e-05, "loss": 0.8777, "step": 1153 }, { "epoch": 0.08576737272389447, "grad_norm": 2.878018243887153, "learning_rate": 1.9058629232039638e-05, "loss": 0.9193, "step": 1154 }, { "epoch": 0.0858416945373467, "grad_norm": 3.27212077288606, "learning_rate": 1.9075144508670522e-05, "loss": 0.5272, "step": 1155 }, { "epoch": 0.08591601635079896, "grad_norm": 3.91629773749927, "learning_rate": 1.9091659785301406e-05, "loss": 0.7395, "step": 1156 }, { "epoch": 0.08599033816425121, "grad_norm": 3.4783616146858134, "learning_rate": 1.9108175061932287e-05, "loss": 1.0407, "step": 1157 }, { "epoch": 0.08606465997770346, "grad_norm": 3.304594297932733, "learning_rate": 1.912469033856317e-05, "loss": 0.8633, "step": 1158 }, { "epoch": 0.0861389817911557, "grad_norm": 3.6554095076075512, "learning_rate": 1.9141205615194056e-05, "loss": 0.9483, "step": 1159 }, { "epoch": 0.08621330360460795, "grad_norm": 3.118434446947244, "learning_rate": 1.915772089182494e-05, "loss": 0.8518, "step": 1160 }, { "epoch": 0.0862876254180602, "grad_norm": 3.40251148574385, "learning_rate": 1.9174236168455824e-05, "loss": 0.9787, "step": 1161 }, { "epoch": 0.08636194723151244, "grad_norm": 3.4076585550408045, "learning_rate": 1.9190751445086705e-05, "loss": 0.7497, "step": 1162 }, { "epoch": 0.0864362690449647, "grad_norm": 4.113260609221913, "learning_rate": 1.920726672171759e-05, "loss": 0.9206, "step": 1163 }, { "epoch": 0.08651059085841695, "grad_norm": 2.936202120191866, "learning_rate": 1.9223781998348473e-05, "loss": 0.8598, "step": 1164 }, { "epoch": 0.0865849126718692, "grad_norm": 4.546576071282859, "learning_rate": 1.9240297274979358e-05, "loss": 0.9019, "step": 1165 }, { "epoch": 0.08665923448532144, "grad_norm": 3.7549766963127094, "learning_rate": 1.925681255161024e-05, "loss": 0.8762, "step": 1166 }, { "epoch": 0.08673355629877369, "grad_norm": 2.6122694799095556, "learning_rate": 1.9273327828241123e-05, "loss": 0.7191, "step": 1167 }, { "epoch": 0.08680787811222594, "grad_norm": 3.4703066721086686, "learning_rate": 1.9289843104872007e-05, "loss": 0.9409, "step": 1168 }, { "epoch": 0.08688219992567818, "grad_norm": 4.305012756176413, "learning_rate": 1.930635838150289e-05, "loss": 0.8826, "step": 1169 }, { "epoch": 0.08695652173913043, "grad_norm": 3.6675352485160535, "learning_rate": 1.9322873658133775e-05, "loss": 0.9649, "step": 1170 }, { "epoch": 0.08703084355258268, "grad_norm": 3.1323580746660165, "learning_rate": 1.933938893476466e-05, "loss": 0.7862, "step": 1171 }, { "epoch": 0.08710516536603494, "grad_norm": 3.4809658795606246, "learning_rate": 1.935590421139554e-05, "loss": 0.9412, "step": 1172 }, { "epoch": 0.08717948717948718, "grad_norm": 4.144848260497084, "learning_rate": 1.9372419488026425e-05, "loss": 0.7959, "step": 1173 }, { "epoch": 0.08725380899293943, "grad_norm": 9.164054558019764, "learning_rate": 1.938893476465731e-05, "loss": 0.973, "step": 1174 }, { "epoch": 0.08732813080639168, "grad_norm": 3.5621918782669044, "learning_rate": 1.9405450041288193e-05, "loss": 0.6942, "step": 1175 }, { "epoch": 0.08740245261984392, "grad_norm": 5.268266658808816, "learning_rate": 1.9421965317919077e-05, "loss": 1.0699, "step": 1176 }, { "epoch": 0.08747677443329617, "grad_norm": 4.715623583508353, "learning_rate": 1.943848059454996e-05, "loss": 0.8288, "step": 1177 }, { "epoch": 0.08755109624674842, "grad_norm": 4.798065977015128, "learning_rate": 1.9454995871180842e-05, "loss": 0.8432, "step": 1178 }, { "epoch": 0.08762541806020067, "grad_norm": 4.099483524549802, "learning_rate": 1.9471511147811727e-05, "loss": 1.0864, "step": 1179 }, { "epoch": 0.08769973987365291, "grad_norm": 10.30903912363354, "learning_rate": 1.948802642444261e-05, "loss": 1.2212, "step": 1180 }, { "epoch": 0.08777406168710516, "grad_norm": 3.378401278888377, "learning_rate": 1.9504541701073495e-05, "loss": 1.0745, "step": 1181 }, { "epoch": 0.08784838350055742, "grad_norm": 4.323863641403465, "learning_rate": 1.952105697770438e-05, "loss": 1.0375, "step": 1182 }, { "epoch": 0.08792270531400966, "grad_norm": 4.254264817131702, "learning_rate": 1.9537572254335264e-05, "loss": 0.934, "step": 1183 }, { "epoch": 0.08799702712746191, "grad_norm": 4.167141693326024, "learning_rate": 1.9554087530966144e-05, "loss": 0.8508, "step": 1184 }, { "epoch": 0.08807134894091416, "grad_norm": 4.676525903502156, "learning_rate": 1.957060280759703e-05, "loss": 1.0083, "step": 1185 }, { "epoch": 0.08814567075436641, "grad_norm": 5.884317902405335, "learning_rate": 1.9587118084227913e-05, "loss": 1.1619, "step": 1186 }, { "epoch": 0.08821999256781865, "grad_norm": 6.987699081509636, "learning_rate": 1.9603633360858797e-05, "loss": 0.9445, "step": 1187 }, { "epoch": 0.0882943143812709, "grad_norm": 3.831825473502172, "learning_rate": 1.962014863748968e-05, "loss": 0.967, "step": 1188 }, { "epoch": 0.08836863619472315, "grad_norm": 4.588417567962876, "learning_rate": 1.9636663914120566e-05, "loss": 1.1308, "step": 1189 }, { "epoch": 0.08844295800817539, "grad_norm": 5.40551232631741, "learning_rate": 1.9653179190751446e-05, "loss": 0.8508, "step": 1190 }, { "epoch": 0.08851727982162765, "grad_norm": 3.3524508779738764, "learning_rate": 1.966969446738233e-05, "loss": 0.7994, "step": 1191 }, { "epoch": 0.0885916016350799, "grad_norm": 3.8804775277090267, "learning_rate": 1.9686209744013215e-05, "loss": 0.7596, "step": 1192 }, { "epoch": 0.08866592344853215, "grad_norm": 4.87076798265173, "learning_rate": 1.97027250206441e-05, "loss": 1.0179, "step": 1193 }, { "epoch": 0.08874024526198439, "grad_norm": 9.93625939948356, "learning_rate": 1.9719240297274983e-05, "loss": 0.7744, "step": 1194 }, { "epoch": 0.08881456707543664, "grad_norm": 4.259046148114474, "learning_rate": 1.9735755573905864e-05, "loss": 0.9588, "step": 1195 }, { "epoch": 0.08888888888888889, "grad_norm": 3.4485975295497657, "learning_rate": 1.975227085053675e-05, "loss": 0.9411, "step": 1196 }, { "epoch": 0.08896321070234113, "grad_norm": 5.066804639900693, "learning_rate": 1.9768786127167633e-05, "loss": 1.0433, "step": 1197 }, { "epoch": 0.08903753251579338, "grad_norm": 388.93851123550024, "learning_rate": 1.9785301403798517e-05, "loss": 0.8264, "step": 1198 }, { "epoch": 0.08911185432924563, "grad_norm": 4.304959394268829, "learning_rate": 1.98018166804294e-05, "loss": 0.8149, "step": 1199 }, { "epoch": 0.08918617614269789, "grad_norm": 3.2783162315414525, "learning_rate": 1.9818331957060282e-05, "loss": 1.0157, "step": 1200 }, { "epoch": 0.08926049795615013, "grad_norm": 3.761812981974261, "learning_rate": 1.9834847233691166e-05, "loss": 0.9327, "step": 1201 }, { "epoch": 0.08933481976960238, "grad_norm": 10.52215270181456, "learning_rate": 1.985136251032205e-05, "loss": 0.7299, "step": 1202 }, { "epoch": 0.08940914158305463, "grad_norm": 3.7838731247268043, "learning_rate": 1.9867877786952935e-05, "loss": 1.0499, "step": 1203 }, { "epoch": 0.08948346339650687, "grad_norm": 2.9230909472324167, "learning_rate": 1.9884393063583815e-05, "loss": 0.8311, "step": 1204 }, { "epoch": 0.08955778520995912, "grad_norm": 3.2360181118821867, "learning_rate": 1.99009083402147e-05, "loss": 0.9078, "step": 1205 }, { "epoch": 0.08963210702341137, "grad_norm": 9.498679626973109, "learning_rate": 1.9917423616845584e-05, "loss": 1.1612, "step": 1206 }, { "epoch": 0.08970642883686362, "grad_norm": 4.399816927311497, "learning_rate": 1.9933938893476468e-05, "loss": 0.9389, "step": 1207 }, { "epoch": 0.08978075065031586, "grad_norm": 4.084603007163887, "learning_rate": 1.9950454170107352e-05, "loss": 1.0023, "step": 1208 }, { "epoch": 0.08985507246376812, "grad_norm": 4.452638743627758, "learning_rate": 1.9966969446738233e-05, "loss": 1.1445, "step": 1209 }, { "epoch": 0.08992939427722037, "grad_norm": 3.778043553961837, "learning_rate": 1.9983484723369117e-05, "loss": 1.0184, "step": 1210 }, { "epoch": 0.0900037160906726, "grad_norm": 3.5861228898975166, "learning_rate": 2e-05, "loss": 0.8005, "step": 1211 }, { "epoch": 0.09007803790412486, "grad_norm": 4.690543262933937, "learning_rate": 1.9999999967810263e-05, "loss": 1.1057, "step": 1212 }, { "epoch": 0.09015235971757711, "grad_norm": 3.5352691450265565, "learning_rate": 1.999999987124104e-05, "loss": 0.9896, "step": 1213 }, { "epoch": 0.09022668153102936, "grad_norm": 3.088282726606562, "learning_rate": 1.9999999710292338e-05, "loss": 0.8206, "step": 1214 }, { "epoch": 0.0903010033444816, "grad_norm": 2.9875848006373675, "learning_rate": 1.9999999484964156e-05, "loss": 0.8848, "step": 1215 }, { "epoch": 0.09037532515793385, "grad_norm": 3.9708990368879094, "learning_rate": 1.99999991952565e-05, "loss": 1.1921, "step": 1216 }, { "epoch": 0.0904496469713861, "grad_norm": 4.225923669612627, "learning_rate": 1.999999884116936e-05, "loss": 0.8981, "step": 1217 }, { "epoch": 0.09052396878483834, "grad_norm": 5.231135861292553, "learning_rate": 1.9999998422702752e-05, "loss": 1.0463, "step": 1218 }, { "epoch": 0.0905982905982906, "grad_norm": 3.1575132893502023, "learning_rate": 1.9999997939856676e-05, "loss": 0.8219, "step": 1219 }, { "epoch": 0.09067261241174285, "grad_norm": 4.487238145654264, "learning_rate": 1.9999997392631128e-05, "loss": 0.925, "step": 1220 }, { "epoch": 0.0907469342251951, "grad_norm": 5.432796691624254, "learning_rate": 1.9999996781026113e-05, "loss": 0.7916, "step": 1221 }, { "epoch": 0.09082125603864734, "grad_norm": 3.9658204144636358, "learning_rate": 1.9999996105041644e-05, "loss": 1.0047, "step": 1222 }, { "epoch": 0.09089557785209959, "grad_norm": 4.762933596821416, "learning_rate": 1.9999995364677714e-05, "loss": 1.0249, "step": 1223 }, { "epoch": 0.09096989966555184, "grad_norm": 3.029583192023026, "learning_rate": 1.9999994559934337e-05, "loss": 1.0431, "step": 1224 }, { "epoch": 0.09104422147900408, "grad_norm": 4.027909467211791, "learning_rate": 1.999999369081151e-05, "loss": 1.1478, "step": 1225 }, { "epoch": 0.09111854329245633, "grad_norm": 3.116799304763702, "learning_rate": 1.9999992757309244e-05, "loss": 0.8822, "step": 1226 }, { "epoch": 0.09119286510590859, "grad_norm": 3.6933308380052154, "learning_rate": 1.9999991759427545e-05, "loss": 0.8634, "step": 1227 }, { "epoch": 0.09126718691936084, "grad_norm": 6.247102574552966, "learning_rate": 1.9999990697166413e-05, "loss": 0.9895, "step": 1228 }, { "epoch": 0.09134150873281308, "grad_norm": 3.671833574790707, "learning_rate": 1.9999989570525863e-05, "loss": 1.0026, "step": 1229 }, { "epoch": 0.09141583054626533, "grad_norm": 4.348908703086609, "learning_rate": 1.99999883795059e-05, "loss": 1.1021, "step": 1230 }, { "epoch": 0.09149015235971758, "grad_norm": 6.593511915770547, "learning_rate": 1.999998712410653e-05, "loss": 1.2132, "step": 1231 }, { "epoch": 0.09156447417316982, "grad_norm": 3.883143069228041, "learning_rate": 1.9999985804327762e-05, "loss": 1.0406, "step": 1232 }, { "epoch": 0.09163879598662207, "grad_norm": 4.490865942898051, "learning_rate": 1.9999984420169604e-05, "loss": 0.856, "step": 1233 }, { "epoch": 0.09171311780007432, "grad_norm": 3.338315367654042, "learning_rate": 1.9999982971632062e-05, "loss": 0.85, "step": 1234 }, { "epoch": 0.09178743961352658, "grad_norm": 3.2603781796625873, "learning_rate": 1.9999981458715154e-05, "loss": 1.1419, "step": 1235 }, { "epoch": 0.09186176142697881, "grad_norm": 3.6847136511017173, "learning_rate": 1.999997988141888e-05, "loss": 1.069, "step": 1236 }, { "epoch": 0.09193608324043107, "grad_norm": 4.098766055258948, "learning_rate": 1.9999978239743258e-05, "loss": 0.9897, "step": 1237 }, { "epoch": 0.09201040505388332, "grad_norm": 3.2059310471109277, "learning_rate": 1.9999976533688293e-05, "loss": 0.9681, "step": 1238 }, { "epoch": 0.09208472686733556, "grad_norm": 3.1203661986172184, "learning_rate": 1.9999974763253996e-05, "loss": 1.0275, "step": 1239 }, { "epoch": 0.09215904868078781, "grad_norm": 2.940788563170273, "learning_rate": 1.9999972928440384e-05, "loss": 0.9504, "step": 1240 }, { "epoch": 0.09223337049424006, "grad_norm": 3.028864628550068, "learning_rate": 1.9999971029247464e-05, "loss": 0.8187, "step": 1241 }, { "epoch": 0.09230769230769231, "grad_norm": 3.8006584663969343, "learning_rate": 1.999996906567525e-05, "loss": 1.068, "step": 1242 }, { "epoch": 0.09238201412114455, "grad_norm": 2.99572724582791, "learning_rate": 1.9999967037723752e-05, "loss": 0.9906, "step": 1243 }, { "epoch": 0.0924563359345968, "grad_norm": 3.455299474997504, "learning_rate": 1.9999964945392984e-05, "loss": 0.9959, "step": 1244 }, { "epoch": 0.09253065774804906, "grad_norm": 3.096569402810664, "learning_rate": 1.9999962788682966e-05, "loss": 0.9996, "step": 1245 }, { "epoch": 0.0926049795615013, "grad_norm": 6.397010892828831, "learning_rate": 1.99999605675937e-05, "loss": 0.817, "step": 1246 }, { "epoch": 0.09267930137495355, "grad_norm": 2.8349630909296484, "learning_rate": 1.999995828212521e-05, "loss": 0.7423, "step": 1247 }, { "epoch": 0.0927536231884058, "grad_norm": 3.306400449363334, "learning_rate": 1.9999955932277508e-05, "loss": 1.024, "step": 1248 }, { "epoch": 0.09282794500185805, "grad_norm": 3.494511387579915, "learning_rate": 1.9999953518050605e-05, "loss": 1.0174, "step": 1249 }, { "epoch": 0.09290226681531029, "grad_norm": 7.117212536632835, "learning_rate": 1.9999951039444526e-05, "loss": 1.0552, "step": 1250 }, { "epoch": 0.09297658862876254, "grad_norm": 2.86316812305091, "learning_rate": 1.9999948496459278e-05, "loss": 0.778, "step": 1251 }, { "epoch": 0.0930509104422148, "grad_norm": 3.359063267775832, "learning_rate": 1.9999945889094877e-05, "loss": 0.8583, "step": 1252 }, { "epoch": 0.09312523225566703, "grad_norm": 5.283754813567089, "learning_rate": 1.9999943217351347e-05, "loss": 1.1104, "step": 1253 }, { "epoch": 0.09319955406911928, "grad_norm": 3.2126906333163188, "learning_rate": 1.9999940481228702e-05, "loss": 0.9951, "step": 1254 }, { "epoch": 0.09327387588257154, "grad_norm": 3.025067290123847, "learning_rate": 1.999993768072696e-05, "loss": 0.8808, "step": 1255 }, { "epoch": 0.09334819769602379, "grad_norm": 5.196643539934442, "learning_rate": 1.9999934815846134e-05, "loss": 1.0517, "step": 1256 }, { "epoch": 0.09342251950947603, "grad_norm": 3.084970853806186, "learning_rate": 1.9999931886586248e-05, "loss": 1.0683, "step": 1257 }, { "epoch": 0.09349684132292828, "grad_norm": 3.685891016055335, "learning_rate": 1.999992889294732e-05, "loss": 0.9652, "step": 1258 }, { "epoch": 0.09357116313638053, "grad_norm": 2.881650128907744, "learning_rate": 1.999992583492937e-05, "loss": 0.8386, "step": 1259 }, { "epoch": 0.09364548494983277, "grad_norm": 2.4564004389851, "learning_rate": 1.9999922712532414e-05, "loss": 0.8473, "step": 1260 }, { "epoch": 0.09371980676328502, "grad_norm": 3.211135394922271, "learning_rate": 1.9999919525756474e-05, "loss": 1.0231, "step": 1261 }, { "epoch": 0.09379412857673727, "grad_norm": 3.823297317993813, "learning_rate": 1.9999916274601575e-05, "loss": 0.9217, "step": 1262 }, { "epoch": 0.09386845039018953, "grad_norm": 2.634913506976756, "learning_rate": 1.999991295906773e-05, "loss": 1.0768, "step": 1263 }, { "epoch": 0.09394277220364176, "grad_norm": 2.5457292146227615, "learning_rate": 1.9999909579154962e-05, "loss": 1.151, "step": 1264 }, { "epoch": 0.09401709401709402, "grad_norm": 2.8231196342514013, "learning_rate": 1.99999061348633e-05, "loss": 0.9811, "step": 1265 }, { "epoch": 0.09409141583054627, "grad_norm": 3.1707911528796444, "learning_rate": 1.999990262619276e-05, "loss": 1.1898, "step": 1266 }, { "epoch": 0.0941657376439985, "grad_norm": 2.677888896862764, "learning_rate": 1.9999899053143364e-05, "loss": 0.9279, "step": 1267 }, { "epoch": 0.09424005945745076, "grad_norm": 3.0989723228330246, "learning_rate": 1.999989541571514e-05, "loss": 1.1129, "step": 1268 }, { "epoch": 0.09431438127090301, "grad_norm": 6.235276113176436, "learning_rate": 1.9999891713908106e-05, "loss": 0.9384, "step": 1269 }, { "epoch": 0.09438870308435526, "grad_norm": 2.9849029837876295, "learning_rate": 1.9999887947722287e-05, "loss": 0.8124, "step": 1270 }, { "epoch": 0.0944630248978075, "grad_norm": 3.304585079277581, "learning_rate": 1.9999884117157712e-05, "loss": 0.91, "step": 1271 }, { "epoch": 0.09453734671125975, "grad_norm": 4.134997578259915, "learning_rate": 1.9999880222214398e-05, "loss": 1.153, "step": 1272 }, { "epoch": 0.094611668524712, "grad_norm": 2.2870041799850824, "learning_rate": 1.9999876262892375e-05, "loss": 0.7249, "step": 1273 }, { "epoch": 0.09468599033816426, "grad_norm": 3.5092708613908132, "learning_rate": 1.999987223919167e-05, "loss": 0.7974, "step": 1274 }, { "epoch": 0.0947603121516165, "grad_norm": 3.305279884397941, "learning_rate": 1.99998681511123e-05, "loss": 1.066, "step": 1275 }, { "epoch": 0.09483463396506875, "grad_norm": 2.8732634606121987, "learning_rate": 1.9999863998654305e-05, "loss": 1.1567, "step": 1276 }, { "epoch": 0.094908955778521, "grad_norm": 2.999157802326461, "learning_rate": 1.99998597818177e-05, "loss": 0.9271, "step": 1277 }, { "epoch": 0.09498327759197324, "grad_norm": 5.157338699691267, "learning_rate": 1.9999855500602518e-05, "loss": 0.921, "step": 1278 }, { "epoch": 0.09505759940542549, "grad_norm": 3.7281964513817427, "learning_rate": 1.9999851155008785e-05, "loss": 0.9801, "step": 1279 }, { "epoch": 0.09513192121887774, "grad_norm": 2.974011826116991, "learning_rate": 1.999984674503653e-05, "loss": 0.932, "step": 1280 }, { "epoch": 0.09520624303233, "grad_norm": 3.7394157820867973, "learning_rate": 1.999984227068578e-05, "loss": 0.9029, "step": 1281 }, { "epoch": 0.09528056484578223, "grad_norm": 4.012690032989441, "learning_rate": 1.9999837731956565e-05, "loss": 0.9627, "step": 1282 }, { "epoch": 0.09535488665923449, "grad_norm": 2.8141886592178103, "learning_rate": 1.9999833128848912e-05, "loss": 1.0232, "step": 1283 }, { "epoch": 0.09542920847268674, "grad_norm": 2.7070959088698645, "learning_rate": 1.9999828461362852e-05, "loss": 0.9128, "step": 1284 }, { "epoch": 0.09550353028613898, "grad_norm": 3.6649154984252044, "learning_rate": 1.9999823729498416e-05, "loss": 0.9381, "step": 1285 }, { "epoch": 0.09557785209959123, "grad_norm": 3.2165182525748346, "learning_rate": 1.9999818933255633e-05, "loss": 1.0127, "step": 1286 }, { "epoch": 0.09565217391304348, "grad_norm": 3.9886183572814162, "learning_rate": 1.9999814072634535e-05, "loss": 1.0513, "step": 1287 }, { "epoch": 0.09572649572649573, "grad_norm": 2.8943724769954398, "learning_rate": 1.9999809147635155e-05, "loss": 0.7736, "step": 1288 }, { "epoch": 0.09580081753994797, "grad_norm": 2.9347857145958534, "learning_rate": 1.999980415825752e-05, "loss": 0.8698, "step": 1289 }, { "epoch": 0.09587513935340022, "grad_norm": 3.530639983236615, "learning_rate": 1.9999799104501665e-05, "loss": 1.0007, "step": 1290 }, { "epoch": 0.09594946116685248, "grad_norm": 2.8031662364641377, "learning_rate": 1.9999793986367623e-05, "loss": 1.1018, "step": 1291 }, { "epoch": 0.09602378298030471, "grad_norm": 3.2068223548420423, "learning_rate": 1.9999788803855424e-05, "loss": 0.7851, "step": 1292 }, { "epoch": 0.09609810479375697, "grad_norm": 2.9481074192263343, "learning_rate": 1.9999783556965106e-05, "loss": 0.8624, "step": 1293 }, { "epoch": 0.09617242660720922, "grad_norm": 2.985218743669599, "learning_rate": 1.99997782456967e-05, "loss": 0.8768, "step": 1294 }, { "epoch": 0.09624674842066147, "grad_norm": 2.986190201020164, "learning_rate": 1.999977287005024e-05, "loss": 1.1071, "step": 1295 }, { "epoch": 0.09632107023411371, "grad_norm": 2.9748059155534916, "learning_rate": 1.9999767430025764e-05, "loss": 0.9928, "step": 1296 }, { "epoch": 0.09639539204756596, "grad_norm": 6.50959165233299, "learning_rate": 1.99997619256233e-05, "loss": 0.7105, "step": 1297 }, { "epoch": 0.09646971386101821, "grad_norm": 3.2706832707951388, "learning_rate": 1.999975635684289e-05, "loss": 1.177, "step": 1298 }, { "epoch": 0.09654403567447045, "grad_norm": 3.4589948060893354, "learning_rate": 1.9999750723684565e-05, "loss": 1.0887, "step": 1299 }, { "epoch": 0.0966183574879227, "grad_norm": 2.1531448183067887, "learning_rate": 1.9999745026148365e-05, "loss": 0.7984, "step": 1300 }, { "epoch": 0.09669267930137496, "grad_norm": 3.2281088644706397, "learning_rate": 1.9999739264234324e-05, "loss": 0.9937, "step": 1301 }, { "epoch": 0.09676700111482721, "grad_norm": 5.949554865377063, "learning_rate": 1.9999733437942483e-05, "loss": 0.8973, "step": 1302 }, { "epoch": 0.09684132292827945, "grad_norm": 3.088739123793115, "learning_rate": 1.9999727547272874e-05, "loss": 1.0392, "step": 1303 }, { "epoch": 0.0969156447417317, "grad_norm": 2.927034390883376, "learning_rate": 1.999972159222554e-05, "loss": 0.9467, "step": 1304 }, { "epoch": 0.09698996655518395, "grad_norm": 3.26547896511471, "learning_rate": 1.9999715572800517e-05, "loss": 1.0585, "step": 1305 }, { "epoch": 0.09706428836863619, "grad_norm": 3.673711986380008, "learning_rate": 1.9999709488997845e-05, "loss": 0.9562, "step": 1306 }, { "epoch": 0.09713861018208844, "grad_norm": 2.700120473166899, "learning_rate": 1.999970334081756e-05, "loss": 0.9183, "step": 1307 }, { "epoch": 0.0972129319955407, "grad_norm": 2.9207302124290573, "learning_rate": 1.9999697128259704e-05, "loss": 0.946, "step": 1308 }, { "epoch": 0.09728725380899295, "grad_norm": 2.73902174984518, "learning_rate": 1.999969085132432e-05, "loss": 1.1207, "step": 1309 }, { "epoch": 0.09736157562244518, "grad_norm": 3.3650004866829235, "learning_rate": 1.999968451001144e-05, "loss": 0.7501, "step": 1310 }, { "epoch": 0.09743589743589744, "grad_norm": 2.7443802125663264, "learning_rate": 1.9999678104321114e-05, "loss": 0.9, "step": 1311 }, { "epoch": 0.09751021924934969, "grad_norm": 3.056969945209944, "learning_rate": 1.9999671634253377e-05, "loss": 0.9357, "step": 1312 }, { "epoch": 0.09758454106280193, "grad_norm": 5.949888855124094, "learning_rate": 1.9999665099808276e-05, "loss": 0.835, "step": 1313 }, { "epoch": 0.09765886287625418, "grad_norm": 2.78528618896673, "learning_rate": 1.9999658500985845e-05, "loss": 0.8367, "step": 1314 }, { "epoch": 0.09773318468970643, "grad_norm": 2.459269676939006, "learning_rate": 1.9999651837786136e-05, "loss": 0.8186, "step": 1315 }, { "epoch": 0.09780750650315868, "grad_norm": 3.413995416269313, "learning_rate": 1.9999645110209185e-05, "loss": 0.9262, "step": 1316 }, { "epoch": 0.09788182831661092, "grad_norm": 2.8896872352611194, "learning_rate": 1.999963831825504e-05, "loss": 1.1631, "step": 1317 }, { "epoch": 0.09795615013006317, "grad_norm": 2.8407286888239214, "learning_rate": 1.9999631461923738e-05, "loss": 0.8133, "step": 1318 }, { "epoch": 0.09803047194351543, "grad_norm": 2.6893881718343393, "learning_rate": 1.999962454121533e-05, "loss": 0.8599, "step": 1319 }, { "epoch": 0.09810479375696766, "grad_norm": 2.814568566046172, "learning_rate": 1.999961755612986e-05, "loss": 0.8418, "step": 1320 }, { "epoch": 0.09817911557041992, "grad_norm": 3.015934908450139, "learning_rate": 1.999961050666737e-05, "loss": 0.8383, "step": 1321 }, { "epoch": 0.09825343738387217, "grad_norm": 2.7258274020544317, "learning_rate": 1.9999603392827903e-05, "loss": 1.0892, "step": 1322 }, { "epoch": 0.09832775919732442, "grad_norm": 3.0128182091892786, "learning_rate": 1.999959621461151e-05, "loss": 0.9819, "step": 1323 }, { "epoch": 0.09840208101077666, "grad_norm": 2.710419499398285, "learning_rate": 1.9999588972018237e-05, "loss": 0.9587, "step": 1324 }, { "epoch": 0.09847640282422891, "grad_norm": 3.1879892307927595, "learning_rate": 1.9999581665048127e-05, "loss": 1.0379, "step": 1325 }, { "epoch": 0.09855072463768116, "grad_norm": 3.2251654837847745, "learning_rate": 1.999957429370123e-05, "loss": 1.1191, "step": 1326 }, { "epoch": 0.0986250464511334, "grad_norm": 2.3890904336214698, "learning_rate": 1.9999566857977593e-05, "loss": 0.8459, "step": 1327 }, { "epoch": 0.09869936826458565, "grad_norm": 3.0153620342044047, "learning_rate": 1.9999559357877267e-05, "loss": 1.0088, "step": 1328 }, { "epoch": 0.0987736900780379, "grad_norm": 2.5935991473858664, "learning_rate": 1.999955179340029e-05, "loss": 0.9363, "step": 1329 }, { "epoch": 0.09884801189149016, "grad_norm": 2.585919325161893, "learning_rate": 1.9999544164546722e-05, "loss": 0.8943, "step": 1330 }, { "epoch": 0.0989223337049424, "grad_norm": 2.99866229476512, "learning_rate": 1.9999536471316605e-05, "loss": 0.9106, "step": 1331 }, { "epoch": 0.09899665551839465, "grad_norm": 3.3816789741717312, "learning_rate": 1.999952871370999e-05, "loss": 1.0757, "step": 1332 }, { "epoch": 0.0990709773318469, "grad_norm": 3.040895406646219, "learning_rate": 1.999952089172693e-05, "loss": 0.9484, "step": 1333 }, { "epoch": 0.09914529914529914, "grad_norm": 3.4435926674230384, "learning_rate": 1.9999513005367474e-05, "loss": 1.0376, "step": 1334 }, { "epoch": 0.09921962095875139, "grad_norm": 2.8185753940823544, "learning_rate": 1.999950505463167e-05, "loss": 0.9015, "step": 1335 }, { "epoch": 0.09929394277220364, "grad_norm": 2.9311531818330367, "learning_rate": 1.9999497039519575e-05, "loss": 0.8074, "step": 1336 }, { "epoch": 0.0993682645856559, "grad_norm": 2.920339098519994, "learning_rate": 1.9999488960031233e-05, "loss": 0.7571, "step": 1337 }, { "epoch": 0.09944258639910813, "grad_norm": 2.9764907335163575, "learning_rate": 1.99994808161667e-05, "loss": 0.9787, "step": 1338 }, { "epoch": 0.09951690821256039, "grad_norm": 2.473815631635639, "learning_rate": 1.999947260792603e-05, "loss": 0.7832, "step": 1339 }, { "epoch": 0.09959123002601264, "grad_norm": 2.9684993547852976, "learning_rate": 1.9999464335309273e-05, "loss": 1.0488, "step": 1340 }, { "epoch": 0.09966555183946488, "grad_norm": 3.230969622135266, "learning_rate": 1.9999455998316485e-05, "loss": 1.0032, "step": 1341 }, { "epoch": 0.09973987365291713, "grad_norm": 2.820522260545994, "learning_rate": 1.999944759694772e-05, "loss": 1.0135, "step": 1342 }, { "epoch": 0.09981419546636938, "grad_norm": 3.8112499036535366, "learning_rate": 1.9999439131203025e-05, "loss": 0.7501, "step": 1343 }, { "epoch": 0.09988851727982163, "grad_norm": 2.3282936599069437, "learning_rate": 1.9999430601082463e-05, "loss": 0.8911, "step": 1344 }, { "epoch": 0.09996283909327387, "grad_norm": 2.1686007991364606, "learning_rate": 1.9999422006586085e-05, "loss": 0.8539, "step": 1345 }, { "epoch": 0.10003716090672612, "grad_norm": 2.5832067722030074, "learning_rate": 1.9999413347713946e-05, "loss": 0.9141, "step": 1346 }, { "epoch": 0.10011148272017838, "grad_norm": 2.62085451227052, "learning_rate": 1.9999404624466107e-05, "loss": 0.9179, "step": 1347 }, { "epoch": 0.10018580453363062, "grad_norm": 3.0104791385138823, "learning_rate": 1.9999395836842616e-05, "loss": 1.225, "step": 1348 }, { "epoch": 0.10026012634708287, "grad_norm": 2.580085506246412, "learning_rate": 1.9999386984843533e-05, "loss": 0.9291, "step": 1349 }, { "epoch": 0.10033444816053512, "grad_norm": 2.5173237007781846, "learning_rate": 1.9999378068468912e-05, "loss": 0.7759, "step": 1350 }, { "epoch": 0.10040876997398737, "grad_norm": 3.1756763945303206, "learning_rate": 1.9999369087718822e-05, "loss": 0.8059, "step": 1351 }, { "epoch": 0.10048309178743961, "grad_norm": 2.412306711665318, "learning_rate": 1.9999360042593307e-05, "loss": 0.9022, "step": 1352 }, { "epoch": 0.10055741360089186, "grad_norm": 2.972552637685109, "learning_rate": 1.9999350933092433e-05, "loss": 0.9461, "step": 1353 }, { "epoch": 0.10063173541434411, "grad_norm": 3.088397153483373, "learning_rate": 1.9999341759216256e-05, "loss": 1.0027, "step": 1354 }, { "epoch": 0.10070605722779635, "grad_norm": 2.8695230146105337, "learning_rate": 1.9999332520964837e-05, "loss": 0.9369, "step": 1355 }, { "epoch": 0.1007803790412486, "grad_norm": 2.7433936974049566, "learning_rate": 1.9999323218338232e-05, "loss": 0.9723, "step": 1356 }, { "epoch": 0.10085470085470086, "grad_norm": 2.457359983223342, "learning_rate": 1.9999313851336504e-05, "loss": 1.032, "step": 1357 }, { "epoch": 0.10092902266815311, "grad_norm": 2.5370041487917594, "learning_rate": 1.9999304419959712e-05, "loss": 0.9736, "step": 1358 }, { "epoch": 0.10100334448160535, "grad_norm": 2.694668680182735, "learning_rate": 1.999929492420792e-05, "loss": 1.1257, "step": 1359 }, { "epoch": 0.1010776662950576, "grad_norm": 11.128640453680832, "learning_rate": 1.999928536408118e-05, "loss": 1.052, "step": 1360 }, { "epoch": 0.10115198810850985, "grad_norm": 4.727185970303763, "learning_rate": 1.9999275739579562e-05, "loss": 1.0225, "step": 1361 }, { "epoch": 0.10122630992196209, "grad_norm": 3.6184914428575583, "learning_rate": 1.9999266050703125e-05, "loss": 0.9249, "step": 1362 }, { "epoch": 0.10130063173541434, "grad_norm": 3.0789578267041953, "learning_rate": 1.9999256297451936e-05, "loss": 1.0235, "step": 1363 }, { "epoch": 0.1013749535488666, "grad_norm": 2.7029815230986505, "learning_rate": 1.999924647982605e-05, "loss": 1.1207, "step": 1364 }, { "epoch": 0.10144927536231885, "grad_norm": 10.609289642522324, "learning_rate": 1.9999236597825536e-05, "loss": 0.8538, "step": 1365 }, { "epoch": 0.10152359717577109, "grad_norm": 2.962045635888934, "learning_rate": 1.9999226651450456e-05, "loss": 0.9755, "step": 1366 }, { "epoch": 0.10159791898922334, "grad_norm": 2.346998416612706, "learning_rate": 1.999921664070087e-05, "loss": 0.8032, "step": 1367 }, { "epoch": 0.10167224080267559, "grad_norm": 3.733619906617187, "learning_rate": 1.999920656557685e-05, "loss": 0.8769, "step": 1368 }, { "epoch": 0.10174656261612783, "grad_norm": 3.881793775381414, "learning_rate": 1.9999196426078456e-05, "loss": 0.9383, "step": 1369 }, { "epoch": 0.10182088442958008, "grad_norm": 3.879473583719631, "learning_rate": 1.9999186222205753e-05, "loss": 1.0211, "step": 1370 }, { "epoch": 0.10189520624303233, "grad_norm": 3.0651447305505766, "learning_rate": 1.9999175953958807e-05, "loss": 0.9028, "step": 1371 }, { "epoch": 0.10196952805648458, "grad_norm": 3.5103006905557357, "learning_rate": 1.999916562133769e-05, "loss": 0.7199, "step": 1372 }, { "epoch": 0.10204384986993682, "grad_norm": 3.352623562440213, "learning_rate": 1.999915522434246e-05, "loss": 0.8582, "step": 1373 }, { "epoch": 0.10211817168338908, "grad_norm": 3.180915510077358, "learning_rate": 1.9999144762973188e-05, "loss": 0.9273, "step": 1374 }, { "epoch": 0.10219249349684133, "grad_norm": 3.009850291682952, "learning_rate": 1.9999134237229936e-05, "loss": 1.0455, "step": 1375 }, { "epoch": 0.10226681531029357, "grad_norm": 2.9527248275100884, "learning_rate": 1.9999123647112785e-05, "loss": 0.9483, "step": 1376 }, { "epoch": 0.10234113712374582, "grad_norm": 2.3803323026904555, "learning_rate": 1.9999112992621786e-05, "loss": 0.8816, "step": 1377 }, { "epoch": 0.10241545893719807, "grad_norm": 2.923300639754296, "learning_rate": 1.9999102273757022e-05, "loss": 0.768, "step": 1378 }, { "epoch": 0.10248978075065032, "grad_norm": 3.092429518382567, "learning_rate": 1.9999091490518557e-05, "loss": 1.0603, "step": 1379 }, { "epoch": 0.10256410256410256, "grad_norm": 2.658732874726217, "learning_rate": 1.999908064290646e-05, "loss": 0.9781, "step": 1380 }, { "epoch": 0.10263842437755481, "grad_norm": 5.227609156973888, "learning_rate": 1.9999069730920798e-05, "loss": 1.0088, "step": 1381 }, { "epoch": 0.10271274619100706, "grad_norm": 5.192817606789304, "learning_rate": 1.9999058754561647e-05, "loss": 1.1441, "step": 1382 }, { "epoch": 0.1027870680044593, "grad_norm": 2.700154234580636, "learning_rate": 1.999904771382907e-05, "loss": 0.8205, "step": 1383 }, { "epoch": 0.10286138981791156, "grad_norm": 2.482996238934551, "learning_rate": 1.9999036608723146e-05, "loss": 0.7479, "step": 1384 }, { "epoch": 0.10293571163136381, "grad_norm": 3.070334685561585, "learning_rate": 1.9999025439243942e-05, "loss": 1.0894, "step": 1385 }, { "epoch": 0.10301003344481606, "grad_norm": 2.4704689161384215, "learning_rate": 1.9999014205391532e-05, "loss": 0.7841, "step": 1386 }, { "epoch": 0.1030843552582683, "grad_norm": 2.474567892749351, "learning_rate": 1.9999002907165986e-05, "loss": 0.9294, "step": 1387 }, { "epoch": 0.10315867707172055, "grad_norm": 2.3285067539888415, "learning_rate": 1.999899154456738e-05, "loss": 0.9228, "step": 1388 }, { "epoch": 0.1032329988851728, "grad_norm": 3.142270414621441, "learning_rate": 1.9998980117595784e-05, "loss": 0.8507, "step": 1389 }, { "epoch": 0.10330732069862504, "grad_norm": 2.8563643811615105, "learning_rate": 1.999896862625127e-05, "loss": 0.8465, "step": 1390 }, { "epoch": 0.10338164251207729, "grad_norm": 2.6593832156487416, "learning_rate": 1.999895707053392e-05, "loss": 0.6651, "step": 1391 }, { "epoch": 0.10345596432552955, "grad_norm": 2.94644682521064, "learning_rate": 1.99989454504438e-05, "loss": 1.1827, "step": 1392 }, { "epoch": 0.1035302861389818, "grad_norm": 2.825364638984798, "learning_rate": 1.9998933765980987e-05, "loss": 0.9068, "step": 1393 }, { "epoch": 0.10360460795243404, "grad_norm": 2.7061339264435103, "learning_rate": 1.9998922017145562e-05, "loss": 1.0482, "step": 1394 }, { "epoch": 0.10367892976588629, "grad_norm": 2.940838746861372, "learning_rate": 1.9998910203937592e-05, "loss": 1.0697, "step": 1395 }, { "epoch": 0.10375325157933854, "grad_norm": 2.7516080589981824, "learning_rate": 1.9998898326357157e-05, "loss": 0.9407, "step": 1396 }, { "epoch": 0.10382757339279078, "grad_norm": 2.350242506860295, "learning_rate": 1.9998886384404335e-05, "loss": 0.8478, "step": 1397 }, { "epoch": 0.10390189520624303, "grad_norm": 2.2723586627371253, "learning_rate": 1.9998874378079202e-05, "loss": 0.7392, "step": 1398 }, { "epoch": 0.10397621701969528, "grad_norm": 2.988485579170927, "learning_rate": 1.999886230738183e-05, "loss": 0.9023, "step": 1399 }, { "epoch": 0.10405053883314754, "grad_norm": 2.507930074416529, "learning_rate": 1.9998850172312305e-05, "loss": 0.9194, "step": 1400 }, { "epoch": 0.10412486064659977, "grad_norm": 2.9022692290304857, "learning_rate": 1.9998837972870703e-05, "loss": 0.9228, "step": 1401 }, { "epoch": 0.10419918246005203, "grad_norm": 2.630976589977493, "learning_rate": 1.99988257090571e-05, "loss": 1.0658, "step": 1402 }, { "epoch": 0.10427350427350428, "grad_norm": 2.6160780139156348, "learning_rate": 1.999881338087157e-05, "loss": 1.0575, "step": 1403 }, { "epoch": 0.10434782608695652, "grad_norm": 2.527068127984758, "learning_rate": 1.9998800988314203e-05, "loss": 1.0308, "step": 1404 }, { "epoch": 0.10442214790040877, "grad_norm": 2.858164261166414, "learning_rate": 1.9998788531385075e-05, "loss": 1.0284, "step": 1405 }, { "epoch": 0.10449646971386102, "grad_norm": 2.323233403432805, "learning_rate": 1.9998776010084263e-05, "loss": 0.7566, "step": 1406 }, { "epoch": 0.10457079152731327, "grad_norm": 2.6417636682820036, "learning_rate": 1.9998763424411853e-05, "loss": 0.8506, "step": 1407 }, { "epoch": 0.10464511334076551, "grad_norm": 2.422572388719444, "learning_rate": 1.999875077436792e-05, "loss": 0.8597, "step": 1408 }, { "epoch": 0.10471943515421776, "grad_norm": 3.119223361046767, "learning_rate": 1.9998738059952548e-05, "loss": 1.1715, "step": 1409 }, { "epoch": 0.10479375696767002, "grad_norm": 2.6147132692032002, "learning_rate": 1.999872528116582e-05, "loss": 0.9695, "step": 1410 }, { "epoch": 0.10486807878112225, "grad_norm": 2.4906899987960394, "learning_rate": 1.999871243800782e-05, "loss": 0.8698, "step": 1411 }, { "epoch": 0.1049424005945745, "grad_norm": 3.051381843311185, "learning_rate": 1.9998699530478625e-05, "loss": 1.1556, "step": 1412 }, { "epoch": 0.10501672240802676, "grad_norm": 2.6194245577958952, "learning_rate": 1.999868655857832e-05, "loss": 0.9305, "step": 1413 }, { "epoch": 0.10509104422147901, "grad_norm": 2.3815840115715554, "learning_rate": 1.9998673522306992e-05, "loss": 0.9935, "step": 1414 }, { "epoch": 0.10516536603493125, "grad_norm": 4.7836860938513395, "learning_rate": 1.9998660421664723e-05, "loss": 0.8065, "step": 1415 }, { "epoch": 0.1052396878483835, "grad_norm": 2.288344436942323, "learning_rate": 1.9998647256651597e-05, "loss": 0.8064, "step": 1416 }, { "epoch": 0.10531400966183575, "grad_norm": 2.9488741303706827, "learning_rate": 1.99986340272677e-05, "loss": 1.0244, "step": 1417 }, { "epoch": 0.10538833147528799, "grad_norm": 2.2086910641402855, "learning_rate": 1.999862073351311e-05, "loss": 0.8766, "step": 1418 }, { "epoch": 0.10546265328874024, "grad_norm": 3.1903798180816336, "learning_rate": 1.9998607375387924e-05, "loss": 1.1136, "step": 1419 }, { "epoch": 0.1055369751021925, "grad_norm": 2.4056404652129117, "learning_rate": 1.999859395289222e-05, "loss": 0.8205, "step": 1420 }, { "epoch": 0.10561129691564475, "grad_norm": 2.616253098623982, "learning_rate": 1.999858046602609e-05, "loss": 0.9558, "step": 1421 }, { "epoch": 0.10568561872909699, "grad_norm": 2.8840861154889135, "learning_rate": 1.9998566914789615e-05, "loss": 0.7903, "step": 1422 }, { "epoch": 0.10575994054254924, "grad_norm": 2.705613746797681, "learning_rate": 1.9998553299182886e-05, "loss": 0.9611, "step": 1423 }, { "epoch": 0.10583426235600149, "grad_norm": 2.374201792171346, "learning_rate": 1.999853961920599e-05, "loss": 0.8575, "step": 1424 }, { "epoch": 0.10590858416945373, "grad_norm": 2.8777019327513265, "learning_rate": 1.9998525874859013e-05, "loss": 0.8852, "step": 1425 }, { "epoch": 0.10598290598290598, "grad_norm": 2.827936836835458, "learning_rate": 1.9998512066142046e-05, "loss": 0.9171, "step": 1426 }, { "epoch": 0.10605722779635823, "grad_norm": 3.1972427351274364, "learning_rate": 1.999849819305518e-05, "loss": 1.1348, "step": 1427 }, { "epoch": 0.10613154960981049, "grad_norm": 2.6124651613215164, "learning_rate": 1.99984842555985e-05, "loss": 0.8277, "step": 1428 }, { "epoch": 0.10620587142326272, "grad_norm": 2.475256263936719, "learning_rate": 1.9998470253772094e-05, "loss": 0.8592, "step": 1429 }, { "epoch": 0.10628019323671498, "grad_norm": 2.6621548761941947, "learning_rate": 1.9998456187576057e-05, "loss": 1.0916, "step": 1430 }, { "epoch": 0.10635451505016723, "grad_norm": 2.6638899112704895, "learning_rate": 1.9998442057010477e-05, "loss": 0.958, "step": 1431 }, { "epoch": 0.10642883686361947, "grad_norm": 2.8339894941741406, "learning_rate": 1.9998427862075448e-05, "loss": 1.0863, "step": 1432 }, { "epoch": 0.10650315867707172, "grad_norm": 2.920935017352174, "learning_rate": 1.9998413602771058e-05, "loss": 1.0465, "step": 1433 }, { "epoch": 0.10657748049052397, "grad_norm": 3.5788284937779697, "learning_rate": 1.99983992790974e-05, "loss": 1.0052, "step": 1434 }, { "epoch": 0.10665180230397622, "grad_norm": 2.4055947609159154, "learning_rate": 1.999838489105457e-05, "loss": 0.7456, "step": 1435 }, { "epoch": 0.10672612411742846, "grad_norm": 2.5808713189827457, "learning_rate": 1.9998370438642652e-05, "loss": 0.8877, "step": 1436 }, { "epoch": 0.10680044593088071, "grad_norm": 2.693247763848516, "learning_rate": 1.9998355921861744e-05, "loss": 0.9291, "step": 1437 }, { "epoch": 0.10687476774433297, "grad_norm": 2.6988428303547236, "learning_rate": 1.9998341340711942e-05, "loss": 0.988, "step": 1438 }, { "epoch": 0.1069490895577852, "grad_norm": 2.4131768157393547, "learning_rate": 1.9998326695193336e-05, "loss": 0.8303, "step": 1439 }, { "epoch": 0.10702341137123746, "grad_norm": 2.5263665852787685, "learning_rate": 1.999831198530602e-05, "loss": 0.8351, "step": 1440 }, { "epoch": 0.10709773318468971, "grad_norm": 3.605527976792583, "learning_rate": 1.999829721105009e-05, "loss": 0.9529, "step": 1441 }, { "epoch": 0.10717205499814196, "grad_norm": 2.8739495452672412, "learning_rate": 1.9998282372425644e-05, "loss": 0.9446, "step": 1442 }, { "epoch": 0.1072463768115942, "grad_norm": 2.7842021247778312, "learning_rate": 1.9998267469432772e-05, "loss": 0.8451, "step": 1443 }, { "epoch": 0.10732069862504645, "grad_norm": 2.868128827565844, "learning_rate": 1.9998252502071576e-05, "loss": 1.0121, "step": 1444 }, { "epoch": 0.1073950204384987, "grad_norm": 5.602499171640463, "learning_rate": 1.9998237470342148e-05, "loss": 0.9064, "step": 1445 }, { "epoch": 0.10746934225195094, "grad_norm": 3.09608151135329, "learning_rate": 1.9998222374244585e-05, "loss": 1.016, "step": 1446 }, { "epoch": 0.1075436640654032, "grad_norm": 3.032115526326774, "learning_rate": 1.9998207213778987e-05, "loss": 0.8654, "step": 1447 }, { "epoch": 0.10761798587885545, "grad_norm": 2.763920123631645, "learning_rate": 1.9998191988945447e-05, "loss": 0.9132, "step": 1448 }, { "epoch": 0.1076923076923077, "grad_norm": 2.97679395465006, "learning_rate": 1.9998176699744067e-05, "loss": 0.9404, "step": 1449 }, { "epoch": 0.10776662950575994, "grad_norm": 3.7862074358686213, "learning_rate": 1.9998161346174943e-05, "loss": 1.0473, "step": 1450 }, { "epoch": 0.10784095131921219, "grad_norm": 2.337498489509428, "learning_rate": 1.999814592823818e-05, "loss": 0.9016, "step": 1451 }, { "epoch": 0.10791527313266444, "grad_norm": 2.7525247558951875, "learning_rate": 1.9998130445933868e-05, "loss": 1.0812, "step": 1452 }, { "epoch": 0.10798959494611668, "grad_norm": 2.5011453620970268, "learning_rate": 1.9998114899262113e-05, "loss": 0.9914, "step": 1453 }, { "epoch": 0.10806391675956893, "grad_norm": 2.6067033682691694, "learning_rate": 1.999809928822301e-05, "loss": 0.8185, "step": 1454 }, { "epoch": 0.10813823857302118, "grad_norm": 2.4843949656018243, "learning_rate": 1.9998083612816663e-05, "loss": 0.7858, "step": 1455 }, { "epoch": 0.10821256038647344, "grad_norm": 2.6924689308389427, "learning_rate": 1.9998067873043175e-05, "loss": 0.9984, "step": 1456 }, { "epoch": 0.10828688219992567, "grad_norm": 2.8831476633118283, "learning_rate": 1.9998052068902643e-05, "loss": 0.711, "step": 1457 }, { "epoch": 0.10836120401337793, "grad_norm": 3.3395149314558084, "learning_rate": 1.9998036200395174e-05, "loss": 1.1152, "step": 1458 }, { "epoch": 0.10843552582683018, "grad_norm": 2.9428017334078804, "learning_rate": 1.999802026752086e-05, "loss": 0.8483, "step": 1459 }, { "epoch": 0.10850984764028242, "grad_norm": 2.679308729271803, "learning_rate": 1.9998004270279816e-05, "loss": 0.9884, "step": 1460 }, { "epoch": 0.10858416945373467, "grad_norm": 2.461988065384676, "learning_rate": 1.9997988208672138e-05, "loss": 0.893, "step": 1461 }, { "epoch": 0.10865849126718692, "grad_norm": 2.5892368549671345, "learning_rate": 1.9997972082697928e-05, "loss": 0.8262, "step": 1462 }, { "epoch": 0.10873281308063917, "grad_norm": 2.455559099252416, "learning_rate": 1.9997955892357295e-05, "loss": 1.0037, "step": 1463 }, { "epoch": 0.10880713489409141, "grad_norm": 2.4675805275045506, "learning_rate": 1.999793963765034e-05, "loss": 0.757, "step": 1464 }, { "epoch": 0.10888145670754366, "grad_norm": 2.690251337570261, "learning_rate": 1.999792331857717e-05, "loss": 0.9994, "step": 1465 }, { "epoch": 0.10895577852099592, "grad_norm": 2.625396672436627, "learning_rate": 1.9997906935137886e-05, "loss": 0.7185, "step": 1466 }, { "epoch": 0.10903010033444815, "grad_norm": 2.7733034020518206, "learning_rate": 1.9997890487332596e-05, "loss": 0.847, "step": 1467 }, { "epoch": 0.1091044221479004, "grad_norm": 2.7406479806783004, "learning_rate": 1.9997873975161405e-05, "loss": 0.9927, "step": 1468 }, { "epoch": 0.10917874396135266, "grad_norm": 2.5658447612712814, "learning_rate": 1.9997857398624424e-05, "loss": 0.891, "step": 1469 }, { "epoch": 0.10925306577480491, "grad_norm": 2.2395819078137382, "learning_rate": 1.9997840757721752e-05, "loss": 0.7079, "step": 1470 }, { "epoch": 0.10932738758825715, "grad_norm": 2.607651549917496, "learning_rate": 1.9997824052453504e-05, "loss": 0.967, "step": 1471 }, { "epoch": 0.1094017094017094, "grad_norm": 3.124929911880305, "learning_rate": 1.9997807282819777e-05, "loss": 0.8624, "step": 1472 }, { "epoch": 0.10947603121516165, "grad_norm": 3.0225755465698345, "learning_rate": 1.999779044882069e-05, "loss": 1.1024, "step": 1473 }, { "epoch": 0.10955035302861389, "grad_norm": 2.7154555215747793, "learning_rate": 1.9997773550456348e-05, "loss": 1.003, "step": 1474 }, { "epoch": 0.10962467484206614, "grad_norm": 2.092722630841548, "learning_rate": 1.9997756587726858e-05, "loss": 0.7654, "step": 1475 }, { "epoch": 0.1096989966555184, "grad_norm": 2.293485058799309, "learning_rate": 1.999773956063233e-05, "loss": 0.8825, "step": 1476 }, { "epoch": 0.10977331846897065, "grad_norm": 2.5289045334399614, "learning_rate": 1.999772246917287e-05, "loss": 1.0503, "step": 1477 }, { "epoch": 0.10984764028242289, "grad_norm": 2.466329434114051, "learning_rate": 1.9997705313348594e-05, "loss": 0.9022, "step": 1478 }, { "epoch": 0.10992196209587514, "grad_norm": 2.502213796440484, "learning_rate": 1.999768809315961e-05, "loss": 0.8793, "step": 1479 }, { "epoch": 0.10999628390932739, "grad_norm": 2.5669987878042155, "learning_rate": 1.999767080860603e-05, "loss": 0.6672, "step": 1480 }, { "epoch": 0.11007060572277963, "grad_norm": 3.7282519947255683, "learning_rate": 1.999765345968796e-05, "loss": 1.0114, "step": 1481 }, { "epoch": 0.11014492753623188, "grad_norm": 2.790717601511051, "learning_rate": 1.9997636046405518e-05, "loss": 1.0346, "step": 1482 }, { "epoch": 0.11021924934968413, "grad_norm": 2.561995939438522, "learning_rate": 1.9997618568758818e-05, "loss": 0.8904, "step": 1483 }, { "epoch": 0.11029357116313639, "grad_norm": 2.8283810092390054, "learning_rate": 1.999760102674796e-05, "loss": 0.9041, "step": 1484 }, { "epoch": 0.11036789297658862, "grad_norm": 2.7993079414721103, "learning_rate": 1.9997583420373073e-05, "loss": 0.9856, "step": 1485 }, { "epoch": 0.11044221479004088, "grad_norm": 2.6209201870355523, "learning_rate": 1.999756574963426e-05, "loss": 1.2489, "step": 1486 }, { "epoch": 0.11051653660349313, "grad_norm": 2.7754578168829496, "learning_rate": 1.9997548014531637e-05, "loss": 1.0322, "step": 1487 }, { "epoch": 0.11059085841694537, "grad_norm": 3.1217663301902987, "learning_rate": 1.999753021506532e-05, "loss": 0.6555, "step": 1488 }, { "epoch": 0.11066518023039762, "grad_norm": 3.2024013992425044, "learning_rate": 1.999751235123542e-05, "loss": 1.0182, "step": 1489 }, { "epoch": 0.11073950204384987, "grad_norm": 3.222827110650575, "learning_rate": 1.9997494423042054e-05, "loss": 0.942, "step": 1490 }, { "epoch": 0.11081382385730212, "grad_norm": 2.4788081392276946, "learning_rate": 1.999747643048534e-05, "loss": 0.8991, "step": 1491 }, { "epoch": 0.11088814567075436, "grad_norm": 2.5818512394471975, "learning_rate": 1.9997458373565387e-05, "loss": 0.8761, "step": 1492 }, { "epoch": 0.11096246748420661, "grad_norm": 3.5460435750393406, "learning_rate": 1.9997440252282325e-05, "loss": 0.829, "step": 1493 }, { "epoch": 0.11103678929765887, "grad_norm": 2.831035975212287, "learning_rate": 1.999742206663625e-05, "loss": 0.8527, "step": 1494 }, { "epoch": 0.1111111111111111, "grad_norm": 4.077540631311367, "learning_rate": 1.9997403816627298e-05, "loss": 1.1383, "step": 1495 }, { "epoch": 0.11118543292456336, "grad_norm": 3.126044449562334, "learning_rate": 1.9997385502255574e-05, "loss": 0.926, "step": 1496 }, { "epoch": 0.11125975473801561, "grad_norm": 2.7464335905531736, "learning_rate": 1.9997367123521204e-05, "loss": 0.9953, "step": 1497 }, { "epoch": 0.11133407655146786, "grad_norm": 2.539802665701912, "learning_rate": 1.9997348680424308e-05, "loss": 1.0183, "step": 1498 }, { "epoch": 0.1114083983649201, "grad_norm": 2.5884669133637974, "learning_rate": 1.999733017296499e-05, "loss": 0.9241, "step": 1499 }, { "epoch": 0.11148272017837235, "grad_norm": 2.6211543613840083, "learning_rate": 1.9997311601143385e-05, "loss": 0.8495, "step": 1500 }, { "epoch": 0.1115570419918246, "grad_norm": 2.903701984635433, "learning_rate": 1.9997292964959606e-05, "loss": 0.8386, "step": 1501 }, { "epoch": 0.11163136380527684, "grad_norm": 2.8585303957599932, "learning_rate": 1.9997274264413774e-05, "loss": 0.9337, "step": 1502 }, { "epoch": 0.1117056856187291, "grad_norm": 2.7412405278184018, "learning_rate": 1.9997255499506008e-05, "loss": 1.0013, "step": 1503 }, { "epoch": 0.11178000743218135, "grad_norm": 2.92993342918039, "learning_rate": 1.999723667023643e-05, "loss": 0.9381, "step": 1504 }, { "epoch": 0.1118543292456336, "grad_norm": 2.7054930766988603, "learning_rate": 1.999721777660516e-05, "loss": 1.0035, "step": 1505 }, { "epoch": 0.11192865105908584, "grad_norm": 3.215323877367, "learning_rate": 1.9997198818612322e-05, "loss": 0.9833, "step": 1506 }, { "epoch": 0.11200297287253809, "grad_norm": 5.235477118970676, "learning_rate": 1.9997179796258036e-05, "loss": 1.0205, "step": 1507 }, { "epoch": 0.11207729468599034, "grad_norm": 2.295288443026966, "learning_rate": 1.9997160709542423e-05, "loss": 0.8736, "step": 1508 }, { "epoch": 0.11215161649944258, "grad_norm": 2.5594404152379213, "learning_rate": 1.9997141558465615e-05, "loss": 0.9697, "step": 1509 }, { "epoch": 0.11222593831289483, "grad_norm": 2.9323515692683935, "learning_rate": 1.9997122343027722e-05, "loss": 1.0976, "step": 1510 }, { "epoch": 0.11230026012634708, "grad_norm": 2.1980803001093783, "learning_rate": 1.9997103063228875e-05, "loss": 0.9048, "step": 1511 }, { "epoch": 0.11237458193979934, "grad_norm": 2.4318146602495756, "learning_rate": 1.9997083719069202e-05, "loss": 0.9505, "step": 1512 }, { "epoch": 0.11244890375325158, "grad_norm": 3.0874974153872454, "learning_rate": 1.9997064310548818e-05, "loss": 0.9751, "step": 1513 }, { "epoch": 0.11252322556670383, "grad_norm": 2.6181691108339167, "learning_rate": 1.9997044837667855e-05, "loss": 0.9346, "step": 1514 }, { "epoch": 0.11259754738015608, "grad_norm": 2.1609398031846134, "learning_rate": 1.9997025300426432e-05, "loss": 0.7673, "step": 1515 }, { "epoch": 0.11267186919360832, "grad_norm": 2.3976341451901733, "learning_rate": 1.9997005698824684e-05, "loss": 0.8497, "step": 1516 }, { "epoch": 0.11274619100706057, "grad_norm": 2.1071996576888394, "learning_rate": 1.9996986032862727e-05, "loss": 0.8113, "step": 1517 }, { "epoch": 0.11282051282051282, "grad_norm": 2.4032053511961298, "learning_rate": 1.9996966302540696e-05, "loss": 0.9498, "step": 1518 }, { "epoch": 0.11289483463396507, "grad_norm": 2.6081290146120275, "learning_rate": 1.9996946507858713e-05, "loss": 0.8569, "step": 1519 }, { "epoch": 0.11296915644741731, "grad_norm": 2.5275484168975, "learning_rate": 1.9996926648816907e-05, "loss": 0.9812, "step": 1520 }, { "epoch": 0.11304347826086956, "grad_norm": 2.802281730020511, "learning_rate": 1.999690672541541e-05, "loss": 0.9998, "step": 1521 }, { "epoch": 0.11311780007432182, "grad_norm": 2.7253891097201604, "learning_rate": 1.9996886737654343e-05, "loss": 0.9534, "step": 1522 }, { "epoch": 0.11319212188777406, "grad_norm": 2.62708005685512, "learning_rate": 1.9996866685533836e-05, "loss": 0.8049, "step": 1523 }, { "epoch": 0.11326644370122631, "grad_norm": 2.6997835270963755, "learning_rate": 1.9996846569054024e-05, "loss": 1.0192, "step": 1524 }, { "epoch": 0.11334076551467856, "grad_norm": 3.359980401579859, "learning_rate": 1.999682638821503e-05, "loss": 0.8586, "step": 1525 }, { "epoch": 0.11341508732813081, "grad_norm": 2.8259312675902732, "learning_rate": 1.9996806143016988e-05, "loss": 1.0085, "step": 1526 }, { "epoch": 0.11348940914158305, "grad_norm": 2.933807000245269, "learning_rate": 1.999678583346002e-05, "loss": 0.8994, "step": 1527 }, { "epoch": 0.1135637309550353, "grad_norm": 2.43791375764605, "learning_rate": 1.999676545954427e-05, "loss": 0.9326, "step": 1528 }, { "epoch": 0.11363805276848755, "grad_norm": 2.9494732085511144, "learning_rate": 1.9996745021269866e-05, "loss": 1.0956, "step": 1529 }, { "epoch": 0.11371237458193979, "grad_norm": 2.2756668738304415, "learning_rate": 1.999672451863693e-05, "loss": 0.9331, "step": 1530 }, { "epoch": 0.11378669639539205, "grad_norm": 2.523718914003022, "learning_rate": 1.99967039516456e-05, "loss": 0.9456, "step": 1531 }, { "epoch": 0.1138610182088443, "grad_norm": 2.790779926523535, "learning_rate": 1.9996683320296013e-05, "loss": 1.0125, "step": 1532 }, { "epoch": 0.11393534002229655, "grad_norm": 2.8133785306062076, "learning_rate": 1.9996662624588296e-05, "loss": 0.9273, "step": 1533 }, { "epoch": 0.11400966183574879, "grad_norm": 2.474876163988188, "learning_rate": 1.9996641864522585e-05, "loss": 0.7437, "step": 1534 }, { "epoch": 0.11408398364920104, "grad_norm": 2.917172623937328, "learning_rate": 1.9996621040099006e-05, "loss": 0.8955, "step": 1535 }, { "epoch": 0.11415830546265329, "grad_norm": 2.4608662536175685, "learning_rate": 1.9996600151317707e-05, "loss": 1.0817, "step": 1536 }, { "epoch": 0.11423262727610553, "grad_norm": 7.228255411720721, "learning_rate": 1.999657919817881e-05, "loss": 0.9537, "step": 1537 }, { "epoch": 0.11430694908955778, "grad_norm": 3.790636607180376, "learning_rate": 1.9996558180682457e-05, "loss": 0.7843, "step": 1538 }, { "epoch": 0.11438127090301003, "grad_norm": 2.994879054335652, "learning_rate": 1.999653709882878e-05, "loss": 1.1206, "step": 1539 }, { "epoch": 0.11445559271646229, "grad_norm": 2.5914704881704975, "learning_rate": 1.9996515952617917e-05, "loss": 0.9784, "step": 1540 }, { "epoch": 0.11452991452991453, "grad_norm": 2.9841217936489395, "learning_rate": 1.9996494742050004e-05, "loss": 0.9039, "step": 1541 }, { "epoch": 0.11460423634336678, "grad_norm": 3.417554373006639, "learning_rate": 1.9996473467125176e-05, "loss": 1.0708, "step": 1542 }, { "epoch": 0.11467855815681903, "grad_norm": 2.5465074773618475, "learning_rate": 1.9996452127843566e-05, "loss": 0.8286, "step": 1543 }, { "epoch": 0.11475287997027127, "grad_norm": 2.664935730171361, "learning_rate": 1.9996430724205322e-05, "loss": 0.9812, "step": 1544 }, { "epoch": 0.11482720178372352, "grad_norm": 2.427655170818996, "learning_rate": 1.9996409256210572e-05, "loss": 0.8756, "step": 1545 }, { "epoch": 0.11490152359717577, "grad_norm": 2.850240379435858, "learning_rate": 1.999638772385946e-05, "loss": 1.1391, "step": 1546 }, { "epoch": 0.11497584541062802, "grad_norm": 2.6125442939983503, "learning_rate": 1.999636612715212e-05, "loss": 1.1492, "step": 1547 }, { "epoch": 0.11505016722408026, "grad_norm": 2.599466416500161, "learning_rate": 1.9996344466088696e-05, "loss": 0.9515, "step": 1548 }, { "epoch": 0.11512448903753252, "grad_norm": 2.3408510709596597, "learning_rate": 1.9996322740669326e-05, "loss": 0.8576, "step": 1549 }, { "epoch": 0.11519881085098477, "grad_norm": 3.631177117772499, "learning_rate": 1.9996300950894145e-05, "loss": 1.1285, "step": 1550 }, { "epoch": 0.115273132664437, "grad_norm": 2.7956225443118456, "learning_rate": 1.99962790967633e-05, "loss": 1.0481, "step": 1551 }, { "epoch": 0.11534745447788926, "grad_norm": 2.522418001063541, "learning_rate": 1.9996257178276928e-05, "loss": 0.9673, "step": 1552 }, { "epoch": 0.11542177629134151, "grad_norm": 2.077777410749589, "learning_rate": 1.999623519543517e-05, "loss": 0.7759, "step": 1553 }, { "epoch": 0.11549609810479376, "grad_norm": 2.9040483151100567, "learning_rate": 1.9996213148238168e-05, "loss": 1.0112, "step": 1554 }, { "epoch": 0.115570419918246, "grad_norm": 1.8759300243394428, "learning_rate": 1.9996191036686066e-05, "loss": 0.6163, "step": 1555 }, { "epoch": 0.11564474173169825, "grad_norm": 2.5135821191668457, "learning_rate": 1.9996168860779007e-05, "loss": 0.8972, "step": 1556 }, { "epoch": 0.1157190635451505, "grad_norm": 2.7289777783317417, "learning_rate": 1.999614662051713e-05, "loss": 0.7973, "step": 1557 }, { "epoch": 0.11579338535860274, "grad_norm": 3.2985993461175944, "learning_rate": 1.9996124315900577e-05, "loss": 1.1472, "step": 1558 }, { "epoch": 0.115867707172055, "grad_norm": 3.116448278524157, "learning_rate": 1.9996101946929495e-05, "loss": 0.9407, "step": 1559 }, { "epoch": 0.11594202898550725, "grad_norm": 2.697109254499594, "learning_rate": 1.999607951360403e-05, "loss": 0.954, "step": 1560 }, { "epoch": 0.1160163507989595, "grad_norm": 2.7374564695955654, "learning_rate": 1.9996057015924323e-05, "loss": 1.0241, "step": 1561 }, { "epoch": 0.11609067261241174, "grad_norm": 3.483679659729278, "learning_rate": 1.999603445389052e-05, "loss": 0.6063, "step": 1562 }, { "epoch": 0.11616499442586399, "grad_norm": 3.6854567707690093, "learning_rate": 1.9996011827502763e-05, "loss": 1.0032, "step": 1563 }, { "epoch": 0.11623931623931624, "grad_norm": 3.3836571184838657, "learning_rate": 1.9995989136761204e-05, "loss": 1.0854, "step": 1564 }, { "epoch": 0.11631363805276848, "grad_norm": 7.222016761694828, "learning_rate": 1.9995966381665982e-05, "loss": 0.9993, "step": 1565 }, { "epoch": 0.11638795986622073, "grad_norm": 2.7792345897931883, "learning_rate": 1.999594356221725e-05, "loss": 1.0052, "step": 1566 }, { "epoch": 0.11646228167967299, "grad_norm": 2.761956805625191, "learning_rate": 1.999592067841515e-05, "loss": 1.0127, "step": 1567 }, { "epoch": 0.11653660349312524, "grad_norm": 2.682221414582019, "learning_rate": 1.999589773025983e-05, "loss": 0.8321, "step": 1568 }, { "epoch": 0.11661092530657748, "grad_norm": 2.367370558704093, "learning_rate": 1.9995874717751445e-05, "loss": 0.8579, "step": 1569 }, { "epoch": 0.11668524712002973, "grad_norm": 3.215117566832298, "learning_rate": 1.999585164089013e-05, "loss": 0.8808, "step": 1570 }, { "epoch": 0.11675956893348198, "grad_norm": 6.278413119379232, "learning_rate": 1.9995828499676048e-05, "loss": 0.8693, "step": 1571 }, { "epoch": 0.11683389074693422, "grad_norm": 2.5360911781151922, "learning_rate": 1.9995805294109334e-05, "loss": 0.8262, "step": 1572 }, { "epoch": 0.11690821256038647, "grad_norm": 3.662949574388134, "learning_rate": 1.9995782024190146e-05, "loss": 0.8762, "step": 1573 }, { "epoch": 0.11698253437383872, "grad_norm": 2.7801042357961308, "learning_rate": 1.9995758689918636e-05, "loss": 0.7847, "step": 1574 }, { "epoch": 0.11705685618729098, "grad_norm": 2.975001461085267, "learning_rate": 1.9995735291294947e-05, "loss": 0.8574, "step": 1575 }, { "epoch": 0.11713117800074321, "grad_norm": 2.7392132735000887, "learning_rate": 1.9995711828319234e-05, "loss": 1.0596, "step": 1576 }, { "epoch": 0.11720549981419547, "grad_norm": 2.368026351783738, "learning_rate": 1.9995688300991646e-05, "loss": 0.8444, "step": 1577 }, { "epoch": 0.11727982162764772, "grad_norm": 2.2698721347789994, "learning_rate": 1.9995664709312336e-05, "loss": 0.8485, "step": 1578 }, { "epoch": 0.11735414344109996, "grad_norm": 2.9913050916632393, "learning_rate": 1.9995641053281453e-05, "loss": 1.0425, "step": 1579 }, { "epoch": 0.11742846525455221, "grad_norm": 2.6348953248936473, "learning_rate": 1.9995617332899153e-05, "loss": 0.8939, "step": 1580 }, { "epoch": 0.11750278706800446, "grad_norm": 2.5116152509141503, "learning_rate": 1.9995593548165588e-05, "loss": 1.0614, "step": 1581 }, { "epoch": 0.11757710888145671, "grad_norm": 2.7523956843977055, "learning_rate": 1.999556969908091e-05, "loss": 1.1966, "step": 1582 }, { "epoch": 0.11765143069490895, "grad_norm": 2.407374691052401, "learning_rate": 1.9995545785645273e-05, "loss": 0.8516, "step": 1583 }, { "epoch": 0.1177257525083612, "grad_norm": 3.1102251588504526, "learning_rate": 1.999552180785883e-05, "loss": 1.1902, "step": 1584 }, { "epoch": 0.11780007432181346, "grad_norm": 3.185319158457428, "learning_rate": 1.999549776572174e-05, "loss": 1.1355, "step": 1585 }, { "epoch": 0.1178743961352657, "grad_norm": 2.805318811201278, "learning_rate": 1.9995473659234147e-05, "loss": 0.9513, "step": 1586 }, { "epoch": 0.11794871794871795, "grad_norm": 2.7271902762919598, "learning_rate": 1.9995449488396218e-05, "loss": 0.8503, "step": 1587 }, { "epoch": 0.1180230397621702, "grad_norm": 2.4506613948408624, "learning_rate": 1.9995425253208106e-05, "loss": 0.9627, "step": 1588 }, { "epoch": 0.11809736157562245, "grad_norm": 2.9215449577023933, "learning_rate": 1.999540095366996e-05, "loss": 1.1074, "step": 1589 }, { "epoch": 0.11817168338907469, "grad_norm": 2.6080060887118486, "learning_rate": 1.9995376589781942e-05, "loss": 0.8959, "step": 1590 }, { "epoch": 0.11824600520252694, "grad_norm": 2.5281821734899665, "learning_rate": 1.999535216154421e-05, "loss": 0.8464, "step": 1591 }, { "epoch": 0.1183203270159792, "grad_norm": 3.1211237304219064, "learning_rate": 1.999532766895692e-05, "loss": 1.2141, "step": 1592 }, { "epoch": 0.11839464882943143, "grad_norm": 2.4294012073312325, "learning_rate": 1.9995303112020226e-05, "loss": 0.7579, "step": 1593 }, { "epoch": 0.11846897064288368, "grad_norm": 2.4702323403110586, "learning_rate": 1.9995278490734292e-05, "loss": 1.0526, "step": 1594 }, { "epoch": 0.11854329245633594, "grad_norm": 2.586539193107247, "learning_rate": 1.999525380509927e-05, "loss": 1.0319, "step": 1595 }, { "epoch": 0.11861761426978819, "grad_norm": 2.0151710923413853, "learning_rate": 1.9995229055115327e-05, "loss": 0.5783, "step": 1596 }, { "epoch": 0.11869193608324043, "grad_norm": 2.4259627007062647, "learning_rate": 1.9995204240782616e-05, "loss": 1.033, "step": 1597 }, { "epoch": 0.11876625789669268, "grad_norm": 3.8520260723233166, "learning_rate": 1.99951793621013e-05, "loss": 1.0782, "step": 1598 }, { "epoch": 0.11884057971014493, "grad_norm": 2.3105700633969075, "learning_rate": 1.9995154419071535e-05, "loss": 0.8714, "step": 1599 }, { "epoch": 0.11891490152359717, "grad_norm": 3.404365630557174, "learning_rate": 1.9995129411693486e-05, "loss": 1.0872, "step": 1600 }, { "epoch": 0.11898922333704942, "grad_norm": 3.0459464288479468, "learning_rate": 1.9995104339967314e-05, "loss": 1.0182, "step": 1601 }, { "epoch": 0.11906354515050167, "grad_norm": 3.1727302434522664, "learning_rate": 1.9995079203893177e-05, "loss": 0.6419, "step": 1602 }, { "epoch": 0.11913786696395393, "grad_norm": 2.3402792925769362, "learning_rate": 1.9995054003471238e-05, "loss": 0.7945, "step": 1603 }, { "epoch": 0.11921218877740616, "grad_norm": 2.517936906656007, "learning_rate": 1.9995028738701662e-05, "loss": 0.9085, "step": 1604 }, { "epoch": 0.11928651059085842, "grad_norm": 3.1398774175529542, "learning_rate": 1.999500340958461e-05, "loss": 1.2531, "step": 1605 }, { "epoch": 0.11936083240431067, "grad_norm": 3.2517322251522573, "learning_rate": 1.9994978016120244e-05, "loss": 1.0804, "step": 1606 }, { "epoch": 0.1194351542177629, "grad_norm": 1.9784910948117105, "learning_rate": 1.9994952558308726e-05, "loss": 0.7092, "step": 1607 }, { "epoch": 0.11950947603121516, "grad_norm": 2.7644328894707533, "learning_rate": 1.9994927036150226e-05, "loss": 0.9345, "step": 1608 }, { "epoch": 0.11958379784466741, "grad_norm": 2.5852403931242596, "learning_rate": 1.9994901449644902e-05, "loss": 0.9822, "step": 1609 }, { "epoch": 0.11965811965811966, "grad_norm": 2.6952473243273336, "learning_rate": 1.999487579879292e-05, "loss": 1.0982, "step": 1610 }, { "epoch": 0.1197324414715719, "grad_norm": 2.5735643300019815, "learning_rate": 1.9994850083594444e-05, "loss": 0.7458, "step": 1611 }, { "epoch": 0.11980676328502415, "grad_norm": 3.0212505223519104, "learning_rate": 1.999482430404965e-05, "loss": 0.9933, "step": 1612 }, { "epoch": 0.1198810850984764, "grad_norm": 2.516612601293048, "learning_rate": 1.9994798460158687e-05, "loss": 0.807, "step": 1613 }, { "epoch": 0.11995540691192864, "grad_norm": 2.5130155322880485, "learning_rate": 1.9994772551921736e-05, "loss": 1.0171, "step": 1614 }, { "epoch": 0.1200297287253809, "grad_norm": 4.5699650805449625, "learning_rate": 1.9994746579338954e-05, "loss": 1.0594, "step": 1615 }, { "epoch": 0.12010405053883315, "grad_norm": 2.848628377376723, "learning_rate": 1.9994720542410512e-05, "loss": 1.0638, "step": 1616 }, { "epoch": 0.1201783723522854, "grad_norm": 2.6020148149850457, "learning_rate": 1.9994694441136582e-05, "loss": 0.9322, "step": 1617 }, { "epoch": 0.12025269416573764, "grad_norm": 2.561410851873235, "learning_rate": 1.9994668275517327e-05, "loss": 1.1049, "step": 1618 }, { "epoch": 0.12032701597918989, "grad_norm": 2.492144042195121, "learning_rate": 1.999464204555291e-05, "loss": 0.9446, "step": 1619 }, { "epoch": 0.12040133779264214, "grad_norm": 2.59389804126774, "learning_rate": 1.9994615751243514e-05, "loss": 1.0779, "step": 1620 }, { "epoch": 0.12047565960609438, "grad_norm": 2.281392070390913, "learning_rate": 1.9994589392589295e-05, "loss": 0.9018, "step": 1621 }, { "epoch": 0.12054998141954663, "grad_norm": 2.435478574206549, "learning_rate": 1.9994562969590432e-05, "loss": 1.1252, "step": 1622 }, { "epoch": 0.12062430323299889, "grad_norm": 2.4107721746379163, "learning_rate": 1.999453648224709e-05, "loss": 0.9719, "step": 1623 }, { "epoch": 0.12069862504645114, "grad_norm": 2.802117957304048, "learning_rate": 1.9994509930559436e-05, "loss": 1.0581, "step": 1624 }, { "epoch": 0.12077294685990338, "grad_norm": 2.773883721650724, "learning_rate": 1.9994483314527653e-05, "loss": 0.921, "step": 1625 }, { "epoch": 0.12084726867335563, "grad_norm": 3.0826796829917353, "learning_rate": 1.9994456634151903e-05, "loss": 0.9577, "step": 1626 }, { "epoch": 0.12092159048680788, "grad_norm": 2.1745534616957394, "learning_rate": 1.9994429889432356e-05, "loss": 0.7957, "step": 1627 }, { "epoch": 0.12099591230026012, "grad_norm": 2.4244473541579454, "learning_rate": 1.999440308036919e-05, "loss": 0.8619, "step": 1628 }, { "epoch": 0.12107023411371237, "grad_norm": 2.9726806072695124, "learning_rate": 1.9994376206962577e-05, "loss": 0.859, "step": 1629 }, { "epoch": 0.12114455592716462, "grad_norm": 2.9488884503444814, "learning_rate": 1.999434926921269e-05, "loss": 1.1457, "step": 1630 }, { "epoch": 0.12121887774061688, "grad_norm": 2.4374140135301934, "learning_rate": 1.9994322267119693e-05, "loss": 0.7484, "step": 1631 }, { "epoch": 0.12129319955406911, "grad_norm": 2.386970865196459, "learning_rate": 1.9994295200683775e-05, "loss": 0.799, "step": 1632 }, { "epoch": 0.12136752136752137, "grad_norm": 3.0171086263562996, "learning_rate": 1.9994268069905102e-05, "loss": 1.0435, "step": 1633 }, { "epoch": 0.12144184318097362, "grad_norm": 4.63606971471562, "learning_rate": 1.999424087478385e-05, "loss": 1.0544, "step": 1634 }, { "epoch": 0.12151616499442586, "grad_norm": 3.0295407354135886, "learning_rate": 1.999421361532019e-05, "loss": 1.0474, "step": 1635 }, { "epoch": 0.12159048680787811, "grad_norm": 2.591298232089964, "learning_rate": 1.9994186291514304e-05, "loss": 0.8916, "step": 1636 }, { "epoch": 0.12166480862133036, "grad_norm": 2.354946787996241, "learning_rate": 1.999415890336637e-05, "loss": 0.8371, "step": 1637 }, { "epoch": 0.12173913043478261, "grad_norm": 2.4437205631057997, "learning_rate": 1.9994131450876553e-05, "loss": 0.7559, "step": 1638 }, { "epoch": 0.12181345224823485, "grad_norm": 2.5129785877312494, "learning_rate": 1.999410393404504e-05, "loss": 0.8395, "step": 1639 }, { "epoch": 0.1218877740616871, "grad_norm": 2.349037691571159, "learning_rate": 1.9994076352872e-05, "loss": 0.8969, "step": 1640 }, { "epoch": 0.12196209587513936, "grad_norm": 2.3288015230148753, "learning_rate": 1.9994048707357617e-05, "loss": 0.8428, "step": 1641 }, { "epoch": 0.12203641768859161, "grad_norm": 2.918530994944077, "learning_rate": 1.999402099750207e-05, "loss": 1.1075, "step": 1642 }, { "epoch": 0.12211073950204385, "grad_norm": 2.4327167992333627, "learning_rate": 1.999399322330553e-05, "loss": 0.9622, "step": 1643 }, { "epoch": 0.1221850613154961, "grad_norm": 2.887206843503859, "learning_rate": 1.9993965384768182e-05, "loss": 0.7193, "step": 1644 }, { "epoch": 0.12225938312894835, "grad_norm": 2.4889139301968117, "learning_rate": 1.9993937481890206e-05, "loss": 0.8022, "step": 1645 }, { "epoch": 0.12233370494240059, "grad_norm": 2.560883830125784, "learning_rate": 1.9993909514671777e-05, "loss": 0.8961, "step": 1646 }, { "epoch": 0.12240802675585284, "grad_norm": 2.324625380192152, "learning_rate": 1.9993881483113075e-05, "loss": 0.7983, "step": 1647 }, { "epoch": 0.1224823485693051, "grad_norm": 2.6247293742796116, "learning_rate": 1.9993853387214285e-05, "loss": 0.9146, "step": 1648 }, { "epoch": 0.12255667038275735, "grad_norm": 2.825804969084089, "learning_rate": 1.9993825226975583e-05, "loss": 0.9293, "step": 1649 }, { "epoch": 0.12263099219620958, "grad_norm": 2.0921686608813035, "learning_rate": 1.9993797002397154e-05, "loss": 0.764, "step": 1650 }, { "epoch": 0.12270531400966184, "grad_norm": 2.3117521387473907, "learning_rate": 1.9993768713479183e-05, "loss": 1.025, "step": 1651 }, { "epoch": 0.12277963582311409, "grad_norm": 2.5936798205231772, "learning_rate": 1.999374036022184e-05, "loss": 0.7616, "step": 1652 }, { "epoch": 0.12285395763656633, "grad_norm": 2.5470676773530263, "learning_rate": 1.999371194262532e-05, "loss": 0.796, "step": 1653 }, { "epoch": 0.12292827945001858, "grad_norm": 2.733892735172918, "learning_rate": 1.99936834606898e-05, "loss": 0.9935, "step": 1654 }, { "epoch": 0.12300260126347083, "grad_norm": 2.5669964396339964, "learning_rate": 1.9993654914415464e-05, "loss": 0.9709, "step": 1655 }, { "epoch": 0.12307692307692308, "grad_norm": 2.8393801338695503, "learning_rate": 1.9993626303802497e-05, "loss": 1.0638, "step": 1656 }, { "epoch": 0.12315124489037532, "grad_norm": 2.5113997123934393, "learning_rate": 1.999359762885108e-05, "loss": 1.0426, "step": 1657 }, { "epoch": 0.12322556670382757, "grad_norm": 3.0637862005283143, "learning_rate": 1.99935688895614e-05, "loss": 0.8852, "step": 1658 }, { "epoch": 0.12329988851727983, "grad_norm": 6.88555577094336, "learning_rate": 1.9993540085933648e-05, "loss": 1.0526, "step": 1659 }, { "epoch": 0.12337421033073206, "grad_norm": 2.1993974512454195, "learning_rate": 1.9993511217968e-05, "loss": 0.8587, "step": 1660 }, { "epoch": 0.12344853214418432, "grad_norm": 2.9074734949999512, "learning_rate": 1.999348228566464e-05, "loss": 0.8392, "step": 1661 }, { "epoch": 0.12352285395763657, "grad_norm": 2.9062758275666587, "learning_rate": 1.9993453289023765e-05, "loss": 1.0252, "step": 1662 }, { "epoch": 0.12359717577108882, "grad_norm": 2.3309804418808326, "learning_rate": 1.9993424228045555e-05, "loss": 0.8442, "step": 1663 }, { "epoch": 0.12367149758454106, "grad_norm": 2.373458388014964, "learning_rate": 1.99933951027302e-05, "loss": 0.9481, "step": 1664 }, { "epoch": 0.12374581939799331, "grad_norm": 2.6338844272688533, "learning_rate": 1.9993365913077883e-05, "loss": 1.0588, "step": 1665 }, { "epoch": 0.12382014121144556, "grad_norm": 2.6113577956462835, "learning_rate": 1.9993336659088797e-05, "loss": 0.8789, "step": 1666 }, { "epoch": 0.1238944630248978, "grad_norm": 2.8187600378145805, "learning_rate": 1.999330734076313e-05, "loss": 0.7665, "step": 1667 }, { "epoch": 0.12396878483835005, "grad_norm": 2.716870984089794, "learning_rate": 1.999327795810106e-05, "loss": 0.8817, "step": 1668 }, { "epoch": 0.1240431066518023, "grad_norm": 2.94203560891853, "learning_rate": 1.9993248511102796e-05, "loss": 0.896, "step": 1669 }, { "epoch": 0.12411742846525456, "grad_norm": 2.418626309099307, "learning_rate": 1.999321899976851e-05, "loss": 0.9534, "step": 1670 }, { "epoch": 0.1241917502787068, "grad_norm": 2.608872456838268, "learning_rate": 1.99931894240984e-05, "loss": 1.0087, "step": 1671 }, { "epoch": 0.12426607209215905, "grad_norm": 3.0786528454918134, "learning_rate": 1.9993159784092654e-05, "loss": 1.0677, "step": 1672 }, { "epoch": 0.1243403939056113, "grad_norm": 2.763760858536128, "learning_rate": 1.9993130079751468e-05, "loss": 0.9259, "step": 1673 }, { "epoch": 0.12441471571906354, "grad_norm": 2.445584638868484, "learning_rate": 1.9993100311075025e-05, "loss": 0.9145, "step": 1674 }, { "epoch": 0.12448903753251579, "grad_norm": 2.381446517517468, "learning_rate": 1.9993070478063523e-05, "loss": 0.8012, "step": 1675 }, { "epoch": 0.12456335934596804, "grad_norm": 2.53574554648628, "learning_rate": 1.999304058071715e-05, "loss": 0.7966, "step": 1676 }, { "epoch": 0.1246376811594203, "grad_norm": 2.2576592695999724, "learning_rate": 1.99930106190361e-05, "loss": 0.921, "step": 1677 }, { "epoch": 0.12471200297287253, "grad_norm": 2.1827251858023744, "learning_rate": 1.999298059302057e-05, "loss": 0.8198, "step": 1678 }, { "epoch": 0.12478632478632479, "grad_norm": 3.0501690115632205, "learning_rate": 1.9992950502670743e-05, "loss": 1.025, "step": 1679 }, { "epoch": 0.12486064659977704, "grad_norm": 2.8161795199147304, "learning_rate": 1.9992920347986825e-05, "loss": 0.9687, "step": 1680 }, { "epoch": 0.12493496841322928, "grad_norm": 2.4022884479550815, "learning_rate": 1.9992890128969e-05, "loss": 0.9158, "step": 1681 }, { "epoch": 0.12500929022668153, "grad_norm": 3.9289287595434437, "learning_rate": 1.9992859845617468e-05, "loss": 1.0201, "step": 1682 }, { "epoch": 0.12508361204013377, "grad_norm": 2.805057873971704, "learning_rate": 1.9992829497932423e-05, "loss": 0.8628, "step": 1683 }, { "epoch": 0.12515793385358603, "grad_norm": 2.770360902928394, "learning_rate": 1.9992799085914064e-05, "loss": 0.9404, "step": 1684 }, { "epoch": 0.12523225566703827, "grad_norm": 2.6655570521707403, "learning_rate": 1.999276860956258e-05, "loss": 1.0647, "step": 1685 }, { "epoch": 0.1253065774804905, "grad_norm": 2.4736949579751566, "learning_rate": 1.9992738068878168e-05, "loss": 0.9571, "step": 1686 }, { "epoch": 0.12538089929394278, "grad_norm": 2.64990345363724, "learning_rate": 1.999270746386103e-05, "loss": 0.9443, "step": 1687 }, { "epoch": 0.12545522110739502, "grad_norm": 2.489981198746955, "learning_rate": 1.999267679451136e-05, "loss": 0.9876, "step": 1688 }, { "epoch": 0.12552954292084728, "grad_norm": 3.3999813205214693, "learning_rate": 1.9992646060829354e-05, "loss": 0.9979, "step": 1689 }, { "epoch": 0.12560386473429952, "grad_norm": 3.7292644231932206, "learning_rate": 1.9992615262815208e-05, "loss": 0.7919, "step": 1690 }, { "epoch": 0.12567818654775176, "grad_norm": 6.929114194123864, "learning_rate": 1.999258440046913e-05, "loss": 0.6966, "step": 1691 }, { "epoch": 0.12575250836120402, "grad_norm": 3.379648636585466, "learning_rate": 1.9992553473791308e-05, "loss": 0.9267, "step": 1692 }, { "epoch": 0.12582683017465626, "grad_norm": 2.5764377055430407, "learning_rate": 1.9992522482781945e-05, "loss": 0.8487, "step": 1693 }, { "epoch": 0.1259011519881085, "grad_norm": 3.0689185333538918, "learning_rate": 1.9992491427441242e-05, "loss": 1.1489, "step": 1694 }, { "epoch": 0.12597547380156077, "grad_norm": 4.215951715793102, "learning_rate": 1.9992460307769397e-05, "loss": 1.0022, "step": 1695 }, { "epoch": 0.126049795615013, "grad_norm": 2.7588846323282357, "learning_rate": 1.9992429123766612e-05, "loss": 0.7305, "step": 1696 }, { "epoch": 0.12612411742846524, "grad_norm": 2.395809290215633, "learning_rate": 1.9992397875433085e-05, "loss": 1.0987, "step": 1697 }, { "epoch": 0.1261984392419175, "grad_norm": 2.741105319680744, "learning_rate": 1.999236656276902e-05, "loss": 1.0041, "step": 1698 }, { "epoch": 0.12627276105536975, "grad_norm": 3.0202983972670863, "learning_rate": 1.9992335185774618e-05, "loss": 0.9664, "step": 1699 }, { "epoch": 0.12634708286882199, "grad_norm": 2.7291032362380347, "learning_rate": 1.9992303744450078e-05, "loss": 0.7784, "step": 1700 }, { "epoch": 0.12642140468227425, "grad_norm": 2.9431723318515726, "learning_rate": 1.9992272238795608e-05, "loss": 0.854, "step": 1701 }, { "epoch": 0.1264957264957265, "grad_norm": 2.888348445586527, "learning_rate": 1.9992240668811405e-05, "loss": 0.8119, "step": 1702 }, { "epoch": 0.12657004830917876, "grad_norm": 3.0515384259774097, "learning_rate": 1.999220903449768e-05, "loss": 0.9375, "step": 1703 }, { "epoch": 0.126644370122631, "grad_norm": 2.8435900777180607, "learning_rate": 1.9992177335854627e-05, "loss": 1.1708, "step": 1704 }, { "epoch": 0.12671869193608323, "grad_norm": 9.034887582670768, "learning_rate": 1.9992145572882458e-05, "loss": 0.8589, "step": 1705 }, { "epoch": 0.1267930137495355, "grad_norm": 30.872438203145666, "learning_rate": 1.999211374558137e-05, "loss": 1.0505, "step": 1706 }, { "epoch": 0.12686733556298774, "grad_norm": 2.6988806511142127, "learning_rate": 1.9992081853951576e-05, "loss": 1.0885, "step": 1707 }, { "epoch": 0.12694165737643998, "grad_norm": 3.04623501222839, "learning_rate": 1.9992049897993277e-05, "loss": 1.0006, "step": 1708 }, { "epoch": 0.12701597918989224, "grad_norm": 2.9087320990500007, "learning_rate": 1.9992017877706678e-05, "loss": 0.7999, "step": 1709 }, { "epoch": 0.12709030100334448, "grad_norm": 2.3134720572141716, "learning_rate": 1.9991985793091985e-05, "loss": 0.922, "step": 1710 }, { "epoch": 0.12716462281679672, "grad_norm": 2.667373431728717, "learning_rate": 1.999195364414941e-05, "loss": 0.9526, "step": 1711 }, { "epoch": 0.12723894463024898, "grad_norm": 2.5628094255928544, "learning_rate": 1.9991921430879153e-05, "loss": 0.9382, "step": 1712 }, { "epoch": 0.12731326644370122, "grad_norm": 3.2914093423560344, "learning_rate": 1.9991889153281424e-05, "loss": 0.9529, "step": 1713 }, { "epoch": 0.12738758825715346, "grad_norm": 3.2059698713128273, "learning_rate": 1.999185681135643e-05, "loss": 1.0332, "step": 1714 }, { "epoch": 0.12746191007060573, "grad_norm": 2.8685660593254014, "learning_rate": 1.9991824405104382e-05, "loss": 0.9325, "step": 1715 }, { "epoch": 0.12753623188405797, "grad_norm": 2.507984580092174, "learning_rate": 1.999179193452549e-05, "loss": 0.9914, "step": 1716 }, { "epoch": 0.12761055369751023, "grad_norm": 2.857757951697553, "learning_rate": 1.9991759399619958e-05, "loss": 0.8512, "step": 1717 }, { "epoch": 0.12768487551096247, "grad_norm": 2.8143835595137974, "learning_rate": 1.9991726800387993e-05, "loss": 1.0079, "step": 1718 }, { "epoch": 0.1277591973244147, "grad_norm": 2.9896342451553974, "learning_rate": 1.9991694136829815e-05, "loss": 0.9704, "step": 1719 }, { "epoch": 0.12783351913786697, "grad_norm": 2.2191708165515585, "learning_rate": 1.9991661408945627e-05, "loss": 0.7212, "step": 1720 }, { "epoch": 0.1279078409513192, "grad_norm": 2.800449936696587, "learning_rate": 1.999162861673564e-05, "loss": 0.8714, "step": 1721 }, { "epoch": 0.12798216276477145, "grad_norm": 2.967009479227521, "learning_rate": 1.9991595760200066e-05, "loss": 0.8896, "step": 1722 }, { "epoch": 0.12805648457822372, "grad_norm": 5.3406829039813015, "learning_rate": 1.999156283933912e-05, "loss": 0.9942, "step": 1723 }, { "epoch": 0.12813080639167596, "grad_norm": 3.027033267475339, "learning_rate": 1.9991529854153007e-05, "loss": 1.0425, "step": 1724 }, { "epoch": 0.1282051282051282, "grad_norm": 2.6919581293108967, "learning_rate": 1.9991496804641946e-05, "loss": 1.0718, "step": 1725 }, { "epoch": 0.12827945001858046, "grad_norm": 3.5617026681016233, "learning_rate": 1.9991463690806145e-05, "loss": 1.0635, "step": 1726 }, { "epoch": 0.1283537718320327, "grad_norm": 2.584667371077603, "learning_rate": 1.9991430512645822e-05, "loss": 0.885, "step": 1727 }, { "epoch": 0.12842809364548494, "grad_norm": 3.0333505169008697, "learning_rate": 1.9991397270161187e-05, "loss": 0.9915, "step": 1728 }, { "epoch": 0.1285024154589372, "grad_norm": 4.309611867175332, "learning_rate": 1.9991363963352453e-05, "loss": 0.8848, "step": 1729 }, { "epoch": 0.12857673727238944, "grad_norm": 2.9953957779625666, "learning_rate": 1.9991330592219834e-05, "loss": 0.7516, "step": 1730 }, { "epoch": 0.1286510590858417, "grad_norm": 2.7621318806311073, "learning_rate": 1.999129715676355e-05, "loss": 0.9564, "step": 1731 }, { "epoch": 0.12872538089929395, "grad_norm": 2.609957209476538, "learning_rate": 1.9991263656983814e-05, "loss": 0.8741, "step": 1732 }, { "epoch": 0.12879970271274618, "grad_norm": 3.0095101794713837, "learning_rate": 1.9991230092880844e-05, "loss": 0.8644, "step": 1733 }, { "epoch": 0.12887402452619845, "grad_norm": 2.5242475416229886, "learning_rate": 1.9991196464454848e-05, "loss": 0.9332, "step": 1734 }, { "epoch": 0.1289483463396507, "grad_norm": 2.258611467223512, "learning_rate": 1.999116277170605e-05, "loss": 0.8694, "step": 1735 }, { "epoch": 0.12902266815310293, "grad_norm": 3.032092043800648, "learning_rate": 1.999112901463466e-05, "loss": 0.9618, "step": 1736 }, { "epoch": 0.1290969899665552, "grad_norm": 2.740863772230298, "learning_rate": 1.9991095193240903e-05, "loss": 0.9768, "step": 1737 }, { "epoch": 0.12917131178000743, "grad_norm": 3.0468486454736494, "learning_rate": 1.9991061307524994e-05, "loss": 0.9667, "step": 1738 }, { "epoch": 0.12924563359345967, "grad_norm": 2.7374909454467544, "learning_rate": 1.999102735748715e-05, "loss": 1.0851, "step": 1739 }, { "epoch": 0.12931995540691194, "grad_norm": 2.142254579889065, "learning_rate": 1.9990993343127592e-05, "loss": 0.9305, "step": 1740 }, { "epoch": 0.12939427722036417, "grad_norm": 2.349675651661148, "learning_rate": 1.9990959264446536e-05, "loss": 0.8969, "step": 1741 }, { "epoch": 0.1294685990338164, "grad_norm": 2.753746323197815, "learning_rate": 1.9990925121444203e-05, "loss": 0.9772, "step": 1742 }, { "epoch": 0.12954292084726868, "grad_norm": 2.1907392702829886, "learning_rate": 1.9990890914120814e-05, "loss": 0.9475, "step": 1743 }, { "epoch": 0.12961724266072092, "grad_norm": 3.1595289166031053, "learning_rate": 1.9990856642476583e-05, "loss": 0.9013, "step": 1744 }, { "epoch": 0.12969156447417318, "grad_norm": 2.643864473366485, "learning_rate": 1.999082230651174e-05, "loss": 0.9784, "step": 1745 }, { "epoch": 0.12976588628762542, "grad_norm": 2.882519531916645, "learning_rate": 1.99907879062265e-05, "loss": 1.007, "step": 1746 }, { "epoch": 0.12984020810107766, "grad_norm": 2.3463853869900517, "learning_rate": 1.9990753441621084e-05, "loss": 0.8993, "step": 1747 }, { "epoch": 0.12991452991452992, "grad_norm": 2.3005240372124303, "learning_rate": 1.9990718912695718e-05, "loss": 0.9196, "step": 1748 }, { "epoch": 0.12998885172798216, "grad_norm": 3.356260367292613, "learning_rate": 1.999068431945062e-05, "loss": 1.1576, "step": 1749 }, { "epoch": 0.1300631735414344, "grad_norm": 2.781483739391748, "learning_rate": 1.9990649661886018e-05, "loss": 0.8735, "step": 1750 }, { "epoch": 0.13013749535488667, "grad_norm": 3.5311281278774977, "learning_rate": 1.9990614940002128e-05, "loss": 1.1659, "step": 1751 }, { "epoch": 0.1302118171683389, "grad_norm": 2.733989065412256, "learning_rate": 1.999058015379918e-05, "loss": 0.9447, "step": 1752 }, { "epoch": 0.13028613898179114, "grad_norm": 2.3719241553604697, "learning_rate": 1.9990545303277393e-05, "loss": 0.7419, "step": 1753 }, { "epoch": 0.1303604607952434, "grad_norm": 2.440371342465496, "learning_rate": 1.9990510388436996e-05, "loss": 0.9898, "step": 1754 }, { "epoch": 0.13043478260869565, "grad_norm": 2.3399413405780867, "learning_rate": 1.9990475409278212e-05, "loss": 0.8388, "step": 1755 }, { "epoch": 0.1305091044221479, "grad_norm": 2.461818015747163, "learning_rate": 1.999044036580126e-05, "loss": 0.8095, "step": 1756 }, { "epoch": 0.13058342623560015, "grad_norm": 2.4273548207676416, "learning_rate": 1.999040525800638e-05, "loss": 0.7697, "step": 1757 }, { "epoch": 0.1306577480490524, "grad_norm": 2.62826571037615, "learning_rate": 1.9990370085893785e-05, "loss": 0.8262, "step": 1758 }, { "epoch": 0.13073206986250466, "grad_norm": 2.443257046338752, "learning_rate": 1.9990334849463705e-05, "loss": 1.035, "step": 1759 }, { "epoch": 0.1308063916759569, "grad_norm": 2.5295068576924313, "learning_rate": 1.9990299548716367e-05, "loss": 0.9212, "step": 1760 }, { "epoch": 0.13088071348940913, "grad_norm": 2.8728028538559274, "learning_rate": 1.9990264183652002e-05, "loss": 1.0027, "step": 1761 }, { "epoch": 0.1309550353028614, "grad_norm": 2.262276723732461, "learning_rate": 1.9990228754270832e-05, "loss": 0.847, "step": 1762 }, { "epoch": 0.13102935711631364, "grad_norm": 16.547567200895795, "learning_rate": 1.999019326057309e-05, "loss": 0.9748, "step": 1763 }, { "epoch": 0.13110367892976588, "grad_norm": 2.731209286403231, "learning_rate": 1.9990157702559002e-05, "loss": 1.1824, "step": 1764 }, { "epoch": 0.13117800074321814, "grad_norm": 2.4260091581870684, "learning_rate": 1.9990122080228793e-05, "loss": 0.8635, "step": 1765 }, { "epoch": 0.13125232255667038, "grad_norm": 2.7969872864385095, "learning_rate": 1.99900863935827e-05, "loss": 0.9638, "step": 1766 }, { "epoch": 0.13132664437012262, "grad_norm": 3.3986097451665516, "learning_rate": 1.999005064262095e-05, "loss": 0.958, "step": 1767 }, { "epoch": 0.13140096618357489, "grad_norm": 7.718633432989514, "learning_rate": 1.999001482734377e-05, "loss": 0.8996, "step": 1768 }, { "epoch": 0.13147528799702712, "grad_norm": 2.9873188759484735, "learning_rate": 1.9989978947751392e-05, "loss": 1.0479, "step": 1769 }, { "epoch": 0.13154960981047936, "grad_norm": 2.569205360496457, "learning_rate": 1.9989943003844046e-05, "loss": 0.8974, "step": 1770 }, { "epoch": 0.13162393162393163, "grad_norm": 2.7261228071662744, "learning_rate": 1.9989906995621974e-05, "loss": 0.9418, "step": 1771 }, { "epoch": 0.13169825343738387, "grad_norm": 4.615536339892498, "learning_rate": 1.9989870923085394e-05, "loss": 1.1894, "step": 1772 }, { "epoch": 0.13177257525083613, "grad_norm": 2.7588390556480147, "learning_rate": 1.9989834786234538e-05, "loss": 1.0289, "step": 1773 }, { "epoch": 0.13184689706428837, "grad_norm": 2.7510513506238503, "learning_rate": 1.9989798585069652e-05, "loss": 0.9979, "step": 1774 }, { "epoch": 0.1319212188777406, "grad_norm": 3.2007341438646875, "learning_rate": 1.9989762319590958e-05, "loss": 0.9495, "step": 1775 }, { "epoch": 0.13199554069119288, "grad_norm": 2.9094047700761365, "learning_rate": 1.998972598979869e-05, "loss": 1.144, "step": 1776 }, { "epoch": 0.1320698625046451, "grad_norm": 3.9232785440801443, "learning_rate": 1.9989689595693087e-05, "loss": 1.0409, "step": 1777 }, { "epoch": 0.13214418431809735, "grad_norm": 7.165508990006684, "learning_rate": 1.9989653137274382e-05, "loss": 1.1117, "step": 1778 }, { "epoch": 0.13221850613154962, "grad_norm": 3.19831519681471, "learning_rate": 1.998961661454281e-05, "loss": 0.8397, "step": 1779 }, { "epoch": 0.13229282794500186, "grad_norm": 2.8435285870284814, "learning_rate": 1.9989580027498597e-05, "loss": 1.0923, "step": 1780 }, { "epoch": 0.1323671497584541, "grad_norm": 2.665039583910207, "learning_rate": 1.9989543376141992e-05, "loss": 0.9871, "step": 1781 }, { "epoch": 0.13244147157190636, "grad_norm": 2.9422079604945504, "learning_rate": 1.9989506660473224e-05, "loss": 0.8932, "step": 1782 }, { "epoch": 0.1325157933853586, "grad_norm": 2.895789451818053, "learning_rate": 1.9989469880492528e-05, "loss": 0.9715, "step": 1783 }, { "epoch": 0.13259011519881084, "grad_norm": 2.3848237228411717, "learning_rate": 1.9989433036200148e-05, "loss": 0.9335, "step": 1784 }, { "epoch": 0.1326644370122631, "grad_norm": 2.8194232947136086, "learning_rate": 1.9989396127596316e-05, "loss": 0.9603, "step": 1785 }, { "epoch": 0.13273875882571534, "grad_norm": 2.661608171156234, "learning_rate": 1.9989359154681267e-05, "loss": 0.9441, "step": 1786 }, { "epoch": 0.1328130806391676, "grad_norm": 2.666655141693615, "learning_rate": 1.9989322117455242e-05, "loss": 0.9679, "step": 1787 }, { "epoch": 0.13288740245261985, "grad_norm": 2.8499315273435695, "learning_rate": 1.998928501591848e-05, "loss": 1.0027, "step": 1788 }, { "epoch": 0.13296172426607208, "grad_norm": 3.250583256196365, "learning_rate": 1.998924785007122e-05, "loss": 0.9454, "step": 1789 }, { "epoch": 0.13303604607952435, "grad_norm": 2.4945833626779224, "learning_rate": 1.9989210619913703e-05, "loss": 1.0152, "step": 1790 }, { "epoch": 0.1331103678929766, "grad_norm": 2.5879852161719286, "learning_rate": 1.998917332544616e-05, "loss": 0.9039, "step": 1791 }, { "epoch": 0.13318468970642883, "grad_norm": 2.9890035657756044, "learning_rate": 1.9989135966668843e-05, "loss": 0.867, "step": 1792 }, { "epoch": 0.1332590115198811, "grad_norm": 3.2413816157871427, "learning_rate": 1.9989098543581987e-05, "loss": 0.9463, "step": 1793 }, { "epoch": 0.13333333333333333, "grad_norm": 2.1069999115689373, "learning_rate": 1.998906105618583e-05, "loss": 0.9146, "step": 1794 }, { "epoch": 0.13340765514678557, "grad_norm": 2.9671945060024774, "learning_rate": 1.9989023504480617e-05, "loss": 1.0096, "step": 1795 }, { "epoch": 0.13348197696023784, "grad_norm": 2.768400881200326, "learning_rate": 1.998898588846659e-05, "loss": 1.0532, "step": 1796 }, { "epoch": 0.13355629877369007, "grad_norm": 2.3798556832455855, "learning_rate": 1.9988948208143987e-05, "loss": 0.8182, "step": 1797 }, { "epoch": 0.1336306205871423, "grad_norm": 2.104503784787865, "learning_rate": 1.998891046351306e-05, "loss": 0.9986, "step": 1798 }, { "epoch": 0.13370494240059458, "grad_norm": 2.9369391123947763, "learning_rate": 1.9988872654574042e-05, "loss": 0.8353, "step": 1799 }, { "epoch": 0.13377926421404682, "grad_norm": 2.5248211215989147, "learning_rate": 1.998883478132718e-05, "loss": 0.8726, "step": 1800 }, { "epoch": 0.13385358602749908, "grad_norm": 2.597655087068619, "learning_rate": 1.9988796843772714e-05, "loss": 0.9903, "step": 1801 }, { "epoch": 0.13392790784095132, "grad_norm": 2.858912563297303, "learning_rate": 1.9988758841910898e-05, "loss": 0.9903, "step": 1802 }, { "epoch": 0.13400222965440356, "grad_norm": 3.0736840815778925, "learning_rate": 1.998872077574197e-05, "loss": 0.848, "step": 1803 }, { "epoch": 0.13407655146785583, "grad_norm": 2.393744167919102, "learning_rate": 1.9988682645266177e-05, "loss": 0.8321, "step": 1804 }, { "epoch": 0.13415087328130806, "grad_norm": 3.315990891812362, "learning_rate": 1.9988644450483757e-05, "loss": 0.8886, "step": 1805 }, { "epoch": 0.1342251950947603, "grad_norm": 3.2021012492457, "learning_rate": 1.998860619139497e-05, "loss": 1.0167, "step": 1806 }, { "epoch": 0.13429951690821257, "grad_norm": 3.417784003978022, "learning_rate": 1.9988567868000052e-05, "loss": 1.026, "step": 1807 }, { "epoch": 0.1343738387216648, "grad_norm": 2.7307989990216375, "learning_rate": 1.998852948029925e-05, "loss": 1.037, "step": 1808 }, { "epoch": 0.13444816053511705, "grad_norm": 2.3148275697740064, "learning_rate": 1.9988491028292817e-05, "loss": 0.9417, "step": 1809 }, { "epoch": 0.1345224823485693, "grad_norm": 3.0650006726119066, "learning_rate": 1.9988452511980993e-05, "loss": 1.0154, "step": 1810 }, { "epoch": 0.13459680416202155, "grad_norm": 2.7045784429151265, "learning_rate": 1.998841393136403e-05, "loss": 1.0898, "step": 1811 }, { "epoch": 0.1346711259754738, "grad_norm": 3.31942071550301, "learning_rate": 1.998837528644218e-05, "loss": 1.1246, "step": 1812 }, { "epoch": 0.13474544778892605, "grad_norm": 2.358404931435446, "learning_rate": 1.998833657721569e-05, "loss": 0.785, "step": 1813 }, { "epoch": 0.1348197696023783, "grad_norm": 2.2821380730535674, "learning_rate": 1.99882978036848e-05, "loss": 0.8731, "step": 1814 }, { "epoch": 0.13489409141583056, "grad_norm": 2.71081164950406, "learning_rate": 1.998825896584977e-05, "loss": 1.2223, "step": 1815 }, { "epoch": 0.1349684132292828, "grad_norm": 2.4990636860836797, "learning_rate": 1.998822006371085e-05, "loss": 0.8571, "step": 1816 }, { "epoch": 0.13504273504273503, "grad_norm": 2.59588453837726, "learning_rate": 1.9988181097268287e-05, "loss": 1.1038, "step": 1817 }, { "epoch": 0.1351170568561873, "grad_norm": 3.681399521768391, "learning_rate": 1.998814206652233e-05, "loss": 0.8189, "step": 1818 }, { "epoch": 0.13519137866963954, "grad_norm": 3.03588075560186, "learning_rate": 1.9988102971473236e-05, "loss": 0.9544, "step": 1819 }, { "epoch": 0.13526570048309178, "grad_norm": 2.711945012822383, "learning_rate": 1.998806381212125e-05, "loss": 0.9746, "step": 1820 }, { "epoch": 0.13534002229654404, "grad_norm": 2.4712207410987452, "learning_rate": 1.9988024588466628e-05, "loss": 1.0831, "step": 1821 }, { "epoch": 0.13541434410999628, "grad_norm": 3.7087621567774685, "learning_rate": 1.9987985300509622e-05, "loss": 0.9581, "step": 1822 }, { "epoch": 0.13548866592344852, "grad_norm": 2.2309826680068556, "learning_rate": 1.9987945948250486e-05, "loss": 0.889, "step": 1823 }, { "epoch": 0.1355629877369008, "grad_norm": 2.1641578496346865, "learning_rate": 1.998790653168947e-05, "loss": 0.8779, "step": 1824 }, { "epoch": 0.13563730955035302, "grad_norm": 2.801601881025812, "learning_rate": 1.9987867050826834e-05, "loss": 1.1037, "step": 1825 }, { "epoch": 0.1357116313638053, "grad_norm": 2.612886430817474, "learning_rate": 1.9987827505662826e-05, "loss": 0.8395, "step": 1826 }, { "epoch": 0.13578595317725753, "grad_norm": 3.863916532516253, "learning_rate": 1.99877878961977e-05, "loss": 0.8302, "step": 1827 }, { "epoch": 0.13586027499070977, "grad_norm": 2.650980797967167, "learning_rate": 1.9987748222431716e-05, "loss": 0.946, "step": 1828 }, { "epoch": 0.13593459680416203, "grad_norm": 2.3217024058713798, "learning_rate": 1.9987708484365125e-05, "loss": 0.9155, "step": 1829 }, { "epoch": 0.13600891861761427, "grad_norm": 2.5826450301888957, "learning_rate": 1.998766868199819e-05, "loss": 1.0662, "step": 1830 }, { "epoch": 0.1360832404310665, "grad_norm": 2.623681474889519, "learning_rate": 1.998762881533116e-05, "loss": 1.0609, "step": 1831 }, { "epoch": 0.13615756224451878, "grad_norm": 2.2786599087133554, "learning_rate": 1.998758888436429e-05, "loss": 0.8381, "step": 1832 }, { "epoch": 0.13623188405797101, "grad_norm": 2.797648929567689, "learning_rate": 1.9987548889097846e-05, "loss": 0.9997, "step": 1833 }, { "epoch": 0.13630620587142325, "grad_norm": 2.5216968274113047, "learning_rate": 1.9987508829532078e-05, "loss": 0.7076, "step": 1834 }, { "epoch": 0.13638052768487552, "grad_norm": 2.5444375603865956, "learning_rate": 1.998746870566725e-05, "loss": 0.9856, "step": 1835 }, { "epoch": 0.13645484949832776, "grad_norm": 2.6401695331685753, "learning_rate": 1.998742851750361e-05, "loss": 0.9274, "step": 1836 }, { "epoch": 0.13652917131178, "grad_norm": 2.760290210985605, "learning_rate": 1.998738826504143e-05, "loss": 0.7826, "step": 1837 }, { "epoch": 0.13660349312523226, "grad_norm": 2.7613136548762545, "learning_rate": 1.998734794828096e-05, "loss": 0.9852, "step": 1838 }, { "epoch": 0.1366778149386845, "grad_norm": 2.7807046896140495, "learning_rate": 1.998730756722246e-05, "loss": 0.7562, "step": 1839 }, { "epoch": 0.13675213675213677, "grad_norm": 2.2498516412758764, "learning_rate": 1.9987267121866195e-05, "loss": 0.8448, "step": 1840 }, { "epoch": 0.136826458565589, "grad_norm": 3.0875422858683925, "learning_rate": 1.9987226612212424e-05, "loss": 1.0604, "step": 1841 }, { "epoch": 0.13690078037904124, "grad_norm": 2.0827162752487838, "learning_rate": 1.9987186038261402e-05, "loss": 0.7479, "step": 1842 }, { "epoch": 0.1369751021924935, "grad_norm": 2.457749330002466, "learning_rate": 1.9987145400013397e-05, "loss": 1.0401, "step": 1843 }, { "epoch": 0.13704942400594575, "grad_norm": 2.7965283156199763, "learning_rate": 1.9987104697468665e-05, "loss": 0.9292, "step": 1844 }, { "epoch": 0.13712374581939799, "grad_norm": 2.327401048695715, "learning_rate": 1.9987063930627475e-05, "loss": 0.7911, "step": 1845 }, { "epoch": 0.13719806763285025, "grad_norm": 2.640550260880073, "learning_rate": 1.9987023099490088e-05, "loss": 1.0794, "step": 1846 }, { "epoch": 0.1372723894463025, "grad_norm": 3.909163319491569, "learning_rate": 1.9986982204056757e-05, "loss": 0.9526, "step": 1847 }, { "epoch": 0.13734671125975473, "grad_norm": 2.6493300343595916, "learning_rate": 1.9986941244327757e-05, "loss": 0.9342, "step": 1848 }, { "epoch": 0.137421033073207, "grad_norm": 2.822617159200003, "learning_rate": 1.9986900220303346e-05, "loss": 1.026, "step": 1849 }, { "epoch": 0.13749535488665923, "grad_norm": 2.437554373620445, "learning_rate": 1.998685913198379e-05, "loss": 0.8287, "step": 1850 }, { "epoch": 0.13756967670011147, "grad_norm": 2.3758589384691713, "learning_rate": 1.9986817979369356e-05, "loss": 1.0557, "step": 1851 }, { "epoch": 0.13764399851356374, "grad_norm": 2.432788605017986, "learning_rate": 1.9986776762460303e-05, "loss": 0.7825, "step": 1852 }, { "epoch": 0.13771832032701598, "grad_norm": 2.2115980563192426, "learning_rate": 1.99867354812569e-05, "loss": 0.7024, "step": 1853 }, { "epoch": 0.13779264214046824, "grad_norm": 3.1096000620761894, "learning_rate": 1.998669413575941e-05, "loss": 0.9569, "step": 1854 }, { "epoch": 0.13786696395392048, "grad_norm": 2.9825288717856835, "learning_rate": 1.9986652725968105e-05, "loss": 0.9001, "step": 1855 }, { "epoch": 0.13794128576737272, "grad_norm": 2.5186688506440778, "learning_rate": 1.9986611251883244e-05, "loss": 1.0416, "step": 1856 }, { "epoch": 0.13801560758082498, "grad_norm": 2.8734036915983103, "learning_rate": 1.9986569713505103e-05, "loss": 1.1221, "step": 1857 }, { "epoch": 0.13808992939427722, "grad_norm": 2.4382000813980187, "learning_rate": 1.9986528110833942e-05, "loss": 0.9775, "step": 1858 }, { "epoch": 0.13816425120772946, "grad_norm": 2.913232591355329, "learning_rate": 1.998648644387003e-05, "loss": 1.0782, "step": 1859 }, { "epoch": 0.13823857302118173, "grad_norm": 2.5296970170932496, "learning_rate": 1.9986444712613636e-05, "loss": 1.0076, "step": 1860 }, { "epoch": 0.13831289483463396, "grad_norm": 2.7762361479470856, "learning_rate": 1.998640291706503e-05, "loss": 1.1154, "step": 1861 }, { "epoch": 0.1383872166480862, "grad_norm": 2.9739266122520696, "learning_rate": 1.998636105722448e-05, "loss": 0.8453, "step": 1862 }, { "epoch": 0.13846153846153847, "grad_norm": 2.668514781184231, "learning_rate": 1.9986319133092258e-05, "loss": 0.9956, "step": 1863 }, { "epoch": 0.1385358602749907, "grad_norm": 2.785181022285728, "learning_rate": 1.9986277144668632e-05, "loss": 0.9957, "step": 1864 }, { "epoch": 0.13861018208844295, "grad_norm": 2.813415072593299, "learning_rate": 1.9986235091953867e-05, "loss": 1.0863, "step": 1865 }, { "epoch": 0.1386845039018952, "grad_norm": 2.515197776845408, "learning_rate": 1.998619297494824e-05, "loss": 0.9248, "step": 1866 }, { "epoch": 0.13875882571534745, "grad_norm": 2.8654008779049382, "learning_rate": 1.9986150793652025e-05, "loss": 1.0004, "step": 1867 }, { "epoch": 0.13883314752879972, "grad_norm": 2.6256413711802375, "learning_rate": 1.9986108548065486e-05, "loss": 0.7842, "step": 1868 }, { "epoch": 0.13890746934225195, "grad_norm": 2.5137503898919125, "learning_rate": 1.9986066238188896e-05, "loss": 1.0394, "step": 1869 }, { "epoch": 0.1389817911557042, "grad_norm": 2.2413322537431553, "learning_rate": 1.9986023864022535e-05, "loss": 0.8251, "step": 1870 }, { "epoch": 0.13905611296915646, "grad_norm": 2.2082115249597982, "learning_rate": 1.998598142556667e-05, "loss": 0.7676, "step": 1871 }, { "epoch": 0.1391304347826087, "grad_norm": 2.6840659889932987, "learning_rate": 1.9985938922821568e-05, "loss": 1.0872, "step": 1872 }, { "epoch": 0.13920475659606094, "grad_norm": 2.2737809008453236, "learning_rate": 1.9985896355787516e-05, "loss": 0.9224, "step": 1873 }, { "epoch": 0.1392790784095132, "grad_norm": 2.086917959129587, "learning_rate": 1.998585372446478e-05, "loss": 0.7396, "step": 1874 }, { "epoch": 0.13935340022296544, "grad_norm": 2.755101532916515, "learning_rate": 1.9985811028853633e-05, "loss": 0.8869, "step": 1875 }, { "epoch": 0.13942772203641768, "grad_norm": 2.731094229370772, "learning_rate": 1.9985768268954357e-05, "loss": 0.8868, "step": 1876 }, { "epoch": 0.13950204384986994, "grad_norm": 2.1703965630428343, "learning_rate": 1.9985725444767218e-05, "loss": 0.6516, "step": 1877 }, { "epoch": 0.13957636566332218, "grad_norm": 3.2958313120575298, "learning_rate": 1.99856825562925e-05, "loss": 1.1231, "step": 1878 }, { "epoch": 0.13965068747677442, "grad_norm": 3.3617620487259936, "learning_rate": 1.9985639603530478e-05, "loss": 1.0118, "step": 1879 }, { "epoch": 0.1397250092902267, "grad_norm": 2.5864289522180206, "learning_rate": 1.9985596586481425e-05, "loss": 1.0318, "step": 1880 }, { "epoch": 0.13979933110367893, "grad_norm": 2.7551842014068946, "learning_rate": 1.9985553505145617e-05, "loss": 0.9642, "step": 1881 }, { "epoch": 0.1398736529171312, "grad_norm": 2.649586251719117, "learning_rate": 1.9985510359523335e-05, "loss": 1.0446, "step": 1882 }, { "epoch": 0.13994797473058343, "grad_norm": 2.5070748469943935, "learning_rate": 1.9985467149614855e-05, "loss": 0.8057, "step": 1883 }, { "epoch": 0.14002229654403567, "grad_norm": 3.5125583610998663, "learning_rate": 1.9985423875420457e-05, "loss": 1.0029, "step": 1884 }, { "epoch": 0.14009661835748793, "grad_norm": 3.1558811906379733, "learning_rate": 1.9985380536940415e-05, "loss": 0.9096, "step": 1885 }, { "epoch": 0.14017094017094017, "grad_norm": 2.711926773322268, "learning_rate": 1.9985337134175014e-05, "loss": 0.8857, "step": 1886 }, { "epoch": 0.1402452619843924, "grad_norm": 2.770326970417122, "learning_rate": 1.998529366712453e-05, "loss": 1.0452, "step": 1887 }, { "epoch": 0.14031958379784468, "grad_norm": 2.37184350216135, "learning_rate": 1.9985250135789244e-05, "loss": 0.9955, "step": 1888 }, { "epoch": 0.14039390561129692, "grad_norm": 2.56383671559597, "learning_rate": 1.9985206540169434e-05, "loss": 0.9588, "step": 1889 }, { "epoch": 0.14046822742474915, "grad_norm": 3.3158461268693538, "learning_rate": 1.9985162880265383e-05, "loss": 0.8906, "step": 1890 }, { "epoch": 0.14054254923820142, "grad_norm": 9.224260469296105, "learning_rate": 1.9985119156077372e-05, "loss": 1.2146, "step": 1891 }, { "epoch": 0.14061687105165366, "grad_norm": 3.1368034869913015, "learning_rate": 1.9985075367605683e-05, "loss": 1.1188, "step": 1892 }, { "epoch": 0.1406911928651059, "grad_norm": 5.722704616410059, "learning_rate": 1.9985031514850594e-05, "loss": 0.9314, "step": 1893 }, { "epoch": 0.14076551467855816, "grad_norm": 2.2474090864949035, "learning_rate": 1.9984987597812392e-05, "loss": 0.9516, "step": 1894 }, { "epoch": 0.1408398364920104, "grad_norm": 2.9324348275777257, "learning_rate": 1.998494361649136e-05, "loss": 0.8706, "step": 1895 }, { "epoch": 0.14091415830546267, "grad_norm": 2.882679831564391, "learning_rate": 1.9984899570887775e-05, "loss": 0.9657, "step": 1896 }, { "epoch": 0.1409884801189149, "grad_norm": 2.308673952661608, "learning_rate": 1.998485546100193e-05, "loss": 0.79, "step": 1897 }, { "epoch": 0.14106280193236714, "grad_norm": 3.3655422513412856, "learning_rate": 1.99848112868341e-05, "loss": 1.0595, "step": 1898 }, { "epoch": 0.1411371237458194, "grad_norm": 2.603115247693465, "learning_rate": 1.9984767048384574e-05, "loss": 0.7618, "step": 1899 }, { "epoch": 0.14121144555927165, "grad_norm": 3.7914343190649107, "learning_rate": 1.998472274565363e-05, "loss": 0.8843, "step": 1900 }, { "epoch": 0.14128576737272389, "grad_norm": 2.5121993723498863, "learning_rate": 1.9984678378641568e-05, "loss": 1.0449, "step": 1901 }, { "epoch": 0.14136008918617615, "grad_norm": 3.2716410154949713, "learning_rate": 1.998463394734866e-05, "loss": 0.9926, "step": 1902 }, { "epoch": 0.1414344109996284, "grad_norm": 2.7828987136273033, "learning_rate": 1.99845894517752e-05, "loss": 0.8205, "step": 1903 }, { "epoch": 0.14150873281308063, "grad_norm": 3.0206697374186846, "learning_rate": 1.9984544891921467e-05, "loss": 1.0341, "step": 1904 }, { "epoch": 0.1415830546265329, "grad_norm": 3.180736056073291, "learning_rate": 1.9984500267787753e-05, "loss": 0.8009, "step": 1905 }, { "epoch": 0.14165737643998513, "grad_norm": 2.8333293917908997, "learning_rate": 1.9984455579374346e-05, "loss": 0.8011, "step": 1906 }, { "epoch": 0.14173169825343737, "grad_norm": 3.0371217544565883, "learning_rate": 1.998441082668153e-05, "loss": 0.9617, "step": 1907 }, { "epoch": 0.14180602006688964, "grad_norm": 2.921976481311702, "learning_rate": 1.9984366009709595e-05, "loss": 1.0801, "step": 1908 }, { "epoch": 0.14188034188034188, "grad_norm": 2.271667117768716, "learning_rate": 1.9984321128458833e-05, "loss": 0.8857, "step": 1909 }, { "epoch": 0.14195466369379414, "grad_norm": 2.6644346206771976, "learning_rate": 1.9984276182929523e-05, "loss": 0.991, "step": 1910 }, { "epoch": 0.14202898550724638, "grad_norm": 11.474744787181729, "learning_rate": 1.998423117312197e-05, "loss": 0.8264, "step": 1911 }, { "epoch": 0.14210330732069862, "grad_norm": 2.8830022012671424, "learning_rate": 1.998418609903645e-05, "loss": 0.8888, "step": 1912 }, { "epoch": 0.14217762913415088, "grad_norm": 2.2425840017991923, "learning_rate": 1.9984140960673256e-05, "loss": 0.7141, "step": 1913 }, { "epoch": 0.14225195094760312, "grad_norm": 2.694797440538187, "learning_rate": 1.9984095758032685e-05, "loss": 0.9348, "step": 1914 }, { "epoch": 0.14232627276105536, "grad_norm": 5.024628666784952, "learning_rate": 1.9984050491115023e-05, "loss": 1.074, "step": 1915 }, { "epoch": 0.14240059457450763, "grad_norm": 2.779852461528821, "learning_rate": 1.9984005159920556e-05, "loss": 0.9318, "step": 1916 }, { "epoch": 0.14247491638795987, "grad_norm": 3.159419702151714, "learning_rate": 1.9983959764449588e-05, "loss": 0.9471, "step": 1917 }, { "epoch": 0.1425492382014121, "grad_norm": 2.492880054788939, "learning_rate": 1.99839143047024e-05, "loss": 1.0294, "step": 1918 }, { "epoch": 0.14262356001486437, "grad_norm": 2.407507451088907, "learning_rate": 1.9983868780679296e-05, "loss": 0.8954, "step": 1919 }, { "epoch": 0.1426978818283166, "grad_norm": 1.97104715887189, "learning_rate": 1.9983823192380562e-05, "loss": 0.7647, "step": 1920 }, { "epoch": 0.14277220364176885, "grad_norm": 3.056711742448813, "learning_rate": 1.9983777539806488e-05, "loss": 1.2176, "step": 1921 }, { "epoch": 0.1428465254552211, "grad_norm": 3.1682487864089564, "learning_rate": 1.9983731822957372e-05, "loss": 1.0197, "step": 1922 }, { "epoch": 0.14292084726867335, "grad_norm": 6.345855418142738, "learning_rate": 1.9983686041833512e-05, "loss": 0.9404, "step": 1923 }, { "epoch": 0.14299516908212562, "grad_norm": 2.510503856195621, "learning_rate": 1.99836401964352e-05, "loss": 0.9743, "step": 1924 }, { "epoch": 0.14306949089557786, "grad_norm": 2.3839632444175667, "learning_rate": 1.9983594286762726e-05, "loss": 0.9349, "step": 1925 }, { "epoch": 0.1431438127090301, "grad_norm": 6.972471538307161, "learning_rate": 1.9983548312816393e-05, "loss": 0.8282, "step": 1926 }, { "epoch": 0.14321813452248236, "grad_norm": 11.095204411240715, "learning_rate": 1.9983502274596492e-05, "loss": 0.8344, "step": 1927 }, { "epoch": 0.1432924563359346, "grad_norm": 2.539966415098051, "learning_rate": 1.9983456172103324e-05, "loss": 0.9012, "step": 1928 }, { "epoch": 0.14336677814938684, "grad_norm": 2.6800786779472325, "learning_rate": 1.998341000533718e-05, "loss": 1.0214, "step": 1929 }, { "epoch": 0.1434410999628391, "grad_norm": 3.16910741468417, "learning_rate": 1.9983363774298365e-05, "loss": 1.0175, "step": 1930 }, { "epoch": 0.14351542177629134, "grad_norm": 2.9145477753630926, "learning_rate": 1.998331747898717e-05, "loss": 1.1094, "step": 1931 }, { "epoch": 0.14358974358974358, "grad_norm": 5.990544664349578, "learning_rate": 1.9983271119403893e-05, "loss": 0.9812, "step": 1932 }, { "epoch": 0.14366406540319585, "grad_norm": 3.1978085081588534, "learning_rate": 1.9983224695548837e-05, "loss": 1.1133, "step": 1933 }, { "epoch": 0.14373838721664808, "grad_norm": 3.339064965033307, "learning_rate": 1.9983178207422297e-05, "loss": 1.0345, "step": 1934 }, { "epoch": 0.14381270903010032, "grad_norm": 2.6174435234699622, "learning_rate": 1.9983131655024574e-05, "loss": 0.8312, "step": 1935 }, { "epoch": 0.1438870308435526, "grad_norm": 2.6839670100053605, "learning_rate": 1.9983085038355967e-05, "loss": 1.0368, "step": 1936 }, { "epoch": 0.14396135265700483, "grad_norm": 3.308669736618353, "learning_rate": 1.9983038357416776e-05, "loss": 1.0183, "step": 1937 }, { "epoch": 0.1440356744704571, "grad_norm": 2.6129148351235627, "learning_rate": 1.9982991612207307e-05, "loss": 1.0852, "step": 1938 }, { "epoch": 0.14410999628390933, "grad_norm": 2.5704212946436904, "learning_rate": 1.998294480272785e-05, "loss": 0.7654, "step": 1939 }, { "epoch": 0.14418431809736157, "grad_norm": 2.8970900922063763, "learning_rate": 1.9982897928978716e-05, "loss": 0.8513, "step": 1940 }, { "epoch": 0.14425863991081384, "grad_norm": 2.772138445640162, "learning_rate": 1.99828509909602e-05, "loss": 0.9245, "step": 1941 }, { "epoch": 0.14433296172426607, "grad_norm": 2.4338864549099526, "learning_rate": 1.998280398867261e-05, "loss": 0.8784, "step": 1942 }, { "epoch": 0.1444072835377183, "grad_norm": 3.14152975610179, "learning_rate": 1.9982756922116242e-05, "loss": 1.0609, "step": 1943 }, { "epoch": 0.14448160535117058, "grad_norm": 3.0705404306675623, "learning_rate": 1.998270979129141e-05, "loss": 1.0037, "step": 1944 }, { "epoch": 0.14455592716462282, "grad_norm": 2.4245981432231267, "learning_rate": 1.9982662596198404e-05, "loss": 0.8912, "step": 1945 }, { "epoch": 0.14463024897807505, "grad_norm": 2.3155695038036543, "learning_rate": 1.9982615336837537e-05, "loss": 0.91, "step": 1946 }, { "epoch": 0.14470457079152732, "grad_norm": 2.8457526608557457, "learning_rate": 1.9982568013209106e-05, "loss": 1.0912, "step": 1947 }, { "epoch": 0.14477889260497956, "grad_norm": 3.358186360004104, "learning_rate": 1.9982520625313425e-05, "loss": 0.8428, "step": 1948 }, { "epoch": 0.1448532144184318, "grad_norm": 2.3207919281549705, "learning_rate": 1.9982473173150794e-05, "loss": 0.8481, "step": 1949 }, { "epoch": 0.14492753623188406, "grad_norm": 2.7090491102203935, "learning_rate": 1.998242565672152e-05, "loss": 1.0757, "step": 1950 }, { "epoch": 0.1450018580453363, "grad_norm": 2.957763716129604, "learning_rate": 1.9982378076025903e-05, "loss": 0.8167, "step": 1951 }, { "epoch": 0.14507617985878857, "grad_norm": 3.339483937389788, "learning_rate": 1.9982330431064258e-05, "loss": 0.8102, "step": 1952 }, { "epoch": 0.1451505016722408, "grad_norm": 2.259217938922976, "learning_rate": 1.9982282721836884e-05, "loss": 0.7479, "step": 1953 }, { "epoch": 0.14522482348569304, "grad_norm": 2.958995017304778, "learning_rate": 1.998223494834409e-05, "loss": 1.0, "step": 1954 }, { "epoch": 0.1452991452991453, "grad_norm": 1.8643519213648223, "learning_rate": 1.998218711058619e-05, "loss": 0.8384, "step": 1955 }, { "epoch": 0.14537346711259755, "grad_norm": 2.7388032688095096, "learning_rate": 1.9982139208563486e-05, "loss": 0.7541, "step": 1956 }, { "epoch": 0.1454477889260498, "grad_norm": 2.3858552909896655, "learning_rate": 1.998209124227629e-05, "loss": 0.9502, "step": 1957 }, { "epoch": 0.14552211073950205, "grad_norm": 2.4843779419982344, "learning_rate": 1.9982043211724905e-05, "loss": 0.9077, "step": 1958 }, { "epoch": 0.1455964325529543, "grad_norm": 3.662639552461659, "learning_rate": 1.9981995116909645e-05, "loss": 0.9765, "step": 1959 }, { "epoch": 0.14567075436640653, "grad_norm": 2.4458021801064835, "learning_rate": 1.9981946957830818e-05, "loss": 0.9545, "step": 1960 }, { "epoch": 0.1457450761798588, "grad_norm": 3.2101861109884946, "learning_rate": 1.9981898734488734e-05, "loss": 1.0472, "step": 1961 }, { "epoch": 0.14581939799331103, "grad_norm": 2.8822489280571615, "learning_rate": 1.9981850446883703e-05, "loss": 0.8459, "step": 1962 }, { "epoch": 0.14589371980676327, "grad_norm": 2.549427931462103, "learning_rate": 1.998180209501604e-05, "loss": 0.9168, "step": 1963 }, { "epoch": 0.14596804162021554, "grad_norm": 2.2616702597077265, "learning_rate": 1.9981753678886052e-05, "loss": 0.809, "step": 1964 }, { "epoch": 0.14604236343366778, "grad_norm": 3.3096194837630293, "learning_rate": 1.998170519849405e-05, "loss": 1.0945, "step": 1965 }, { "epoch": 0.14611668524712004, "grad_norm": 3.2874260556171584, "learning_rate": 1.998165665384035e-05, "loss": 1.0361, "step": 1966 }, { "epoch": 0.14619100706057228, "grad_norm": 2.332589894838091, "learning_rate": 1.998160804492526e-05, "loss": 0.6497, "step": 1967 }, { "epoch": 0.14626532887402452, "grad_norm": 2.724976474026902, "learning_rate": 1.9981559371749096e-05, "loss": 1.0886, "step": 1968 }, { "epoch": 0.14633965068747679, "grad_norm": 5.499771548111626, "learning_rate": 1.9981510634312172e-05, "loss": 0.91, "step": 1969 }, { "epoch": 0.14641397250092902, "grad_norm": 2.391803214416953, "learning_rate": 1.99814618326148e-05, "loss": 0.8188, "step": 1970 }, { "epoch": 0.14648829431438126, "grad_norm": 2.5297186466059522, "learning_rate": 1.9981412966657294e-05, "loss": 0.9742, "step": 1971 }, { "epoch": 0.14656261612783353, "grad_norm": 2.985775023716218, "learning_rate": 1.998136403643997e-05, "loss": 1.0461, "step": 1972 }, { "epoch": 0.14663693794128577, "grad_norm": 2.6660203587015254, "learning_rate": 1.9981315041963143e-05, "loss": 0.9767, "step": 1973 }, { "epoch": 0.146711259754738, "grad_norm": 2.7050080331670747, "learning_rate": 1.9981265983227126e-05, "loss": 1.0342, "step": 1974 }, { "epoch": 0.14678558156819027, "grad_norm": 2.9237608708505562, "learning_rate": 1.9981216860232236e-05, "loss": 0.7237, "step": 1975 }, { "epoch": 0.1468599033816425, "grad_norm": 2.6748892691514925, "learning_rate": 1.998116767297879e-05, "loss": 0.8233, "step": 1976 }, { "epoch": 0.14693422519509475, "grad_norm": 2.407372745422854, "learning_rate": 1.9981118421467108e-05, "loss": 0.85, "step": 1977 }, { "epoch": 0.147008547008547, "grad_norm": 2.9591465817408924, "learning_rate": 1.99810691056975e-05, "loss": 1.0987, "step": 1978 }, { "epoch": 0.14708286882199925, "grad_norm": 3.093246588465748, "learning_rate": 1.9981019725670286e-05, "loss": 0.9378, "step": 1979 }, { "epoch": 0.14715719063545152, "grad_norm": 2.0673928990475936, "learning_rate": 1.9980970281385787e-05, "loss": 0.797, "step": 1980 }, { "epoch": 0.14723151244890376, "grad_norm": 3.001797811867421, "learning_rate": 1.998092077284432e-05, "loss": 0.942, "step": 1981 }, { "epoch": 0.147305834262356, "grad_norm": 2.5804062925211415, "learning_rate": 1.99808712000462e-05, "loss": 0.6978, "step": 1982 }, { "epoch": 0.14738015607580826, "grad_norm": 3.0613772023596906, "learning_rate": 1.9980821562991752e-05, "loss": 1.0767, "step": 1983 }, { "epoch": 0.1474544778892605, "grad_norm": 2.485025017688634, "learning_rate": 1.9980771861681292e-05, "loss": 0.9763, "step": 1984 }, { "epoch": 0.14752879970271274, "grad_norm": 2.4460874378019635, "learning_rate": 1.9980722096115143e-05, "loss": 0.897, "step": 1985 }, { "epoch": 0.147603121516165, "grad_norm": 2.3658780518129925, "learning_rate": 1.9980672266293616e-05, "loss": 0.8151, "step": 1986 }, { "epoch": 0.14767744332961724, "grad_norm": 2.6681036343104485, "learning_rate": 1.998062237221704e-05, "loss": 0.9419, "step": 1987 }, { "epoch": 0.14775176514306948, "grad_norm": 2.88546864554887, "learning_rate": 1.9980572413885743e-05, "loss": 1.1284, "step": 1988 }, { "epoch": 0.14782608695652175, "grad_norm": 2.3012754743738992, "learning_rate": 1.9980522391300033e-05, "loss": 0.8766, "step": 1989 }, { "epoch": 0.14790040876997398, "grad_norm": 2.6787749917947137, "learning_rate": 1.998047230446024e-05, "loss": 1.0196, "step": 1990 }, { "epoch": 0.14797473058342622, "grad_norm": 3.4790106586375984, "learning_rate": 1.9980422153366682e-05, "loss": 1.0581, "step": 1991 }, { "epoch": 0.1480490523968785, "grad_norm": 4.502190647728056, "learning_rate": 1.998037193801968e-05, "loss": 1.0245, "step": 1992 }, { "epoch": 0.14812337421033073, "grad_norm": 3.311743907246146, "learning_rate": 1.9980321658419567e-05, "loss": 1.0019, "step": 1993 }, { "epoch": 0.148197696023783, "grad_norm": 2.6261848945687554, "learning_rate": 1.998027131456666e-05, "loss": 0.8897, "step": 1994 }, { "epoch": 0.14827201783723523, "grad_norm": 2.4430143612191824, "learning_rate": 1.9980220906461284e-05, "loss": 0.8633, "step": 1995 }, { "epoch": 0.14834633965068747, "grad_norm": 2.655417352862499, "learning_rate": 1.9980170434103762e-05, "loss": 1.0962, "step": 1996 }, { "epoch": 0.14842066146413974, "grad_norm": 2.9620915476430616, "learning_rate": 1.9980119897494424e-05, "loss": 1.0848, "step": 1997 }, { "epoch": 0.14849498327759197, "grad_norm": 2.1282269662194744, "learning_rate": 1.998006929663359e-05, "loss": 0.7409, "step": 1998 }, { "epoch": 0.1485693050910442, "grad_norm": 2.7381869527481415, "learning_rate": 1.9980018631521586e-05, "loss": 0.9389, "step": 1999 }, { "epoch": 0.14864362690449648, "grad_norm": 3.226073581447415, "learning_rate": 1.9979967902158744e-05, "loss": 0.7298, "step": 2000 }, { "epoch": 0.14871794871794872, "grad_norm": 2.397372982642193, "learning_rate": 1.9979917108545383e-05, "loss": 0.7231, "step": 2001 }, { "epoch": 0.14879227053140096, "grad_norm": 2.29641770504886, "learning_rate": 1.9979866250681835e-05, "loss": 0.7118, "step": 2002 }, { "epoch": 0.14886659234485322, "grad_norm": 2.9086079622214727, "learning_rate": 1.9979815328568426e-05, "loss": 0.8222, "step": 2003 }, { "epoch": 0.14894091415830546, "grad_norm": 2.6161735134489787, "learning_rate": 1.997976434220548e-05, "loss": 1.072, "step": 2004 }, { "epoch": 0.1490152359717577, "grad_norm": 3.0091548801745063, "learning_rate": 1.9979713291593335e-05, "loss": 1.0444, "step": 2005 }, { "epoch": 0.14908955778520996, "grad_norm": 2.4373563233751794, "learning_rate": 1.997966217673231e-05, "loss": 1.0007, "step": 2006 }, { "epoch": 0.1491638795986622, "grad_norm": 2.3832481061913247, "learning_rate": 1.997961099762274e-05, "loss": 0.9692, "step": 2007 }, { "epoch": 0.14923820141211447, "grad_norm": 2.915403757756788, "learning_rate": 1.997955975426495e-05, "loss": 0.9158, "step": 2008 }, { "epoch": 0.1493125232255667, "grad_norm": 2.413421973996041, "learning_rate": 1.9979508446659273e-05, "loss": 0.8901, "step": 2009 }, { "epoch": 0.14938684503901895, "grad_norm": 4.810026695391419, "learning_rate": 1.9979457074806036e-05, "loss": 1.0713, "step": 2010 }, { "epoch": 0.1494611668524712, "grad_norm": 2.4657233036334625, "learning_rate": 1.9979405638705574e-05, "loss": 0.9378, "step": 2011 }, { "epoch": 0.14953548866592345, "grad_norm": 3.1105790089026293, "learning_rate": 1.997935413835822e-05, "loss": 0.9099, "step": 2012 }, { "epoch": 0.1496098104793757, "grad_norm": 2.1912097391964385, "learning_rate": 1.99793025737643e-05, "loss": 0.7968, "step": 2013 }, { "epoch": 0.14968413229282795, "grad_norm": 2.566159401801546, "learning_rate": 1.9979250944924145e-05, "loss": 0.8989, "step": 2014 }, { "epoch": 0.1497584541062802, "grad_norm": 2.7176246651993217, "learning_rate": 1.997919925183809e-05, "loss": 0.9392, "step": 2015 }, { "epoch": 0.14983277591973243, "grad_norm": 2.6273471909025066, "learning_rate": 1.997914749450647e-05, "loss": 1.0448, "step": 2016 }, { "epoch": 0.1499070977331847, "grad_norm": 2.887536387254828, "learning_rate": 1.9979095672929617e-05, "loss": 0.8568, "step": 2017 }, { "epoch": 0.14998141954663693, "grad_norm": 2.387507246160384, "learning_rate": 1.997904378710786e-05, "loss": 0.7061, "step": 2018 }, { "epoch": 0.15005574136008917, "grad_norm": 2.5127403319107287, "learning_rate": 1.997899183704154e-05, "loss": 1.0443, "step": 2019 }, { "epoch": 0.15013006317354144, "grad_norm": 3.2226488486517106, "learning_rate": 1.997893982273099e-05, "loss": 0.9428, "step": 2020 }, { "epoch": 0.15020438498699368, "grad_norm": 2.8429591011580015, "learning_rate": 1.9978887744176538e-05, "loss": 0.7746, "step": 2021 }, { "epoch": 0.15027870680044594, "grad_norm": 3.339101913787062, "learning_rate": 1.997883560137853e-05, "loss": 0.8002, "step": 2022 }, { "epoch": 0.15035302861389818, "grad_norm": 2.9105887887026207, "learning_rate": 1.9978783394337292e-05, "loss": 0.8822, "step": 2023 }, { "epoch": 0.15042735042735042, "grad_norm": 2.421827819266559, "learning_rate": 1.9978731123053166e-05, "loss": 1.0171, "step": 2024 }, { "epoch": 0.1505016722408027, "grad_norm": 2.530159989318788, "learning_rate": 1.997867878752649e-05, "loss": 0.7586, "step": 2025 }, { "epoch": 0.15057599405425492, "grad_norm": 2.5664859012476566, "learning_rate": 1.9978626387757592e-05, "loss": 0.8962, "step": 2026 }, { "epoch": 0.15065031586770716, "grad_norm": 2.366256209390995, "learning_rate": 1.9978573923746815e-05, "loss": 1.0747, "step": 2027 }, { "epoch": 0.15072463768115943, "grad_norm": 2.430889237739934, "learning_rate": 1.9978521395494502e-05, "loss": 0.8789, "step": 2028 }, { "epoch": 0.15079895949461167, "grad_norm": 2.2828222710023796, "learning_rate": 1.9978468803000984e-05, "loss": 0.8058, "step": 2029 }, { "epoch": 0.1508732813080639, "grad_norm": 2.646585646356997, "learning_rate": 1.99784161462666e-05, "loss": 0.8775, "step": 2030 }, { "epoch": 0.15094760312151617, "grad_norm": 3.0981249580867356, "learning_rate": 1.9978363425291693e-05, "loss": 0.9255, "step": 2031 }, { "epoch": 0.1510219249349684, "grad_norm": 2.7677016564106616, "learning_rate": 1.9978310640076597e-05, "loss": 1.0444, "step": 2032 }, { "epoch": 0.15109624674842065, "grad_norm": 2.6807038367221243, "learning_rate": 1.9978257790621655e-05, "loss": 1.0553, "step": 2033 }, { "epoch": 0.15117056856187291, "grad_norm": 1.628316462049548, "learning_rate": 1.997820487692721e-05, "loss": 0.6126, "step": 2034 }, { "epoch": 0.15124489037532515, "grad_norm": 2.4270833411116004, "learning_rate": 1.9978151898993598e-05, "loss": 0.9705, "step": 2035 }, { "epoch": 0.15131921218877742, "grad_norm": 2.132653762043394, "learning_rate": 1.997809885682116e-05, "loss": 0.8926, "step": 2036 }, { "epoch": 0.15139353400222966, "grad_norm": 2.2366205984188556, "learning_rate": 1.9978045750410243e-05, "loss": 0.6716, "step": 2037 }, { "epoch": 0.1514678558156819, "grad_norm": 2.6118415121210647, "learning_rate": 1.9977992579761186e-05, "loss": 0.9109, "step": 2038 }, { "epoch": 0.15154217762913416, "grad_norm": 2.1969997438481155, "learning_rate": 1.9977939344874327e-05, "loss": 0.9628, "step": 2039 }, { "epoch": 0.1516164994425864, "grad_norm": 3.6839648582169837, "learning_rate": 1.9977886045750014e-05, "loss": 0.9128, "step": 2040 }, { "epoch": 0.15169082125603864, "grad_norm": 2.145149398834432, "learning_rate": 1.9977832682388585e-05, "loss": 0.7298, "step": 2041 }, { "epoch": 0.1517651430694909, "grad_norm": 2.543851945254098, "learning_rate": 1.997777925479039e-05, "loss": 0.904, "step": 2042 }, { "epoch": 0.15183946488294314, "grad_norm": 2.6033530072325357, "learning_rate": 1.9977725762955768e-05, "loss": 0.8685, "step": 2043 }, { "epoch": 0.15191378669639538, "grad_norm": 3.2323259146489813, "learning_rate": 1.9977672206885067e-05, "loss": 1.1137, "step": 2044 }, { "epoch": 0.15198810850984765, "grad_norm": 3.2279347108378635, "learning_rate": 1.997761858657863e-05, "loss": 1.0537, "step": 2045 }, { "epoch": 0.15206243032329989, "grad_norm": 2.435278245689658, "learning_rate": 1.9977564902036802e-05, "loss": 0.9077, "step": 2046 }, { "epoch": 0.15213675213675212, "grad_norm": 3.3251409193307464, "learning_rate": 1.997751115325993e-05, "loss": 0.8126, "step": 2047 }, { "epoch": 0.1522110739502044, "grad_norm": 2.581212774873684, "learning_rate": 1.9977457340248356e-05, "loss": 0.9831, "step": 2048 }, { "epoch": 0.15228539576365663, "grad_norm": 2.9047414060151335, "learning_rate": 1.997740346300243e-05, "loss": 1.1121, "step": 2049 }, { "epoch": 0.1523597175771089, "grad_norm": 2.847358698749491, "learning_rate": 1.99773495215225e-05, "loss": 0.945, "step": 2050 }, { "epoch": 0.15243403939056113, "grad_norm": 3.7012006802007558, "learning_rate": 1.997729551580891e-05, "loss": 0.9066, "step": 2051 }, { "epoch": 0.15250836120401337, "grad_norm": 2.5358842201179117, "learning_rate": 1.997724144586201e-05, "loss": 0.8594, "step": 2052 }, { "epoch": 0.15258268301746564, "grad_norm": 2.204801963128395, "learning_rate": 1.9977187311682148e-05, "loss": 0.6811, "step": 2053 }, { "epoch": 0.15265700483091788, "grad_norm": 2.8793994735110546, "learning_rate": 1.997713311326967e-05, "loss": 1.0527, "step": 2054 }, { "epoch": 0.1527313266443701, "grad_norm": 3.057146650408635, "learning_rate": 1.9977078850624928e-05, "loss": 1.1951, "step": 2055 }, { "epoch": 0.15280564845782238, "grad_norm": 2.609917634609832, "learning_rate": 1.997702452374827e-05, "loss": 0.8931, "step": 2056 }, { "epoch": 0.15287997027127462, "grad_norm": 2.474901265015364, "learning_rate": 1.9976970132640043e-05, "loss": 1.0129, "step": 2057 }, { "epoch": 0.15295429208472686, "grad_norm": 2.7663995990528076, "learning_rate": 1.9976915677300602e-05, "loss": 0.7058, "step": 2058 }, { "epoch": 0.15302861389817912, "grad_norm": 2.4029004702940804, "learning_rate": 1.99768611577303e-05, "loss": 0.9474, "step": 2059 }, { "epoch": 0.15310293571163136, "grad_norm": 2.1950720345211883, "learning_rate": 1.9976806573929475e-05, "loss": 0.7737, "step": 2060 }, { "epoch": 0.1531772575250836, "grad_norm": 2.39205218669239, "learning_rate": 1.9976751925898492e-05, "loss": 0.6323, "step": 2061 }, { "epoch": 0.15325157933853586, "grad_norm": 2.0036973998949357, "learning_rate": 1.9976697213637698e-05, "loss": 0.6263, "step": 2062 }, { "epoch": 0.1533259011519881, "grad_norm": 2.260392304190362, "learning_rate": 1.9976642437147446e-05, "loss": 0.9705, "step": 2063 }, { "epoch": 0.15340022296544037, "grad_norm": 3.1245773544510778, "learning_rate": 1.9976587596428084e-05, "loss": 1.1333, "step": 2064 }, { "epoch": 0.1534745447788926, "grad_norm": 2.542465968773685, "learning_rate": 1.997653269147997e-05, "loss": 0.8387, "step": 2065 }, { "epoch": 0.15354886659234485, "grad_norm": 2.7043647653806255, "learning_rate": 1.9976477722303458e-05, "loss": 0.8908, "step": 2066 }, { "epoch": 0.1536231884057971, "grad_norm": 2.800554636961504, "learning_rate": 1.9976422688898897e-05, "loss": 1.0943, "step": 2067 }, { "epoch": 0.15369751021924935, "grad_norm": 3.2965488709763155, "learning_rate": 1.9976367591266647e-05, "loss": 0.8839, "step": 2068 }, { "epoch": 0.1537718320327016, "grad_norm": 3.4341535384033324, "learning_rate": 1.997631242940706e-05, "loss": 1.0617, "step": 2069 }, { "epoch": 0.15384615384615385, "grad_norm": 2.511290633508282, "learning_rate": 1.9976257203320487e-05, "loss": 0.86, "step": 2070 }, { "epoch": 0.1539204756596061, "grad_norm": 2.3853419735251973, "learning_rate": 1.9976201913007293e-05, "loss": 0.8402, "step": 2071 }, { "epoch": 0.15399479747305833, "grad_norm": 2.844316337267902, "learning_rate": 1.9976146558467825e-05, "loss": 0.799, "step": 2072 }, { "epoch": 0.1540691192865106, "grad_norm": 2.597807068861321, "learning_rate": 1.9976091139702447e-05, "loss": 1.0262, "step": 2073 }, { "epoch": 0.15414344109996284, "grad_norm": 2.0883314531233874, "learning_rate": 1.9976035656711507e-05, "loss": 0.9342, "step": 2074 }, { "epoch": 0.15421776291341507, "grad_norm": 2.21074114092842, "learning_rate": 1.997598010949537e-05, "loss": 0.6233, "step": 2075 }, { "epoch": 0.15429208472686734, "grad_norm": 2.206815156548797, "learning_rate": 1.9975924498054392e-05, "loss": 0.9357, "step": 2076 }, { "epoch": 0.15436640654031958, "grad_norm": 2.3518655774287835, "learning_rate": 1.997586882238893e-05, "loss": 0.8271, "step": 2077 }, { "epoch": 0.15444072835377184, "grad_norm": 2.6435539194895736, "learning_rate": 1.997581308249934e-05, "loss": 1.0741, "step": 2078 }, { "epoch": 0.15451505016722408, "grad_norm": 2.8007270077213615, "learning_rate": 1.9975757278385982e-05, "loss": 1.0139, "step": 2079 }, { "epoch": 0.15458937198067632, "grad_norm": 2.578832612979317, "learning_rate": 1.9975701410049218e-05, "loss": 0.8229, "step": 2080 }, { "epoch": 0.1546636937941286, "grad_norm": 2.972618151459155, "learning_rate": 1.9975645477489405e-05, "loss": 0.9592, "step": 2081 }, { "epoch": 0.15473801560758083, "grad_norm": 2.6515179666589246, "learning_rate": 1.9975589480706904e-05, "loss": 1.0828, "step": 2082 }, { "epoch": 0.15481233742103306, "grad_norm": 2.8498116937922986, "learning_rate": 1.9975533419702077e-05, "loss": 1.1774, "step": 2083 }, { "epoch": 0.15488665923448533, "grad_norm": 3.330478122297653, "learning_rate": 1.9975477294475282e-05, "loss": 0.9773, "step": 2084 }, { "epoch": 0.15496098104793757, "grad_norm": 2.350663120054506, "learning_rate": 1.997542110502688e-05, "loss": 1.0036, "step": 2085 }, { "epoch": 0.1550353028613898, "grad_norm": 2.551916453716635, "learning_rate": 1.997536485135724e-05, "loss": 0.9758, "step": 2086 }, { "epoch": 0.15510962467484207, "grad_norm": 2.823480685946435, "learning_rate": 1.9975308533466714e-05, "loss": 0.8109, "step": 2087 }, { "epoch": 0.1551839464882943, "grad_norm": 3.0523277673641434, "learning_rate": 1.997525215135567e-05, "loss": 1.0284, "step": 2088 }, { "epoch": 0.15525826830174655, "grad_norm": 3.447919131332459, "learning_rate": 1.9975195705024473e-05, "loss": 0.9292, "step": 2089 }, { "epoch": 0.15533259011519882, "grad_norm": 2.7711959283087673, "learning_rate": 1.9975139194473477e-05, "loss": 1.1772, "step": 2090 }, { "epoch": 0.15540691192865105, "grad_norm": 2.9652902360903712, "learning_rate": 1.997508261970306e-05, "loss": 0.8998, "step": 2091 }, { "epoch": 0.15548123374210332, "grad_norm": 2.696662625888205, "learning_rate": 1.9975025980713574e-05, "loss": 0.9966, "step": 2092 }, { "epoch": 0.15555555555555556, "grad_norm": 3.3423526944718644, "learning_rate": 1.997496927750539e-05, "loss": 1.12, "step": 2093 }, { "epoch": 0.1556298773690078, "grad_norm": 2.8967231722290006, "learning_rate": 1.997491251007887e-05, "loss": 1.0107, "step": 2094 }, { "epoch": 0.15570419918246006, "grad_norm": 3.1715455685847544, "learning_rate": 1.9974855678434384e-05, "loss": 0.9525, "step": 2095 }, { "epoch": 0.1557785209959123, "grad_norm": 2.5289987027881167, "learning_rate": 1.997479878257229e-05, "loss": 0.7815, "step": 2096 }, { "epoch": 0.15585284280936454, "grad_norm": 3.4320711322843707, "learning_rate": 1.9974741822492965e-05, "loss": 1.0366, "step": 2097 }, { "epoch": 0.1559271646228168, "grad_norm": 2.7162884258225892, "learning_rate": 1.9974684798196765e-05, "loss": 0.8332, "step": 2098 }, { "epoch": 0.15600148643626904, "grad_norm": 2.4797852643521083, "learning_rate": 1.9974627709684065e-05, "loss": 0.7781, "step": 2099 }, { "epoch": 0.15607580824972128, "grad_norm": 2.6586531278281282, "learning_rate": 1.997457055695523e-05, "loss": 1.034, "step": 2100 }, { "epoch": 0.15615013006317355, "grad_norm": 2.5758684898844484, "learning_rate": 1.997451334001062e-05, "loss": 1.0231, "step": 2101 }, { "epoch": 0.1562244518766258, "grad_norm": 2.352251645950956, "learning_rate": 1.997445605885062e-05, "loss": 0.9408, "step": 2102 }, { "epoch": 0.15629877369007802, "grad_norm": 2.6876941001875125, "learning_rate": 1.997439871347558e-05, "loss": 1.2451, "step": 2103 }, { "epoch": 0.1563730955035303, "grad_norm": 2.426017987851689, "learning_rate": 1.9974341303885885e-05, "loss": 0.9381, "step": 2104 }, { "epoch": 0.15644741731698253, "grad_norm": 2.191388087545205, "learning_rate": 1.99742838300819e-05, "loss": 0.9048, "step": 2105 }, { "epoch": 0.1565217391304348, "grad_norm": 2.773899792988625, "learning_rate": 1.997422629206399e-05, "loss": 1.0304, "step": 2106 }, { "epoch": 0.15659606094388703, "grad_norm": 2.689562450997032, "learning_rate": 1.9974168689832528e-05, "loss": 0.6771, "step": 2107 }, { "epoch": 0.15667038275733927, "grad_norm": 3.0375735104521904, "learning_rate": 1.9974111023387887e-05, "loss": 0.8034, "step": 2108 }, { "epoch": 0.15674470457079154, "grad_norm": 2.668777050573074, "learning_rate": 1.9974053292730433e-05, "loss": 0.8975, "step": 2109 }, { "epoch": 0.15681902638424378, "grad_norm": 3.2476966936395155, "learning_rate": 1.9973995497860547e-05, "loss": 0.9209, "step": 2110 }, { "epoch": 0.15689334819769601, "grad_norm": 2.298057774665012, "learning_rate": 1.997393763877859e-05, "loss": 0.8881, "step": 2111 }, { "epoch": 0.15696767001114828, "grad_norm": 2.4764244102134145, "learning_rate": 1.9973879715484944e-05, "loss": 1.005, "step": 2112 }, { "epoch": 0.15704199182460052, "grad_norm": 2.7208339436195415, "learning_rate": 1.997382172797998e-05, "loss": 0.8544, "step": 2113 }, { "epoch": 0.15711631363805276, "grad_norm": 2.887632598888072, "learning_rate": 1.9973763676264065e-05, "loss": 1.0455, "step": 2114 }, { "epoch": 0.15719063545150502, "grad_norm": 2.1944443849410216, "learning_rate": 1.9973705560337575e-05, "loss": 0.7857, "step": 2115 }, { "epoch": 0.15726495726495726, "grad_norm": 2.3039502090970956, "learning_rate": 1.997364738020089e-05, "loss": 0.9171, "step": 2116 }, { "epoch": 0.1573392790784095, "grad_norm": 2.297200757864147, "learning_rate": 1.997358913585438e-05, "loss": 0.781, "step": 2117 }, { "epoch": 0.15741360089186177, "grad_norm": 2.4253081951100954, "learning_rate": 1.997353082729842e-05, "loss": 0.8993, "step": 2118 }, { "epoch": 0.157487922705314, "grad_norm": 2.2532892195086993, "learning_rate": 1.9973472454533388e-05, "loss": 0.9713, "step": 2119 }, { "epoch": 0.15756224451876627, "grad_norm": 2.5096318838962146, "learning_rate": 1.9973414017559656e-05, "loss": 0.9589, "step": 2120 }, { "epoch": 0.1576365663322185, "grad_norm": 3.5334404836338513, "learning_rate": 1.99733555163776e-05, "loss": 1.1332, "step": 2121 }, { "epoch": 0.15771088814567075, "grad_norm": 2.7905911684548173, "learning_rate": 1.9973296950987603e-05, "loss": 1.0895, "step": 2122 }, { "epoch": 0.157785209959123, "grad_norm": 2.164311833628591, "learning_rate": 1.9973238321390034e-05, "loss": 0.9941, "step": 2123 }, { "epoch": 0.15785953177257525, "grad_norm": 2.3409587472792337, "learning_rate": 1.9973179627585277e-05, "loss": 0.9293, "step": 2124 }, { "epoch": 0.1579338535860275, "grad_norm": 4.625977845813874, "learning_rate": 1.9973120869573705e-05, "loss": 1.0768, "step": 2125 }, { "epoch": 0.15800817539947976, "grad_norm": 2.6097535028806513, "learning_rate": 1.99730620473557e-05, "loss": 1.0047, "step": 2126 }, { "epoch": 0.158082497212932, "grad_norm": 2.72888813788881, "learning_rate": 1.997300316093164e-05, "loss": 0.9157, "step": 2127 }, { "epoch": 0.15815681902638423, "grad_norm": 2.6717654158278608, "learning_rate": 1.9972944210301903e-05, "loss": 1.0631, "step": 2128 }, { "epoch": 0.1582311408398365, "grad_norm": 2.527042046565525, "learning_rate": 1.9972885195466865e-05, "loss": 0.7827, "step": 2129 }, { "epoch": 0.15830546265328874, "grad_norm": 2.06811482302314, "learning_rate": 1.9972826116426912e-05, "loss": 0.8183, "step": 2130 }, { "epoch": 0.15837978446674097, "grad_norm": 2.259877144491423, "learning_rate": 1.9972766973182418e-05, "loss": 1.0945, "step": 2131 }, { "epoch": 0.15845410628019324, "grad_norm": 3.2891492056027363, "learning_rate": 1.997270776573377e-05, "loss": 1.1419, "step": 2132 }, { "epoch": 0.15852842809364548, "grad_norm": 3.2730011106545023, "learning_rate": 1.997264849408135e-05, "loss": 1.097, "step": 2133 }, { "epoch": 0.15860274990709775, "grad_norm": 2.2719260924002045, "learning_rate": 1.9972589158225533e-05, "loss": 0.9723, "step": 2134 }, { "epoch": 0.15867707172054998, "grad_norm": 2.1455838019967435, "learning_rate": 1.9972529758166707e-05, "loss": 1.0222, "step": 2135 }, { "epoch": 0.15875139353400222, "grad_norm": 2.7415184505199894, "learning_rate": 1.997247029390525e-05, "loss": 0.9362, "step": 2136 }, { "epoch": 0.1588257153474545, "grad_norm": 2.373104418626499, "learning_rate": 1.997241076544155e-05, "loss": 1.0716, "step": 2137 }, { "epoch": 0.15890003716090673, "grad_norm": 2.363625083077908, "learning_rate": 1.9972351172775983e-05, "loss": 0.6666, "step": 2138 }, { "epoch": 0.15897435897435896, "grad_norm": 2.2726940634014565, "learning_rate": 1.9972291515908938e-05, "loss": 0.8681, "step": 2139 }, { "epoch": 0.15904868078781123, "grad_norm": 2.5560768517778985, "learning_rate": 1.9972231794840796e-05, "loss": 1.0317, "step": 2140 }, { "epoch": 0.15912300260126347, "grad_norm": 2.2835490217004573, "learning_rate": 1.9972172009571945e-05, "loss": 0.9925, "step": 2141 }, { "epoch": 0.1591973244147157, "grad_norm": 2.2535777386107982, "learning_rate": 1.997211216010277e-05, "loss": 0.953, "step": 2142 }, { "epoch": 0.15927164622816797, "grad_norm": 10.861570151581192, "learning_rate": 1.997205224643365e-05, "loss": 0.9071, "step": 2143 }, { "epoch": 0.1593459680416202, "grad_norm": 2.525130955282035, "learning_rate": 1.997199226856498e-05, "loss": 1.0571, "step": 2144 }, { "epoch": 0.15942028985507245, "grad_norm": 3.138001971630013, "learning_rate": 1.9971932226497137e-05, "loss": 0.8221, "step": 2145 }, { "epoch": 0.15949461166852472, "grad_norm": 3.147741768254442, "learning_rate": 1.997187212023051e-05, "loss": 0.9694, "step": 2146 }, { "epoch": 0.15956893348197695, "grad_norm": 2.684816566877436, "learning_rate": 1.997181194976549e-05, "loss": 0.8628, "step": 2147 }, { "epoch": 0.15964325529542922, "grad_norm": 3.152330499776882, "learning_rate": 1.9971751715102467e-05, "loss": 1.063, "step": 2148 }, { "epoch": 0.15971757710888146, "grad_norm": 2.3049080933111554, "learning_rate": 1.997169141624182e-05, "loss": 0.9429, "step": 2149 }, { "epoch": 0.1597918989223337, "grad_norm": 12.482057153987185, "learning_rate": 1.9971631053183937e-05, "loss": 0.8821, "step": 2150 }, { "epoch": 0.15986622073578596, "grad_norm": 2.790648625271152, "learning_rate": 1.9971570625929218e-05, "loss": 0.9986, "step": 2151 }, { "epoch": 0.1599405425492382, "grad_norm": 3.198719116911587, "learning_rate": 1.997151013447804e-05, "loss": 0.9997, "step": 2152 }, { "epoch": 0.16001486436269044, "grad_norm": 2.125566677002232, "learning_rate": 1.9971449578830797e-05, "loss": 0.7842, "step": 2153 }, { "epoch": 0.1600891861761427, "grad_norm": 2.315228563382102, "learning_rate": 1.997138895898788e-05, "loss": 0.9616, "step": 2154 }, { "epoch": 0.16016350798959494, "grad_norm": 2.4516843334475396, "learning_rate": 1.997132827494968e-05, "loss": 0.8995, "step": 2155 }, { "epoch": 0.16023782980304718, "grad_norm": 2.7770227309343576, "learning_rate": 1.9971267526716582e-05, "loss": 1.0685, "step": 2156 }, { "epoch": 0.16031215161649945, "grad_norm": 2.2155485609979793, "learning_rate": 1.9971206714288986e-05, "loss": 0.8639, "step": 2157 }, { "epoch": 0.1603864734299517, "grad_norm": 2.7141419283007457, "learning_rate": 1.9971145837667272e-05, "loss": 0.9186, "step": 2158 }, { "epoch": 0.16046079524340393, "grad_norm": 3.575455294169358, "learning_rate": 1.9971084896851846e-05, "loss": 0.9679, "step": 2159 }, { "epoch": 0.1605351170568562, "grad_norm": 3.275371402149305, "learning_rate": 1.997102389184309e-05, "loss": 0.923, "step": 2160 }, { "epoch": 0.16060943887030843, "grad_norm": 3.884488076487698, "learning_rate": 1.99709628226414e-05, "loss": 0.8431, "step": 2161 }, { "epoch": 0.1606837606837607, "grad_norm": 2.4973167398307936, "learning_rate": 1.9970901689247164e-05, "loss": 1.0346, "step": 2162 }, { "epoch": 0.16075808249721293, "grad_norm": 2.5384462320250827, "learning_rate": 1.9970840491660787e-05, "loss": 0.9966, "step": 2163 }, { "epoch": 0.16083240431066517, "grad_norm": 2.9006031436591853, "learning_rate": 1.997077922988265e-05, "loss": 0.9853, "step": 2164 }, { "epoch": 0.16090672612411744, "grad_norm": 2.701140001055706, "learning_rate": 1.9970717903913156e-05, "loss": 0.9298, "step": 2165 }, { "epoch": 0.16098104793756968, "grad_norm": 2.1150931649931843, "learning_rate": 1.99706565137527e-05, "loss": 0.9988, "step": 2166 }, { "epoch": 0.16105536975102192, "grad_norm": 3.333472118933205, "learning_rate": 1.9970595059401673e-05, "loss": 0.8168, "step": 2167 }, { "epoch": 0.16112969156447418, "grad_norm": 2.4099002695458718, "learning_rate": 1.997053354086047e-05, "loss": 0.8142, "step": 2168 }, { "epoch": 0.16120401337792642, "grad_norm": 2.6451629244991492, "learning_rate": 1.9970471958129492e-05, "loss": 0.9883, "step": 2169 }, { "epoch": 0.16127833519137866, "grad_norm": 2.1639846655703145, "learning_rate": 1.9970410311209133e-05, "loss": 0.8047, "step": 2170 }, { "epoch": 0.16135265700483092, "grad_norm": 2.274778886309213, "learning_rate": 1.997034860009979e-05, "loss": 1.025, "step": 2171 }, { "epoch": 0.16142697881828316, "grad_norm": 2.736133603479137, "learning_rate": 1.997028682480186e-05, "loss": 0.9139, "step": 2172 }, { "epoch": 0.1615013006317354, "grad_norm": 2.324344392472266, "learning_rate": 1.9970224985315738e-05, "loss": 0.9867, "step": 2173 }, { "epoch": 0.16157562244518767, "grad_norm": 2.608658184623505, "learning_rate": 1.9970163081641826e-05, "loss": 0.9399, "step": 2174 }, { "epoch": 0.1616499442586399, "grad_norm": 2.126024265224311, "learning_rate": 1.9970101113780523e-05, "loss": 0.6905, "step": 2175 }, { "epoch": 0.16172426607209217, "grad_norm": 2.189314729873458, "learning_rate": 1.9970039081732225e-05, "loss": 0.9704, "step": 2176 }, { "epoch": 0.1617985878855444, "grad_norm": 2.191971506555856, "learning_rate": 1.9969976985497336e-05, "loss": 0.9038, "step": 2177 }, { "epoch": 0.16187290969899665, "grad_norm": 2.2424228984381305, "learning_rate": 1.996991482507625e-05, "loss": 0.8809, "step": 2178 }, { "epoch": 0.1619472315124489, "grad_norm": 2.863455691497385, "learning_rate": 1.996985260046937e-05, "loss": 0.9101, "step": 2179 }, { "epoch": 0.16202155332590115, "grad_norm": 2.35081954346471, "learning_rate": 1.9969790311677095e-05, "loss": 0.8328, "step": 2180 }, { "epoch": 0.1620958751393534, "grad_norm": 2.2520247444506944, "learning_rate": 1.996972795869983e-05, "loss": 0.9898, "step": 2181 }, { "epoch": 0.16217019695280566, "grad_norm": 2.0098774026173665, "learning_rate": 1.996966554153797e-05, "loss": 0.7518, "step": 2182 }, { "epoch": 0.1622445187662579, "grad_norm": 3.2035737089731726, "learning_rate": 1.996960306019192e-05, "loss": 0.8783, "step": 2183 }, { "epoch": 0.16231884057971013, "grad_norm": 2.2803986251931465, "learning_rate": 1.9969540514662086e-05, "loss": 0.916, "step": 2184 }, { "epoch": 0.1623931623931624, "grad_norm": 2.6407763329295726, "learning_rate": 1.9969477904948865e-05, "loss": 0.8132, "step": 2185 }, { "epoch": 0.16246748420661464, "grad_norm": 2.5333960589511344, "learning_rate": 1.9969415231052665e-05, "loss": 0.9179, "step": 2186 }, { "epoch": 0.1625418060200669, "grad_norm": 2.3037914439725102, "learning_rate": 1.9969352492973884e-05, "loss": 1.0044, "step": 2187 }, { "epoch": 0.16261612783351914, "grad_norm": 3.1520776104053017, "learning_rate": 1.996928969071293e-05, "loss": 1.0614, "step": 2188 }, { "epoch": 0.16269044964697138, "grad_norm": 2.4531427067591154, "learning_rate": 1.996922682427021e-05, "loss": 0.7182, "step": 2189 }, { "epoch": 0.16276477146042365, "grad_norm": 2.369253907391909, "learning_rate": 1.9969163893646123e-05, "loss": 0.9348, "step": 2190 }, { "epoch": 0.16283909327387588, "grad_norm": 2.891271911357659, "learning_rate": 1.9969100898841075e-05, "loss": 1.102, "step": 2191 }, { "epoch": 0.16291341508732812, "grad_norm": 2.582984420819272, "learning_rate": 1.996903783985547e-05, "loss": 1.0335, "step": 2192 }, { "epoch": 0.1629877369007804, "grad_norm": 2.075977296868258, "learning_rate": 1.9968974716689722e-05, "loss": 0.7299, "step": 2193 }, { "epoch": 0.16306205871423263, "grad_norm": 2.4659939372600546, "learning_rate": 1.996891152934423e-05, "loss": 0.9617, "step": 2194 }, { "epoch": 0.16313638052768487, "grad_norm": 2.7312684324920333, "learning_rate": 1.9968848277819403e-05, "loss": 0.8926, "step": 2195 }, { "epoch": 0.16321070234113713, "grad_norm": 2.293488980528426, "learning_rate": 1.9968784962115645e-05, "loss": 0.9185, "step": 2196 }, { "epoch": 0.16328502415458937, "grad_norm": 2.332732904924003, "learning_rate": 1.996872158223337e-05, "loss": 1.0307, "step": 2197 }, { "epoch": 0.1633593459680416, "grad_norm": 3.061656748045755, "learning_rate": 1.9968658138172984e-05, "loss": 1.0422, "step": 2198 }, { "epoch": 0.16343366778149387, "grad_norm": 3.5686510422581743, "learning_rate": 1.996859462993489e-05, "loss": 0.8389, "step": 2199 }, { "epoch": 0.1635079895949461, "grad_norm": 3.091008447288015, "learning_rate": 1.9968531057519502e-05, "loss": 1.0758, "step": 2200 }, { "epoch": 0.16358231140839838, "grad_norm": 2.3411121562266732, "learning_rate": 1.996846742092723e-05, "loss": 1.0487, "step": 2201 }, { "epoch": 0.16365663322185062, "grad_norm": 2.701769143786672, "learning_rate": 1.9968403720158482e-05, "loss": 1.0754, "step": 2202 }, { "epoch": 0.16373095503530286, "grad_norm": 2.2545671079029654, "learning_rate": 1.9968339955213668e-05, "loss": 0.9152, "step": 2203 }, { "epoch": 0.16380527684875512, "grad_norm": 2.0998792598783558, "learning_rate": 1.9968276126093195e-05, "loss": 0.8145, "step": 2204 }, { "epoch": 0.16387959866220736, "grad_norm": 2.026870054677082, "learning_rate": 1.9968212232797484e-05, "loss": 0.8487, "step": 2205 }, { "epoch": 0.1639539204756596, "grad_norm": 2.305150190539749, "learning_rate": 1.9968148275326932e-05, "loss": 0.9518, "step": 2206 }, { "epoch": 0.16402824228911186, "grad_norm": 2.1181887412849507, "learning_rate": 1.9968084253681966e-05, "loss": 0.8845, "step": 2207 }, { "epoch": 0.1641025641025641, "grad_norm": 2.305402660681178, "learning_rate": 1.9968020167862988e-05, "loss": 0.9046, "step": 2208 }, { "epoch": 0.16417688591601634, "grad_norm": 2.1183636357511397, "learning_rate": 1.9967956017870412e-05, "loss": 0.8165, "step": 2209 }, { "epoch": 0.1642512077294686, "grad_norm": 2.155588136005924, "learning_rate": 1.996789180370465e-05, "loss": 0.7316, "step": 2210 }, { "epoch": 0.16432552954292085, "grad_norm": 2.6530329027563964, "learning_rate": 1.9967827525366123e-05, "loss": 1.2254, "step": 2211 }, { "epoch": 0.16439985135637308, "grad_norm": 2.417239978192243, "learning_rate": 1.996776318285524e-05, "loss": 1.1063, "step": 2212 }, { "epoch": 0.16447417316982535, "grad_norm": 2.081741197407199, "learning_rate": 1.9967698776172414e-05, "loss": 0.8757, "step": 2213 }, { "epoch": 0.1645484949832776, "grad_norm": 2.531472793334557, "learning_rate": 1.9967634305318055e-05, "loss": 0.97, "step": 2214 }, { "epoch": 0.16462281679672985, "grad_norm": 2.408421146250649, "learning_rate": 1.996756977029259e-05, "loss": 0.8847, "step": 2215 }, { "epoch": 0.1646971386101821, "grad_norm": 2.7217969094517778, "learning_rate": 1.9967505171096425e-05, "loss": 0.8927, "step": 2216 }, { "epoch": 0.16477146042363433, "grad_norm": 4.146820506208441, "learning_rate": 1.996744050772998e-05, "loss": 1.0676, "step": 2217 }, { "epoch": 0.1648457822370866, "grad_norm": 3.2125179435073123, "learning_rate": 1.996737578019367e-05, "loss": 0.8415, "step": 2218 }, { "epoch": 0.16492010405053883, "grad_norm": 2.4011408232659486, "learning_rate": 1.9967310988487912e-05, "loss": 1.0555, "step": 2219 }, { "epoch": 0.16499442586399107, "grad_norm": 2.1936691527793672, "learning_rate": 1.996724613261312e-05, "loss": 0.8021, "step": 2220 }, { "epoch": 0.16506874767744334, "grad_norm": 2.5543664022494954, "learning_rate": 1.9967181212569717e-05, "loss": 0.8253, "step": 2221 }, { "epoch": 0.16514306949089558, "grad_norm": 2.81148533165266, "learning_rate": 1.996711622835812e-05, "loss": 1.0465, "step": 2222 }, { "epoch": 0.16521739130434782, "grad_norm": 2.419133994790502, "learning_rate": 1.9967051179978746e-05, "loss": 1.1554, "step": 2223 }, { "epoch": 0.16529171311780008, "grad_norm": 2.7612697866972433, "learning_rate": 1.996698606743201e-05, "loss": 0.9428, "step": 2224 }, { "epoch": 0.16536603493125232, "grad_norm": 2.982636310020004, "learning_rate": 1.9966920890718338e-05, "loss": 0.8733, "step": 2225 }, { "epoch": 0.16544035674470456, "grad_norm": 2.4224156255461087, "learning_rate": 1.9966855649838145e-05, "loss": 1.0123, "step": 2226 }, { "epoch": 0.16551467855815682, "grad_norm": 2.584541551198198, "learning_rate": 1.9966790344791852e-05, "loss": 0.8033, "step": 2227 }, { "epoch": 0.16558900037160906, "grad_norm": 2.6170119601490907, "learning_rate": 1.9966724975579882e-05, "loss": 1.0004, "step": 2228 }, { "epoch": 0.16566332218506133, "grad_norm": 3.007057001451003, "learning_rate": 1.9966659542202653e-05, "loss": 0.8489, "step": 2229 }, { "epoch": 0.16573764399851357, "grad_norm": 2.381848629196486, "learning_rate": 1.9966594044660588e-05, "loss": 1.0205, "step": 2230 }, { "epoch": 0.1658119658119658, "grad_norm": 2.51832385433257, "learning_rate": 1.9966528482954106e-05, "loss": 0.9324, "step": 2231 }, { "epoch": 0.16588628762541807, "grad_norm": 2.37675549370961, "learning_rate": 1.996646285708363e-05, "loss": 0.894, "step": 2232 }, { "epoch": 0.1659606094388703, "grad_norm": 3.4208113772198416, "learning_rate": 1.9966397167049582e-05, "loss": 0.944, "step": 2233 }, { "epoch": 0.16603493125232255, "grad_norm": 3.1862069066004244, "learning_rate": 1.996633141285239e-05, "loss": 1.1286, "step": 2234 }, { "epoch": 0.16610925306577481, "grad_norm": 2.3528246199431977, "learning_rate": 1.9966265594492473e-05, "loss": 0.7975, "step": 2235 }, { "epoch": 0.16618357487922705, "grad_norm": 2.9341427892338516, "learning_rate": 1.9966199711970253e-05, "loss": 0.9408, "step": 2236 }, { "epoch": 0.1662578966926793, "grad_norm": 2.284490929499915, "learning_rate": 1.996613376528616e-05, "loss": 0.8105, "step": 2237 }, { "epoch": 0.16633221850613156, "grad_norm": 2.9705111727074134, "learning_rate": 1.996606775444061e-05, "loss": 1.1599, "step": 2238 }, { "epoch": 0.1664065403195838, "grad_norm": 13.559312688741267, "learning_rate": 1.9966001679434036e-05, "loss": 1.1247, "step": 2239 }, { "epoch": 0.16648086213303603, "grad_norm": 3.125317572470351, "learning_rate": 1.9965935540266857e-05, "loss": 1.105, "step": 2240 }, { "epoch": 0.1665551839464883, "grad_norm": 2.4940542247199673, "learning_rate": 1.9965869336939508e-05, "loss": 0.8374, "step": 2241 }, { "epoch": 0.16662950575994054, "grad_norm": 2.7502644603316, "learning_rate": 1.9965803069452405e-05, "loss": 0.8403, "step": 2242 }, { "epoch": 0.1667038275733928, "grad_norm": 2.75940744811414, "learning_rate": 1.9965736737805978e-05, "loss": 0.9204, "step": 2243 }, { "epoch": 0.16677814938684504, "grad_norm": 2.4570066731815916, "learning_rate": 1.9965670342000658e-05, "loss": 0.7804, "step": 2244 }, { "epoch": 0.16685247120029728, "grad_norm": 2.549370118594342, "learning_rate": 1.9965603882036867e-05, "loss": 1.0478, "step": 2245 }, { "epoch": 0.16692679301374955, "grad_norm": 2.805380491093818, "learning_rate": 1.996553735791504e-05, "loss": 0.9268, "step": 2246 }, { "epoch": 0.16700111482720179, "grad_norm": 3.0972641790955175, "learning_rate": 1.9965470769635595e-05, "loss": 1.1465, "step": 2247 }, { "epoch": 0.16707543664065402, "grad_norm": 2.6796338381309655, "learning_rate": 1.996540411719897e-05, "loss": 0.8869, "step": 2248 }, { "epoch": 0.1671497584541063, "grad_norm": 7.032771234833605, "learning_rate": 1.9965337400605586e-05, "loss": 1.0202, "step": 2249 }, { "epoch": 0.16722408026755853, "grad_norm": 2.5042066659047064, "learning_rate": 1.9965270619855883e-05, "loss": 0.6948, "step": 2250 }, { "epoch": 0.16729840208101077, "grad_norm": 2.8866634074532826, "learning_rate": 1.9965203774950282e-05, "loss": 0.9295, "step": 2251 }, { "epoch": 0.16737272389446303, "grad_norm": 2.962179390743704, "learning_rate": 1.9965136865889213e-05, "loss": 1.0927, "step": 2252 }, { "epoch": 0.16744704570791527, "grad_norm": 2.7341901485151268, "learning_rate": 1.9965069892673113e-05, "loss": 1.0454, "step": 2253 }, { "epoch": 0.1675213675213675, "grad_norm": 3.1218915625005685, "learning_rate": 1.996500285530241e-05, "loss": 0.8885, "step": 2254 }, { "epoch": 0.16759568933481978, "grad_norm": 2.830554244378876, "learning_rate": 1.9964935753777535e-05, "loss": 1.0909, "step": 2255 }, { "epoch": 0.167670011148272, "grad_norm": 2.6794785160827996, "learning_rate": 1.996486858809892e-05, "loss": 1.1293, "step": 2256 }, { "epoch": 0.16774433296172428, "grad_norm": 2.7560532902097377, "learning_rate": 1.9964801358267e-05, "loss": 0.8384, "step": 2257 }, { "epoch": 0.16781865477517652, "grad_norm": 2.5881646175180757, "learning_rate": 1.9964734064282206e-05, "loss": 1.2353, "step": 2258 }, { "epoch": 0.16789297658862876, "grad_norm": 3.14959086447097, "learning_rate": 1.9964666706144967e-05, "loss": 1.1163, "step": 2259 }, { "epoch": 0.16796729840208102, "grad_norm": 3.0394992961853387, "learning_rate": 1.996459928385573e-05, "loss": 1.2387, "step": 2260 }, { "epoch": 0.16804162021553326, "grad_norm": 2.9411303207125643, "learning_rate": 1.996453179741491e-05, "loss": 0.9953, "step": 2261 }, { "epoch": 0.1681159420289855, "grad_norm": 2.3654827399930665, "learning_rate": 1.9964464246822954e-05, "loss": 0.7165, "step": 2262 }, { "epoch": 0.16819026384243776, "grad_norm": 2.5688115622150276, "learning_rate": 1.9964396632080296e-05, "loss": 0.955, "step": 2263 }, { "epoch": 0.16826458565589, "grad_norm": 2.898162394711946, "learning_rate": 1.996432895318737e-05, "loss": 1.1598, "step": 2264 }, { "epoch": 0.16833890746934224, "grad_norm": 2.1496033648318074, "learning_rate": 1.996426121014461e-05, "loss": 0.9303, "step": 2265 }, { "epoch": 0.1684132292827945, "grad_norm": 2.41222564665506, "learning_rate": 1.9964193402952454e-05, "loss": 0.8014, "step": 2266 }, { "epoch": 0.16848755109624675, "grad_norm": 10.730417215086383, "learning_rate": 1.9964125531611335e-05, "loss": 0.9567, "step": 2267 }, { "epoch": 0.16856187290969898, "grad_norm": 2.726530538526869, "learning_rate": 1.9964057596121697e-05, "loss": 0.9565, "step": 2268 }, { "epoch": 0.16863619472315125, "grad_norm": 3.245534371867977, "learning_rate": 1.996398959648397e-05, "loss": 0.9101, "step": 2269 }, { "epoch": 0.1687105165366035, "grad_norm": 4.111093449631658, "learning_rate": 1.9963921532698595e-05, "loss": 0.948, "step": 2270 }, { "epoch": 0.16878483835005575, "grad_norm": 8.41647177454001, "learning_rate": 1.9963853404766012e-05, "loss": 1.1908, "step": 2271 }, { "epoch": 0.168859160163508, "grad_norm": 2.8779370195422103, "learning_rate": 1.996378521268666e-05, "loss": 0.7664, "step": 2272 }, { "epoch": 0.16893348197696023, "grad_norm": 2.9663508271811487, "learning_rate": 1.996371695646097e-05, "loss": 1.052, "step": 2273 }, { "epoch": 0.1690078037904125, "grad_norm": 4.409950436385808, "learning_rate": 1.996364863608939e-05, "loss": 0.7847, "step": 2274 }, { "epoch": 0.16908212560386474, "grad_norm": 2.448252528559285, "learning_rate": 1.9963580251572355e-05, "loss": 0.7706, "step": 2275 }, { "epoch": 0.16915644741731697, "grad_norm": 2.806433445213408, "learning_rate": 1.9963511802910313e-05, "loss": 0.9311, "step": 2276 }, { "epoch": 0.16923076923076924, "grad_norm": 3.4165500778818085, "learning_rate": 1.9963443290103696e-05, "loss": 1.168, "step": 2277 }, { "epoch": 0.16930509104422148, "grad_norm": 2.6013939841833698, "learning_rate": 1.9963374713152944e-05, "loss": 0.8828, "step": 2278 }, { "epoch": 0.16937941285767372, "grad_norm": 2.052774179958701, "learning_rate": 1.9963306072058505e-05, "loss": 0.9884, "step": 2279 }, { "epoch": 0.16945373467112598, "grad_norm": 2.362633679973444, "learning_rate": 1.996323736682082e-05, "loss": 0.784, "step": 2280 }, { "epoch": 0.16952805648457822, "grad_norm": 2.5473132179446205, "learning_rate": 1.9963168597440332e-05, "loss": 0.8385, "step": 2281 }, { "epoch": 0.16960237829803046, "grad_norm": 2.477863046644869, "learning_rate": 1.9963099763917476e-05, "loss": 1.0412, "step": 2282 }, { "epoch": 0.16967670011148273, "grad_norm": 2.7277615871272856, "learning_rate": 1.9963030866252705e-05, "loss": 0.827, "step": 2283 }, { "epoch": 0.16975102192493496, "grad_norm": 3.092334993744543, "learning_rate": 1.9962961904446455e-05, "loss": 0.8016, "step": 2284 }, { "epoch": 0.16982534373838723, "grad_norm": 3.307522568249891, "learning_rate": 1.9962892878499177e-05, "loss": 0.9667, "step": 2285 }, { "epoch": 0.16989966555183947, "grad_norm": 2.7009076937674807, "learning_rate": 1.996282378841131e-05, "loss": 1.1079, "step": 2286 }, { "epoch": 0.1699739873652917, "grad_norm": 2.5476406305256263, "learning_rate": 1.99627546341833e-05, "loss": 0.9803, "step": 2287 }, { "epoch": 0.17004830917874397, "grad_norm": 3.0411807844615297, "learning_rate": 1.9962685415815592e-05, "loss": 1.1192, "step": 2288 }, { "epoch": 0.1701226309921962, "grad_norm": 3.958115538368741, "learning_rate": 1.9962616133308632e-05, "loss": 0.8342, "step": 2289 }, { "epoch": 0.17019695280564845, "grad_norm": 3.7093446581017355, "learning_rate": 1.996254678666287e-05, "loss": 0.9813, "step": 2290 }, { "epoch": 0.17027127461910072, "grad_norm": 2.621257991244305, "learning_rate": 1.9962477375878742e-05, "loss": 0.935, "step": 2291 }, { "epoch": 0.17034559643255295, "grad_norm": 2.768878688023005, "learning_rate": 1.9962407900956708e-05, "loss": 0.7793, "step": 2292 }, { "epoch": 0.1704199182460052, "grad_norm": 2.6843025019303965, "learning_rate": 1.9962338361897206e-05, "loss": 0.937, "step": 2293 }, { "epoch": 0.17049424005945746, "grad_norm": 3.3618524376353505, "learning_rate": 1.996226875870069e-05, "loss": 0.932, "step": 2294 }, { "epoch": 0.1705685618729097, "grad_norm": 2.2957203897441296, "learning_rate": 1.9962199091367598e-05, "loss": 0.8335, "step": 2295 }, { "epoch": 0.17064288368636193, "grad_norm": 2.292247154400236, "learning_rate": 1.9962129359898386e-05, "loss": 0.6935, "step": 2296 }, { "epoch": 0.1707172054998142, "grad_norm": 2.4245550631821597, "learning_rate": 1.9962059564293506e-05, "loss": 0.926, "step": 2297 }, { "epoch": 0.17079152731326644, "grad_norm": 2.579663144748812, "learning_rate": 1.9961989704553403e-05, "loss": 0.7541, "step": 2298 }, { "epoch": 0.1708658491267187, "grad_norm": 2.6773250689421135, "learning_rate": 1.9961919780678525e-05, "loss": 0.8893, "step": 2299 }, { "epoch": 0.17094017094017094, "grad_norm": 2.4634306040602154, "learning_rate": 1.9961849792669326e-05, "loss": 0.6216, "step": 2300 }, { "epoch": 0.17101449275362318, "grad_norm": 3.005023766582915, "learning_rate": 1.9961779740526254e-05, "loss": 1.062, "step": 2301 }, { "epoch": 0.17108881456707545, "grad_norm": 3.508122093615157, "learning_rate": 1.996170962424976e-05, "loss": 0.737, "step": 2302 }, { "epoch": 0.1711631363805277, "grad_norm": 2.6128190799778266, "learning_rate": 1.99616394438403e-05, "loss": 0.8723, "step": 2303 }, { "epoch": 0.17123745819397992, "grad_norm": 3.6883766187974585, "learning_rate": 1.996156919929832e-05, "loss": 1.069, "step": 2304 }, { "epoch": 0.1713117800074322, "grad_norm": 2.7765687000418597, "learning_rate": 1.9961498890624275e-05, "loss": 0.7361, "step": 2305 }, { "epoch": 0.17138610182088443, "grad_norm": 2.30984974474655, "learning_rate": 1.9961428517818615e-05, "loss": 0.9415, "step": 2306 }, { "epoch": 0.17146042363433667, "grad_norm": 2.4270692353317025, "learning_rate": 1.9961358080881794e-05, "loss": 1.1018, "step": 2307 }, { "epoch": 0.17153474544778893, "grad_norm": 2.0726790189605033, "learning_rate": 1.996128757981427e-05, "loss": 0.8779, "step": 2308 }, { "epoch": 0.17160906726124117, "grad_norm": 2.533426069276118, "learning_rate": 1.9961217014616493e-05, "loss": 1.0627, "step": 2309 }, { "epoch": 0.1716833890746934, "grad_norm": 2.153570314213445, "learning_rate": 1.9961146385288918e-05, "loss": 0.8047, "step": 2310 }, { "epoch": 0.17175771088814568, "grad_norm": 2.1913256452954513, "learning_rate": 1.9961075691831997e-05, "loss": 0.9543, "step": 2311 }, { "epoch": 0.17183203270159791, "grad_norm": 2.7867671825931946, "learning_rate": 1.996100493424619e-05, "loss": 1.1629, "step": 2312 }, { "epoch": 0.17190635451505018, "grad_norm": 2.698654884239293, "learning_rate": 1.9960934112531946e-05, "loss": 0.932, "step": 2313 }, { "epoch": 0.17198067632850242, "grad_norm": 2.29546347673357, "learning_rate": 1.996086322668973e-05, "loss": 0.778, "step": 2314 }, { "epoch": 0.17205499814195466, "grad_norm": 2.794982405171022, "learning_rate": 1.996079227671999e-05, "loss": 1.042, "step": 2315 }, { "epoch": 0.17212931995540692, "grad_norm": 2.150805254867613, "learning_rate": 1.996072126262319e-05, "loss": 0.9321, "step": 2316 }, { "epoch": 0.17220364176885916, "grad_norm": 4.006938352551178, "learning_rate": 1.9960650184399777e-05, "loss": 0.9333, "step": 2317 }, { "epoch": 0.1722779635823114, "grad_norm": 3.165249520781134, "learning_rate": 1.996057904205022e-05, "loss": 0.8265, "step": 2318 }, { "epoch": 0.17235228539576367, "grad_norm": 2.022351020782344, "learning_rate": 1.9960507835574972e-05, "loss": 0.8287, "step": 2319 }, { "epoch": 0.1724266072092159, "grad_norm": 2.3567983078614367, "learning_rate": 1.9960436564974487e-05, "loss": 0.7093, "step": 2320 }, { "epoch": 0.17250092902266814, "grad_norm": 2.050303069953295, "learning_rate": 1.996036523024923e-05, "loss": 0.698, "step": 2321 }, { "epoch": 0.1725752508361204, "grad_norm": 3.0260042708951063, "learning_rate": 1.9960293831399663e-05, "loss": 1.0347, "step": 2322 }, { "epoch": 0.17264957264957265, "grad_norm": 2.467360508032951, "learning_rate": 1.996022236842624e-05, "loss": 0.9341, "step": 2323 }, { "epoch": 0.17272389446302489, "grad_norm": 2.566913139552629, "learning_rate": 1.9960150841329416e-05, "loss": 0.8111, "step": 2324 }, { "epoch": 0.17279821627647715, "grad_norm": 2.6200581001191843, "learning_rate": 1.9960079250109664e-05, "loss": 0.8097, "step": 2325 }, { "epoch": 0.1728725380899294, "grad_norm": 2.1867291521475694, "learning_rate": 1.9960007594767434e-05, "loss": 0.8833, "step": 2326 }, { "epoch": 0.17294685990338166, "grad_norm": 3.2160748236859806, "learning_rate": 1.9959935875303195e-05, "loss": 1.0788, "step": 2327 }, { "epoch": 0.1730211817168339, "grad_norm": 2.5397674637251546, "learning_rate": 1.9959864091717405e-05, "loss": 0.9464, "step": 2328 }, { "epoch": 0.17309550353028613, "grad_norm": 2.83205146304881, "learning_rate": 1.9959792244010527e-05, "loss": 0.9381, "step": 2329 }, { "epoch": 0.1731698253437384, "grad_norm": 2.544082444753728, "learning_rate": 1.995972033218302e-05, "loss": 0.8237, "step": 2330 }, { "epoch": 0.17324414715719064, "grad_norm": 2.711760702871251, "learning_rate": 1.9959648356235358e-05, "loss": 0.9086, "step": 2331 }, { "epoch": 0.17331846897064287, "grad_norm": 2.6463440492182024, "learning_rate": 1.995957631616799e-05, "loss": 1.246, "step": 2332 }, { "epoch": 0.17339279078409514, "grad_norm": 2.502733139183666, "learning_rate": 1.9959504211981388e-05, "loss": 0.9305, "step": 2333 }, { "epoch": 0.17346711259754738, "grad_norm": 2.624761581787047, "learning_rate": 1.9959432043676015e-05, "loss": 1.1044, "step": 2334 }, { "epoch": 0.17354143441099962, "grad_norm": 2.9285517367558254, "learning_rate": 1.9959359811252336e-05, "loss": 1.0046, "step": 2335 }, { "epoch": 0.17361575622445188, "grad_norm": 5.364946044799287, "learning_rate": 1.9959287514710814e-05, "loss": 0.9789, "step": 2336 }, { "epoch": 0.17369007803790412, "grad_norm": 3.0363241572538713, "learning_rate": 1.9959215154051916e-05, "loss": 1.0208, "step": 2337 }, { "epoch": 0.17376439985135636, "grad_norm": 2.6239048480165947, "learning_rate": 1.9959142729276108e-05, "loss": 0.762, "step": 2338 }, { "epoch": 0.17383872166480863, "grad_norm": 2.9571574600798542, "learning_rate": 1.9959070240383856e-05, "loss": 0.7231, "step": 2339 }, { "epoch": 0.17391304347826086, "grad_norm": 2.94689263405827, "learning_rate": 1.9958997687375624e-05, "loss": 1.0828, "step": 2340 }, { "epoch": 0.17398736529171313, "grad_norm": 2.359876460881203, "learning_rate": 1.9958925070251884e-05, "loss": 0.9702, "step": 2341 }, { "epoch": 0.17406168710516537, "grad_norm": 2.556178676171441, "learning_rate": 1.99588523890131e-05, "loss": 0.8433, "step": 2342 }, { "epoch": 0.1741360089186176, "grad_norm": 2.297693973273737, "learning_rate": 1.9958779643659746e-05, "loss": 0.9331, "step": 2343 }, { "epoch": 0.17421033073206987, "grad_norm": 3.2777989108970083, "learning_rate": 1.995870683419228e-05, "loss": 1.1394, "step": 2344 }, { "epoch": 0.1742846525455221, "grad_norm": 2.0386474552696345, "learning_rate": 1.9958633960611178e-05, "loss": 0.9805, "step": 2345 }, { "epoch": 0.17435897435897435, "grad_norm": 2.5812560921159067, "learning_rate": 1.9958561022916906e-05, "loss": 1.0092, "step": 2346 }, { "epoch": 0.17443329617242662, "grad_norm": 2.7008508438576118, "learning_rate": 1.9958488021109934e-05, "loss": 1.0516, "step": 2347 }, { "epoch": 0.17450761798587885, "grad_norm": 2.3517157631354246, "learning_rate": 1.9958414955190734e-05, "loss": 0.7222, "step": 2348 }, { "epoch": 0.1745819397993311, "grad_norm": 2.8759621764263468, "learning_rate": 1.995834182515978e-05, "loss": 1.0863, "step": 2349 }, { "epoch": 0.17465626161278336, "grad_norm": 2.3817529118350107, "learning_rate": 1.995826863101753e-05, "loss": 1.0243, "step": 2350 }, { "epoch": 0.1747305834262356, "grad_norm": 2.618442803683885, "learning_rate": 1.9958195372764466e-05, "loss": 1.0346, "step": 2351 }, { "epoch": 0.17480490523968784, "grad_norm": 2.812757692920294, "learning_rate": 1.9958122050401057e-05, "loss": 0.9963, "step": 2352 }, { "epoch": 0.1748792270531401, "grad_norm": 2.551519040322345, "learning_rate": 1.9958048663927777e-05, "loss": 0.9763, "step": 2353 }, { "epoch": 0.17495354886659234, "grad_norm": 2.1798884610675793, "learning_rate": 1.9957975213345095e-05, "loss": 0.8477, "step": 2354 }, { "epoch": 0.1750278706800446, "grad_norm": 2.5260883135421888, "learning_rate": 1.9957901698653487e-05, "loss": 0.8665, "step": 2355 }, { "epoch": 0.17510219249349684, "grad_norm": 2.260034680753321, "learning_rate": 1.995782811985342e-05, "loss": 0.9141, "step": 2356 }, { "epoch": 0.17517651430694908, "grad_norm": 2.548230062580561, "learning_rate": 1.9957754476945375e-05, "loss": 1.0265, "step": 2357 }, { "epoch": 0.17525083612040135, "grad_norm": 6.791058394166412, "learning_rate": 1.9957680769929827e-05, "loss": 0.9193, "step": 2358 }, { "epoch": 0.1753251579338536, "grad_norm": 2.8445357047811357, "learning_rate": 1.995760699880724e-05, "loss": 0.7381, "step": 2359 }, { "epoch": 0.17539947974730583, "grad_norm": 2.7050469615993293, "learning_rate": 1.99575331635781e-05, "loss": 0.8513, "step": 2360 }, { "epoch": 0.1754738015607581, "grad_norm": 3.3634345914567274, "learning_rate": 1.995745926424288e-05, "loss": 0.9891, "step": 2361 }, { "epoch": 0.17554812337421033, "grad_norm": 2.389201572319487, "learning_rate": 1.995738530080205e-05, "loss": 0.8743, "step": 2362 }, { "epoch": 0.17562244518766257, "grad_norm": 2.523271338571582, "learning_rate": 1.995731127325609e-05, "loss": 1.1617, "step": 2363 }, { "epoch": 0.17569676700111483, "grad_norm": 4.29368515038068, "learning_rate": 1.995723718160548e-05, "loss": 0.7475, "step": 2364 }, { "epoch": 0.17577108881456707, "grad_norm": 2.5639964549152534, "learning_rate": 1.995716302585069e-05, "loss": 0.8174, "step": 2365 }, { "epoch": 0.1758454106280193, "grad_norm": 4.4402367788744845, "learning_rate": 1.9957088805992205e-05, "loss": 1.1932, "step": 2366 }, { "epoch": 0.17591973244147158, "grad_norm": 2.4705381286435624, "learning_rate": 1.9957014522030495e-05, "loss": 1.1302, "step": 2367 }, { "epoch": 0.17599405425492382, "grad_norm": 2.1790744833050555, "learning_rate": 1.9956940173966044e-05, "loss": 0.8311, "step": 2368 }, { "epoch": 0.17606837606837608, "grad_norm": 2.728207257639415, "learning_rate": 1.9956865761799328e-05, "loss": 1.0538, "step": 2369 }, { "epoch": 0.17614269788182832, "grad_norm": 2.1504285944573778, "learning_rate": 1.995679128553083e-05, "loss": 0.7961, "step": 2370 }, { "epoch": 0.17621701969528056, "grad_norm": 2.8971833612489153, "learning_rate": 1.9956716745161022e-05, "loss": 1.0905, "step": 2371 }, { "epoch": 0.17629134150873282, "grad_norm": 3.265513253702625, "learning_rate": 1.9956642140690393e-05, "loss": 0.6502, "step": 2372 }, { "epoch": 0.17636566332218506, "grad_norm": 2.603562161907197, "learning_rate": 1.995656747211941e-05, "loss": 0.9324, "step": 2373 }, { "epoch": 0.1764399851356373, "grad_norm": 2.3574870419678353, "learning_rate": 1.995649273944857e-05, "loss": 0.8539, "step": 2374 }, { "epoch": 0.17651430694908957, "grad_norm": 2.4084829891119117, "learning_rate": 1.9956417942678343e-05, "loss": 0.8613, "step": 2375 }, { "epoch": 0.1765886287625418, "grad_norm": 2.819882842666215, "learning_rate": 1.995634308180922e-05, "loss": 0.8888, "step": 2376 }, { "epoch": 0.17666295057599404, "grad_norm": 2.492112497567111, "learning_rate": 1.995626815684167e-05, "loss": 1.1407, "step": 2377 }, { "epoch": 0.1767372723894463, "grad_norm": 2.5224679672316412, "learning_rate": 1.995619316777618e-05, "loss": 0.9088, "step": 2378 }, { "epoch": 0.17681159420289855, "grad_norm": 2.3242100629095006, "learning_rate": 1.9956118114613244e-05, "loss": 0.8635, "step": 2379 }, { "epoch": 0.17688591601635079, "grad_norm": 2.5042712292831975, "learning_rate": 1.9956042997353328e-05, "loss": 0.6978, "step": 2380 }, { "epoch": 0.17696023782980305, "grad_norm": 6.840993837134059, "learning_rate": 1.9955967815996925e-05, "loss": 1.0126, "step": 2381 }, { "epoch": 0.1770345596432553, "grad_norm": 6.173892062290851, "learning_rate": 1.9955892570544525e-05, "loss": 0.8346, "step": 2382 }, { "epoch": 0.17710888145670756, "grad_norm": 2.561809925923765, "learning_rate": 1.9955817260996597e-05, "loss": 0.7443, "step": 2383 }, { "epoch": 0.1771832032701598, "grad_norm": 2.1351623446616483, "learning_rate": 1.9955741887353638e-05, "loss": 0.9489, "step": 2384 }, { "epoch": 0.17725752508361203, "grad_norm": 2.3502038774358756, "learning_rate": 1.995566644961613e-05, "loss": 1.1093, "step": 2385 }, { "epoch": 0.1773318468970643, "grad_norm": 2.2557962901082664, "learning_rate": 1.9955590947784557e-05, "loss": 0.964, "step": 2386 }, { "epoch": 0.17740616871051654, "grad_norm": 2.33163523105372, "learning_rate": 1.9955515381859403e-05, "loss": 1.0239, "step": 2387 }, { "epoch": 0.17748049052396878, "grad_norm": 2.5047478731874415, "learning_rate": 1.995543975184116e-05, "loss": 0.7738, "step": 2388 }, { "epoch": 0.17755481233742104, "grad_norm": 2.038920593230072, "learning_rate": 1.995536405773031e-05, "loss": 0.7753, "step": 2389 }, { "epoch": 0.17762913415087328, "grad_norm": 2.1768967469545317, "learning_rate": 1.9955288299527346e-05, "loss": 0.8831, "step": 2390 }, { "epoch": 0.17770345596432552, "grad_norm": 2.7667156812262963, "learning_rate": 1.995521247723275e-05, "loss": 1.0037, "step": 2391 }, { "epoch": 0.17777777777777778, "grad_norm": 2.953796683061694, "learning_rate": 1.9955136590847016e-05, "loss": 0.8939, "step": 2392 }, { "epoch": 0.17785209959123002, "grad_norm": 4.314639171160394, "learning_rate": 1.995506064037063e-05, "loss": 0.9038, "step": 2393 }, { "epoch": 0.17792642140468226, "grad_norm": 2.6090925505243105, "learning_rate": 1.9954984625804073e-05, "loss": 0.9323, "step": 2394 }, { "epoch": 0.17800074321813453, "grad_norm": 2.2219227276177484, "learning_rate": 1.9954908547147847e-05, "loss": 0.9106, "step": 2395 }, { "epoch": 0.17807506503158677, "grad_norm": 2.383628701591438, "learning_rate": 1.9954832404402435e-05, "loss": 0.9407, "step": 2396 }, { "epoch": 0.17814938684503903, "grad_norm": 2.602822071168724, "learning_rate": 1.995475619756833e-05, "loss": 0.9043, "step": 2397 }, { "epoch": 0.17822370865849127, "grad_norm": 2.3290473543148096, "learning_rate": 1.9954679926646018e-05, "loss": 0.9132, "step": 2398 }, { "epoch": 0.1782980304719435, "grad_norm": 2.1808906289341645, "learning_rate": 1.9954603591635995e-05, "loss": 1.0366, "step": 2399 }, { "epoch": 0.17837235228539577, "grad_norm": 2.5773867471994274, "learning_rate": 1.995452719253875e-05, "loss": 0.9131, "step": 2400 }, { "epoch": 0.178446674098848, "grad_norm": 2.27380491393145, "learning_rate": 1.995445072935478e-05, "loss": 1.0696, "step": 2401 }, { "epoch": 0.17852099591230025, "grad_norm": 2.276225156294802, "learning_rate": 1.9954374202084567e-05, "loss": 0.8377, "step": 2402 }, { "epoch": 0.17859531772575252, "grad_norm": 2.4968396989051143, "learning_rate": 1.995429761072861e-05, "loss": 0.9787, "step": 2403 }, { "epoch": 0.17866963953920476, "grad_norm": 3.0298178341762894, "learning_rate": 1.99542209552874e-05, "loss": 0.9661, "step": 2404 }, { "epoch": 0.178743961352657, "grad_norm": 2.163263943423682, "learning_rate": 1.9954144235761436e-05, "loss": 0.8687, "step": 2405 }, { "epoch": 0.17881828316610926, "grad_norm": 2.8656652652801604, "learning_rate": 1.9954067452151207e-05, "loss": 0.994, "step": 2406 }, { "epoch": 0.1788926049795615, "grad_norm": 2.5758164867704925, "learning_rate": 1.9953990604457207e-05, "loss": 0.9511, "step": 2407 }, { "epoch": 0.17896692679301374, "grad_norm": 2.699172710574005, "learning_rate": 1.9953913692679933e-05, "loss": 0.8282, "step": 2408 }, { "epoch": 0.179041248606466, "grad_norm": 2.0424673178687507, "learning_rate": 1.9953836716819877e-05, "loss": 0.8791, "step": 2409 }, { "epoch": 0.17911557041991824, "grad_norm": 2.286093944453155, "learning_rate": 1.9953759676877543e-05, "loss": 0.8204, "step": 2410 }, { "epoch": 0.1791898922333705, "grad_norm": 2.455278507282846, "learning_rate": 1.9953682572853413e-05, "loss": 0.7961, "step": 2411 }, { "epoch": 0.17926421404682275, "grad_norm": 2.8185181017573946, "learning_rate": 1.9953605404747994e-05, "loss": 0.8346, "step": 2412 }, { "epoch": 0.17933853586027498, "grad_norm": 2.3716524624410553, "learning_rate": 1.995352817256178e-05, "loss": 1.116, "step": 2413 }, { "epoch": 0.17941285767372725, "grad_norm": 2.2322381489599, "learning_rate": 1.9953450876295267e-05, "loss": 0.9177, "step": 2414 }, { "epoch": 0.1794871794871795, "grad_norm": 2.994638453595344, "learning_rate": 1.9953373515948958e-05, "loss": 0.9977, "step": 2415 }, { "epoch": 0.17956150130063173, "grad_norm": 2.4654983375613133, "learning_rate": 1.9953296091523343e-05, "loss": 0.9674, "step": 2416 }, { "epoch": 0.179635823114084, "grad_norm": 2.707770964519982, "learning_rate": 1.9953218603018923e-05, "loss": 0.8707, "step": 2417 }, { "epoch": 0.17971014492753623, "grad_norm": 3.0356872625006788, "learning_rate": 1.99531410504362e-05, "loss": 0.938, "step": 2418 }, { "epoch": 0.17978446674098847, "grad_norm": 11.761864585377227, "learning_rate": 1.995306343377567e-05, "loss": 0.8838, "step": 2419 }, { "epoch": 0.17985878855444073, "grad_norm": 2.496460556287733, "learning_rate": 1.9952985753037836e-05, "loss": 0.7174, "step": 2420 }, { "epoch": 0.17993311036789297, "grad_norm": 2.243283462467614, "learning_rate": 1.9952908008223194e-05, "loss": 0.737, "step": 2421 }, { "epoch": 0.1800074321813452, "grad_norm": 1.8956019626907972, "learning_rate": 1.9952830199332252e-05, "loss": 0.8126, "step": 2422 }, { "epoch": 0.18008175399479748, "grad_norm": 2.3173145966110646, "learning_rate": 1.99527523263655e-05, "loss": 0.9077, "step": 2423 }, { "epoch": 0.18015607580824972, "grad_norm": 2.921269828074155, "learning_rate": 1.9952674389323446e-05, "loss": 0.9945, "step": 2424 }, { "epoch": 0.18023039762170198, "grad_norm": 2.958921466888027, "learning_rate": 1.9952596388206592e-05, "loss": 0.8886, "step": 2425 }, { "epoch": 0.18030471943515422, "grad_norm": 3.7158644435110117, "learning_rate": 1.9952518323015437e-05, "loss": 1.0743, "step": 2426 }, { "epoch": 0.18037904124860646, "grad_norm": 3.1153129171248017, "learning_rate": 1.9952440193750487e-05, "loss": 1.1421, "step": 2427 }, { "epoch": 0.18045336306205872, "grad_norm": 2.4740849367662467, "learning_rate": 1.9952362000412246e-05, "loss": 0.9319, "step": 2428 }, { "epoch": 0.18052768487551096, "grad_norm": 2.789306419461318, "learning_rate": 1.995228374300121e-05, "loss": 1.0098, "step": 2429 }, { "epoch": 0.1806020066889632, "grad_norm": 2.6271181776697734, "learning_rate": 1.9952205421517888e-05, "loss": 0.9141, "step": 2430 }, { "epoch": 0.18067632850241547, "grad_norm": 3.663286098030393, "learning_rate": 1.9952127035962785e-05, "loss": 0.7911, "step": 2431 }, { "epoch": 0.1807506503158677, "grad_norm": 5.672249839068829, "learning_rate": 1.9952048586336405e-05, "loss": 1.0355, "step": 2432 }, { "epoch": 0.18082497212931994, "grad_norm": 2.1149661937009427, "learning_rate": 1.9951970072639254e-05, "loss": 0.8629, "step": 2433 }, { "epoch": 0.1808992939427722, "grad_norm": 2.5270090966012893, "learning_rate": 1.9951891494871833e-05, "loss": 1.0763, "step": 2434 }, { "epoch": 0.18097361575622445, "grad_norm": 2.6947458351104667, "learning_rate": 1.995181285303465e-05, "loss": 0.6052, "step": 2435 }, { "epoch": 0.1810479375696767, "grad_norm": 5.623504263684225, "learning_rate": 1.9951734147128217e-05, "loss": 1.0337, "step": 2436 }, { "epoch": 0.18112225938312895, "grad_norm": 2.411058461319538, "learning_rate": 1.9951655377153035e-05, "loss": 1.0742, "step": 2437 }, { "epoch": 0.1811965811965812, "grad_norm": 1.94387302668099, "learning_rate": 1.995157654310961e-05, "loss": 0.8877, "step": 2438 }, { "epoch": 0.18127090301003346, "grad_norm": 2.3143948652851707, "learning_rate": 1.995149764499845e-05, "loss": 0.8036, "step": 2439 }, { "epoch": 0.1813452248234857, "grad_norm": 3.0235913152620624, "learning_rate": 1.995141868282007e-05, "loss": 1.1776, "step": 2440 }, { "epoch": 0.18141954663693793, "grad_norm": 2.3426472509946747, "learning_rate": 1.995133965657497e-05, "loss": 1.0278, "step": 2441 }, { "epoch": 0.1814938684503902, "grad_norm": 2.4628262540615857, "learning_rate": 1.995126056626366e-05, "loss": 0.9405, "step": 2442 }, { "epoch": 0.18156819026384244, "grad_norm": 2.5566811104334146, "learning_rate": 1.995118141188665e-05, "loss": 0.8171, "step": 2443 }, { "epoch": 0.18164251207729468, "grad_norm": 2.3684806461596004, "learning_rate": 1.9951102193444454e-05, "loss": 0.8881, "step": 2444 }, { "epoch": 0.18171683389074694, "grad_norm": 2.4688054486023585, "learning_rate": 1.9951022910937574e-05, "loss": 0.8872, "step": 2445 }, { "epoch": 0.18179115570419918, "grad_norm": 2.106041017801445, "learning_rate": 1.995094356436653e-05, "loss": 0.8052, "step": 2446 }, { "epoch": 0.18186547751765142, "grad_norm": 2.950367724496076, "learning_rate": 1.9950864153731825e-05, "loss": 0.8582, "step": 2447 }, { "epoch": 0.18193979933110369, "grad_norm": 2.8261179619286416, "learning_rate": 1.9950784679033972e-05, "loss": 0.9725, "step": 2448 }, { "epoch": 0.18201412114455592, "grad_norm": 2.39390860016022, "learning_rate": 1.9950705140273486e-05, "loss": 0.8322, "step": 2449 }, { "epoch": 0.18208844295800816, "grad_norm": 2.6994493139012437, "learning_rate": 1.9950625537450875e-05, "loss": 0.821, "step": 2450 }, { "epoch": 0.18216276477146043, "grad_norm": 2.1083089147051552, "learning_rate": 1.9950545870566654e-05, "loss": 0.7985, "step": 2451 }, { "epoch": 0.18223708658491267, "grad_norm": 2.4534544319052705, "learning_rate": 1.995046613962133e-05, "loss": 0.953, "step": 2452 }, { "epoch": 0.18231140839836493, "grad_norm": 2.607823841291625, "learning_rate": 1.995038634461543e-05, "loss": 0.8818, "step": 2453 }, { "epoch": 0.18238573021181717, "grad_norm": 2.898546333424414, "learning_rate": 1.9950306485549454e-05, "loss": 0.9391, "step": 2454 }, { "epoch": 0.1824600520252694, "grad_norm": 2.212462780342331, "learning_rate": 1.9950226562423923e-05, "loss": 1.0466, "step": 2455 }, { "epoch": 0.18253437383872168, "grad_norm": 3.392930296751146, "learning_rate": 1.9950146575239346e-05, "loss": 0.926, "step": 2456 }, { "epoch": 0.1826086956521739, "grad_norm": 1.9460580442887254, "learning_rate": 1.9950066523996242e-05, "loss": 0.8296, "step": 2457 }, { "epoch": 0.18268301746562615, "grad_norm": 3.1776871847745607, "learning_rate": 1.994998640869513e-05, "loss": 0.9831, "step": 2458 }, { "epoch": 0.18275733927907842, "grad_norm": 2.080675884136862, "learning_rate": 1.994990622933652e-05, "loss": 0.8605, "step": 2459 }, { "epoch": 0.18283166109253066, "grad_norm": 2.6425603733653054, "learning_rate": 1.994982598592093e-05, "loss": 0.8211, "step": 2460 }, { "epoch": 0.1829059829059829, "grad_norm": 2.08088605363844, "learning_rate": 1.9949745678448877e-05, "loss": 0.8496, "step": 2461 }, { "epoch": 0.18298030471943516, "grad_norm": 2.2262847821199974, "learning_rate": 1.9949665306920875e-05, "loss": 1.0289, "step": 2462 }, { "epoch": 0.1830546265328874, "grad_norm": 2.5136190738557476, "learning_rate": 1.9949584871337445e-05, "loss": 0.8846, "step": 2463 }, { "epoch": 0.18312894834633964, "grad_norm": 2.2925718009431515, "learning_rate": 1.9949504371699108e-05, "loss": 0.9391, "step": 2464 }, { "epoch": 0.1832032701597919, "grad_norm": 2.281875009355111, "learning_rate": 1.9949423808006374e-05, "loss": 0.7327, "step": 2465 }, { "epoch": 0.18327759197324414, "grad_norm": 2.1080593561274696, "learning_rate": 1.9949343180259765e-05, "loss": 0.6341, "step": 2466 }, { "epoch": 0.1833519137866964, "grad_norm": 2.2228803243478796, "learning_rate": 1.9949262488459803e-05, "loss": 1.1375, "step": 2467 }, { "epoch": 0.18342623560014865, "grad_norm": 2.4399937815482216, "learning_rate": 1.9949181732607006e-05, "loss": 1.0242, "step": 2468 }, { "epoch": 0.18350055741360088, "grad_norm": 2.333092846248175, "learning_rate": 1.9949100912701892e-05, "loss": 1.0149, "step": 2469 }, { "epoch": 0.18357487922705315, "grad_norm": 2.155093040714588, "learning_rate": 1.9949020028744984e-05, "loss": 0.7393, "step": 2470 }, { "epoch": 0.1836492010405054, "grad_norm": 3.008402560203373, "learning_rate": 1.9948939080736797e-05, "loss": 1.0535, "step": 2471 }, { "epoch": 0.18372352285395763, "grad_norm": 2.3439301029192774, "learning_rate": 1.994885806867786e-05, "loss": 0.92, "step": 2472 }, { "epoch": 0.1837978446674099, "grad_norm": 2.479378188245203, "learning_rate": 1.9948776992568694e-05, "loss": 0.994, "step": 2473 }, { "epoch": 0.18387216648086213, "grad_norm": 3.1504076943931496, "learning_rate": 1.994869585240981e-05, "loss": 0.8758, "step": 2474 }, { "epoch": 0.18394648829431437, "grad_norm": 2.498072581612907, "learning_rate": 1.9948614648201743e-05, "loss": 1.0896, "step": 2475 }, { "epoch": 0.18402081010776664, "grad_norm": 2.312842915329032, "learning_rate": 1.994853337994501e-05, "loss": 0.9548, "step": 2476 }, { "epoch": 0.18409513192121887, "grad_norm": 2.01039629169714, "learning_rate": 1.9948452047640137e-05, "loss": 0.9442, "step": 2477 }, { "epoch": 0.1841694537346711, "grad_norm": 2.94568441822084, "learning_rate": 1.9948370651287646e-05, "loss": 1.0548, "step": 2478 }, { "epoch": 0.18424377554812338, "grad_norm": 2.7856852991186787, "learning_rate": 1.994828919088806e-05, "loss": 0.9811, "step": 2479 }, { "epoch": 0.18431809736157562, "grad_norm": 2.030403388106962, "learning_rate": 1.9948207666441903e-05, "loss": 0.9484, "step": 2480 }, { "epoch": 0.18439241917502788, "grad_norm": 2.488691969075978, "learning_rate": 1.9948126077949702e-05, "loss": 0.9978, "step": 2481 }, { "epoch": 0.18446674098848012, "grad_norm": 4.537304430849446, "learning_rate": 1.9948044425411984e-05, "loss": 0.8767, "step": 2482 }, { "epoch": 0.18454106280193236, "grad_norm": 2.4972428006267062, "learning_rate": 1.9947962708829268e-05, "loss": 1.0573, "step": 2483 }, { "epoch": 0.18461538461538463, "grad_norm": 2.1669431533161947, "learning_rate": 1.994788092820209e-05, "loss": 0.8705, "step": 2484 }, { "epoch": 0.18468970642883686, "grad_norm": 2.680357101979217, "learning_rate": 1.9947799083530963e-05, "loss": 0.9869, "step": 2485 }, { "epoch": 0.1847640282422891, "grad_norm": 2.562073235939279, "learning_rate": 1.9947717174816425e-05, "loss": 0.9665, "step": 2486 }, { "epoch": 0.18483835005574137, "grad_norm": 2.961070595014167, "learning_rate": 1.9947635202059002e-05, "loss": 1.0117, "step": 2487 }, { "epoch": 0.1849126718691936, "grad_norm": 2.082069099278851, "learning_rate": 1.9947553165259218e-05, "loss": 0.8192, "step": 2488 }, { "epoch": 0.18498699368264584, "grad_norm": 2.3624957574542385, "learning_rate": 1.9947471064417603e-05, "loss": 0.9251, "step": 2489 }, { "epoch": 0.1850613154960981, "grad_norm": 2.0873627224036797, "learning_rate": 1.9947388899534684e-05, "loss": 0.8265, "step": 2490 }, { "epoch": 0.18513563730955035, "grad_norm": 2.174215347689804, "learning_rate": 1.9947306670610995e-05, "loss": 0.8369, "step": 2491 }, { "epoch": 0.1852099591230026, "grad_norm": 2.405609610644162, "learning_rate": 1.9947224377647058e-05, "loss": 1.003, "step": 2492 }, { "epoch": 0.18528428093645485, "grad_norm": 2.4424071586811213, "learning_rate": 1.994714202064341e-05, "loss": 1.0624, "step": 2493 }, { "epoch": 0.1853586027499071, "grad_norm": 2.3287780612483333, "learning_rate": 1.9947059599600575e-05, "loss": 0.927, "step": 2494 }, { "epoch": 0.18543292456335936, "grad_norm": 2.134949247182786, "learning_rate": 1.994697711451909e-05, "loss": 0.8885, "step": 2495 }, { "epoch": 0.1855072463768116, "grad_norm": 2.4790586415332334, "learning_rate": 1.994689456539948e-05, "loss": 0.9621, "step": 2496 }, { "epoch": 0.18558156819026383, "grad_norm": 2.802092156498932, "learning_rate": 1.9946811952242276e-05, "loss": 0.7561, "step": 2497 }, { "epoch": 0.1856558900037161, "grad_norm": 1.9774398902769743, "learning_rate": 1.9946729275048016e-05, "loss": 0.7595, "step": 2498 }, { "epoch": 0.18573021181716834, "grad_norm": 2.700973238085767, "learning_rate": 1.9946646533817228e-05, "loss": 1.1615, "step": 2499 }, { "epoch": 0.18580453363062058, "grad_norm": 3.0841048809937446, "learning_rate": 1.9946563728550448e-05, "loss": 0.8997, "step": 2500 }, { "epoch": 0.18587885544407284, "grad_norm": 2.535280553817109, "learning_rate": 1.9946480859248204e-05, "loss": 1.0912, "step": 2501 }, { "epoch": 0.18595317725752508, "grad_norm": 2.8823840507032914, "learning_rate": 1.9946397925911035e-05, "loss": 0.8318, "step": 2502 }, { "epoch": 0.18602749907097732, "grad_norm": 2.420896680774368, "learning_rate": 1.9946314928539473e-05, "loss": 0.8531, "step": 2503 }, { "epoch": 0.1861018208844296, "grad_norm": 2.2612426087318296, "learning_rate": 1.9946231867134046e-05, "loss": 0.9068, "step": 2504 }, { "epoch": 0.18617614269788182, "grad_norm": 2.36266998601156, "learning_rate": 1.99461487416953e-05, "loss": 0.8324, "step": 2505 }, { "epoch": 0.18625046451133406, "grad_norm": 2.2885307326917763, "learning_rate": 1.994606555222376e-05, "loss": 1.0858, "step": 2506 }, { "epoch": 0.18632478632478633, "grad_norm": 2.7018278758760728, "learning_rate": 1.994598229871997e-05, "loss": 1.0275, "step": 2507 }, { "epoch": 0.18639910813823857, "grad_norm": 2.325377829283216, "learning_rate": 1.994589898118446e-05, "loss": 0.9733, "step": 2508 }, { "epoch": 0.18647342995169083, "grad_norm": 2.6787545819892533, "learning_rate": 1.9945815599617767e-05, "loss": 0.9853, "step": 2509 }, { "epoch": 0.18654775176514307, "grad_norm": 2.6558493944420585, "learning_rate": 1.994573215402043e-05, "loss": 0.9108, "step": 2510 }, { "epoch": 0.1866220735785953, "grad_norm": 3.048978652043044, "learning_rate": 1.994564864439299e-05, "loss": 1.1476, "step": 2511 }, { "epoch": 0.18669639539204758, "grad_norm": 3.3632605415894012, "learning_rate": 1.9945565070735974e-05, "loss": 0.8386, "step": 2512 }, { "epoch": 0.18677071720549981, "grad_norm": 2.6089181440119944, "learning_rate": 1.994548143304993e-05, "loss": 1.1401, "step": 2513 }, { "epoch": 0.18684503901895205, "grad_norm": 5.782737919907058, "learning_rate": 1.994539773133539e-05, "loss": 0.813, "step": 2514 }, { "epoch": 0.18691936083240432, "grad_norm": 2.497327062185511, "learning_rate": 1.9945313965592897e-05, "loss": 1.0051, "step": 2515 }, { "epoch": 0.18699368264585656, "grad_norm": 2.7292790124703115, "learning_rate": 1.9945230135822985e-05, "loss": 1.0305, "step": 2516 }, { "epoch": 0.1870680044593088, "grad_norm": 1.9748625359799277, "learning_rate": 1.9945146242026203e-05, "loss": 0.8124, "step": 2517 }, { "epoch": 0.18714232627276106, "grad_norm": 3.1074684802474346, "learning_rate": 1.9945062284203084e-05, "loss": 0.9896, "step": 2518 }, { "epoch": 0.1872166480862133, "grad_norm": 2.519952061550545, "learning_rate": 1.9944978262354167e-05, "loss": 0.9755, "step": 2519 }, { "epoch": 0.18729096989966554, "grad_norm": 2.214070882069167, "learning_rate": 1.9944894176480002e-05, "loss": 0.7586, "step": 2520 }, { "epoch": 0.1873652917131178, "grad_norm": 2.6179104210481685, "learning_rate": 1.9944810026581117e-05, "loss": 0.8882, "step": 2521 }, { "epoch": 0.18743961352657004, "grad_norm": 2.648443133019922, "learning_rate": 1.9944725812658064e-05, "loss": 0.8995, "step": 2522 }, { "epoch": 0.1875139353400223, "grad_norm": 2.3728347191908408, "learning_rate": 1.9944641534711385e-05, "loss": 1.0935, "step": 2523 }, { "epoch": 0.18758825715347455, "grad_norm": 2.524945155766906, "learning_rate": 1.9944557192741616e-05, "loss": 0.9018, "step": 2524 }, { "epoch": 0.18766257896692679, "grad_norm": 2.8840018415304463, "learning_rate": 1.9944472786749306e-05, "loss": 0.9437, "step": 2525 }, { "epoch": 0.18773690078037905, "grad_norm": 2.865891822300113, "learning_rate": 1.9944388316734996e-05, "loss": 0.9041, "step": 2526 }, { "epoch": 0.1878112225938313, "grad_norm": 2.140933513784674, "learning_rate": 1.994430378269923e-05, "loss": 0.8252, "step": 2527 }, { "epoch": 0.18788554440728353, "grad_norm": 2.151037950248297, "learning_rate": 1.994421918464255e-05, "loss": 0.9471, "step": 2528 }, { "epoch": 0.1879598662207358, "grad_norm": 1.9148134402173669, "learning_rate": 1.9944134522565505e-05, "loss": 0.8065, "step": 2529 }, { "epoch": 0.18803418803418803, "grad_norm": 2.2770776394297405, "learning_rate": 1.9944049796468636e-05, "loss": 0.9069, "step": 2530 }, { "epoch": 0.18810850984764027, "grad_norm": 2.436208058659591, "learning_rate": 1.994396500635249e-05, "loss": 0.962, "step": 2531 }, { "epoch": 0.18818283166109254, "grad_norm": 2.7555354835897736, "learning_rate": 1.9943880152217617e-05, "loss": 1.0533, "step": 2532 }, { "epoch": 0.18825715347454477, "grad_norm": 2.244559536350067, "learning_rate": 1.9943795234064558e-05, "loss": 0.8641, "step": 2533 }, { "epoch": 0.188331475287997, "grad_norm": 2.8208771274556788, "learning_rate": 1.994371025189386e-05, "loss": 0.9995, "step": 2534 }, { "epoch": 0.18840579710144928, "grad_norm": 2.630009189830406, "learning_rate": 1.9943625205706073e-05, "loss": 1.0258, "step": 2535 }, { "epoch": 0.18848011891490152, "grad_norm": 3.277697971725459, "learning_rate": 1.994354009550174e-05, "loss": 0.9778, "step": 2536 }, { "epoch": 0.18855444072835378, "grad_norm": 1.9340291995308576, "learning_rate": 1.9943454921281417e-05, "loss": 0.6626, "step": 2537 }, { "epoch": 0.18862876254180602, "grad_norm": 2.4798300974035503, "learning_rate": 1.9943369683045642e-05, "loss": 0.7296, "step": 2538 }, { "epoch": 0.18870308435525826, "grad_norm": 2.1702555720604613, "learning_rate": 1.9943284380794973e-05, "loss": 0.7909, "step": 2539 }, { "epoch": 0.18877740616871053, "grad_norm": 2.3409748320444232, "learning_rate": 1.994319901452995e-05, "loss": 1.0575, "step": 2540 }, { "epoch": 0.18885172798216276, "grad_norm": 2.1466525310212528, "learning_rate": 1.9943113584251134e-05, "loss": 0.9666, "step": 2541 }, { "epoch": 0.188926049795615, "grad_norm": 2.2798432273411495, "learning_rate": 1.994302808995906e-05, "loss": 0.9602, "step": 2542 }, { "epoch": 0.18900037160906727, "grad_norm": 2.404738661217327, "learning_rate": 1.9942942531654297e-05, "loss": 0.9048, "step": 2543 }, { "epoch": 0.1890746934225195, "grad_norm": 3.115228473714863, "learning_rate": 1.994285690933738e-05, "loss": 1.0324, "step": 2544 }, { "epoch": 0.18914901523597175, "grad_norm": 2.3941146357743066, "learning_rate": 1.9942771223008866e-05, "loss": 0.8759, "step": 2545 }, { "epoch": 0.189223337049424, "grad_norm": 2.2053523836858697, "learning_rate": 1.994268547266931e-05, "loss": 0.9212, "step": 2546 }, { "epoch": 0.18929765886287625, "grad_norm": 2.3593169548252617, "learning_rate": 1.9942599658319257e-05, "loss": 1.036, "step": 2547 }, { "epoch": 0.18937198067632852, "grad_norm": 3.6432158774085144, "learning_rate": 1.9942513779959264e-05, "loss": 0.9534, "step": 2548 }, { "epoch": 0.18944630248978075, "grad_norm": 2.2519485199011084, "learning_rate": 1.9942427837589886e-05, "loss": 0.9849, "step": 2549 }, { "epoch": 0.189520624303233, "grad_norm": 2.905949809366697, "learning_rate": 1.9942341831211672e-05, "loss": 0.9092, "step": 2550 }, { "epoch": 0.18959494611668526, "grad_norm": 2.4439841974868006, "learning_rate": 1.9942255760825177e-05, "loss": 0.7591, "step": 2551 }, { "epoch": 0.1896692679301375, "grad_norm": 2.0929570551900816, "learning_rate": 1.9942169626430957e-05, "loss": 0.7327, "step": 2552 }, { "epoch": 0.18974358974358974, "grad_norm": 14.09193818210659, "learning_rate": 1.994208342802956e-05, "loss": 0.8635, "step": 2553 }, { "epoch": 0.189817911557042, "grad_norm": 2.5227409899729043, "learning_rate": 1.9941997165621553e-05, "loss": 0.9629, "step": 2554 }, { "epoch": 0.18989223337049424, "grad_norm": 2.596992021887361, "learning_rate": 1.9941910839207478e-05, "loss": 1.0063, "step": 2555 }, { "epoch": 0.18996655518394648, "grad_norm": 9.482589608821344, "learning_rate": 1.99418244487879e-05, "loss": 1.3374, "step": 2556 }, { "epoch": 0.19004087699739874, "grad_norm": 3.1058347513806455, "learning_rate": 1.9941737994363368e-05, "loss": 1.1357, "step": 2557 }, { "epoch": 0.19011519881085098, "grad_norm": 2.219348154629677, "learning_rate": 1.9941651475934445e-05, "loss": 1.0316, "step": 2558 }, { "epoch": 0.19018952062430322, "grad_norm": 1.9246037596022798, "learning_rate": 1.9941564893501687e-05, "loss": 0.7329, "step": 2559 }, { "epoch": 0.1902638424377555, "grad_norm": 2.1406051780025335, "learning_rate": 1.994147824706565e-05, "loss": 0.9701, "step": 2560 }, { "epoch": 0.19033816425120773, "grad_norm": 2.6245290861648853, "learning_rate": 1.994139153662689e-05, "loss": 0.9829, "step": 2561 }, { "epoch": 0.19041248606466, "grad_norm": 2.340596517399912, "learning_rate": 1.9941304762185968e-05, "loss": 0.783, "step": 2562 }, { "epoch": 0.19048680787811223, "grad_norm": 2.709819963840101, "learning_rate": 1.9941217923743444e-05, "loss": 1.0653, "step": 2563 }, { "epoch": 0.19056112969156447, "grad_norm": 2.47662627161176, "learning_rate": 1.9941131021299873e-05, "loss": 0.9148, "step": 2564 }, { "epoch": 0.19063545150501673, "grad_norm": 2.168569328886184, "learning_rate": 1.9941044054855813e-05, "loss": 0.9078, "step": 2565 }, { "epoch": 0.19070977331846897, "grad_norm": 2.308408659729731, "learning_rate": 1.9940957024411832e-05, "loss": 0.9443, "step": 2566 }, { "epoch": 0.1907840951319212, "grad_norm": 2.7396122672319834, "learning_rate": 1.9940869929968484e-05, "loss": 0.9285, "step": 2567 }, { "epoch": 0.19085841694537348, "grad_norm": 2.039887303345884, "learning_rate": 1.994078277152633e-05, "loss": 0.7926, "step": 2568 }, { "epoch": 0.19093273875882572, "grad_norm": 2.4016232497602727, "learning_rate": 1.9940695549085933e-05, "loss": 0.8172, "step": 2569 }, { "epoch": 0.19100706057227795, "grad_norm": 1.9413070303389544, "learning_rate": 1.9940608262647853e-05, "loss": 0.9217, "step": 2570 }, { "epoch": 0.19108138238573022, "grad_norm": 2.4312509249520096, "learning_rate": 1.9940520912212656e-05, "loss": 0.949, "step": 2571 }, { "epoch": 0.19115570419918246, "grad_norm": 2.4050246854528736, "learning_rate": 1.9940433497780897e-05, "loss": 1.0735, "step": 2572 }, { "epoch": 0.1912300260126347, "grad_norm": 1.9079544655979, "learning_rate": 1.9940346019353146e-05, "loss": 0.5993, "step": 2573 }, { "epoch": 0.19130434782608696, "grad_norm": 2.2718204155288038, "learning_rate": 1.9940258476929963e-05, "loss": 0.6037, "step": 2574 }, { "epoch": 0.1913786696395392, "grad_norm": 2.6019653626431443, "learning_rate": 1.9940170870511908e-05, "loss": 0.7249, "step": 2575 }, { "epoch": 0.19145299145299147, "grad_norm": 2.427974264915125, "learning_rate": 1.9940083200099553e-05, "loss": 0.8598, "step": 2576 }, { "epoch": 0.1915273132664437, "grad_norm": 2.508814054206034, "learning_rate": 1.9939995465693454e-05, "loss": 1.0155, "step": 2577 }, { "epoch": 0.19160163507989594, "grad_norm": 3.716050044105616, "learning_rate": 1.9939907667294183e-05, "loss": 0.822, "step": 2578 }, { "epoch": 0.1916759568933482, "grad_norm": 2.1439621871733543, "learning_rate": 1.99398198049023e-05, "loss": 0.9528, "step": 2579 }, { "epoch": 0.19175027870680045, "grad_norm": 2.407047994666112, "learning_rate": 1.9939731878518378e-05, "loss": 0.9505, "step": 2580 }, { "epoch": 0.19182460052025269, "grad_norm": 2.8142613976198243, "learning_rate": 1.9939643888142975e-05, "loss": 0.9422, "step": 2581 }, { "epoch": 0.19189892233370495, "grad_norm": 3.002124322806169, "learning_rate": 1.993955583377666e-05, "loss": 0.8938, "step": 2582 }, { "epoch": 0.1919732441471572, "grad_norm": 2.7417153393838665, "learning_rate": 1.993946771542e-05, "loss": 0.8557, "step": 2583 }, { "epoch": 0.19204756596060943, "grad_norm": 2.5236378913266932, "learning_rate": 1.993937953307356e-05, "loss": 0.8106, "step": 2584 }, { "epoch": 0.1921218877740617, "grad_norm": 2.1505434103102155, "learning_rate": 1.9939291286737915e-05, "loss": 0.8617, "step": 2585 }, { "epoch": 0.19219620958751393, "grad_norm": 3.0178061462932284, "learning_rate": 1.9939202976413624e-05, "loss": 0.9841, "step": 2586 }, { "epoch": 0.19227053140096617, "grad_norm": 2.9356837922067736, "learning_rate": 1.9939114602101263e-05, "loss": 1.0184, "step": 2587 }, { "epoch": 0.19234485321441844, "grad_norm": 2.274422390579256, "learning_rate": 1.9939026163801398e-05, "loss": 0.9692, "step": 2588 }, { "epoch": 0.19241917502787068, "grad_norm": 3.217275288648243, "learning_rate": 1.9938937661514596e-05, "loss": 0.9934, "step": 2589 }, { "epoch": 0.19249349684132294, "grad_norm": 2.6731073579670164, "learning_rate": 1.9938849095241432e-05, "loss": 1.1533, "step": 2590 }, { "epoch": 0.19256781865477518, "grad_norm": 1.9925805587735583, "learning_rate": 1.993876046498247e-05, "loss": 0.7162, "step": 2591 }, { "epoch": 0.19264214046822742, "grad_norm": 2.294109877388066, "learning_rate": 1.9938671770738284e-05, "loss": 0.7464, "step": 2592 }, { "epoch": 0.19271646228167968, "grad_norm": 2.28516234051883, "learning_rate": 1.9938583012509446e-05, "loss": 0.9614, "step": 2593 }, { "epoch": 0.19279078409513192, "grad_norm": 2.135503536814247, "learning_rate": 1.993849419029653e-05, "loss": 0.8188, "step": 2594 }, { "epoch": 0.19286510590858416, "grad_norm": 2.4184019442501343, "learning_rate": 1.9938405304100097e-05, "loss": 0.834, "step": 2595 }, { "epoch": 0.19293942772203643, "grad_norm": 2.266122109766537, "learning_rate": 1.9938316353920732e-05, "loss": 1.0421, "step": 2596 }, { "epoch": 0.19301374953548867, "grad_norm": 2.2447558582122693, "learning_rate": 1.9938227339759e-05, "loss": 1.0317, "step": 2597 }, { "epoch": 0.1930880713489409, "grad_norm": 2.563863190299843, "learning_rate": 1.9938138261615476e-05, "loss": 0.7712, "step": 2598 }, { "epoch": 0.19316239316239317, "grad_norm": 1.699191379477109, "learning_rate": 1.9938049119490733e-05, "loss": 0.5659, "step": 2599 }, { "epoch": 0.1932367149758454, "grad_norm": 2.1875050547751327, "learning_rate": 1.9937959913385347e-05, "loss": 0.6635, "step": 2600 }, { "epoch": 0.19331103678929765, "grad_norm": 2.5321930274425317, "learning_rate": 1.993787064329989e-05, "loss": 1.0934, "step": 2601 }, { "epoch": 0.1933853586027499, "grad_norm": 2.174347346227227, "learning_rate": 1.9937781309234936e-05, "loss": 0.9917, "step": 2602 }, { "epoch": 0.19345968041620215, "grad_norm": 2.137458438094302, "learning_rate": 1.993769191119106e-05, "loss": 0.8858, "step": 2603 }, { "epoch": 0.19353400222965442, "grad_norm": 2.5487954869713585, "learning_rate": 1.9937602449168842e-05, "loss": 0.9471, "step": 2604 }, { "epoch": 0.19360832404310666, "grad_norm": 2.033359916266019, "learning_rate": 1.9937512923168856e-05, "loss": 0.8266, "step": 2605 }, { "epoch": 0.1936826458565589, "grad_norm": 2.8247390029098653, "learning_rate": 1.9937423333191678e-05, "loss": 1.0273, "step": 2606 }, { "epoch": 0.19375696767001116, "grad_norm": 2.46317697207111, "learning_rate": 1.9937333679237882e-05, "loss": 1.1205, "step": 2607 }, { "epoch": 0.1938312894834634, "grad_norm": 2.7866475098348085, "learning_rate": 1.9937243961308046e-05, "loss": 0.9044, "step": 2608 }, { "epoch": 0.19390561129691564, "grad_norm": 2.114402281914421, "learning_rate": 1.993715417940275e-05, "loss": 0.9013, "step": 2609 }, { "epoch": 0.1939799331103679, "grad_norm": 2.6122320556275977, "learning_rate": 1.9937064333522575e-05, "loss": 0.904, "step": 2610 }, { "epoch": 0.19405425492382014, "grad_norm": 2.2345712141120257, "learning_rate": 1.993697442366809e-05, "loss": 0.8283, "step": 2611 }, { "epoch": 0.19412857673727238, "grad_norm": 2.4052401408555952, "learning_rate": 1.993688444983988e-05, "loss": 0.8808, "step": 2612 }, { "epoch": 0.19420289855072465, "grad_norm": 2.101934770540088, "learning_rate": 1.9936794412038526e-05, "loss": 0.7602, "step": 2613 }, { "epoch": 0.19427722036417688, "grad_norm": 2.8262926331913425, "learning_rate": 1.9936704310264604e-05, "loss": 0.7061, "step": 2614 }, { "epoch": 0.19435154217762912, "grad_norm": 2.2576255823831186, "learning_rate": 1.9936614144518695e-05, "loss": 0.8328, "step": 2615 }, { "epoch": 0.1944258639910814, "grad_norm": 2.7441820531431755, "learning_rate": 1.993652391480138e-05, "loss": 1.0422, "step": 2616 }, { "epoch": 0.19450018580453363, "grad_norm": 2.446576051676756, "learning_rate": 1.993643362111324e-05, "loss": 0.9389, "step": 2617 }, { "epoch": 0.1945745076179859, "grad_norm": 2.7863008237945013, "learning_rate": 1.9936343263454855e-05, "loss": 0.9755, "step": 2618 }, { "epoch": 0.19464882943143813, "grad_norm": 1.9065847187320326, "learning_rate": 1.9936252841826808e-05, "loss": 0.6611, "step": 2619 }, { "epoch": 0.19472315124489037, "grad_norm": 2.459060603073153, "learning_rate": 1.9936162356229684e-05, "loss": 0.8166, "step": 2620 }, { "epoch": 0.19479747305834264, "grad_norm": 2.3507303637972083, "learning_rate": 1.9936071806664056e-05, "loss": 0.8272, "step": 2621 }, { "epoch": 0.19487179487179487, "grad_norm": 2.3603519367853854, "learning_rate": 1.9935981193130517e-05, "loss": 1.0197, "step": 2622 }, { "epoch": 0.1949461166852471, "grad_norm": 2.2691469043088097, "learning_rate": 1.993589051562965e-05, "loss": 0.8373, "step": 2623 }, { "epoch": 0.19502043849869938, "grad_norm": 2.526823087616381, "learning_rate": 1.993579977416203e-05, "loss": 0.9045, "step": 2624 }, { "epoch": 0.19509476031215162, "grad_norm": 2.4240972621160686, "learning_rate": 1.9935708968728246e-05, "loss": 0.9721, "step": 2625 }, { "epoch": 0.19516908212560385, "grad_norm": 2.240177150464679, "learning_rate": 1.9935618099328886e-05, "loss": 0.7395, "step": 2626 }, { "epoch": 0.19524340393905612, "grad_norm": 2.3871189265256354, "learning_rate": 1.9935527165964532e-05, "loss": 0.8756, "step": 2627 }, { "epoch": 0.19531772575250836, "grad_norm": 2.5923369925136677, "learning_rate": 1.9935436168635766e-05, "loss": 0.8305, "step": 2628 }, { "epoch": 0.1953920475659606, "grad_norm": 2.5250545351472, "learning_rate": 1.9935345107343183e-05, "loss": 1.0236, "step": 2629 }, { "epoch": 0.19546636937941286, "grad_norm": 2.4590049078361482, "learning_rate": 1.993525398208736e-05, "loss": 0.8379, "step": 2630 }, { "epoch": 0.1955406911928651, "grad_norm": 2.1420902490178837, "learning_rate": 1.9935162792868888e-05, "loss": 0.8759, "step": 2631 }, { "epoch": 0.19561501300631737, "grad_norm": 2.350873326203709, "learning_rate": 1.9935071539688356e-05, "loss": 1.0878, "step": 2632 }, { "epoch": 0.1956893348197696, "grad_norm": 3.8583073946077975, "learning_rate": 1.9934980222546347e-05, "loss": 1.1036, "step": 2633 }, { "epoch": 0.19576365663322184, "grad_norm": 2.38856802143323, "learning_rate": 1.9934888841443453e-05, "loss": 0.9108, "step": 2634 }, { "epoch": 0.1958379784466741, "grad_norm": 2.436051014861316, "learning_rate": 1.9934797396380255e-05, "loss": 0.9362, "step": 2635 }, { "epoch": 0.19591230026012635, "grad_norm": 2.5588938684845908, "learning_rate": 1.993470588735735e-05, "loss": 1.0885, "step": 2636 }, { "epoch": 0.1959866220735786, "grad_norm": 2.676041558495654, "learning_rate": 1.9934614314375325e-05, "loss": 0.8424, "step": 2637 }, { "epoch": 0.19606094388703085, "grad_norm": 2.4028978385757966, "learning_rate": 1.9934522677434766e-05, "loss": 0.9795, "step": 2638 }, { "epoch": 0.1961352657004831, "grad_norm": 2.7713499215585133, "learning_rate": 1.9934430976536267e-05, "loss": 1.0948, "step": 2639 }, { "epoch": 0.19620958751393533, "grad_norm": 2.305814128317759, "learning_rate": 1.9934339211680415e-05, "loss": 1.0545, "step": 2640 }, { "epoch": 0.1962839093273876, "grad_norm": 2.0446180390691726, "learning_rate": 1.9934247382867808e-05, "loss": 0.8566, "step": 2641 }, { "epoch": 0.19635823114083983, "grad_norm": 2.5209720195592658, "learning_rate": 1.9934155490099026e-05, "loss": 0.9207, "step": 2642 }, { "epoch": 0.19643255295429207, "grad_norm": 2.3386737268039295, "learning_rate": 1.9934063533374666e-05, "loss": 0.9856, "step": 2643 }, { "epoch": 0.19650687476774434, "grad_norm": 2.791980540151571, "learning_rate": 1.9933971512695324e-05, "loss": 1.0213, "step": 2644 }, { "epoch": 0.19658119658119658, "grad_norm": 2.6745087976201156, "learning_rate": 1.993387942806159e-05, "loss": 0.9996, "step": 2645 }, { "epoch": 0.19665551839464884, "grad_norm": 2.7044081965547733, "learning_rate": 1.9933787279474052e-05, "loss": 1.0669, "step": 2646 }, { "epoch": 0.19672984020810108, "grad_norm": 2.227711875227924, "learning_rate": 1.993369506693331e-05, "loss": 1.1725, "step": 2647 }, { "epoch": 0.19680416202155332, "grad_norm": 2.632042956361664, "learning_rate": 1.9933602790439953e-05, "loss": 0.9174, "step": 2648 }, { "epoch": 0.19687848383500559, "grad_norm": 2.4106936334354336, "learning_rate": 1.9933510449994573e-05, "loss": 1.0881, "step": 2649 }, { "epoch": 0.19695280564845782, "grad_norm": 2.3783487651681923, "learning_rate": 1.9933418045597773e-05, "loss": 1.0074, "step": 2650 }, { "epoch": 0.19702712746191006, "grad_norm": 2.492388878793689, "learning_rate": 1.993332557725014e-05, "loss": 1.0583, "step": 2651 }, { "epoch": 0.19710144927536233, "grad_norm": 2.596934648774863, "learning_rate": 1.9933233044952277e-05, "loss": 0.9907, "step": 2652 }, { "epoch": 0.19717577108881457, "grad_norm": 2.371890250178802, "learning_rate": 1.993314044870477e-05, "loss": 0.7431, "step": 2653 }, { "epoch": 0.1972500929022668, "grad_norm": 2.30862767962302, "learning_rate": 1.9933047788508225e-05, "loss": 0.8395, "step": 2654 }, { "epoch": 0.19732441471571907, "grad_norm": 2.4715258832720806, "learning_rate": 1.9932955064363232e-05, "loss": 0.9502, "step": 2655 }, { "epoch": 0.1973987365291713, "grad_norm": 2.4806619208448453, "learning_rate": 1.9932862276270387e-05, "loss": 0.9121, "step": 2656 }, { "epoch": 0.19747305834262355, "grad_norm": 2.3907065323696974, "learning_rate": 1.993276942423029e-05, "loss": 0.8267, "step": 2657 }, { "epoch": 0.1975473801560758, "grad_norm": 2.3791355107296295, "learning_rate": 1.993267650824354e-05, "loss": 0.9383, "step": 2658 }, { "epoch": 0.19762170196952805, "grad_norm": 2.2295204114193092, "learning_rate": 1.9932583528310736e-05, "loss": 0.8182, "step": 2659 }, { "epoch": 0.19769602378298032, "grad_norm": 2.2359686156498086, "learning_rate": 1.9932490484432473e-05, "loss": 0.7799, "step": 2660 }, { "epoch": 0.19777034559643256, "grad_norm": 2.6218317455978575, "learning_rate": 1.9932397376609352e-05, "loss": 1.0027, "step": 2661 }, { "epoch": 0.1978446674098848, "grad_norm": 2.7674184087598572, "learning_rate": 1.993230420484197e-05, "loss": 0.9947, "step": 2662 }, { "epoch": 0.19791898922333706, "grad_norm": 2.6578799421666663, "learning_rate": 1.9932210969130933e-05, "loss": 0.8768, "step": 2663 }, { "epoch": 0.1979933110367893, "grad_norm": 5.416209341892815, "learning_rate": 1.9932117669476835e-05, "loss": 0.6735, "step": 2664 }, { "epoch": 0.19806763285024154, "grad_norm": 3.046655164889917, "learning_rate": 1.9932024305880278e-05, "loss": 0.7977, "step": 2665 }, { "epoch": 0.1981419546636938, "grad_norm": 2.485117504461607, "learning_rate": 1.9931930878341867e-05, "loss": 1.0491, "step": 2666 }, { "epoch": 0.19821627647714604, "grad_norm": 2.8253378341598476, "learning_rate": 1.9931837386862196e-05, "loss": 0.9414, "step": 2667 }, { "epoch": 0.19829059829059828, "grad_norm": 2.109054909265061, "learning_rate": 1.993174383144187e-05, "loss": 0.8234, "step": 2668 }, { "epoch": 0.19836492010405055, "grad_norm": 3.335787410141243, "learning_rate": 1.9931650212081496e-05, "loss": 0.9468, "step": 2669 }, { "epoch": 0.19843924191750278, "grad_norm": 2.5933572612531512, "learning_rate": 1.9931556528781676e-05, "loss": 0.9697, "step": 2670 }, { "epoch": 0.19851356373095502, "grad_norm": 2.748981686135672, "learning_rate": 1.9931462781543006e-05, "loss": 0.988, "step": 2671 }, { "epoch": 0.1985878855444073, "grad_norm": 2.030956254564249, "learning_rate": 1.9931368970366094e-05, "loss": 0.892, "step": 2672 }, { "epoch": 0.19866220735785953, "grad_norm": 1.9915387121735828, "learning_rate": 1.9931275095251547e-05, "loss": 0.7347, "step": 2673 }, { "epoch": 0.1987365291713118, "grad_norm": 3.470206712705726, "learning_rate": 1.9931181156199965e-05, "loss": 0.8463, "step": 2674 }, { "epoch": 0.19881085098476403, "grad_norm": 2.44355987322643, "learning_rate": 1.993108715321195e-05, "loss": 0.8519, "step": 2675 }, { "epoch": 0.19888517279821627, "grad_norm": 2.1716628728019747, "learning_rate": 1.9930993086288114e-05, "loss": 0.959, "step": 2676 }, { "epoch": 0.19895949461166854, "grad_norm": 2.2859499419702254, "learning_rate": 1.9930898955429062e-05, "loss": 0.8062, "step": 2677 }, { "epoch": 0.19903381642512077, "grad_norm": 2.106467210742878, "learning_rate": 1.9930804760635397e-05, "loss": 0.8133, "step": 2678 }, { "epoch": 0.199108138238573, "grad_norm": 2.1480465864441296, "learning_rate": 1.9930710501907726e-05, "loss": 0.8726, "step": 2679 }, { "epoch": 0.19918246005202528, "grad_norm": 3.2277775703937026, "learning_rate": 1.9930616179246655e-05, "loss": 1.1274, "step": 2680 }, { "epoch": 0.19925678186547752, "grad_norm": 1.7571799656367293, "learning_rate": 1.993052179265279e-05, "loss": 0.6513, "step": 2681 }, { "epoch": 0.19933110367892976, "grad_norm": 2.2255549750426082, "learning_rate": 1.9930427342126743e-05, "loss": 0.8864, "step": 2682 }, { "epoch": 0.19940542549238202, "grad_norm": 2.50071131981777, "learning_rate": 1.9930332827669122e-05, "loss": 0.7956, "step": 2683 }, { "epoch": 0.19947974730583426, "grad_norm": 5.602414266903226, "learning_rate": 1.993023824928053e-05, "loss": 1.0163, "step": 2684 }, { "epoch": 0.1995540691192865, "grad_norm": 2.7958625241314494, "learning_rate": 1.9930143606961586e-05, "loss": 1.0033, "step": 2685 }, { "epoch": 0.19962839093273876, "grad_norm": 3.2282401293007505, "learning_rate": 1.9930048900712886e-05, "loss": 1.0429, "step": 2686 }, { "epoch": 0.199702712746191, "grad_norm": 2.8334662247035043, "learning_rate": 1.992995413053505e-05, "loss": 0.9306, "step": 2687 }, { "epoch": 0.19977703455964327, "grad_norm": 2.1361102991447236, "learning_rate": 1.9929859296428682e-05, "loss": 0.8718, "step": 2688 }, { "epoch": 0.1998513563730955, "grad_norm": 2.6601233159169024, "learning_rate": 1.9929764398394396e-05, "loss": 1.0148, "step": 2689 }, { "epoch": 0.19992567818654775, "grad_norm": 2.4379555770298675, "learning_rate": 1.9929669436432803e-05, "loss": 1.0353, "step": 2690 }, { "epoch": 0.2, "grad_norm": 2.6397989964756086, "learning_rate": 1.992957441054451e-05, "loss": 1.1081, "step": 2691 }, { "epoch": 0.20007432181345225, "grad_norm": 3.622021962459294, "learning_rate": 1.9929479320730136e-05, "loss": 0.7939, "step": 2692 }, { "epoch": 0.2001486436269045, "grad_norm": 3.195589682583794, "learning_rate": 1.9929384166990288e-05, "loss": 1.1734, "step": 2693 }, { "epoch": 0.20022296544035675, "grad_norm": 2.674625023226308, "learning_rate": 1.992928894932558e-05, "loss": 0.8537, "step": 2694 }, { "epoch": 0.200297287253809, "grad_norm": 4.213937232760127, "learning_rate": 1.9929193667736624e-05, "loss": 1.1052, "step": 2695 }, { "epoch": 0.20037160906726123, "grad_norm": 2.2727809970757313, "learning_rate": 1.9929098322224035e-05, "loss": 0.9352, "step": 2696 }, { "epoch": 0.2004459308807135, "grad_norm": 2.888536680076197, "learning_rate": 1.9929002912788427e-05, "loss": 0.8292, "step": 2697 }, { "epoch": 0.20052025269416573, "grad_norm": 2.191530283846301, "learning_rate": 1.9928907439430418e-05, "loss": 0.9684, "step": 2698 }, { "epoch": 0.20059457450761797, "grad_norm": 2.399335670501056, "learning_rate": 1.992881190215061e-05, "loss": 0.9126, "step": 2699 }, { "epoch": 0.20066889632107024, "grad_norm": 2.83450053746089, "learning_rate": 1.992871630094963e-05, "loss": 1.1091, "step": 2700 }, { "epoch": 0.20074321813452248, "grad_norm": 3.361602557490186, "learning_rate": 1.9928620635828086e-05, "loss": 0.8184, "step": 2701 }, { "epoch": 0.20081753994797474, "grad_norm": 3.120506239972859, "learning_rate": 1.99285249067866e-05, "loss": 0.6863, "step": 2702 }, { "epoch": 0.20089186176142698, "grad_norm": 2.0106897187680173, "learning_rate": 1.9928429113825788e-05, "loss": 0.876, "step": 2703 }, { "epoch": 0.20096618357487922, "grad_norm": 2.307029071701834, "learning_rate": 1.9928333256946263e-05, "loss": 0.8399, "step": 2704 }, { "epoch": 0.2010405053883315, "grad_norm": 2.5045358986983177, "learning_rate": 1.9928237336148645e-05, "loss": 0.8005, "step": 2705 }, { "epoch": 0.20111482720178372, "grad_norm": 2.5922566005641756, "learning_rate": 1.9928141351433545e-05, "loss": 0.945, "step": 2706 }, { "epoch": 0.20118914901523596, "grad_norm": 2.3098070731992033, "learning_rate": 1.992804530280159e-05, "loss": 0.9508, "step": 2707 }, { "epoch": 0.20126347082868823, "grad_norm": 5.1876478558826715, "learning_rate": 1.9927949190253395e-05, "loss": 0.8244, "step": 2708 }, { "epoch": 0.20133779264214047, "grad_norm": 2.2460156680121104, "learning_rate": 1.992785301378958e-05, "loss": 0.8178, "step": 2709 }, { "epoch": 0.2014121144555927, "grad_norm": 2.6856123796078264, "learning_rate": 1.9927756773410757e-05, "loss": 0.9103, "step": 2710 }, { "epoch": 0.20148643626904497, "grad_norm": 2.4164676022030567, "learning_rate": 1.9927660469117556e-05, "loss": 0.8023, "step": 2711 }, { "epoch": 0.2015607580824972, "grad_norm": 2.4091411262203315, "learning_rate": 1.992756410091059e-05, "loss": 0.8941, "step": 2712 }, { "epoch": 0.20163507989594945, "grad_norm": 2.542291001968204, "learning_rate": 1.9927467668790478e-05, "loss": 1.002, "step": 2713 }, { "epoch": 0.20170940170940171, "grad_norm": 2.470641930154289, "learning_rate": 1.9927371172757847e-05, "loss": 1.1124, "step": 2714 }, { "epoch": 0.20178372352285395, "grad_norm": 2.5886635871046084, "learning_rate": 1.9927274612813318e-05, "loss": 1.0073, "step": 2715 }, { "epoch": 0.20185804533630622, "grad_norm": 2.6172553662559084, "learning_rate": 1.992717798895751e-05, "loss": 0.9486, "step": 2716 }, { "epoch": 0.20193236714975846, "grad_norm": 3.0904057598041286, "learning_rate": 1.992708130119104e-05, "loss": 1.1959, "step": 2717 }, { "epoch": 0.2020066889632107, "grad_norm": 2.862767251981805, "learning_rate": 1.992698454951454e-05, "loss": 0.9569, "step": 2718 }, { "epoch": 0.20208101077666296, "grad_norm": 2.2525729006363076, "learning_rate": 1.9926887733928626e-05, "loss": 0.9266, "step": 2719 }, { "epoch": 0.2021553325901152, "grad_norm": 2.3925726622017427, "learning_rate": 1.9926790854433928e-05, "loss": 1.0593, "step": 2720 }, { "epoch": 0.20222965440356744, "grad_norm": 2.1340437882909082, "learning_rate": 1.992669391103106e-05, "loss": 0.7143, "step": 2721 }, { "epoch": 0.2023039762170197, "grad_norm": 2.167105638783137, "learning_rate": 1.9926596903720657e-05, "loss": 0.7464, "step": 2722 }, { "epoch": 0.20237829803047194, "grad_norm": 5.2779761297727195, "learning_rate": 1.9926499832503334e-05, "loss": 1.0433, "step": 2723 }, { "epoch": 0.20245261984392418, "grad_norm": 2.14393106197177, "learning_rate": 1.9926402697379723e-05, "loss": 0.8388, "step": 2724 }, { "epoch": 0.20252694165737645, "grad_norm": 2.1136495959297563, "learning_rate": 1.9926305498350445e-05, "loss": 0.71, "step": 2725 }, { "epoch": 0.20260126347082869, "grad_norm": 2.6580047187374714, "learning_rate": 1.9926208235416125e-05, "loss": 0.6866, "step": 2726 }, { "epoch": 0.20267558528428092, "grad_norm": 2.407417442171666, "learning_rate": 1.9926110908577395e-05, "loss": 0.8058, "step": 2727 }, { "epoch": 0.2027499070977332, "grad_norm": 2.3130679157732934, "learning_rate": 1.992601351783488e-05, "loss": 1.0749, "step": 2728 }, { "epoch": 0.20282422891118543, "grad_norm": 2.1975578593492644, "learning_rate": 1.9925916063189202e-05, "loss": 0.888, "step": 2729 }, { "epoch": 0.2028985507246377, "grad_norm": 3.17420291402702, "learning_rate": 1.992581854464099e-05, "loss": 0.858, "step": 2730 }, { "epoch": 0.20297287253808993, "grad_norm": 2.1477668794595832, "learning_rate": 1.9925720962190874e-05, "loss": 0.8737, "step": 2731 }, { "epoch": 0.20304719435154217, "grad_norm": 7.053358309509445, "learning_rate": 1.992562331583948e-05, "loss": 0.7813, "step": 2732 }, { "epoch": 0.20312151616499444, "grad_norm": 3.342317177250268, "learning_rate": 1.9925525605587445e-05, "loss": 1.0911, "step": 2733 }, { "epoch": 0.20319583797844668, "grad_norm": 2.946912260250149, "learning_rate": 1.9925427831435386e-05, "loss": 0.862, "step": 2734 }, { "epoch": 0.2032701597918989, "grad_norm": 2.0686071566365785, "learning_rate": 1.9925329993383937e-05, "loss": 0.8719, "step": 2735 }, { "epoch": 0.20334448160535118, "grad_norm": 2.8981008544809828, "learning_rate": 1.9925232091433728e-05, "loss": 1.1582, "step": 2736 }, { "epoch": 0.20341880341880342, "grad_norm": 2.649988075626344, "learning_rate": 1.992513412558539e-05, "loss": 1.0478, "step": 2737 }, { "epoch": 0.20349312523225566, "grad_norm": 2.0757521243821677, "learning_rate": 1.9925036095839557e-05, "loss": 0.6785, "step": 2738 }, { "epoch": 0.20356744704570792, "grad_norm": 2.432587811102554, "learning_rate": 1.9924938002196855e-05, "loss": 0.8436, "step": 2739 }, { "epoch": 0.20364176885916016, "grad_norm": 4.959441839007029, "learning_rate": 1.9924839844657914e-05, "loss": 0.9616, "step": 2740 }, { "epoch": 0.2037160906726124, "grad_norm": 2.228958324198406, "learning_rate": 1.992474162322337e-05, "loss": 0.9147, "step": 2741 }, { "epoch": 0.20379041248606466, "grad_norm": 2.6406095770715337, "learning_rate": 1.9924643337893857e-05, "loss": 0.8873, "step": 2742 }, { "epoch": 0.2038647342995169, "grad_norm": 2.325102434767355, "learning_rate": 1.992454498867e-05, "loss": 0.8813, "step": 2743 }, { "epoch": 0.20393905611296917, "grad_norm": 2.9511104084834976, "learning_rate": 1.9924446575552444e-05, "loss": 1.0865, "step": 2744 }, { "epoch": 0.2040133779264214, "grad_norm": 4.309715068602753, "learning_rate": 1.9924348098541814e-05, "loss": 0.7353, "step": 2745 }, { "epoch": 0.20408769973987365, "grad_norm": 1.969867628832912, "learning_rate": 1.9924249557638746e-05, "loss": 0.8978, "step": 2746 }, { "epoch": 0.2041620215533259, "grad_norm": 2.1058833141304545, "learning_rate": 1.9924150952843874e-05, "loss": 0.8068, "step": 2747 }, { "epoch": 0.20423634336677815, "grad_norm": 2.5595638161110705, "learning_rate": 1.9924052284157832e-05, "loss": 1.0716, "step": 2748 }, { "epoch": 0.2043106651802304, "grad_norm": 2.368810823067351, "learning_rate": 1.9923953551581254e-05, "loss": 0.9505, "step": 2749 }, { "epoch": 0.20438498699368265, "grad_norm": 7.280076654155079, "learning_rate": 1.9923854755114783e-05, "loss": 0.9215, "step": 2750 }, { "epoch": 0.2044593088071349, "grad_norm": 2.4708918130023054, "learning_rate": 1.9923755894759047e-05, "loss": 0.7795, "step": 2751 }, { "epoch": 0.20453363062058713, "grad_norm": 1.6934942332903897, "learning_rate": 1.9923656970514688e-05, "loss": 0.7395, "step": 2752 }, { "epoch": 0.2046079524340394, "grad_norm": 1.9901209331874974, "learning_rate": 1.9923557982382337e-05, "loss": 0.7812, "step": 2753 }, { "epoch": 0.20468227424749164, "grad_norm": 2.2493005256222243, "learning_rate": 1.9923458930362637e-05, "loss": 0.9784, "step": 2754 }, { "epoch": 0.20475659606094387, "grad_norm": 2.437247489523108, "learning_rate": 1.992335981445622e-05, "loss": 0.9306, "step": 2755 }, { "epoch": 0.20483091787439614, "grad_norm": 2.4757535840540714, "learning_rate": 1.992326063466373e-05, "loss": 0.6849, "step": 2756 }, { "epoch": 0.20490523968784838, "grad_norm": 2.9586735702425244, "learning_rate": 1.9923161390985803e-05, "loss": 0.8661, "step": 2757 }, { "epoch": 0.20497956150130064, "grad_norm": 1.9631887998859059, "learning_rate": 1.9923062083423077e-05, "loss": 0.7169, "step": 2758 }, { "epoch": 0.20505388331475288, "grad_norm": 2.3690777019227154, "learning_rate": 1.992296271197619e-05, "loss": 0.7886, "step": 2759 }, { "epoch": 0.20512820512820512, "grad_norm": 7.185240622594491, "learning_rate": 1.9922863276645793e-05, "loss": 0.8048, "step": 2760 }, { "epoch": 0.2052025269416574, "grad_norm": 2.200061347107385, "learning_rate": 1.9922763777432508e-05, "loss": 0.7893, "step": 2761 }, { "epoch": 0.20527684875510963, "grad_norm": 2.506686723059642, "learning_rate": 1.9922664214336988e-05, "loss": 0.7936, "step": 2762 }, { "epoch": 0.20535117056856186, "grad_norm": 2.343988319524583, "learning_rate": 1.992256458735987e-05, "loss": 0.939, "step": 2763 }, { "epoch": 0.20542549238201413, "grad_norm": 5.022804995087222, "learning_rate": 1.9922464896501795e-05, "loss": 0.8862, "step": 2764 }, { "epoch": 0.20549981419546637, "grad_norm": 2.3017575615078476, "learning_rate": 1.992236514176341e-05, "loss": 0.7713, "step": 2765 }, { "epoch": 0.2055741360089186, "grad_norm": 3.4055007830725588, "learning_rate": 1.9922265323145348e-05, "loss": 0.8648, "step": 2766 }, { "epoch": 0.20564845782237087, "grad_norm": 2.715111915990085, "learning_rate": 1.992216544064826e-05, "loss": 0.8774, "step": 2767 }, { "epoch": 0.2057227796358231, "grad_norm": 2.1319518146701624, "learning_rate": 1.9922065494272783e-05, "loss": 0.9123, "step": 2768 }, { "epoch": 0.20579710144927535, "grad_norm": 2.568970259548525, "learning_rate": 1.9921965484019564e-05, "loss": 0.9719, "step": 2769 }, { "epoch": 0.20587142326272762, "grad_norm": 2.4674220241211224, "learning_rate": 1.9921865409889246e-05, "loss": 1.008, "step": 2770 }, { "epoch": 0.20594574507617985, "grad_norm": 2.990373722466233, "learning_rate": 1.9921765271882472e-05, "loss": 0.8958, "step": 2771 }, { "epoch": 0.20602006688963212, "grad_norm": 2.8609854515779265, "learning_rate": 1.9921665069999894e-05, "loss": 0.9678, "step": 2772 }, { "epoch": 0.20609438870308436, "grad_norm": 2.400736847132813, "learning_rate": 1.9921564804242144e-05, "loss": 1.0305, "step": 2773 }, { "epoch": 0.2061687105165366, "grad_norm": 2.2136925068181124, "learning_rate": 1.992146447460988e-05, "loss": 0.8209, "step": 2774 }, { "epoch": 0.20624303232998886, "grad_norm": 2.6551190054169624, "learning_rate": 1.992136408110374e-05, "loss": 0.9323, "step": 2775 }, { "epoch": 0.2063173541434411, "grad_norm": 3.2406428577288082, "learning_rate": 1.9921263623724373e-05, "loss": 1.0409, "step": 2776 }, { "epoch": 0.20639167595689334, "grad_norm": 2.442045767357704, "learning_rate": 1.9921163102472423e-05, "loss": 0.8658, "step": 2777 }, { "epoch": 0.2064659977703456, "grad_norm": 2.6184500661447716, "learning_rate": 1.992106251734854e-05, "loss": 1.0522, "step": 2778 }, { "epoch": 0.20654031958379784, "grad_norm": 2.6685980803731426, "learning_rate": 1.9920961868353378e-05, "loss": 1.0489, "step": 2779 }, { "epoch": 0.20661464139725008, "grad_norm": 2.3711235632680623, "learning_rate": 1.9920861155487572e-05, "loss": 0.9271, "step": 2780 }, { "epoch": 0.20668896321070235, "grad_norm": 3.271031346446389, "learning_rate": 1.9920760378751776e-05, "loss": 1.1656, "step": 2781 }, { "epoch": 0.20676328502415459, "grad_norm": 2.3711152866912752, "learning_rate": 1.9920659538146644e-05, "loss": 0.8831, "step": 2782 }, { "epoch": 0.20683760683760682, "grad_norm": 2.635021217575822, "learning_rate": 1.992055863367282e-05, "loss": 0.9212, "step": 2783 }, { "epoch": 0.2069119286510591, "grad_norm": 2.0996335241086763, "learning_rate": 1.992045766533095e-05, "loss": 0.96, "step": 2784 }, { "epoch": 0.20698625046451133, "grad_norm": 2.5295321940287563, "learning_rate": 1.992035663312169e-05, "loss": 1.0062, "step": 2785 }, { "epoch": 0.2070605722779636, "grad_norm": 2.3277069411799407, "learning_rate": 1.9920255537045692e-05, "loss": 0.7865, "step": 2786 }, { "epoch": 0.20713489409141583, "grad_norm": 2.6700879607471273, "learning_rate": 1.99201543771036e-05, "loss": 0.8792, "step": 2787 }, { "epoch": 0.20720921590486807, "grad_norm": 2.2685078087741344, "learning_rate": 1.992005315329607e-05, "loss": 1.042, "step": 2788 }, { "epoch": 0.20728353771832034, "grad_norm": 2.5532603569283268, "learning_rate": 1.9919951865623753e-05, "loss": 0.9601, "step": 2789 }, { "epoch": 0.20735785953177258, "grad_norm": 2.3680748051107363, "learning_rate": 1.99198505140873e-05, "loss": 0.754, "step": 2790 }, { "epoch": 0.20743218134522481, "grad_norm": 2.2490500509683184, "learning_rate": 1.9919749098687364e-05, "loss": 0.7523, "step": 2791 }, { "epoch": 0.20750650315867708, "grad_norm": 2.0590571726426403, "learning_rate": 1.99196476194246e-05, "loss": 0.7552, "step": 2792 }, { "epoch": 0.20758082497212932, "grad_norm": 2.957402892443312, "learning_rate": 1.9919546076299658e-05, "loss": 0.9732, "step": 2793 }, { "epoch": 0.20765514678558156, "grad_norm": 2.546660193529261, "learning_rate": 1.9919444469313192e-05, "loss": 0.9877, "step": 2794 }, { "epoch": 0.20772946859903382, "grad_norm": 2.701883657892641, "learning_rate": 1.9919342798465856e-05, "loss": 1.1088, "step": 2795 }, { "epoch": 0.20780379041248606, "grad_norm": 2.379611541587761, "learning_rate": 1.991924106375831e-05, "loss": 1.0829, "step": 2796 }, { "epoch": 0.2078781122259383, "grad_norm": 2.2534669107148937, "learning_rate": 1.99191392651912e-05, "loss": 1.0331, "step": 2797 }, { "epoch": 0.20795243403939057, "grad_norm": 2.4306404046277916, "learning_rate": 1.9919037402765188e-05, "loss": 0.8233, "step": 2798 }, { "epoch": 0.2080267558528428, "grad_norm": 2.4668159736495965, "learning_rate": 1.9918935476480928e-05, "loss": 0.8947, "step": 2799 }, { "epoch": 0.20810107766629507, "grad_norm": 2.2782337536781334, "learning_rate": 1.9918833486339078e-05, "loss": 0.769, "step": 2800 }, { "epoch": 0.2081753994797473, "grad_norm": 2.8430669982448893, "learning_rate": 1.991873143234029e-05, "loss": 0.9693, "step": 2801 }, { "epoch": 0.20824972129319955, "grad_norm": 1.8995144772848196, "learning_rate": 1.9918629314485226e-05, "loss": 0.8263, "step": 2802 }, { "epoch": 0.2083240431066518, "grad_norm": 2.2363938927829867, "learning_rate": 1.991852713277454e-05, "loss": 1.018, "step": 2803 }, { "epoch": 0.20839836492010405, "grad_norm": 1.8669397466096365, "learning_rate": 1.991842488720889e-05, "loss": 0.6554, "step": 2804 }, { "epoch": 0.2084726867335563, "grad_norm": 1.9505258320189625, "learning_rate": 1.9918322577788935e-05, "loss": 0.8699, "step": 2805 }, { "epoch": 0.20854700854700856, "grad_norm": 2.1173315348263864, "learning_rate": 1.9918220204515336e-05, "loss": 0.756, "step": 2806 }, { "epoch": 0.2086213303604608, "grad_norm": 9.256300692966084, "learning_rate": 1.9918117767388748e-05, "loss": 1.0366, "step": 2807 }, { "epoch": 0.20869565217391303, "grad_norm": 2.8422007812835326, "learning_rate": 1.9918015266409835e-05, "loss": 0.8907, "step": 2808 }, { "epoch": 0.2087699739873653, "grad_norm": 2.073578438069387, "learning_rate": 1.991791270157925e-05, "loss": 0.6931, "step": 2809 }, { "epoch": 0.20884429580081754, "grad_norm": 2.0458414875302267, "learning_rate": 1.991781007289766e-05, "loss": 0.8813, "step": 2810 }, { "epoch": 0.20891861761426977, "grad_norm": 2.9822582075917716, "learning_rate": 1.9917707380365725e-05, "loss": 0.936, "step": 2811 }, { "epoch": 0.20899293942772204, "grad_norm": 3.5695414647020285, "learning_rate": 1.99176046239841e-05, "loss": 1.1184, "step": 2812 }, { "epoch": 0.20906726124117428, "grad_norm": 2.405180846408041, "learning_rate": 1.9917501803753456e-05, "loss": 0.9629, "step": 2813 }, { "epoch": 0.20914158305462655, "grad_norm": 2.44773555707083, "learning_rate": 1.991739891967445e-05, "loss": 0.8265, "step": 2814 }, { "epoch": 0.20921590486807878, "grad_norm": 2.1484815703448987, "learning_rate": 1.991729597174774e-05, "loss": 0.9037, "step": 2815 }, { "epoch": 0.20929022668153102, "grad_norm": 2.5981773792967284, "learning_rate": 1.9917192959973997e-05, "loss": 0.9264, "step": 2816 }, { "epoch": 0.2093645484949833, "grad_norm": 2.680295294862662, "learning_rate": 1.991708988435388e-05, "loss": 1.0493, "step": 2817 }, { "epoch": 0.20943887030843553, "grad_norm": 2.448232468762752, "learning_rate": 1.991698674488805e-05, "loss": 0.7237, "step": 2818 }, { "epoch": 0.20951319212188776, "grad_norm": 2.4849231667050753, "learning_rate": 1.9916883541577175e-05, "loss": 0.9924, "step": 2819 }, { "epoch": 0.20958751393534003, "grad_norm": 3.5709826646043727, "learning_rate": 1.9916780274421918e-05, "loss": 0.8971, "step": 2820 }, { "epoch": 0.20966183574879227, "grad_norm": 2.2589983552125514, "learning_rate": 1.9916676943422945e-05, "loss": 0.7779, "step": 2821 }, { "epoch": 0.2097361575622445, "grad_norm": 2.5961109788395023, "learning_rate": 1.991657354858092e-05, "loss": 1.0632, "step": 2822 }, { "epoch": 0.20981047937569677, "grad_norm": 2.070001766639026, "learning_rate": 1.991647008989651e-05, "loss": 0.9225, "step": 2823 }, { "epoch": 0.209884801189149, "grad_norm": 2.175787050943417, "learning_rate": 1.991636656737038e-05, "loss": 0.9643, "step": 2824 }, { "epoch": 0.20995912300260125, "grad_norm": 2.2693250829540905, "learning_rate": 1.9916262981003196e-05, "loss": 0.8633, "step": 2825 }, { "epoch": 0.21003344481605352, "grad_norm": 2.0433902783742397, "learning_rate": 1.9916159330795624e-05, "loss": 0.819, "step": 2826 }, { "epoch": 0.21010776662950575, "grad_norm": 2.3815086365329163, "learning_rate": 1.9916055616748336e-05, "loss": 0.9475, "step": 2827 }, { "epoch": 0.21018208844295802, "grad_norm": 2.221270435181959, "learning_rate": 1.9915951838861996e-05, "loss": 0.8261, "step": 2828 }, { "epoch": 0.21025641025641026, "grad_norm": 2.4684993502545765, "learning_rate": 1.991584799713727e-05, "loss": 0.8988, "step": 2829 }, { "epoch": 0.2103307320698625, "grad_norm": 2.8439039704972173, "learning_rate": 1.9915744091574833e-05, "loss": 0.8726, "step": 2830 }, { "epoch": 0.21040505388331476, "grad_norm": 2.21520017234583, "learning_rate": 1.9915640122175343e-05, "loss": 0.8195, "step": 2831 }, { "epoch": 0.210479375696767, "grad_norm": 2.7822830646985497, "learning_rate": 1.9915536088939484e-05, "loss": 0.7841, "step": 2832 }, { "epoch": 0.21055369751021924, "grad_norm": 2.0726367348459336, "learning_rate": 1.9915431991867914e-05, "loss": 0.9482, "step": 2833 }, { "epoch": 0.2106280193236715, "grad_norm": 3.073319298561098, "learning_rate": 1.9915327830961306e-05, "loss": 1.0275, "step": 2834 }, { "epoch": 0.21070234113712374, "grad_norm": 2.4876272221836255, "learning_rate": 1.9915223606220337e-05, "loss": 1.0025, "step": 2835 }, { "epoch": 0.21077666295057598, "grad_norm": 2.0597310798430897, "learning_rate": 1.9915119317645667e-05, "loss": 0.8261, "step": 2836 }, { "epoch": 0.21085098476402825, "grad_norm": 2.844792386640211, "learning_rate": 1.9915014965237977e-05, "loss": 1.0607, "step": 2837 }, { "epoch": 0.2109253065774805, "grad_norm": 2.8561717133341062, "learning_rate": 1.9914910548997936e-05, "loss": 0.8644, "step": 2838 }, { "epoch": 0.21099962839093273, "grad_norm": 2.9485561942817404, "learning_rate": 1.9914806068926213e-05, "loss": 1.0379, "step": 2839 }, { "epoch": 0.211073950204385, "grad_norm": 2.1860337188239423, "learning_rate": 1.9914701525023483e-05, "loss": 1.0127, "step": 2840 }, { "epoch": 0.21114827201783723, "grad_norm": 2.350097620728505, "learning_rate": 1.991459691729042e-05, "loss": 0.9315, "step": 2841 }, { "epoch": 0.2112225938312895, "grad_norm": 2.390896106764851, "learning_rate": 1.9914492245727696e-05, "loss": 1.1346, "step": 2842 }, { "epoch": 0.21129691564474173, "grad_norm": 2.094264531200689, "learning_rate": 1.9914387510335987e-05, "loss": 0.8879, "step": 2843 }, { "epoch": 0.21137123745819397, "grad_norm": 2.3363612517529746, "learning_rate": 1.991428271111596e-05, "loss": 1.0191, "step": 2844 }, { "epoch": 0.21144555927164624, "grad_norm": 2.4739983777764705, "learning_rate": 1.99141778480683e-05, "loss": 0.9821, "step": 2845 }, { "epoch": 0.21151988108509848, "grad_norm": 2.257258292001381, "learning_rate": 1.991407292119368e-05, "loss": 1.0815, "step": 2846 }, { "epoch": 0.21159420289855072, "grad_norm": 2.2000052833176014, "learning_rate": 1.991396793049277e-05, "loss": 0.8232, "step": 2847 }, { "epoch": 0.21166852471200298, "grad_norm": 5.579094948776446, "learning_rate": 1.9913862875966246e-05, "loss": 0.8291, "step": 2848 }, { "epoch": 0.21174284652545522, "grad_norm": 2.2563591701247914, "learning_rate": 1.991375775761479e-05, "loss": 0.7934, "step": 2849 }, { "epoch": 0.21181716833890746, "grad_norm": 2.1303550313137767, "learning_rate": 1.9913652575439077e-05, "loss": 0.6821, "step": 2850 }, { "epoch": 0.21189149015235972, "grad_norm": 2.312624243379462, "learning_rate": 1.991354732943978e-05, "loss": 0.7082, "step": 2851 }, { "epoch": 0.21196581196581196, "grad_norm": 2.3311865998196826, "learning_rate": 1.9913442019617584e-05, "loss": 1.1539, "step": 2852 }, { "epoch": 0.2120401337792642, "grad_norm": 2.6130547903935035, "learning_rate": 1.991333664597316e-05, "loss": 0.732, "step": 2853 }, { "epoch": 0.21211445559271647, "grad_norm": 2.601719436596689, "learning_rate": 1.9913231208507188e-05, "loss": 0.9241, "step": 2854 }, { "epoch": 0.2121887774061687, "grad_norm": 2.3621329746924267, "learning_rate": 1.991312570722035e-05, "loss": 0.9497, "step": 2855 }, { "epoch": 0.21226309921962097, "grad_norm": 2.777629454410214, "learning_rate": 1.9913020142113323e-05, "loss": 1.1541, "step": 2856 }, { "epoch": 0.2123374210330732, "grad_norm": 3.611422923169215, "learning_rate": 1.9912914513186785e-05, "loss": 0.5755, "step": 2857 }, { "epoch": 0.21241174284652545, "grad_norm": 3.043562050087131, "learning_rate": 1.9912808820441417e-05, "loss": 1.1542, "step": 2858 }, { "epoch": 0.2124860646599777, "grad_norm": 2.3112178088742925, "learning_rate": 1.9912703063877904e-05, "loss": 0.9142, "step": 2859 }, { "epoch": 0.21256038647342995, "grad_norm": 2.929845240082746, "learning_rate": 1.9912597243496922e-05, "loss": 0.7485, "step": 2860 }, { "epoch": 0.2126347082868822, "grad_norm": 2.812385527755352, "learning_rate": 1.991249135929915e-05, "loss": 0.8603, "step": 2861 }, { "epoch": 0.21270903010033446, "grad_norm": 2.740955329211589, "learning_rate": 1.9912385411285275e-05, "loss": 0.8186, "step": 2862 }, { "epoch": 0.2127833519137867, "grad_norm": 2.1706419540936777, "learning_rate": 1.991227939945598e-05, "loss": 0.9214, "step": 2863 }, { "epoch": 0.21285767372723893, "grad_norm": 2.204959796973964, "learning_rate": 1.991217332381194e-05, "loss": 0.8672, "step": 2864 }, { "epoch": 0.2129319955406912, "grad_norm": 2.4963304742571184, "learning_rate": 1.9912067184353843e-05, "loss": 0.9618, "step": 2865 }, { "epoch": 0.21300631735414344, "grad_norm": 2.0753074792898, "learning_rate": 1.991196098108237e-05, "loss": 0.8615, "step": 2866 }, { "epoch": 0.21308063916759568, "grad_norm": 1.9102833911362276, "learning_rate": 1.9911854713998212e-05, "loss": 0.7907, "step": 2867 }, { "epoch": 0.21315496098104794, "grad_norm": 2.12936488237434, "learning_rate": 1.9911748383102044e-05, "loss": 0.8969, "step": 2868 }, { "epoch": 0.21322928279450018, "grad_norm": 2.288596628081873, "learning_rate": 1.9911641988394557e-05, "loss": 0.7735, "step": 2869 }, { "epoch": 0.21330360460795245, "grad_norm": 3.021714751813208, "learning_rate": 1.9911535529876432e-05, "loss": 1.0114, "step": 2870 }, { "epoch": 0.21337792642140468, "grad_norm": 3.4008480404526744, "learning_rate": 1.9911429007548354e-05, "loss": 0.9647, "step": 2871 }, { "epoch": 0.21345224823485692, "grad_norm": 2.1464173991986732, "learning_rate": 1.991132242141101e-05, "loss": 0.8526, "step": 2872 }, { "epoch": 0.2135265700483092, "grad_norm": 2.5811974544851704, "learning_rate": 1.9911215771465087e-05, "loss": 0.8312, "step": 2873 }, { "epoch": 0.21360089186176143, "grad_norm": 2.5050259556828216, "learning_rate": 1.991110905771127e-05, "loss": 0.8613, "step": 2874 }, { "epoch": 0.21367521367521367, "grad_norm": 2.1846786862054106, "learning_rate": 1.991100228015025e-05, "loss": 0.9039, "step": 2875 }, { "epoch": 0.21374953548866593, "grad_norm": 3.0801604055272547, "learning_rate": 1.991089543878271e-05, "loss": 1.2745, "step": 2876 }, { "epoch": 0.21382385730211817, "grad_norm": 2.1457747064750987, "learning_rate": 1.9910788533609343e-05, "loss": 1.0063, "step": 2877 }, { "epoch": 0.2138981791155704, "grad_norm": 3.5805209835919047, "learning_rate": 1.9910681564630828e-05, "loss": 1.1353, "step": 2878 }, { "epoch": 0.21397250092902267, "grad_norm": 9.843258857545496, "learning_rate": 1.9910574531847865e-05, "loss": 1.0187, "step": 2879 }, { "epoch": 0.2140468227424749, "grad_norm": 2.2501933912826253, "learning_rate": 1.9910467435261133e-05, "loss": 0.9221, "step": 2880 }, { "epoch": 0.21412114455592715, "grad_norm": 2.1454827358809596, "learning_rate": 1.9910360274871325e-05, "loss": 0.8719, "step": 2881 }, { "epoch": 0.21419546636937942, "grad_norm": 3.258400030174571, "learning_rate": 1.9910253050679136e-05, "loss": 1.1747, "step": 2882 }, { "epoch": 0.21426978818283166, "grad_norm": 2.4744079232366696, "learning_rate": 1.9910145762685248e-05, "loss": 0.9688, "step": 2883 }, { "epoch": 0.21434410999628392, "grad_norm": 2.4243091961858867, "learning_rate": 1.991003841089036e-05, "loss": 0.7396, "step": 2884 }, { "epoch": 0.21441843180973616, "grad_norm": 2.1274461127138515, "learning_rate": 1.9909930995295154e-05, "loss": 0.9147, "step": 2885 }, { "epoch": 0.2144927536231884, "grad_norm": 3.0935458041345867, "learning_rate": 1.990982351590033e-05, "loss": 0.7767, "step": 2886 }, { "epoch": 0.21456707543664066, "grad_norm": 2.716715717836993, "learning_rate": 1.9909715972706576e-05, "loss": 1.1123, "step": 2887 }, { "epoch": 0.2146413972500929, "grad_norm": 2.4421505629201583, "learning_rate": 1.9909608365714585e-05, "loss": 0.9123, "step": 2888 }, { "epoch": 0.21471571906354514, "grad_norm": 2.4284741446868394, "learning_rate": 1.9909500694925047e-05, "loss": 0.825, "step": 2889 }, { "epoch": 0.2147900408769974, "grad_norm": 2.688497340228253, "learning_rate": 1.9909392960338662e-05, "loss": 1.0404, "step": 2890 }, { "epoch": 0.21486436269044965, "grad_norm": 2.4627669735645936, "learning_rate": 1.9909285161956116e-05, "loss": 0.7882, "step": 2891 }, { "epoch": 0.21493868450390188, "grad_norm": 3.2040555069442336, "learning_rate": 1.9909177299778105e-05, "loss": 1.1519, "step": 2892 }, { "epoch": 0.21501300631735415, "grad_norm": 2.4264043585076998, "learning_rate": 1.9909069373805327e-05, "loss": 0.7882, "step": 2893 }, { "epoch": 0.2150873281308064, "grad_norm": 2.582157764441804, "learning_rate": 1.9908961384038476e-05, "loss": 0.8582, "step": 2894 }, { "epoch": 0.21516164994425863, "grad_norm": 2.711175040081802, "learning_rate": 1.9908853330478242e-05, "loss": 0.9036, "step": 2895 }, { "epoch": 0.2152359717577109, "grad_norm": 2.76686269023724, "learning_rate": 1.9908745213125325e-05, "loss": 0.7993, "step": 2896 }, { "epoch": 0.21531029357116313, "grad_norm": 2.47130148396619, "learning_rate": 1.9908637031980426e-05, "loss": 1.1092, "step": 2897 }, { "epoch": 0.2153846153846154, "grad_norm": 2.5263472241829406, "learning_rate": 1.990852878704423e-05, "loss": 0.9965, "step": 2898 }, { "epoch": 0.21545893719806763, "grad_norm": 2.602594853473971, "learning_rate": 1.9908420478317444e-05, "loss": 1.0229, "step": 2899 }, { "epoch": 0.21553325901151987, "grad_norm": 2.631979327014709, "learning_rate": 1.9908312105800754e-05, "loss": 0.8488, "step": 2900 }, { "epoch": 0.21560758082497214, "grad_norm": 3.5253850299466993, "learning_rate": 1.9908203669494874e-05, "loss": 0.9519, "step": 2901 }, { "epoch": 0.21568190263842438, "grad_norm": 3.1213858764887386, "learning_rate": 1.9908095169400484e-05, "loss": 1.0332, "step": 2902 }, { "epoch": 0.21575622445187662, "grad_norm": 2.220942937704716, "learning_rate": 1.9907986605518295e-05, "loss": 0.9045, "step": 2903 }, { "epoch": 0.21583054626532888, "grad_norm": 2.2525433250909406, "learning_rate": 1.9907877977849007e-05, "loss": 0.9889, "step": 2904 }, { "epoch": 0.21590486807878112, "grad_norm": 2.2018182783913716, "learning_rate": 1.990776928639331e-05, "loss": 0.7362, "step": 2905 }, { "epoch": 0.21597918989223336, "grad_norm": 1.9582470164893, "learning_rate": 1.9907660531151908e-05, "loss": 0.7028, "step": 2906 }, { "epoch": 0.21605351170568562, "grad_norm": 2.1749381191060113, "learning_rate": 1.99075517121255e-05, "loss": 0.9362, "step": 2907 }, { "epoch": 0.21612783351913786, "grad_norm": 2.058344878525974, "learning_rate": 1.990744282931479e-05, "loss": 0.8324, "step": 2908 }, { "epoch": 0.2162021553325901, "grad_norm": 5.609038175892348, "learning_rate": 1.990733388272048e-05, "loss": 1.1686, "step": 2909 }, { "epoch": 0.21627647714604237, "grad_norm": 2.805101262110831, "learning_rate": 1.9907224872343268e-05, "loss": 0.9847, "step": 2910 }, { "epoch": 0.2163507989594946, "grad_norm": 1.8372212646772177, "learning_rate": 1.9907115798183855e-05, "loss": 0.6138, "step": 2911 }, { "epoch": 0.21642512077294687, "grad_norm": 3.111203523157323, "learning_rate": 1.9907006660242945e-05, "loss": 1.3394, "step": 2912 }, { "epoch": 0.2164994425863991, "grad_norm": 2.881841016019551, "learning_rate": 1.9906897458521238e-05, "loss": 0.9442, "step": 2913 }, { "epoch": 0.21657376439985135, "grad_norm": 2.5482095532063767, "learning_rate": 1.9906788193019443e-05, "loss": 1.0643, "step": 2914 }, { "epoch": 0.21664808621330361, "grad_norm": 2.534104329667356, "learning_rate": 1.9906678863738253e-05, "loss": 1.0448, "step": 2915 }, { "epoch": 0.21672240802675585, "grad_norm": 2.097282740570247, "learning_rate": 1.9906569470678386e-05, "loss": 0.6198, "step": 2916 }, { "epoch": 0.2167967298402081, "grad_norm": 2.1654986858546974, "learning_rate": 1.9906460013840532e-05, "loss": 0.913, "step": 2917 }, { "epoch": 0.21687105165366036, "grad_norm": 2.569729926303433, "learning_rate": 1.9906350493225408e-05, "loss": 0.9406, "step": 2918 }, { "epoch": 0.2169453734671126, "grad_norm": 2.443408644783977, "learning_rate": 1.9906240908833713e-05, "loss": 1.0138, "step": 2919 }, { "epoch": 0.21701969528056483, "grad_norm": 2.176002401279409, "learning_rate": 1.9906131260666147e-05, "loss": 0.6925, "step": 2920 }, { "epoch": 0.2170940170940171, "grad_norm": 2.087145083498101, "learning_rate": 1.990602154872343e-05, "loss": 0.7203, "step": 2921 }, { "epoch": 0.21716833890746934, "grad_norm": 2.2614012751670236, "learning_rate": 1.9905911773006258e-05, "loss": 0.8244, "step": 2922 }, { "epoch": 0.2172426607209216, "grad_norm": 2.4261074818396735, "learning_rate": 1.9905801933515338e-05, "loss": 0.7505, "step": 2923 }, { "epoch": 0.21731698253437384, "grad_norm": 2.1756425117670672, "learning_rate": 1.9905692030251378e-05, "loss": 0.7915, "step": 2924 }, { "epoch": 0.21739130434782608, "grad_norm": 4.142947164202887, "learning_rate": 1.990558206321509e-05, "loss": 0.8999, "step": 2925 }, { "epoch": 0.21746562616127835, "grad_norm": 2.7771369514187643, "learning_rate": 1.9905472032407177e-05, "loss": 1.0908, "step": 2926 }, { "epoch": 0.21753994797473059, "grad_norm": 2.3699549239999356, "learning_rate": 1.9905361937828348e-05, "loss": 0.8322, "step": 2927 }, { "epoch": 0.21761426978818282, "grad_norm": 3.0346250218777424, "learning_rate": 1.9905251779479315e-05, "loss": 1.0764, "step": 2928 }, { "epoch": 0.2176885916016351, "grad_norm": 2.7187911852391995, "learning_rate": 1.9905141557360784e-05, "loss": 1.1185, "step": 2929 }, { "epoch": 0.21776291341508733, "grad_norm": 2.402925430681388, "learning_rate": 1.9905031271473465e-05, "loss": 0.6982, "step": 2930 }, { "epoch": 0.21783723522853957, "grad_norm": 2.206003232647751, "learning_rate": 1.990492092181807e-05, "loss": 0.8762, "step": 2931 }, { "epoch": 0.21791155704199183, "grad_norm": 2.5381724965554993, "learning_rate": 1.9904810508395304e-05, "loss": 0.9808, "step": 2932 }, { "epoch": 0.21798587885544407, "grad_norm": 3.0946660873928633, "learning_rate": 1.9904700031205886e-05, "loss": 0.9706, "step": 2933 }, { "epoch": 0.2180602006688963, "grad_norm": 4.39726405037615, "learning_rate": 1.9904589490250522e-05, "loss": 0.6456, "step": 2934 }, { "epoch": 0.21813452248234858, "grad_norm": 3.288201821465193, "learning_rate": 1.9904478885529926e-05, "loss": 1.2283, "step": 2935 }, { "epoch": 0.2182088442958008, "grad_norm": 2.2404651186065694, "learning_rate": 1.9904368217044808e-05, "loss": 0.8518, "step": 2936 }, { "epoch": 0.21828316610925308, "grad_norm": 2.585869558424804, "learning_rate": 1.9904257484795883e-05, "loss": 0.8579, "step": 2937 }, { "epoch": 0.21835748792270532, "grad_norm": 3.8849225354163606, "learning_rate": 1.990414668878386e-05, "loss": 1.0528, "step": 2938 }, { "epoch": 0.21843180973615756, "grad_norm": 2.621148468134775, "learning_rate": 1.9904035829009455e-05, "loss": 0.9189, "step": 2939 }, { "epoch": 0.21850613154960982, "grad_norm": 2.9041821457646395, "learning_rate": 1.990392490547338e-05, "loss": 1.1822, "step": 2940 }, { "epoch": 0.21858045336306206, "grad_norm": 2.366780715029185, "learning_rate": 1.9903813918176352e-05, "loss": 0.9837, "step": 2941 }, { "epoch": 0.2186547751765143, "grad_norm": 3.6259332682451775, "learning_rate": 1.9903702867119083e-05, "loss": 0.7888, "step": 2942 }, { "epoch": 0.21872909698996656, "grad_norm": 2.0789220396535373, "learning_rate": 1.990359175230229e-05, "loss": 0.9005, "step": 2943 }, { "epoch": 0.2188034188034188, "grad_norm": 2.4254687506503596, "learning_rate": 1.9903480573726686e-05, "loss": 1.0166, "step": 2944 }, { "epoch": 0.21887774061687104, "grad_norm": 2.927993681905925, "learning_rate": 1.9903369331392986e-05, "loss": 1.0852, "step": 2945 }, { "epoch": 0.2189520624303233, "grad_norm": 2.1800427503087825, "learning_rate": 1.990325802530191e-05, "loss": 0.9204, "step": 2946 }, { "epoch": 0.21902638424377555, "grad_norm": 2.5465195318804215, "learning_rate": 1.9903146655454174e-05, "loss": 0.9105, "step": 2947 }, { "epoch": 0.21910070605722778, "grad_norm": 2.4575491000863745, "learning_rate": 1.990303522185049e-05, "loss": 0.9531, "step": 2948 }, { "epoch": 0.21917502787068005, "grad_norm": 2.3630921349653984, "learning_rate": 1.9902923724491585e-05, "loss": 0.8898, "step": 2949 }, { "epoch": 0.2192493496841323, "grad_norm": 4.668978903980585, "learning_rate": 1.9902812163378166e-05, "loss": 1.0945, "step": 2950 }, { "epoch": 0.21932367149758455, "grad_norm": 3.0555927272448673, "learning_rate": 1.990270053851096e-05, "loss": 0.9087, "step": 2951 }, { "epoch": 0.2193979933110368, "grad_norm": 2.0088355468808423, "learning_rate": 1.990258884989068e-05, "loss": 0.9315, "step": 2952 }, { "epoch": 0.21947231512448903, "grad_norm": 2.506025232636802, "learning_rate": 1.990247709751804e-05, "loss": 0.7333, "step": 2953 }, { "epoch": 0.2195466369379413, "grad_norm": 2.3725396038957345, "learning_rate": 1.9902365281393776e-05, "loss": 0.7131, "step": 2954 }, { "epoch": 0.21962095875139354, "grad_norm": 2.4359635013896113, "learning_rate": 1.9902253401518593e-05, "loss": 1.0599, "step": 2955 }, { "epoch": 0.21969528056484577, "grad_norm": 2.489337132555715, "learning_rate": 1.990214145789322e-05, "loss": 0.9384, "step": 2956 }, { "epoch": 0.21976960237829804, "grad_norm": 2.4085988031564023, "learning_rate": 1.9902029450518372e-05, "loss": 0.8049, "step": 2957 }, { "epoch": 0.21984392419175028, "grad_norm": 2.379048826906679, "learning_rate": 1.990191737939477e-05, "loss": 0.733, "step": 2958 }, { "epoch": 0.21991824600520252, "grad_norm": 2.5200856752768512, "learning_rate": 1.990180524452314e-05, "loss": 0.9712, "step": 2959 }, { "epoch": 0.21999256781865478, "grad_norm": 2.408358369449083, "learning_rate": 1.9901693045904206e-05, "loss": 0.9085, "step": 2960 }, { "epoch": 0.22006688963210702, "grad_norm": 3.22125034012681, "learning_rate": 1.9901580783538678e-05, "loss": 0.831, "step": 2961 }, { "epoch": 0.22014121144555926, "grad_norm": 2.1483882049301473, "learning_rate": 1.990146845742729e-05, "loss": 0.9221, "step": 2962 }, { "epoch": 0.22021553325901153, "grad_norm": 2.4123090949820134, "learning_rate": 1.9901356067570764e-05, "loss": 0.9937, "step": 2963 }, { "epoch": 0.22028985507246376, "grad_norm": 2.9966210903939974, "learning_rate": 1.990124361396982e-05, "loss": 0.8863, "step": 2964 }, { "epoch": 0.22036417688591603, "grad_norm": 2.4692502575579027, "learning_rate": 1.990113109662518e-05, "loss": 0.879, "step": 2965 }, { "epoch": 0.22043849869936827, "grad_norm": 2.271634537209645, "learning_rate": 1.9901018515537575e-05, "loss": 0.9757, "step": 2966 }, { "epoch": 0.2205128205128205, "grad_norm": 2.615965041904778, "learning_rate": 1.9900905870707726e-05, "loss": 1.0358, "step": 2967 }, { "epoch": 0.22058714232627277, "grad_norm": 1.9716261567792663, "learning_rate": 1.9900793162136362e-05, "loss": 0.9171, "step": 2968 }, { "epoch": 0.220661464139725, "grad_norm": 5.060405399384798, "learning_rate": 1.99006803898242e-05, "loss": 1.3803, "step": 2969 }, { "epoch": 0.22073578595317725, "grad_norm": 3.82742169035306, "learning_rate": 1.9900567553771974e-05, "loss": 0.9472, "step": 2970 }, { "epoch": 0.22081010776662952, "grad_norm": 2.0244331391894383, "learning_rate": 1.9900454653980404e-05, "loss": 0.7157, "step": 2971 }, { "epoch": 0.22088442958008175, "grad_norm": 2.2759364507190383, "learning_rate": 1.9900341690450225e-05, "loss": 0.817, "step": 2972 }, { "epoch": 0.220958751393534, "grad_norm": 2.016835124117642, "learning_rate": 1.990022866318216e-05, "loss": 1.0088, "step": 2973 }, { "epoch": 0.22103307320698626, "grad_norm": 2.568131591468326, "learning_rate": 1.9900115572176933e-05, "loss": 1.0098, "step": 2974 }, { "epoch": 0.2211073950204385, "grad_norm": 2.122110090241456, "learning_rate": 1.990000241743528e-05, "loss": 0.6343, "step": 2975 }, { "epoch": 0.22118171683389073, "grad_norm": 1.9599019641487152, "learning_rate": 1.989988919895792e-05, "loss": 0.7782, "step": 2976 }, { "epoch": 0.221256038647343, "grad_norm": 2.1861430310482133, "learning_rate": 1.9899775916745596e-05, "loss": 0.8376, "step": 2977 }, { "epoch": 0.22133036046079524, "grad_norm": 2.414836848323804, "learning_rate": 1.9899662570799018e-05, "loss": 0.8991, "step": 2978 }, { "epoch": 0.2214046822742475, "grad_norm": 2.1231042843630745, "learning_rate": 1.9899549161118933e-05, "loss": 0.9743, "step": 2979 }, { "epoch": 0.22147900408769974, "grad_norm": 2.4200150264434632, "learning_rate": 1.989943568770606e-05, "loss": 0.99, "step": 2980 }, { "epoch": 0.22155332590115198, "grad_norm": 2.405807226935894, "learning_rate": 1.9899322150561134e-05, "loss": 0.9536, "step": 2981 }, { "epoch": 0.22162764771460425, "grad_norm": 2.0072730517439497, "learning_rate": 1.9899208549684887e-05, "loss": 0.8294, "step": 2982 }, { "epoch": 0.2217019695280565, "grad_norm": 2.6808310605683827, "learning_rate": 1.989909488507805e-05, "loss": 1.0678, "step": 2983 }, { "epoch": 0.22177629134150872, "grad_norm": 2.4185567005460307, "learning_rate": 1.989898115674135e-05, "loss": 0.7688, "step": 2984 }, { "epoch": 0.221850613154961, "grad_norm": 2.1825315465939625, "learning_rate": 1.9898867364675527e-05, "loss": 0.9409, "step": 2985 }, { "epoch": 0.22192493496841323, "grad_norm": 2.604467873276771, "learning_rate": 1.989875350888131e-05, "loss": 1.0149, "step": 2986 }, { "epoch": 0.22199925678186547, "grad_norm": 2.493500731763387, "learning_rate": 1.989863958935943e-05, "loss": 1.028, "step": 2987 }, { "epoch": 0.22207357859531773, "grad_norm": 2.486864115288013, "learning_rate": 1.9898525606110622e-05, "loss": 0.92, "step": 2988 }, { "epoch": 0.22214790040876997, "grad_norm": 1.9256604645871058, "learning_rate": 1.9898411559135616e-05, "loss": 0.7272, "step": 2989 }, { "epoch": 0.2222222222222222, "grad_norm": 2.37366170361793, "learning_rate": 1.9898297448435154e-05, "loss": 0.9868, "step": 2990 }, { "epoch": 0.22229654403567448, "grad_norm": 2.5467326125684835, "learning_rate": 1.9898183274009967e-05, "loss": 0.9995, "step": 2991 }, { "epoch": 0.22237086584912671, "grad_norm": 2.943868439954586, "learning_rate": 1.9898069035860787e-05, "loss": 0.8491, "step": 2992 }, { "epoch": 0.22244518766257898, "grad_norm": 2.4898939599738976, "learning_rate": 1.9897954733988353e-05, "loss": 1.0427, "step": 2993 }, { "epoch": 0.22251950947603122, "grad_norm": 3.392347023636662, "learning_rate": 1.9897840368393403e-05, "loss": 0.9259, "step": 2994 }, { "epoch": 0.22259383128948346, "grad_norm": 2.074563412027435, "learning_rate": 1.9897725939076666e-05, "loss": 0.7453, "step": 2995 }, { "epoch": 0.22266815310293572, "grad_norm": 2.6346541985175187, "learning_rate": 1.9897611446038887e-05, "loss": 0.9134, "step": 2996 }, { "epoch": 0.22274247491638796, "grad_norm": 2.2956621771855628, "learning_rate": 1.9897496889280795e-05, "loss": 0.8901, "step": 2997 }, { "epoch": 0.2228167967298402, "grad_norm": 2.75505504334479, "learning_rate": 1.9897382268803135e-05, "loss": 1.1709, "step": 2998 }, { "epoch": 0.22289111854329247, "grad_norm": 2.2645090656384776, "learning_rate": 1.989726758460664e-05, "loss": 0.8303, "step": 2999 }, { "epoch": 0.2229654403567447, "grad_norm": 2.5202562880308466, "learning_rate": 1.9897152836692052e-05, "loss": 1.0638, "step": 3000 }, { "epoch": 0.22303976217019694, "grad_norm": 2.2692113566011143, "learning_rate": 1.9897038025060103e-05, "loss": 0.8575, "step": 3001 }, { "epoch": 0.2231140839836492, "grad_norm": 2.0870498489578115, "learning_rate": 1.989692314971154e-05, "loss": 0.9494, "step": 3002 }, { "epoch": 0.22318840579710145, "grad_norm": 2.5400600454983633, "learning_rate": 1.9896808210647096e-05, "loss": 0.9222, "step": 3003 }, { "epoch": 0.22326272761055369, "grad_norm": 2.5861852611800775, "learning_rate": 1.9896693207867516e-05, "loss": 0.7394, "step": 3004 }, { "epoch": 0.22333704942400595, "grad_norm": 2.252165415794852, "learning_rate": 1.989657814137354e-05, "loss": 0.9945, "step": 3005 }, { "epoch": 0.2234113712374582, "grad_norm": 2.6433038809411156, "learning_rate": 1.9896463011165907e-05, "loss": 1.1039, "step": 3006 }, { "epoch": 0.22348569305091046, "grad_norm": 1.9786614798997524, "learning_rate": 1.989634781724536e-05, "loss": 0.6938, "step": 3007 }, { "epoch": 0.2235600148643627, "grad_norm": 2.3157778739921353, "learning_rate": 1.9896232559612637e-05, "loss": 1.0046, "step": 3008 }, { "epoch": 0.22363433667781493, "grad_norm": 2.412073848211437, "learning_rate": 1.9896117238268485e-05, "loss": 1.0108, "step": 3009 }, { "epoch": 0.2237086584912672, "grad_norm": 2.671583062592471, "learning_rate": 1.989600185321364e-05, "loss": 1.165, "step": 3010 }, { "epoch": 0.22378298030471944, "grad_norm": 2.3351562343502126, "learning_rate": 1.9895886404448846e-05, "loss": 1.0478, "step": 3011 }, { "epoch": 0.22385730211817167, "grad_norm": 1.988350794576733, "learning_rate": 1.9895770891974857e-05, "loss": 0.8664, "step": 3012 }, { "epoch": 0.22393162393162394, "grad_norm": 2.0573725923628206, "learning_rate": 1.9895655315792402e-05, "loss": 0.7454, "step": 3013 }, { "epoch": 0.22400594574507618, "grad_norm": 2.038348080542371, "learning_rate": 1.9895539675902235e-05, "loss": 0.8043, "step": 3014 }, { "epoch": 0.22408026755852842, "grad_norm": 2.13754200725294, "learning_rate": 1.9895423972305097e-05, "loss": 0.9316, "step": 3015 }, { "epoch": 0.22415458937198068, "grad_norm": 2.4582309217414946, "learning_rate": 1.989530820500173e-05, "loss": 0.9631, "step": 3016 }, { "epoch": 0.22422891118543292, "grad_norm": 1.8373251651226068, "learning_rate": 1.9895192373992883e-05, "loss": 0.8306, "step": 3017 }, { "epoch": 0.22430323299888516, "grad_norm": 2.4558217989200544, "learning_rate": 1.9895076479279302e-05, "loss": 0.7945, "step": 3018 }, { "epoch": 0.22437755481233743, "grad_norm": 2.2574964101197788, "learning_rate": 1.9894960520861734e-05, "loss": 1.1009, "step": 3019 }, { "epoch": 0.22445187662578966, "grad_norm": 1.9991027531837893, "learning_rate": 1.989484449874092e-05, "loss": 0.7332, "step": 3020 }, { "epoch": 0.22452619843924193, "grad_norm": 2.256069022406934, "learning_rate": 1.9894728412917616e-05, "loss": 0.8727, "step": 3021 }, { "epoch": 0.22460052025269417, "grad_norm": 2.727875449041902, "learning_rate": 1.9894612263392556e-05, "loss": 0.838, "step": 3022 }, { "epoch": 0.2246748420661464, "grad_norm": 2.218631236887847, "learning_rate": 1.98944960501665e-05, "loss": 0.8392, "step": 3023 }, { "epoch": 0.22474916387959867, "grad_norm": 2.2677293397745837, "learning_rate": 1.9894379773240192e-05, "loss": 1.0017, "step": 3024 }, { "epoch": 0.2248234856930509, "grad_norm": 2.5178825098656925, "learning_rate": 1.989426343261438e-05, "loss": 1.0103, "step": 3025 }, { "epoch": 0.22489780750650315, "grad_norm": 2.3476175744105685, "learning_rate": 1.9894147028289815e-05, "loss": 0.735, "step": 3026 }, { "epoch": 0.22497212931995542, "grad_norm": 3.011547281913358, "learning_rate": 1.989403056026724e-05, "loss": 0.8483, "step": 3027 }, { "epoch": 0.22504645113340765, "grad_norm": 2.2927036282136712, "learning_rate": 1.9893914028547414e-05, "loss": 1.0347, "step": 3028 }, { "epoch": 0.2251207729468599, "grad_norm": 3.160015768383835, "learning_rate": 1.989379743313108e-05, "loss": 0.9594, "step": 3029 }, { "epoch": 0.22519509476031216, "grad_norm": 2.084778097649313, "learning_rate": 1.9893680774018993e-05, "loss": 0.735, "step": 3030 }, { "epoch": 0.2252694165737644, "grad_norm": 1.9883641011322704, "learning_rate": 1.9893564051211904e-05, "loss": 0.8447, "step": 3031 }, { "epoch": 0.22534373838721664, "grad_norm": 2.4662147115055015, "learning_rate": 1.989344726471056e-05, "loss": 0.9083, "step": 3032 }, { "epoch": 0.2254180602006689, "grad_norm": 2.231541151108991, "learning_rate": 1.989333041451572e-05, "loss": 0.7608, "step": 3033 }, { "epoch": 0.22549238201412114, "grad_norm": 2.484983504758712, "learning_rate": 1.9893213500628127e-05, "loss": 0.9399, "step": 3034 }, { "epoch": 0.2255667038275734, "grad_norm": 2.7487249092326578, "learning_rate": 1.989309652304854e-05, "loss": 0.8847, "step": 3035 }, { "epoch": 0.22564102564102564, "grad_norm": 2.799426267279744, "learning_rate": 1.9892979481777714e-05, "loss": 0.9314, "step": 3036 }, { "epoch": 0.22571534745447788, "grad_norm": 2.2468399316998764, "learning_rate": 1.9892862376816396e-05, "loss": 0.9206, "step": 3037 }, { "epoch": 0.22578966926793015, "grad_norm": 2.242279061031069, "learning_rate": 1.9892745208165347e-05, "loss": 0.7434, "step": 3038 }, { "epoch": 0.2258639910813824, "grad_norm": 2.0679802158023484, "learning_rate": 1.9892627975825314e-05, "loss": 0.746, "step": 3039 }, { "epoch": 0.22593831289483463, "grad_norm": 1.831030811916555, "learning_rate": 1.9892510679797056e-05, "loss": 0.6288, "step": 3040 }, { "epoch": 0.2260126347082869, "grad_norm": 2.361527856753326, "learning_rate": 1.989239332008133e-05, "loss": 0.9271, "step": 3041 }, { "epoch": 0.22608695652173913, "grad_norm": 2.6086600202571404, "learning_rate": 1.9892275896678884e-05, "loss": 1.0184, "step": 3042 }, { "epoch": 0.22616127833519137, "grad_norm": 2.5206879939168583, "learning_rate": 1.9892158409590484e-05, "loss": 0.8774, "step": 3043 }, { "epoch": 0.22623560014864363, "grad_norm": 2.458638191318203, "learning_rate": 1.989204085881688e-05, "loss": 0.7387, "step": 3044 }, { "epoch": 0.22630992196209587, "grad_norm": 2.419907523376471, "learning_rate": 1.9891923244358832e-05, "loss": 1.0771, "step": 3045 }, { "epoch": 0.2263842437755481, "grad_norm": 2.273877174774512, "learning_rate": 1.9891805566217092e-05, "loss": 0.7419, "step": 3046 }, { "epoch": 0.22645856558900038, "grad_norm": 2.7949456009965457, "learning_rate": 1.9891687824392423e-05, "loss": 1.1327, "step": 3047 }, { "epoch": 0.22653288740245262, "grad_norm": 2.2051203283365055, "learning_rate": 1.989157001888558e-05, "loss": 0.8804, "step": 3048 }, { "epoch": 0.22660720921590488, "grad_norm": 2.0167697352556586, "learning_rate": 1.9891452149697325e-05, "loss": 0.8378, "step": 3049 }, { "epoch": 0.22668153102935712, "grad_norm": 2.302024444350962, "learning_rate": 1.989133421682841e-05, "loss": 0.9241, "step": 3050 }, { "epoch": 0.22675585284280936, "grad_norm": 2.4453166473132812, "learning_rate": 1.9891216220279602e-05, "loss": 0.9063, "step": 3051 }, { "epoch": 0.22683017465626162, "grad_norm": 2.789322777773754, "learning_rate": 1.9891098160051657e-05, "loss": 0.9369, "step": 3052 }, { "epoch": 0.22690449646971386, "grad_norm": 2.513764878734123, "learning_rate": 1.9890980036145334e-05, "loss": 1.0383, "step": 3053 }, { "epoch": 0.2269788182831661, "grad_norm": 1.9624918531291542, "learning_rate": 1.9890861848561394e-05, "loss": 0.8471, "step": 3054 }, { "epoch": 0.22705314009661837, "grad_norm": 2.0642456844956474, "learning_rate": 1.98907435973006e-05, "loss": 0.9915, "step": 3055 }, { "epoch": 0.2271274619100706, "grad_norm": 2.2966810599375633, "learning_rate": 1.9890625282363712e-05, "loss": 0.8901, "step": 3056 }, { "epoch": 0.22720178372352284, "grad_norm": 2.699836605442353, "learning_rate": 1.989050690375149e-05, "loss": 0.7513, "step": 3057 }, { "epoch": 0.2272761055369751, "grad_norm": 2.3812396733776193, "learning_rate": 1.98903884614647e-05, "loss": 0.8865, "step": 3058 }, { "epoch": 0.22735042735042735, "grad_norm": 2.0747056690478334, "learning_rate": 1.9890269955504102e-05, "loss": 0.8882, "step": 3059 }, { "epoch": 0.22742474916387959, "grad_norm": 2.5478654528659312, "learning_rate": 1.9890151385870458e-05, "loss": 0.9013, "step": 3060 }, { "epoch": 0.22749907097733185, "grad_norm": 2.362511151470868, "learning_rate": 1.9890032752564533e-05, "loss": 0.8406, "step": 3061 }, { "epoch": 0.2275733927907841, "grad_norm": 1.9844441872168996, "learning_rate": 1.988991405558709e-05, "loss": 0.6954, "step": 3062 }, { "epoch": 0.22764771460423636, "grad_norm": 2.594172153417897, "learning_rate": 1.9889795294938895e-05, "loss": 0.941, "step": 3063 }, { "epoch": 0.2277220364176886, "grad_norm": 2.2858067830201825, "learning_rate": 1.988967647062071e-05, "loss": 0.7598, "step": 3064 }, { "epoch": 0.22779635823114083, "grad_norm": 1.9582693206051778, "learning_rate": 1.98895575826333e-05, "loss": 0.7103, "step": 3065 }, { "epoch": 0.2278706800445931, "grad_norm": 2.7745063857382264, "learning_rate": 1.988943863097743e-05, "loss": 0.8574, "step": 3066 }, { "epoch": 0.22794500185804534, "grad_norm": 3.4412823145925238, "learning_rate": 1.988931961565387e-05, "loss": 0.9024, "step": 3067 }, { "epoch": 0.22801932367149758, "grad_norm": 2.1097292050629117, "learning_rate": 1.9889200536663385e-05, "loss": 0.7967, "step": 3068 }, { "epoch": 0.22809364548494984, "grad_norm": 2.5159830370139735, "learning_rate": 1.9889081394006737e-05, "loss": 0.9032, "step": 3069 }, { "epoch": 0.22816796729840208, "grad_norm": 2.2387107534060875, "learning_rate": 1.9888962187684698e-05, "loss": 0.8771, "step": 3070 }, { "epoch": 0.22824228911185432, "grad_norm": 2.5562451983697563, "learning_rate": 1.9888842917698033e-05, "loss": 1.0864, "step": 3071 }, { "epoch": 0.22831661092530658, "grad_norm": 2.1793392461722414, "learning_rate": 1.9888723584047507e-05, "loss": 0.7274, "step": 3072 }, { "epoch": 0.22839093273875882, "grad_norm": 2.3792276996029647, "learning_rate": 1.9888604186733897e-05, "loss": 0.8474, "step": 3073 }, { "epoch": 0.22846525455221106, "grad_norm": 2.6340465040723138, "learning_rate": 1.9888484725757966e-05, "loss": 0.7816, "step": 3074 }, { "epoch": 0.22853957636566333, "grad_norm": 2.0199578929982764, "learning_rate": 1.9888365201120477e-05, "loss": 0.7598, "step": 3075 }, { "epoch": 0.22861389817911557, "grad_norm": 4.507236261364429, "learning_rate": 1.9888245612822212e-05, "loss": 0.9459, "step": 3076 }, { "epoch": 0.22868821999256783, "grad_norm": 2.3833865327728967, "learning_rate": 1.9888125960863935e-05, "loss": 1.0797, "step": 3077 }, { "epoch": 0.22876254180602007, "grad_norm": 2.3551898987696003, "learning_rate": 1.9888006245246413e-05, "loss": 0.9598, "step": 3078 }, { "epoch": 0.2288368636194723, "grad_norm": 2.231332526557199, "learning_rate": 1.988788646597042e-05, "loss": 0.8242, "step": 3079 }, { "epoch": 0.22891118543292457, "grad_norm": 2.151252190677953, "learning_rate": 1.988776662303673e-05, "loss": 0.9828, "step": 3080 }, { "epoch": 0.2289855072463768, "grad_norm": 3.1029615815762064, "learning_rate": 1.988764671644611e-05, "loss": 0.9827, "step": 3081 }, { "epoch": 0.22905982905982905, "grad_norm": 1.7942590947299113, "learning_rate": 1.9887526746199333e-05, "loss": 0.9112, "step": 3082 }, { "epoch": 0.22913415087328132, "grad_norm": 2.6435146761273924, "learning_rate": 1.988740671229717e-05, "loss": 0.9924, "step": 3083 }, { "epoch": 0.22920847268673356, "grad_norm": 2.3744863115441692, "learning_rate": 1.98872866147404e-05, "loss": 0.8106, "step": 3084 }, { "epoch": 0.2292827945001858, "grad_norm": 2.1176283731925016, "learning_rate": 1.9887166453529787e-05, "loss": 0.7696, "step": 3085 }, { "epoch": 0.22935711631363806, "grad_norm": 2.430349104290462, "learning_rate": 1.9887046228666113e-05, "loss": 1.1082, "step": 3086 }, { "epoch": 0.2294314381270903, "grad_norm": 2.5365644004056165, "learning_rate": 1.9886925940150145e-05, "loss": 0.6866, "step": 3087 }, { "epoch": 0.22950575994054254, "grad_norm": 2.1406742470248217, "learning_rate": 1.9886805587982667e-05, "loss": 0.7972, "step": 3088 }, { "epoch": 0.2295800817539948, "grad_norm": 2.2503132143912055, "learning_rate": 1.988668517216444e-05, "loss": 0.8836, "step": 3089 }, { "epoch": 0.22965440356744704, "grad_norm": 2.1292414503148014, "learning_rate": 1.988656469269625e-05, "loss": 0.8728, "step": 3090 }, { "epoch": 0.2297287253808993, "grad_norm": 2.7794260130804616, "learning_rate": 1.9886444149578873e-05, "loss": 1.0444, "step": 3091 }, { "epoch": 0.22980304719435155, "grad_norm": 2.0212821601662188, "learning_rate": 1.9886323542813074e-05, "loss": 0.7372, "step": 3092 }, { "epoch": 0.22987736900780378, "grad_norm": 2.26105290090157, "learning_rate": 1.9886202872399644e-05, "loss": 0.912, "step": 3093 }, { "epoch": 0.22995169082125605, "grad_norm": 2.2125197962913288, "learning_rate": 1.988608213833935e-05, "loss": 0.9557, "step": 3094 }, { "epoch": 0.2300260126347083, "grad_norm": 2.4224952615301905, "learning_rate": 1.9885961340632974e-05, "loss": 0.9258, "step": 3095 }, { "epoch": 0.23010033444816053, "grad_norm": 2.448020435635902, "learning_rate": 1.988584047928129e-05, "loss": 0.8216, "step": 3096 }, { "epoch": 0.2301746562616128, "grad_norm": 2.7667707640536268, "learning_rate": 1.9885719554285076e-05, "loss": 0.9772, "step": 3097 }, { "epoch": 0.23024897807506503, "grad_norm": 2.549416619622717, "learning_rate": 1.9885598565645115e-05, "loss": 0.9708, "step": 3098 }, { "epoch": 0.23032329988851727, "grad_norm": 2.7045770767158124, "learning_rate": 1.9885477513362185e-05, "loss": 0.9331, "step": 3099 }, { "epoch": 0.23039762170196953, "grad_norm": 2.5438112854462354, "learning_rate": 1.988535639743706e-05, "loss": 0.9208, "step": 3100 }, { "epoch": 0.23047194351542177, "grad_norm": 2.1578593707473672, "learning_rate": 1.9885235217870525e-05, "loss": 0.9738, "step": 3101 }, { "epoch": 0.230546265328874, "grad_norm": 2.1989397842247, "learning_rate": 1.988511397466336e-05, "loss": 0.9887, "step": 3102 }, { "epoch": 0.23062058714232628, "grad_norm": 2.057983382036057, "learning_rate": 1.9884992667816342e-05, "loss": 0.7567, "step": 3103 }, { "epoch": 0.23069490895577852, "grad_norm": 2.392946953875924, "learning_rate": 1.9884871297330256e-05, "loss": 0.9384, "step": 3104 }, { "epoch": 0.23076923076923078, "grad_norm": 2.1956109575785763, "learning_rate": 1.9884749863205876e-05, "loss": 0.8542, "step": 3105 }, { "epoch": 0.23084355258268302, "grad_norm": 2.043184653458265, "learning_rate": 1.9884628365443996e-05, "loss": 0.8599, "step": 3106 }, { "epoch": 0.23091787439613526, "grad_norm": 2.526569979891881, "learning_rate": 1.988450680404539e-05, "loss": 1.0449, "step": 3107 }, { "epoch": 0.23099219620958752, "grad_norm": 2.736000033871688, "learning_rate": 1.988438517901084e-05, "loss": 0.9231, "step": 3108 }, { "epoch": 0.23106651802303976, "grad_norm": 2.5500880985758427, "learning_rate": 1.9884263490341133e-05, "loss": 0.9501, "step": 3109 }, { "epoch": 0.231140839836492, "grad_norm": 2.696864713120979, "learning_rate": 1.988414173803705e-05, "loss": 1.1326, "step": 3110 }, { "epoch": 0.23121516164994427, "grad_norm": 2.2694167945031967, "learning_rate": 1.9884019922099372e-05, "loss": 0.9724, "step": 3111 }, { "epoch": 0.2312894834633965, "grad_norm": 2.5825686748938192, "learning_rate": 1.988389804252889e-05, "loss": 0.972, "step": 3112 }, { "epoch": 0.23136380527684874, "grad_norm": 2.339845092875245, "learning_rate": 1.9883776099326385e-05, "loss": 0.8637, "step": 3113 }, { "epoch": 0.231438127090301, "grad_norm": 2.4394857213461014, "learning_rate": 1.9883654092492645e-05, "loss": 0.8829, "step": 3114 }, { "epoch": 0.23151244890375325, "grad_norm": 3.0691514447311588, "learning_rate": 1.9883532022028448e-05, "loss": 0.9389, "step": 3115 }, { "epoch": 0.2315867707172055, "grad_norm": 3.4391789116678178, "learning_rate": 1.9883409887934587e-05, "loss": 1.0405, "step": 3116 }, { "epoch": 0.23166109253065775, "grad_norm": 2.197462348220909, "learning_rate": 1.988328769021185e-05, "loss": 1.0801, "step": 3117 }, { "epoch": 0.23173541434411, "grad_norm": 2.267745417920241, "learning_rate": 1.988316542886101e-05, "loss": 0.8588, "step": 3118 }, { "epoch": 0.23180973615756226, "grad_norm": 2.5936368600112933, "learning_rate": 1.9883043103882872e-05, "loss": 0.9008, "step": 3119 }, { "epoch": 0.2318840579710145, "grad_norm": 2.1078051259831976, "learning_rate": 1.9882920715278213e-05, "loss": 0.8204, "step": 3120 }, { "epoch": 0.23195837978446673, "grad_norm": 2.423872740572647, "learning_rate": 1.988279826304782e-05, "loss": 0.89, "step": 3121 }, { "epoch": 0.232032701597919, "grad_norm": 2.0964569811161966, "learning_rate": 1.988267574719249e-05, "loss": 0.9964, "step": 3122 }, { "epoch": 0.23210702341137124, "grad_norm": 5.894686838220286, "learning_rate": 1.9882553167713e-05, "loss": 0.9871, "step": 3123 }, { "epoch": 0.23218134522482348, "grad_norm": 1.8192614306599515, "learning_rate": 1.988243052461015e-05, "loss": 0.7246, "step": 3124 }, { "epoch": 0.23225566703827574, "grad_norm": 3.0270336575617325, "learning_rate": 1.9882307817884724e-05, "loss": 0.902, "step": 3125 }, { "epoch": 0.23232998885172798, "grad_norm": 2.8572979462950374, "learning_rate": 1.988218504753751e-05, "loss": 0.9647, "step": 3126 }, { "epoch": 0.23240431066518022, "grad_norm": 2.634479875047299, "learning_rate": 1.9882062213569305e-05, "loss": 0.8567, "step": 3127 }, { "epoch": 0.23247863247863249, "grad_norm": 2.2708835745663647, "learning_rate": 1.9881939315980898e-05, "loss": 0.7834, "step": 3128 }, { "epoch": 0.23255295429208472, "grad_norm": 3.070166400308622, "learning_rate": 1.9881816354773076e-05, "loss": 1.0075, "step": 3129 }, { "epoch": 0.23262727610553696, "grad_norm": 2.604225655557469, "learning_rate": 1.9881693329946632e-05, "loss": 0.7946, "step": 3130 }, { "epoch": 0.23270159791898923, "grad_norm": 2.355388951501137, "learning_rate": 1.9881570241502362e-05, "loss": 0.8512, "step": 3131 }, { "epoch": 0.23277591973244147, "grad_norm": 2.5285247238088226, "learning_rate": 1.9881447089441053e-05, "loss": 0.9661, "step": 3132 }, { "epoch": 0.23285024154589373, "grad_norm": 2.575591670135381, "learning_rate": 1.98813238737635e-05, "loss": 0.9237, "step": 3133 }, { "epoch": 0.23292456335934597, "grad_norm": 2.3193054429568436, "learning_rate": 1.98812005944705e-05, "loss": 0.9933, "step": 3134 }, { "epoch": 0.2329988851727982, "grad_norm": 3.1270403713343344, "learning_rate": 1.9881077251562838e-05, "loss": 0.9242, "step": 3135 }, { "epoch": 0.23307320698625048, "grad_norm": 2.7320407391476333, "learning_rate": 1.988095384504132e-05, "loss": 0.9946, "step": 3136 }, { "epoch": 0.2331475287997027, "grad_norm": 9.105176546768087, "learning_rate": 1.9880830374906726e-05, "loss": 0.9992, "step": 3137 }, { "epoch": 0.23322185061315495, "grad_norm": 2.1910913203927707, "learning_rate": 1.9880706841159865e-05, "loss": 0.8771, "step": 3138 }, { "epoch": 0.23329617242660722, "grad_norm": 2.6540165090948444, "learning_rate": 1.9880583243801525e-05, "loss": 1.1142, "step": 3139 }, { "epoch": 0.23337049424005946, "grad_norm": 2.3348092159319127, "learning_rate": 1.98804595828325e-05, "loss": 0.8367, "step": 3140 }, { "epoch": 0.2334448160535117, "grad_norm": 2.469411248808241, "learning_rate": 1.988033585825359e-05, "loss": 0.9202, "step": 3141 }, { "epoch": 0.23351913786696396, "grad_norm": 2.6873679738144904, "learning_rate": 1.988021207006559e-05, "loss": 0.9999, "step": 3142 }, { "epoch": 0.2335934596804162, "grad_norm": 2.1742891438979903, "learning_rate": 1.98800882182693e-05, "loss": 1.1419, "step": 3143 }, { "epoch": 0.23366778149386844, "grad_norm": 3.2915021351975473, "learning_rate": 1.987996430286551e-05, "loss": 0.8698, "step": 3144 }, { "epoch": 0.2337421033073207, "grad_norm": 2.5588625211098877, "learning_rate": 1.987984032385503e-05, "loss": 1.0607, "step": 3145 }, { "epoch": 0.23381642512077294, "grad_norm": 1.7585899783108065, "learning_rate": 1.9879716281238644e-05, "loss": 0.8388, "step": 3146 }, { "epoch": 0.2338907469342252, "grad_norm": 3.075791827803754, "learning_rate": 1.987959217501716e-05, "loss": 0.8863, "step": 3147 }, { "epoch": 0.23396506874767745, "grad_norm": 2.39798884425772, "learning_rate": 1.9879468005191374e-05, "loss": 0.8345, "step": 3148 }, { "epoch": 0.23403939056112968, "grad_norm": 2.553557100760346, "learning_rate": 1.987934377176208e-05, "loss": 0.754, "step": 3149 }, { "epoch": 0.23411371237458195, "grad_norm": 2.4072374394479805, "learning_rate": 1.987921947473009e-05, "loss": 1.0549, "step": 3150 }, { "epoch": 0.2341880341880342, "grad_norm": 5.049804454140719, "learning_rate": 1.98790951140962e-05, "loss": 0.987, "step": 3151 }, { "epoch": 0.23426235600148643, "grad_norm": 2.5526960913771184, "learning_rate": 1.9878970689861202e-05, "loss": 1.0441, "step": 3152 }, { "epoch": 0.2343366778149387, "grad_norm": 2.3894306553868416, "learning_rate": 1.9878846202025905e-05, "loss": 1.0469, "step": 3153 }, { "epoch": 0.23441099962839093, "grad_norm": 2.7397782303457534, "learning_rate": 1.987872165059111e-05, "loss": 1.0059, "step": 3154 }, { "epoch": 0.23448532144184317, "grad_norm": 2.135125236666411, "learning_rate": 1.9878597035557618e-05, "loss": 1.0307, "step": 3155 }, { "epoch": 0.23455964325529544, "grad_norm": 2.0336597265752916, "learning_rate": 1.987847235692623e-05, "loss": 0.8076, "step": 3156 }, { "epoch": 0.23463396506874767, "grad_norm": 2.761128550177793, "learning_rate": 1.9878347614697747e-05, "loss": 1.0266, "step": 3157 }, { "epoch": 0.2347082868821999, "grad_norm": 2.325237234455307, "learning_rate": 1.9878222808872977e-05, "loss": 0.7537, "step": 3158 }, { "epoch": 0.23478260869565218, "grad_norm": 4.047365834624693, "learning_rate": 1.987809793945272e-05, "loss": 0.9173, "step": 3159 }, { "epoch": 0.23485693050910442, "grad_norm": 2.3702650950864363, "learning_rate": 1.9877973006437784e-05, "loss": 0.8023, "step": 3160 }, { "epoch": 0.23493125232255668, "grad_norm": 2.4464705818668504, "learning_rate": 1.9877848009828966e-05, "loss": 0.8555, "step": 3161 }, { "epoch": 0.23500557413600892, "grad_norm": 1.9578868855610791, "learning_rate": 1.987772294962708e-05, "loss": 0.6293, "step": 3162 }, { "epoch": 0.23507989594946116, "grad_norm": 2.2605528490528752, "learning_rate": 1.987759782583292e-05, "loss": 0.6783, "step": 3163 }, { "epoch": 0.23515421776291343, "grad_norm": 2.116838067173037, "learning_rate": 1.9877472638447304e-05, "loss": 0.9152, "step": 3164 }, { "epoch": 0.23522853957636566, "grad_norm": 3.552525405774467, "learning_rate": 1.987734738747103e-05, "loss": 1.0196, "step": 3165 }, { "epoch": 0.2353028613898179, "grad_norm": 2.3898068598547275, "learning_rate": 1.9877222072904902e-05, "loss": 0.908, "step": 3166 }, { "epoch": 0.23537718320327017, "grad_norm": 2.5504833565754303, "learning_rate": 1.9877096694749734e-05, "loss": 1.2128, "step": 3167 }, { "epoch": 0.2354515050167224, "grad_norm": 2.336407863175069, "learning_rate": 1.987697125300633e-05, "loss": 0.7189, "step": 3168 }, { "epoch": 0.23552582683017464, "grad_norm": 2.931580143167688, "learning_rate": 1.9876845747675497e-05, "loss": 0.8817, "step": 3169 }, { "epoch": 0.2356001486436269, "grad_norm": 2.386093526832456, "learning_rate": 1.9876720178758044e-05, "loss": 0.8624, "step": 3170 }, { "epoch": 0.23567447045707915, "grad_norm": 2.7218034093857346, "learning_rate": 1.9876594546254776e-05, "loss": 1.0915, "step": 3171 }, { "epoch": 0.2357487922705314, "grad_norm": 2.7628703694118077, "learning_rate": 1.987646885016651e-05, "loss": 0.8884, "step": 3172 }, { "epoch": 0.23582311408398365, "grad_norm": 2.0280802386221763, "learning_rate": 1.9876343090494048e-05, "loss": 0.7591, "step": 3173 }, { "epoch": 0.2358974358974359, "grad_norm": 2.2186971795931116, "learning_rate": 1.98762172672382e-05, "loss": 1.0087, "step": 3174 }, { "epoch": 0.23597175771088816, "grad_norm": 2.850855374767078, "learning_rate": 1.987609138039978e-05, "loss": 0.9569, "step": 3175 }, { "epoch": 0.2360460795243404, "grad_norm": 2.1552977724245137, "learning_rate": 1.9875965429979594e-05, "loss": 0.8543, "step": 3176 }, { "epoch": 0.23612040133779263, "grad_norm": 3.6658891748509768, "learning_rate": 1.9875839415978457e-05, "loss": 0.9938, "step": 3177 }, { "epoch": 0.2361947231512449, "grad_norm": 2.310132007656103, "learning_rate": 1.9875713338397176e-05, "loss": 0.9011, "step": 3178 }, { "epoch": 0.23626904496469714, "grad_norm": 1.9012266586752695, "learning_rate": 1.9875587197236566e-05, "loss": 0.853, "step": 3179 }, { "epoch": 0.23634336677814938, "grad_norm": 3.66424957757259, "learning_rate": 1.9875460992497436e-05, "loss": 0.9081, "step": 3180 }, { "epoch": 0.23641768859160164, "grad_norm": 1.8389472001451004, "learning_rate": 1.9875334724180605e-05, "loss": 0.7416, "step": 3181 }, { "epoch": 0.23649201040505388, "grad_norm": 2.6180162219888996, "learning_rate": 1.987520839228688e-05, "loss": 0.9582, "step": 3182 }, { "epoch": 0.23656633221850612, "grad_norm": 2.413196223056957, "learning_rate": 1.9875081996817073e-05, "loss": 1.0372, "step": 3183 }, { "epoch": 0.2366406540319584, "grad_norm": 2.5106448042963647, "learning_rate": 1.9874955537772e-05, "loss": 1.0295, "step": 3184 }, { "epoch": 0.23671497584541062, "grad_norm": 1.592505979318448, "learning_rate": 1.9874829015152482e-05, "loss": 0.6602, "step": 3185 }, { "epoch": 0.23678929765886286, "grad_norm": 3.0048795316409818, "learning_rate": 1.987470242895932e-05, "loss": 1.1302, "step": 3186 }, { "epoch": 0.23686361947231513, "grad_norm": 2.3547723525097446, "learning_rate": 1.9874575779193343e-05, "loss": 0.9365, "step": 3187 }, { "epoch": 0.23693794128576737, "grad_norm": 2.568003409446794, "learning_rate": 1.9874449065855356e-05, "loss": 0.9168, "step": 3188 }, { "epoch": 0.23701226309921963, "grad_norm": 2.865283502669033, "learning_rate": 1.9874322288946178e-05, "loss": 0.8542, "step": 3189 }, { "epoch": 0.23708658491267187, "grad_norm": 2.417207757459183, "learning_rate": 1.9874195448466622e-05, "loss": 0.8993, "step": 3190 }, { "epoch": 0.2371609067261241, "grad_norm": 2.51092756200319, "learning_rate": 1.9874068544417514e-05, "loss": 1.0559, "step": 3191 }, { "epoch": 0.23723522853957638, "grad_norm": 1.909312277869202, "learning_rate": 1.9873941576799664e-05, "loss": 0.888, "step": 3192 }, { "epoch": 0.23730955035302861, "grad_norm": 2.6858387119066243, "learning_rate": 1.987381454561389e-05, "loss": 1.0549, "step": 3193 }, { "epoch": 0.23738387216648085, "grad_norm": 2.6788323990107137, "learning_rate": 1.9873687450861005e-05, "loss": 0.9206, "step": 3194 }, { "epoch": 0.23745819397993312, "grad_norm": 2.1500038095736445, "learning_rate": 1.987356029254184e-05, "loss": 0.9379, "step": 3195 }, { "epoch": 0.23753251579338536, "grad_norm": 2.3878877519495942, "learning_rate": 1.9873433070657204e-05, "loss": 0.8482, "step": 3196 }, { "epoch": 0.2376068376068376, "grad_norm": 2.665522407527255, "learning_rate": 1.9873305785207916e-05, "loss": 0.695, "step": 3197 }, { "epoch": 0.23768115942028986, "grad_norm": 2.204433841106173, "learning_rate": 1.9873178436194796e-05, "loss": 0.7878, "step": 3198 }, { "epoch": 0.2377554812337421, "grad_norm": 2.9105921636132015, "learning_rate": 1.9873051023618667e-05, "loss": 0.8701, "step": 3199 }, { "epoch": 0.23782980304719434, "grad_norm": 2.2507151570277935, "learning_rate": 1.9872923547480352e-05, "loss": 0.593, "step": 3200 }, { "epoch": 0.2379041248606466, "grad_norm": 2.3898383452711185, "learning_rate": 1.987279600778066e-05, "loss": 1.034, "step": 3201 }, { "epoch": 0.23797844667409884, "grad_norm": 7.144383991356979, "learning_rate": 1.9872668404520428e-05, "loss": 0.6383, "step": 3202 }, { "epoch": 0.2380527684875511, "grad_norm": 2.3952038051778324, "learning_rate": 1.9872540737700465e-05, "loss": 0.9325, "step": 3203 }, { "epoch": 0.23812709030100335, "grad_norm": 5.1696141331091425, "learning_rate": 1.9872413007321595e-05, "loss": 1.1206, "step": 3204 }, { "epoch": 0.23820141211445559, "grad_norm": 2.3015196911006655, "learning_rate": 1.9872285213384647e-05, "loss": 1.1451, "step": 3205 }, { "epoch": 0.23827573392790785, "grad_norm": 2.636823978413053, "learning_rate": 1.9872157355890435e-05, "loss": 1.0071, "step": 3206 }, { "epoch": 0.2383500557413601, "grad_norm": 2.7109689208839156, "learning_rate": 1.9872029434839788e-05, "loss": 0.9234, "step": 3207 }, { "epoch": 0.23842437755481233, "grad_norm": 2.266489056630255, "learning_rate": 1.9871901450233527e-05, "loss": 0.9853, "step": 3208 }, { "epoch": 0.2384986993682646, "grad_norm": 2.324818760675506, "learning_rate": 1.9871773402072477e-05, "loss": 1.0152, "step": 3209 }, { "epoch": 0.23857302118171683, "grad_norm": 2.7685384953030465, "learning_rate": 1.987164529035746e-05, "loss": 0.8065, "step": 3210 }, { "epoch": 0.23864734299516907, "grad_norm": 3.374855846596971, "learning_rate": 1.987151711508931e-05, "loss": 0.8364, "step": 3211 }, { "epoch": 0.23872166480862134, "grad_norm": 2.356946227348794, "learning_rate": 1.9871388876268836e-05, "loss": 0.897, "step": 3212 }, { "epoch": 0.23879598662207357, "grad_norm": 2.6212821362063243, "learning_rate": 1.9871260573896878e-05, "loss": 1.0184, "step": 3213 }, { "epoch": 0.2388703084355258, "grad_norm": 2.048598725444137, "learning_rate": 1.9871132207974254e-05, "loss": 0.9334, "step": 3214 }, { "epoch": 0.23894463024897808, "grad_norm": 2.286541460127084, "learning_rate": 1.9871003778501797e-05, "loss": 0.9338, "step": 3215 }, { "epoch": 0.23901895206243032, "grad_norm": 2.986259885501039, "learning_rate": 1.9870875285480323e-05, "loss": 0.9274, "step": 3216 }, { "epoch": 0.23909327387588258, "grad_norm": 2.711847213857581, "learning_rate": 1.9870746728910674e-05, "loss": 0.8896, "step": 3217 }, { "epoch": 0.23916759568933482, "grad_norm": 2.0487121750435424, "learning_rate": 1.9870618108793662e-05, "loss": 0.745, "step": 3218 }, { "epoch": 0.23924191750278706, "grad_norm": 2.5362116174330676, "learning_rate": 1.9870489425130128e-05, "loss": 1.0297, "step": 3219 }, { "epoch": 0.23931623931623933, "grad_norm": 2.0854170926934112, "learning_rate": 1.9870360677920894e-05, "loss": 0.8291, "step": 3220 }, { "epoch": 0.23939056112969156, "grad_norm": 2.354229325924667, "learning_rate": 1.9870231867166788e-05, "loss": 0.9399, "step": 3221 }, { "epoch": 0.2394648829431438, "grad_norm": 2.466545911406683, "learning_rate": 1.9870102992868646e-05, "loss": 0.9622, "step": 3222 }, { "epoch": 0.23953920475659607, "grad_norm": 2.259822285135288, "learning_rate": 1.986997405502729e-05, "loss": 0.9056, "step": 3223 }, { "epoch": 0.2396135265700483, "grad_norm": 2.40489334430212, "learning_rate": 1.9869845053643553e-05, "loss": 0.7771, "step": 3224 }, { "epoch": 0.23968784838350055, "grad_norm": 2.37653785386868, "learning_rate": 1.9869715988718264e-05, "loss": 0.9366, "step": 3225 }, { "epoch": 0.2397621701969528, "grad_norm": 2.504987197098112, "learning_rate": 1.986958686025226e-05, "loss": 0.9429, "step": 3226 }, { "epoch": 0.23983649201040505, "grad_norm": 2.196202661038542, "learning_rate": 1.9869457668246365e-05, "loss": 0.811, "step": 3227 }, { "epoch": 0.2399108138238573, "grad_norm": 2.4116052926885794, "learning_rate": 1.9869328412701418e-05, "loss": 0.9686, "step": 3228 }, { "epoch": 0.23998513563730955, "grad_norm": 2.2080687434474564, "learning_rate": 1.9869199093618242e-05, "loss": 1.1166, "step": 3229 }, { "epoch": 0.2400594574507618, "grad_norm": 2.078999965993811, "learning_rate": 1.986906971099767e-05, "loss": 0.8419, "step": 3230 }, { "epoch": 0.24013377926421406, "grad_norm": 2.923451244414184, "learning_rate": 1.9868940264840546e-05, "loss": 0.8772, "step": 3231 }, { "epoch": 0.2402081010776663, "grad_norm": 2.825869322749374, "learning_rate": 1.9868810755147697e-05, "loss": 0.9939, "step": 3232 }, { "epoch": 0.24028242289111854, "grad_norm": 2.460746587853776, "learning_rate": 1.9868681181919953e-05, "loss": 0.9856, "step": 3233 }, { "epoch": 0.2403567447045708, "grad_norm": 2.4430189726400315, "learning_rate": 1.9868551545158154e-05, "loss": 0.9611, "step": 3234 }, { "epoch": 0.24043106651802304, "grad_norm": 1.8674921969045801, "learning_rate": 1.9868421844863132e-05, "loss": 0.84, "step": 3235 }, { "epoch": 0.24050538833147528, "grad_norm": 2.1757249194846517, "learning_rate": 1.986829208103572e-05, "loss": 0.8516, "step": 3236 }, { "epoch": 0.24057971014492754, "grad_norm": 2.4997817978839874, "learning_rate": 1.986816225367676e-05, "loss": 1.0243, "step": 3237 }, { "epoch": 0.24065403195837978, "grad_norm": 1.9397866563103965, "learning_rate": 1.986803236278708e-05, "loss": 0.6827, "step": 3238 }, { "epoch": 0.24072835377183202, "grad_norm": 2.099706114090512, "learning_rate": 1.9867902408367522e-05, "loss": 0.9493, "step": 3239 }, { "epoch": 0.2408026755852843, "grad_norm": 3.239998533013239, "learning_rate": 1.986777239041892e-05, "loss": 0.9162, "step": 3240 }, { "epoch": 0.24087699739873653, "grad_norm": 1.9873621438270699, "learning_rate": 1.9867642308942106e-05, "loss": 0.723, "step": 3241 }, { "epoch": 0.24095131921218876, "grad_norm": 2.056949436629614, "learning_rate": 1.9867512163937926e-05, "loss": 0.8289, "step": 3242 }, { "epoch": 0.24102564102564103, "grad_norm": 2.4692585235971234, "learning_rate": 1.9867381955407218e-05, "loss": 0.9263, "step": 3243 }, { "epoch": 0.24109996283909327, "grad_norm": 2.138868967698501, "learning_rate": 1.9867251683350814e-05, "loss": 0.8539, "step": 3244 }, { "epoch": 0.24117428465254553, "grad_norm": 2.461730273601173, "learning_rate": 1.9867121347769555e-05, "loss": 0.885, "step": 3245 }, { "epoch": 0.24124860646599777, "grad_norm": 2.304173325671598, "learning_rate": 1.986699094866428e-05, "loss": 0.73, "step": 3246 }, { "epoch": 0.24132292827945, "grad_norm": 2.4136217330110576, "learning_rate": 1.986686048603583e-05, "loss": 1.0354, "step": 3247 }, { "epoch": 0.24139725009290228, "grad_norm": 2.59958486064678, "learning_rate": 1.9866729959885048e-05, "loss": 0.93, "step": 3248 }, { "epoch": 0.24147157190635452, "grad_norm": 2.5085799337529, "learning_rate": 1.9866599370212766e-05, "loss": 0.9341, "step": 3249 }, { "epoch": 0.24154589371980675, "grad_norm": 2.597253145077468, "learning_rate": 1.986646871701983e-05, "loss": 0.858, "step": 3250 }, { "epoch": 0.24162021553325902, "grad_norm": 2.855208552109399, "learning_rate": 1.9866338000307084e-05, "loss": 0.9734, "step": 3251 }, { "epoch": 0.24169453734671126, "grad_norm": 3.0250449932847667, "learning_rate": 1.9866207220075364e-05, "loss": 1.0059, "step": 3252 }, { "epoch": 0.2417688591601635, "grad_norm": 2.3440105130641564, "learning_rate": 1.986607637632551e-05, "loss": 0.8256, "step": 3253 }, { "epoch": 0.24184318097361576, "grad_norm": 2.622312441098618, "learning_rate": 1.9865945469058374e-05, "loss": 0.748, "step": 3254 }, { "epoch": 0.241917502787068, "grad_norm": 2.268227477951327, "learning_rate": 1.986581449827479e-05, "loss": 1.0676, "step": 3255 }, { "epoch": 0.24199182460052024, "grad_norm": 1.9300848684970575, "learning_rate": 1.98656834639756e-05, "loss": 0.8761, "step": 3256 }, { "epoch": 0.2420661464139725, "grad_norm": 2.2793712931996257, "learning_rate": 1.986555236616166e-05, "loss": 0.8841, "step": 3257 }, { "epoch": 0.24214046822742474, "grad_norm": 3.3201011024177043, "learning_rate": 1.9865421204833802e-05, "loss": 1.1159, "step": 3258 }, { "epoch": 0.242214790040877, "grad_norm": 2.7339195446069335, "learning_rate": 1.9865289979992872e-05, "loss": 0.8878, "step": 3259 }, { "epoch": 0.24228911185432925, "grad_norm": 2.14225830250598, "learning_rate": 1.9865158691639718e-05, "loss": 0.8369, "step": 3260 }, { "epoch": 0.24236343366778149, "grad_norm": 2.185957310161328, "learning_rate": 1.9865027339775186e-05, "loss": 0.9391, "step": 3261 }, { "epoch": 0.24243775548123375, "grad_norm": 2.101588919723192, "learning_rate": 1.986489592440012e-05, "loss": 0.7161, "step": 3262 }, { "epoch": 0.242512077294686, "grad_norm": 2.246618653491866, "learning_rate": 1.9864764445515364e-05, "loss": 0.8887, "step": 3263 }, { "epoch": 0.24258639910813823, "grad_norm": 2.367953599654717, "learning_rate": 1.986463290312177e-05, "loss": 0.7589, "step": 3264 }, { "epoch": 0.2426607209215905, "grad_norm": 7.047310743400011, "learning_rate": 1.9864501297220177e-05, "loss": 0.7722, "step": 3265 }, { "epoch": 0.24273504273504273, "grad_norm": 2.4034868248156207, "learning_rate": 1.9864369627811436e-05, "loss": 0.7795, "step": 3266 }, { "epoch": 0.24280936454849497, "grad_norm": 2.3878720190716685, "learning_rate": 1.9864237894896397e-05, "loss": 0.8532, "step": 3267 }, { "epoch": 0.24288368636194724, "grad_norm": 2.0438105227804986, "learning_rate": 1.986410609847591e-05, "loss": 0.7665, "step": 3268 }, { "epoch": 0.24295800817539948, "grad_norm": 2.518182208270081, "learning_rate": 1.9863974238550814e-05, "loss": 0.8291, "step": 3269 }, { "epoch": 0.24303232998885171, "grad_norm": 2.125970181951257, "learning_rate": 1.9863842315121965e-05, "loss": 0.7876, "step": 3270 }, { "epoch": 0.24310665180230398, "grad_norm": 2.2546429248435214, "learning_rate": 1.986371032819021e-05, "loss": 0.7273, "step": 3271 }, { "epoch": 0.24318097361575622, "grad_norm": 2.095153882871376, "learning_rate": 1.9863578277756402e-05, "loss": 0.7736, "step": 3272 }, { "epoch": 0.24325529542920848, "grad_norm": 2.277798684575304, "learning_rate": 1.9863446163821393e-05, "loss": 0.8447, "step": 3273 }, { "epoch": 0.24332961724266072, "grad_norm": 2.310610389341247, "learning_rate": 1.986331398638602e-05, "loss": 0.9758, "step": 3274 }, { "epoch": 0.24340393905611296, "grad_norm": 2.982854352309968, "learning_rate": 1.986318174545115e-05, "loss": 0.8324, "step": 3275 }, { "epoch": 0.24347826086956523, "grad_norm": 3.155564754228142, "learning_rate": 1.9863049441017625e-05, "loss": 1.0664, "step": 3276 }, { "epoch": 0.24355258268301747, "grad_norm": 2.473569111640219, "learning_rate": 1.98629170730863e-05, "loss": 1.029, "step": 3277 }, { "epoch": 0.2436269044964697, "grad_norm": 2.3787383466654446, "learning_rate": 1.9862784641658027e-05, "loss": 0.9808, "step": 3278 }, { "epoch": 0.24370122630992197, "grad_norm": 2.918524895665372, "learning_rate": 1.986265214673366e-05, "loss": 0.9835, "step": 3279 }, { "epoch": 0.2437755481233742, "grad_norm": 2.6004565063177663, "learning_rate": 1.9862519588314045e-05, "loss": 0.9374, "step": 3280 }, { "epoch": 0.24384986993682645, "grad_norm": 2.5704620787951638, "learning_rate": 1.9862386966400045e-05, "loss": 0.8927, "step": 3281 }, { "epoch": 0.2439241917502787, "grad_norm": 2.08766811054254, "learning_rate": 1.986225428099251e-05, "loss": 0.8191, "step": 3282 }, { "epoch": 0.24399851356373095, "grad_norm": 2.0558843677466596, "learning_rate": 1.9862121532092292e-05, "loss": 0.7821, "step": 3283 }, { "epoch": 0.24407283537718322, "grad_norm": 2.42861597514796, "learning_rate": 1.9861988719700247e-05, "loss": 0.7728, "step": 3284 }, { "epoch": 0.24414715719063546, "grad_norm": 2.2440020729775734, "learning_rate": 1.986185584381723e-05, "loss": 0.9575, "step": 3285 }, { "epoch": 0.2442214790040877, "grad_norm": 2.351576754867504, "learning_rate": 1.9861722904444095e-05, "loss": 0.9515, "step": 3286 }, { "epoch": 0.24429580081753996, "grad_norm": 2.1740444930310976, "learning_rate": 1.9861589901581703e-05, "loss": 0.9388, "step": 3287 }, { "epoch": 0.2443701226309922, "grad_norm": 2.7191265132145883, "learning_rate": 1.9861456835230904e-05, "loss": 0.8791, "step": 3288 }, { "epoch": 0.24444444444444444, "grad_norm": 2.255995634130125, "learning_rate": 1.986132370539256e-05, "loss": 0.8021, "step": 3289 }, { "epoch": 0.2445187662578967, "grad_norm": 2.5989154856980217, "learning_rate": 1.986119051206753e-05, "loss": 1.0719, "step": 3290 }, { "epoch": 0.24459308807134894, "grad_norm": 2.179137542398936, "learning_rate": 1.9861057255256656e-05, "loss": 0.8908, "step": 3291 }, { "epoch": 0.24466740988480118, "grad_norm": 2.2936233062427376, "learning_rate": 1.9860923934960816e-05, "loss": 1.0923, "step": 3292 }, { "epoch": 0.24474173169825345, "grad_norm": 2.4260824744166554, "learning_rate": 1.9860790551180857e-05, "loss": 0.8535, "step": 3293 }, { "epoch": 0.24481605351170568, "grad_norm": 1.9808049819564397, "learning_rate": 1.9860657103917638e-05, "loss": 0.8218, "step": 3294 }, { "epoch": 0.24489037532515792, "grad_norm": 2.3098794435978665, "learning_rate": 1.9860523593172023e-05, "loss": 0.8482, "step": 3295 }, { "epoch": 0.2449646971386102, "grad_norm": 2.003515422384715, "learning_rate": 1.9860390018944867e-05, "loss": 0.9163, "step": 3296 }, { "epoch": 0.24503901895206243, "grad_norm": 2.6325853832043435, "learning_rate": 1.9860256381237034e-05, "loss": 0.9375, "step": 3297 }, { "epoch": 0.2451133407655147, "grad_norm": 2.0624984363381254, "learning_rate": 1.9860122680049377e-05, "loss": 0.7783, "step": 3298 }, { "epoch": 0.24518766257896693, "grad_norm": 2.0375198687930154, "learning_rate": 1.9859988915382767e-05, "loss": 0.919, "step": 3299 }, { "epoch": 0.24526198439241917, "grad_norm": 2.182890417693869, "learning_rate": 1.9859855087238057e-05, "loss": 0.824, "step": 3300 }, { "epoch": 0.24533630620587144, "grad_norm": 3.012195907890721, "learning_rate": 1.9859721195616113e-05, "loss": 0.8925, "step": 3301 }, { "epoch": 0.24541062801932367, "grad_norm": 2.775477899182722, "learning_rate": 1.9859587240517797e-05, "loss": 1.1295, "step": 3302 }, { "epoch": 0.2454849498327759, "grad_norm": 2.5119896665954373, "learning_rate": 1.9859453221943967e-05, "loss": 0.895, "step": 3303 }, { "epoch": 0.24555927164622818, "grad_norm": 2.2308172122457135, "learning_rate": 1.985931913989549e-05, "loss": 0.9705, "step": 3304 }, { "epoch": 0.24563359345968042, "grad_norm": 4.047431736594619, "learning_rate": 1.9859184994373225e-05, "loss": 0.761, "step": 3305 }, { "epoch": 0.24570791527313265, "grad_norm": 2.8444041507885247, "learning_rate": 1.9859050785378043e-05, "loss": 0.7068, "step": 3306 }, { "epoch": 0.24578223708658492, "grad_norm": 2.2331600506760907, "learning_rate": 1.9858916512910804e-05, "loss": 0.829, "step": 3307 }, { "epoch": 0.24585655890003716, "grad_norm": 2.6946001134540283, "learning_rate": 1.985878217697237e-05, "loss": 0.8816, "step": 3308 }, { "epoch": 0.2459308807134894, "grad_norm": 2.916709984068792, "learning_rate": 1.9858647777563606e-05, "loss": 0.854, "step": 3309 }, { "epoch": 0.24600520252694166, "grad_norm": 2.133316940885652, "learning_rate": 1.9858513314685383e-05, "loss": 0.7995, "step": 3310 }, { "epoch": 0.2460795243403939, "grad_norm": 2.188893809570219, "learning_rate": 1.985837878833856e-05, "loss": 0.8612, "step": 3311 }, { "epoch": 0.24615384615384617, "grad_norm": 2.5157718369604143, "learning_rate": 1.9858244198524007e-05, "loss": 0.9238, "step": 3312 }, { "epoch": 0.2462281679672984, "grad_norm": 2.5141186129278115, "learning_rate": 1.985810954524259e-05, "loss": 1.0273, "step": 3313 }, { "epoch": 0.24630248978075064, "grad_norm": 2.784528224304442, "learning_rate": 1.985797482849517e-05, "loss": 0.8528, "step": 3314 }, { "epoch": 0.2463768115942029, "grad_norm": 2.792310244965799, "learning_rate": 1.9857840048282627e-05, "loss": 0.8679, "step": 3315 }, { "epoch": 0.24645113340765515, "grad_norm": 2.4308924067790363, "learning_rate": 1.9857705204605814e-05, "loss": 0.9045, "step": 3316 }, { "epoch": 0.2465254552211074, "grad_norm": 2.1258869670697598, "learning_rate": 1.9857570297465614e-05, "loss": 0.9418, "step": 3317 }, { "epoch": 0.24659977703455965, "grad_norm": 2.771291874768093, "learning_rate": 1.9857435326862885e-05, "loss": 0.9877, "step": 3318 }, { "epoch": 0.2466740988480119, "grad_norm": 2.614761748238394, "learning_rate": 1.9857300292798497e-05, "loss": 0.9436, "step": 3319 }, { "epoch": 0.24674842066146413, "grad_norm": 2.048423919399582, "learning_rate": 1.985716519527332e-05, "loss": 0.7751, "step": 3320 }, { "epoch": 0.2468227424749164, "grad_norm": 2.6993323723017744, "learning_rate": 1.985703003428823e-05, "loss": 0.8288, "step": 3321 }, { "epoch": 0.24689706428836863, "grad_norm": 2.3819522559156767, "learning_rate": 1.9856894809844088e-05, "loss": 0.9992, "step": 3322 }, { "epoch": 0.24697138610182087, "grad_norm": 2.9469244212364964, "learning_rate": 1.985675952194177e-05, "loss": 0.9517, "step": 3323 }, { "epoch": 0.24704570791527314, "grad_norm": 2.1690212175916512, "learning_rate": 1.9856624170582147e-05, "loss": 0.8926, "step": 3324 }, { "epoch": 0.24712002972872538, "grad_norm": 2.2130238348937072, "learning_rate": 1.9856488755766085e-05, "loss": 0.8044, "step": 3325 }, { "epoch": 0.24719435154217764, "grad_norm": 2.454770864301038, "learning_rate": 1.985635327749446e-05, "loss": 0.9699, "step": 3326 }, { "epoch": 0.24726867335562988, "grad_norm": 2.5081220911188247, "learning_rate": 1.985621773576815e-05, "loss": 0.836, "step": 3327 }, { "epoch": 0.24734299516908212, "grad_norm": 2.08770411599676, "learning_rate": 1.985608213058802e-05, "loss": 0.9357, "step": 3328 }, { "epoch": 0.24741731698253439, "grad_norm": 2.6422984942125654, "learning_rate": 1.985594646195494e-05, "loss": 0.9628, "step": 3329 }, { "epoch": 0.24749163879598662, "grad_norm": 2.158704171962463, "learning_rate": 1.9855810729869793e-05, "loss": 0.8975, "step": 3330 }, { "epoch": 0.24756596060943886, "grad_norm": 3.05093821681622, "learning_rate": 1.9855674934333444e-05, "loss": 0.8506, "step": 3331 }, { "epoch": 0.24764028242289113, "grad_norm": 2.4206959070949616, "learning_rate": 1.9855539075346772e-05, "loss": 0.9463, "step": 3332 }, { "epoch": 0.24771460423634337, "grad_norm": 2.631491486210109, "learning_rate": 1.985540315291065e-05, "loss": 0.8722, "step": 3333 }, { "epoch": 0.2477889260497956, "grad_norm": 2.4860737641219233, "learning_rate": 1.985526716702596e-05, "loss": 0.8314, "step": 3334 }, { "epoch": 0.24786324786324787, "grad_norm": 2.467510112354699, "learning_rate": 1.985513111769357e-05, "loss": 0.7608, "step": 3335 }, { "epoch": 0.2479375696767001, "grad_norm": 2.646447004578355, "learning_rate": 1.985499500491435e-05, "loss": 0.8965, "step": 3336 }, { "epoch": 0.24801189149015235, "grad_norm": 1.9611878128920726, "learning_rate": 1.985485882868919e-05, "loss": 0.6925, "step": 3337 }, { "epoch": 0.2480862133036046, "grad_norm": 2.830188050843367, "learning_rate": 1.9854722589018954e-05, "loss": 1.2039, "step": 3338 }, { "epoch": 0.24816053511705685, "grad_norm": 3.7354199896626366, "learning_rate": 1.985458628590453e-05, "loss": 0.9862, "step": 3339 }, { "epoch": 0.24823485693050912, "grad_norm": 2.4554933967058665, "learning_rate": 1.985444991934679e-05, "loss": 1.0118, "step": 3340 }, { "epoch": 0.24830917874396136, "grad_norm": 2.0023514692020488, "learning_rate": 1.985431348934661e-05, "loss": 0.8274, "step": 3341 }, { "epoch": 0.2483835005574136, "grad_norm": 2.3620963842920077, "learning_rate": 1.9854176995904874e-05, "loss": 0.8224, "step": 3342 }, { "epoch": 0.24845782237086586, "grad_norm": 2.526967963911846, "learning_rate": 1.9854040439022458e-05, "loss": 0.7796, "step": 3343 }, { "epoch": 0.2485321441843181, "grad_norm": 2.471972680778831, "learning_rate": 1.9853903818700242e-05, "loss": 0.9684, "step": 3344 }, { "epoch": 0.24860646599777034, "grad_norm": 2.6161600987662075, "learning_rate": 1.9853767134939104e-05, "loss": 0.8734, "step": 3345 }, { "epoch": 0.2486807878112226, "grad_norm": 3.1631975214595744, "learning_rate": 1.985363038773992e-05, "loss": 0.9857, "step": 3346 }, { "epoch": 0.24875510962467484, "grad_norm": 2.0409497433705273, "learning_rate": 1.985349357710358e-05, "loss": 0.8858, "step": 3347 }, { "epoch": 0.24882943143812708, "grad_norm": 2.374662997291712, "learning_rate": 1.9853356703030958e-05, "loss": 0.9963, "step": 3348 }, { "epoch": 0.24890375325157935, "grad_norm": 2.668958815220206, "learning_rate": 1.9853219765522936e-05, "loss": 1.1378, "step": 3349 }, { "epoch": 0.24897807506503158, "grad_norm": 3.194482845588468, "learning_rate": 1.9853082764580397e-05, "loss": 0.9832, "step": 3350 }, { "epoch": 0.24905239687848382, "grad_norm": 3.438818118698613, "learning_rate": 1.985294570020422e-05, "loss": 1.1433, "step": 3351 }, { "epoch": 0.2491267186919361, "grad_norm": 2.133010079897927, "learning_rate": 1.9852808572395293e-05, "loss": 1.0022, "step": 3352 }, { "epoch": 0.24920104050538833, "grad_norm": 3.8696071514739336, "learning_rate": 1.9852671381154497e-05, "loss": 0.6851, "step": 3353 }, { "epoch": 0.2492753623188406, "grad_norm": 2.098800881724487, "learning_rate": 1.985253412648271e-05, "loss": 0.9798, "step": 3354 }, { "epoch": 0.24934968413229283, "grad_norm": 3.925911249124257, "learning_rate": 1.9852396808380824e-05, "loss": 1.2236, "step": 3355 }, { "epoch": 0.24942400594574507, "grad_norm": 2.115247969469884, "learning_rate": 1.9852259426849715e-05, "loss": 0.7439, "step": 3356 }, { "epoch": 0.24949832775919734, "grad_norm": 2.4846773827824173, "learning_rate": 1.9852121981890272e-05, "loss": 0.9885, "step": 3357 }, { "epoch": 0.24957264957264957, "grad_norm": 2.5699163876369515, "learning_rate": 1.985198447350338e-05, "loss": 0.9135, "step": 3358 }, { "epoch": 0.2496469713861018, "grad_norm": 2.165254632063663, "learning_rate": 1.985184690168992e-05, "loss": 0.9887, "step": 3359 }, { "epoch": 0.24972129319955408, "grad_norm": 2.7422584030349477, "learning_rate": 1.985170926645078e-05, "loss": 1.0773, "step": 3360 }, { "epoch": 0.24979561501300632, "grad_norm": 2.4898080326349903, "learning_rate": 1.985157156778685e-05, "loss": 0.9809, "step": 3361 }, { "epoch": 0.24986993682645856, "grad_norm": 2.599063560317757, "learning_rate": 1.9851433805699015e-05, "loss": 0.8704, "step": 3362 }, { "epoch": 0.24994425863991082, "grad_norm": 2.1687586793616274, "learning_rate": 1.9851295980188157e-05, "loss": 0.9931, "step": 3363 }, { "epoch": 0.25001858045336306, "grad_norm": 3.3331764695459056, "learning_rate": 1.9851158091255168e-05, "loss": 1.0369, "step": 3364 }, { "epoch": 0.2500929022668153, "grad_norm": 2.187229926543836, "learning_rate": 1.9851020138900937e-05, "loss": 0.9593, "step": 3365 }, { "epoch": 0.25016722408026754, "grad_norm": 2.2253533172529063, "learning_rate": 1.9850882123126346e-05, "loss": 0.8406, "step": 3366 }, { "epoch": 0.25024154589371983, "grad_norm": 2.749851784515678, "learning_rate": 1.9850744043932285e-05, "loss": 0.8635, "step": 3367 }, { "epoch": 0.25031586770717207, "grad_norm": 2.288580140202469, "learning_rate": 1.985060590131965e-05, "loss": 0.9027, "step": 3368 }, { "epoch": 0.2503901895206243, "grad_norm": 2.6174114253523895, "learning_rate": 1.9850467695289325e-05, "loss": 0.8503, "step": 3369 }, { "epoch": 0.25046451133407654, "grad_norm": 2.3741834788188116, "learning_rate": 1.9850329425842197e-05, "loss": 0.8676, "step": 3370 }, { "epoch": 0.2505388331475288, "grad_norm": 2.471124063497637, "learning_rate": 1.985019109297916e-05, "loss": 0.9311, "step": 3371 }, { "epoch": 0.250613154960981, "grad_norm": 2.3878955187272664, "learning_rate": 1.9850052696701103e-05, "loss": 0.878, "step": 3372 }, { "epoch": 0.2506874767744333, "grad_norm": 2.402350383686461, "learning_rate": 1.984991423700892e-05, "loss": 1.0061, "step": 3373 }, { "epoch": 0.25076179858788555, "grad_norm": 2.046603069069123, "learning_rate": 1.9849775713903497e-05, "loss": 0.9838, "step": 3374 }, { "epoch": 0.2508361204013378, "grad_norm": 2.3537292174309807, "learning_rate": 1.9849637127385734e-05, "loss": 0.9135, "step": 3375 }, { "epoch": 0.25091044221479003, "grad_norm": 2.657096025196751, "learning_rate": 1.9849498477456516e-05, "loss": 0.8573, "step": 3376 }, { "epoch": 0.25098476402824227, "grad_norm": 1.8602334966607241, "learning_rate": 1.9849359764116734e-05, "loss": 0.8087, "step": 3377 }, { "epoch": 0.25105908584169456, "grad_norm": 2.1981543553059013, "learning_rate": 1.9849220987367287e-05, "loss": 0.7686, "step": 3378 }, { "epoch": 0.2511334076551468, "grad_norm": 2.3283113607591543, "learning_rate": 1.9849082147209068e-05, "loss": 0.7131, "step": 3379 }, { "epoch": 0.25120772946859904, "grad_norm": 2.3851679828213124, "learning_rate": 1.984894324364297e-05, "loss": 1.0026, "step": 3380 }, { "epoch": 0.2512820512820513, "grad_norm": 6.549614397753059, "learning_rate": 1.9848804276669885e-05, "loss": 1.0058, "step": 3381 }, { "epoch": 0.2513563730955035, "grad_norm": 2.1766095381015558, "learning_rate": 1.984866524629071e-05, "loss": 0.7742, "step": 3382 }, { "epoch": 0.25143069490895575, "grad_norm": 2.9745512810339476, "learning_rate": 1.9848526152506337e-05, "loss": 1.0402, "step": 3383 }, { "epoch": 0.25150501672240805, "grad_norm": 2.3637528177795377, "learning_rate": 1.984838699531767e-05, "loss": 0.9067, "step": 3384 }, { "epoch": 0.2515793385358603, "grad_norm": 2.477987080210656, "learning_rate": 1.984824777472559e-05, "loss": 0.8886, "step": 3385 }, { "epoch": 0.2516536603493125, "grad_norm": 3.194118067323453, "learning_rate": 1.9848108490731007e-05, "loss": 0.8429, "step": 3386 }, { "epoch": 0.25172798216276476, "grad_norm": 3.005893175723266, "learning_rate": 1.984796914333481e-05, "loss": 0.9703, "step": 3387 }, { "epoch": 0.251802303976217, "grad_norm": 3.3135453145142977, "learning_rate": 1.9847829732537897e-05, "loss": 0.7943, "step": 3388 }, { "epoch": 0.2518766257896693, "grad_norm": 2.0256686993013684, "learning_rate": 1.9847690258341172e-05, "loss": 0.9454, "step": 3389 }, { "epoch": 0.25195094760312153, "grad_norm": 2.4652687456192544, "learning_rate": 1.9847550720745525e-05, "loss": 0.9529, "step": 3390 }, { "epoch": 0.25202526941657377, "grad_norm": 2.301888842264015, "learning_rate": 1.9847411119751857e-05, "loss": 1.0239, "step": 3391 }, { "epoch": 0.252099591230026, "grad_norm": 2.965713615985953, "learning_rate": 1.984727145536107e-05, "loss": 0.982, "step": 3392 }, { "epoch": 0.25217391304347825, "grad_norm": 2.3012848583902135, "learning_rate": 1.984713172757406e-05, "loss": 0.6913, "step": 3393 }, { "epoch": 0.2522482348569305, "grad_norm": 2.0730032223822286, "learning_rate": 1.9846991936391725e-05, "loss": 0.8193, "step": 3394 }, { "epoch": 0.2523225566703828, "grad_norm": 2.3912644065777564, "learning_rate": 1.9846852081814967e-05, "loss": 0.875, "step": 3395 }, { "epoch": 0.252396878483835, "grad_norm": 2.5312047551753287, "learning_rate": 1.9846712163844687e-05, "loss": 0.9136, "step": 3396 }, { "epoch": 0.25247120029728726, "grad_norm": 1.905132384177034, "learning_rate": 1.9846572182481784e-05, "loss": 0.9217, "step": 3397 }, { "epoch": 0.2525455221107395, "grad_norm": 2.464943025871077, "learning_rate": 1.9846432137727162e-05, "loss": 0.8673, "step": 3398 }, { "epoch": 0.25261984392419173, "grad_norm": 2.503508975974662, "learning_rate": 1.9846292029581716e-05, "loss": 0.9461, "step": 3399 }, { "epoch": 0.25269416573764397, "grad_norm": 2.2810257251643034, "learning_rate": 1.984615185804636e-05, "loss": 0.9238, "step": 3400 }, { "epoch": 0.25276848755109627, "grad_norm": 2.195480927595998, "learning_rate": 1.9846011623121986e-05, "loss": 0.8755, "step": 3401 }, { "epoch": 0.2528428093645485, "grad_norm": 2.3755292010331015, "learning_rate": 1.9845871324809498e-05, "loss": 0.9647, "step": 3402 }, { "epoch": 0.25291713117800074, "grad_norm": 1.9478109316608874, "learning_rate": 1.9845730963109804e-05, "loss": 0.8439, "step": 3403 }, { "epoch": 0.252991452991453, "grad_norm": 2.371502361950036, "learning_rate": 1.9845590538023803e-05, "loss": 1.0361, "step": 3404 }, { "epoch": 0.2530657748049052, "grad_norm": 2.357723094665604, "learning_rate": 1.98454500495524e-05, "loss": 0.9531, "step": 3405 }, { "epoch": 0.2531400966183575, "grad_norm": 2.4596002867741054, "learning_rate": 1.9845309497696505e-05, "loss": 1.0967, "step": 3406 }, { "epoch": 0.25321441843180975, "grad_norm": 2.0162205474190165, "learning_rate": 1.9845168882457014e-05, "loss": 0.9407, "step": 3407 }, { "epoch": 0.253288740245262, "grad_norm": 2.725841904937174, "learning_rate": 1.984502820383484e-05, "loss": 0.8087, "step": 3408 }, { "epoch": 0.25336306205871423, "grad_norm": 1.8297223857227523, "learning_rate": 1.9844887461830882e-05, "loss": 0.8323, "step": 3409 }, { "epoch": 0.25343738387216647, "grad_norm": 2.2518065391386513, "learning_rate": 1.984474665644605e-05, "loss": 0.6225, "step": 3410 }, { "epoch": 0.2535117056856187, "grad_norm": 2.8797746217170173, "learning_rate": 1.984460578768125e-05, "loss": 1.0225, "step": 3411 }, { "epoch": 0.253586027499071, "grad_norm": 2.5096850197090963, "learning_rate": 1.984446485553739e-05, "loss": 0.9037, "step": 3412 }, { "epoch": 0.25366034931252324, "grad_norm": 1.9965587299058287, "learning_rate": 1.9844323860015376e-05, "loss": 0.7258, "step": 3413 }, { "epoch": 0.2537346711259755, "grad_norm": 2.211444803246759, "learning_rate": 1.9844182801116114e-05, "loss": 0.9169, "step": 3414 }, { "epoch": 0.2538089929394277, "grad_norm": 4.2993050314639545, "learning_rate": 1.9844041678840516e-05, "loss": 0.9638, "step": 3415 }, { "epoch": 0.25388331475287995, "grad_norm": 2.3981494734154345, "learning_rate": 1.9843900493189485e-05, "loss": 0.9566, "step": 3416 }, { "epoch": 0.25395763656633225, "grad_norm": 2.1492078675805186, "learning_rate": 1.9843759244163938e-05, "loss": 1.1345, "step": 3417 }, { "epoch": 0.2540319583797845, "grad_norm": 2.4370709057073374, "learning_rate": 1.9843617931764778e-05, "loss": 1.0982, "step": 3418 }, { "epoch": 0.2541062801932367, "grad_norm": 2.4325824315333113, "learning_rate": 1.9843476555992916e-05, "loss": 0.9932, "step": 3419 }, { "epoch": 0.25418060200668896, "grad_norm": 2.237890614041397, "learning_rate": 1.984333511684926e-05, "loss": 0.8975, "step": 3420 }, { "epoch": 0.2542549238201412, "grad_norm": 2.2173402124559947, "learning_rate": 1.984319361433473e-05, "loss": 0.8906, "step": 3421 }, { "epoch": 0.25432924563359344, "grad_norm": 2.3495728132061053, "learning_rate": 1.9843052048450225e-05, "loss": 0.892, "step": 3422 }, { "epoch": 0.25440356744704573, "grad_norm": 2.4216352706386544, "learning_rate": 1.984291041919666e-05, "loss": 0.995, "step": 3423 }, { "epoch": 0.25447788926049797, "grad_norm": 2.2886463269758752, "learning_rate": 1.9842768726574954e-05, "loss": 0.9169, "step": 3424 }, { "epoch": 0.2545522110739502, "grad_norm": 1.8985267470400606, "learning_rate": 1.984262697058601e-05, "loss": 0.8616, "step": 3425 }, { "epoch": 0.25462653288740245, "grad_norm": 2.3749235701935145, "learning_rate": 1.9842485151230747e-05, "loss": 1.0257, "step": 3426 }, { "epoch": 0.2547008547008547, "grad_norm": 2.518892127593748, "learning_rate": 1.984234326851007e-05, "loss": 0.8559, "step": 3427 }, { "epoch": 0.2547751765143069, "grad_norm": 2.1288583692065255, "learning_rate": 1.9842201322424905e-05, "loss": 0.7522, "step": 3428 }, { "epoch": 0.2548494983277592, "grad_norm": 1.8567441061085197, "learning_rate": 1.9842059312976155e-05, "loss": 0.8499, "step": 3429 }, { "epoch": 0.25492382014121145, "grad_norm": 2.1848870042253528, "learning_rate": 1.9841917240164737e-05, "loss": 0.761, "step": 3430 }, { "epoch": 0.2549981419546637, "grad_norm": 2.7491968664029063, "learning_rate": 1.9841775103991565e-05, "loss": 1.0425, "step": 3431 }, { "epoch": 0.25507246376811593, "grad_norm": 2.591117567423093, "learning_rate": 1.9841632904457557e-05, "loss": 1.194, "step": 3432 }, { "epoch": 0.25514678558156817, "grad_norm": 2.2252032270352076, "learning_rate": 1.984149064156363e-05, "loss": 0.9855, "step": 3433 }, { "epoch": 0.25522110739502046, "grad_norm": 2.283558273432547, "learning_rate": 1.9841348315310692e-05, "loss": 0.8357, "step": 3434 }, { "epoch": 0.2552954292084727, "grad_norm": 2.6761855673125976, "learning_rate": 1.9841205925699668e-05, "loss": 0.7959, "step": 3435 }, { "epoch": 0.25536975102192494, "grad_norm": 2.136383197093943, "learning_rate": 1.984106347273147e-05, "loss": 1.0876, "step": 3436 }, { "epoch": 0.2554440728353772, "grad_norm": 2.275536537637354, "learning_rate": 1.9840920956407015e-05, "loss": 0.788, "step": 3437 }, { "epoch": 0.2555183946488294, "grad_norm": 3.0526249045726104, "learning_rate": 1.9840778376727222e-05, "loss": 0.9111, "step": 3438 }, { "epoch": 0.25559271646228165, "grad_norm": 2.7977800472226724, "learning_rate": 1.984063573369301e-05, "loss": 0.8511, "step": 3439 }, { "epoch": 0.25566703827573395, "grad_norm": 2.3578824364961366, "learning_rate": 1.9840493027305295e-05, "loss": 0.8909, "step": 3440 }, { "epoch": 0.2557413600891862, "grad_norm": 2.5654335984731875, "learning_rate": 1.9840350257564993e-05, "loss": 0.9283, "step": 3441 }, { "epoch": 0.2558156819026384, "grad_norm": 1.9925052690600882, "learning_rate": 1.984020742447303e-05, "loss": 0.9666, "step": 3442 }, { "epoch": 0.25589000371609066, "grad_norm": 3.9208562696736156, "learning_rate": 1.9840064528030322e-05, "loss": 0.775, "step": 3443 }, { "epoch": 0.2559643255295429, "grad_norm": 1.9699858432161974, "learning_rate": 1.983992156823779e-05, "loss": 0.8326, "step": 3444 }, { "epoch": 0.2560386473429952, "grad_norm": 2.2250339116258373, "learning_rate": 1.983977854509635e-05, "loss": 0.932, "step": 3445 }, { "epoch": 0.25611296915644743, "grad_norm": 2.5923705025031882, "learning_rate": 1.983963545860693e-05, "loss": 0.9343, "step": 3446 }, { "epoch": 0.2561872909698997, "grad_norm": 2.1956760363444485, "learning_rate": 1.983949230877044e-05, "loss": 1.0172, "step": 3447 }, { "epoch": 0.2562616127833519, "grad_norm": 2.0440475767400152, "learning_rate": 1.983934909558782e-05, "loss": 0.9128, "step": 3448 }, { "epoch": 0.25633593459680415, "grad_norm": 2.3983128913522775, "learning_rate": 1.9839205819059974e-05, "loss": 0.903, "step": 3449 }, { "epoch": 0.2564102564102564, "grad_norm": 2.10869493118486, "learning_rate": 1.9839062479187833e-05, "loss": 0.9064, "step": 3450 }, { "epoch": 0.2564845782237087, "grad_norm": 2.4877430294689358, "learning_rate": 1.983891907597232e-05, "loss": 1.1035, "step": 3451 }, { "epoch": 0.2565589000371609, "grad_norm": 2.4893426925447764, "learning_rate": 1.9838775609414356e-05, "loss": 1.0651, "step": 3452 }, { "epoch": 0.25663322185061316, "grad_norm": 2.5621178417711725, "learning_rate": 1.9838632079514863e-05, "loss": 0.9965, "step": 3453 }, { "epoch": 0.2567075436640654, "grad_norm": 3.2140565562707852, "learning_rate": 1.9838488486274768e-05, "loss": 0.8594, "step": 3454 }, { "epoch": 0.25678186547751763, "grad_norm": 1.9877092682956687, "learning_rate": 1.9838344829694996e-05, "loss": 0.8353, "step": 3455 }, { "epoch": 0.2568561872909699, "grad_norm": 2.905531909718822, "learning_rate": 1.9838201109776468e-05, "loss": 0.9817, "step": 3456 }, { "epoch": 0.25693050910442217, "grad_norm": 2.448134007940165, "learning_rate": 1.9838057326520113e-05, "loss": 0.7297, "step": 3457 }, { "epoch": 0.2570048309178744, "grad_norm": 2.558618844360054, "learning_rate": 1.983791347992686e-05, "loss": 0.8599, "step": 3458 }, { "epoch": 0.25707915273132664, "grad_norm": 2.66191676432728, "learning_rate": 1.9837769569997625e-05, "loss": 0.9041, "step": 3459 }, { "epoch": 0.2571534745447789, "grad_norm": 2.4872014842914227, "learning_rate": 1.9837625596733342e-05, "loss": 1.0249, "step": 3460 }, { "epoch": 0.2572277963582311, "grad_norm": 2.624638771186836, "learning_rate": 1.9837481560134932e-05, "loss": 0.9341, "step": 3461 }, { "epoch": 0.2573021181716834, "grad_norm": 2.25012679362721, "learning_rate": 1.983733746020333e-05, "loss": 1.1107, "step": 3462 }, { "epoch": 0.25737643998513565, "grad_norm": 2.217922573363433, "learning_rate": 1.983719329693946e-05, "loss": 0.9519, "step": 3463 }, { "epoch": 0.2574507617985879, "grad_norm": 2.506137753697705, "learning_rate": 1.9837049070344247e-05, "loss": 0.7777, "step": 3464 }, { "epoch": 0.25752508361204013, "grad_norm": 2.329306804571379, "learning_rate": 1.9836904780418625e-05, "loss": 0.77, "step": 3465 }, { "epoch": 0.25759940542549237, "grad_norm": 2.415896966996718, "learning_rate": 1.983676042716352e-05, "loss": 1.0055, "step": 3466 }, { "epoch": 0.2576737272389446, "grad_norm": 2.416988494326499, "learning_rate": 1.983661601057986e-05, "loss": 0.9021, "step": 3467 }, { "epoch": 0.2577480490523969, "grad_norm": 2.076619848675143, "learning_rate": 1.983647153066858e-05, "loss": 0.7663, "step": 3468 }, { "epoch": 0.25782237086584914, "grad_norm": 2.012733331554492, "learning_rate": 1.9836326987430606e-05, "loss": 0.7556, "step": 3469 }, { "epoch": 0.2578966926793014, "grad_norm": 3.6700369822474426, "learning_rate": 1.9836182380866866e-05, "loss": 0.7634, "step": 3470 }, { "epoch": 0.2579710144927536, "grad_norm": 2.8675693492800103, "learning_rate": 1.9836037710978296e-05, "loss": 0.9786, "step": 3471 }, { "epoch": 0.25804533630620585, "grad_norm": 2.2715525271633754, "learning_rate": 1.9835892977765824e-05, "loss": 0.8484, "step": 3472 }, { "epoch": 0.25811965811965815, "grad_norm": 2.4702183472838324, "learning_rate": 1.983574818123039e-05, "loss": 0.8999, "step": 3473 }, { "epoch": 0.2581939799331104, "grad_norm": 2.3786060343947955, "learning_rate": 1.983560332137291e-05, "loss": 0.8854, "step": 3474 }, { "epoch": 0.2582683017465626, "grad_norm": 2.5297155971533516, "learning_rate": 1.983545839819433e-05, "loss": 0.8596, "step": 3475 }, { "epoch": 0.25834262356001486, "grad_norm": 3.15257672035071, "learning_rate": 1.9835313411695582e-05, "loss": 0.9973, "step": 3476 }, { "epoch": 0.2584169453734671, "grad_norm": 2.494709175419882, "learning_rate": 1.9835168361877593e-05, "loss": 0.8946, "step": 3477 }, { "epoch": 0.25849126718691934, "grad_norm": 2.6842339590313653, "learning_rate": 1.9835023248741307e-05, "loss": 1.0435, "step": 3478 }, { "epoch": 0.25856558900037163, "grad_norm": 2.272927611586313, "learning_rate": 1.9834878072287644e-05, "loss": 1.0123, "step": 3479 }, { "epoch": 0.25863991081382387, "grad_norm": 3.738114035731749, "learning_rate": 1.983473283251755e-05, "loss": 0.859, "step": 3480 }, { "epoch": 0.2587142326272761, "grad_norm": 2.4550538235117116, "learning_rate": 1.9834587529431953e-05, "loss": 0.8999, "step": 3481 }, { "epoch": 0.25878855444072835, "grad_norm": 5.276003757322605, "learning_rate": 1.9834442163031795e-05, "loss": 0.9329, "step": 3482 }, { "epoch": 0.2588628762541806, "grad_norm": 3.0320915919802265, "learning_rate": 1.9834296733318008e-05, "loss": 0.7627, "step": 3483 }, { "epoch": 0.2589371980676328, "grad_norm": 2.6450928495943806, "learning_rate": 1.9834151240291527e-05, "loss": 0.7875, "step": 3484 }, { "epoch": 0.2590115198810851, "grad_norm": 2.9417637840827395, "learning_rate": 1.983400568395329e-05, "loss": 1.1386, "step": 3485 }, { "epoch": 0.25908584169453736, "grad_norm": 2.1927409512251304, "learning_rate": 1.983386006430424e-05, "loss": 0.8917, "step": 3486 }, { "epoch": 0.2591601635079896, "grad_norm": 2.335556236723271, "learning_rate": 1.9833714381345306e-05, "loss": 1.0243, "step": 3487 }, { "epoch": 0.25923448532144183, "grad_norm": 2.472754481518483, "learning_rate": 1.9833568635077428e-05, "loss": 0.8175, "step": 3488 }, { "epoch": 0.25930880713489407, "grad_norm": 2.3855544302787584, "learning_rate": 1.983342282550155e-05, "loss": 1.0285, "step": 3489 }, { "epoch": 0.25938312894834636, "grad_norm": 2.466546540479104, "learning_rate": 1.98332769526186e-05, "loss": 0.8973, "step": 3490 }, { "epoch": 0.2594574507617986, "grad_norm": 2.210335137648134, "learning_rate": 1.983313101642952e-05, "loss": 0.8816, "step": 3491 }, { "epoch": 0.25953177257525084, "grad_norm": 3.007524561272228, "learning_rate": 1.9832985016935262e-05, "loss": 0.9046, "step": 3492 }, { "epoch": 0.2596060943887031, "grad_norm": 2.441870504950118, "learning_rate": 1.9832838954136755e-05, "loss": 0.967, "step": 3493 }, { "epoch": 0.2596804162021553, "grad_norm": 2.335028127222438, "learning_rate": 1.9832692828034936e-05, "loss": 0.8345, "step": 3494 }, { "epoch": 0.25975473801560756, "grad_norm": 2.5958765967840143, "learning_rate": 1.9832546638630753e-05, "loss": 1.1088, "step": 3495 }, { "epoch": 0.25982905982905985, "grad_norm": 2.098431575282772, "learning_rate": 1.9832400385925148e-05, "loss": 0.8624, "step": 3496 }, { "epoch": 0.2599033816425121, "grad_norm": 2.672066192404937, "learning_rate": 1.9832254069919057e-05, "loss": 1.0015, "step": 3497 }, { "epoch": 0.2599777034559643, "grad_norm": 2.5376334184098597, "learning_rate": 1.9832107690613425e-05, "loss": 1.1009, "step": 3498 }, { "epoch": 0.26005202526941656, "grad_norm": 2.9165355629051954, "learning_rate": 1.9831961248009193e-05, "loss": 0.671, "step": 3499 }, { "epoch": 0.2601263470828688, "grad_norm": 2.9467475091992537, "learning_rate": 1.9831814742107304e-05, "loss": 0.859, "step": 3500 }, { "epoch": 0.2602006688963211, "grad_norm": 2.444431498468175, "learning_rate": 1.9831668172908706e-05, "loss": 1.1123, "step": 3501 }, { "epoch": 0.26027499070977334, "grad_norm": 2.2233618264322716, "learning_rate": 1.983152154041433e-05, "loss": 0.8, "step": 3502 }, { "epoch": 0.2603493125232256, "grad_norm": 3.2726875124174235, "learning_rate": 1.9831374844625134e-05, "loss": 1.0719, "step": 3503 }, { "epoch": 0.2604236343366778, "grad_norm": 2.4431483782123853, "learning_rate": 1.9831228085542058e-05, "loss": 0.7981, "step": 3504 }, { "epoch": 0.26049795615013005, "grad_norm": 2.242351252114032, "learning_rate": 1.9831081263166044e-05, "loss": 0.915, "step": 3505 }, { "epoch": 0.2605722779635823, "grad_norm": 2.0442230960008185, "learning_rate": 1.9830934377498037e-05, "loss": 0.8395, "step": 3506 }, { "epoch": 0.2606465997770346, "grad_norm": 2.5267273500949967, "learning_rate": 1.983078742853899e-05, "loss": 1.2018, "step": 3507 }, { "epoch": 0.2607209215904868, "grad_norm": 2.1874820812191045, "learning_rate": 1.983064041628984e-05, "loss": 1.0129, "step": 3508 }, { "epoch": 0.26079524340393906, "grad_norm": 2.4189464569762054, "learning_rate": 1.9830493340751536e-05, "loss": 1.0316, "step": 3509 }, { "epoch": 0.2608695652173913, "grad_norm": 2.729685220084415, "learning_rate": 1.983034620192503e-05, "loss": 0.9403, "step": 3510 }, { "epoch": 0.26094388703084354, "grad_norm": 2.479966340109212, "learning_rate": 1.9830198999811263e-05, "loss": 1.0229, "step": 3511 }, { "epoch": 0.2610182088442958, "grad_norm": 2.5700741010924744, "learning_rate": 1.9830051734411183e-05, "loss": 0.92, "step": 3512 }, { "epoch": 0.26109253065774807, "grad_norm": 2.038873215806035, "learning_rate": 1.982990440572574e-05, "loss": 0.9157, "step": 3513 }, { "epoch": 0.2611668524712003, "grad_norm": 2.439902023704833, "learning_rate": 1.9829757013755886e-05, "loss": 0.7086, "step": 3514 }, { "epoch": 0.26124117428465254, "grad_norm": 2.2886344453836323, "learning_rate": 1.9829609558502563e-05, "loss": 0.9124, "step": 3515 }, { "epoch": 0.2613154960981048, "grad_norm": 2.3337855273005697, "learning_rate": 1.9829462039966728e-05, "loss": 0.761, "step": 3516 }, { "epoch": 0.261389817911557, "grad_norm": 1.789347606328576, "learning_rate": 1.982931445814932e-05, "loss": 0.8519, "step": 3517 }, { "epoch": 0.2614641397250093, "grad_norm": 2.1898477339615825, "learning_rate": 1.98291668130513e-05, "loss": 1.0937, "step": 3518 }, { "epoch": 0.26153846153846155, "grad_norm": 2.1644480892734737, "learning_rate": 1.9829019104673612e-05, "loss": 1.0036, "step": 3519 }, { "epoch": 0.2616127833519138, "grad_norm": 2.462257858110188, "learning_rate": 1.9828871333017213e-05, "loss": 1.0058, "step": 3520 }, { "epoch": 0.26168710516536603, "grad_norm": 2.912065936890358, "learning_rate": 1.9828723498083046e-05, "loss": 1.0206, "step": 3521 }, { "epoch": 0.26176142697881827, "grad_norm": 2.520255543699775, "learning_rate": 1.9828575599872067e-05, "loss": 0.8949, "step": 3522 }, { "epoch": 0.2618357487922705, "grad_norm": 2.4214753769763933, "learning_rate": 1.982842763838523e-05, "loss": 0.7498, "step": 3523 }, { "epoch": 0.2619100706057228, "grad_norm": 2.4969330000433, "learning_rate": 1.9828279613623488e-05, "loss": 0.9434, "step": 3524 }, { "epoch": 0.26198439241917504, "grad_norm": 3.0426017953279074, "learning_rate": 1.9828131525587788e-05, "loss": 1.0196, "step": 3525 }, { "epoch": 0.2620587142326273, "grad_norm": 2.6297062523346892, "learning_rate": 1.9827983374279086e-05, "loss": 0.9934, "step": 3526 }, { "epoch": 0.2621330360460795, "grad_norm": 2.1161950146154522, "learning_rate": 1.982783515969834e-05, "loss": 0.8504, "step": 3527 }, { "epoch": 0.26220735785953175, "grad_norm": 2.9062974855494272, "learning_rate": 1.9827686881846504e-05, "loss": 0.816, "step": 3528 }, { "epoch": 0.26228167967298405, "grad_norm": 2.4299766533161655, "learning_rate": 1.9827538540724526e-05, "loss": 0.8123, "step": 3529 }, { "epoch": 0.2623560014864363, "grad_norm": 1.7903484322631296, "learning_rate": 1.9827390136333363e-05, "loss": 0.7349, "step": 3530 }, { "epoch": 0.2624303232998885, "grad_norm": 2.1354505664847183, "learning_rate": 1.9827241668673976e-05, "loss": 0.8346, "step": 3531 }, { "epoch": 0.26250464511334076, "grad_norm": 2.135398774200703, "learning_rate": 1.9827093137747316e-05, "loss": 0.926, "step": 3532 }, { "epoch": 0.262578966926793, "grad_norm": 2.880188074725037, "learning_rate": 1.982694454355434e-05, "loss": 1.0126, "step": 3533 }, { "epoch": 0.26265328874024524, "grad_norm": 2.437504817412391, "learning_rate": 1.9826795886096004e-05, "loss": 0.8968, "step": 3534 }, { "epoch": 0.26272761055369753, "grad_norm": 1.7656640411103843, "learning_rate": 1.982664716537327e-05, "loss": 0.7013, "step": 3535 }, { "epoch": 0.26280193236714977, "grad_norm": 2.2579166071046135, "learning_rate": 1.9826498381387086e-05, "loss": 0.8865, "step": 3536 }, { "epoch": 0.262876254180602, "grad_norm": 2.5952127252548998, "learning_rate": 1.9826349534138415e-05, "loss": 1.028, "step": 3537 }, { "epoch": 0.26295057599405425, "grad_norm": 2.345539537826845, "learning_rate": 1.982620062362822e-05, "loss": 0.7547, "step": 3538 }, { "epoch": 0.2630248978075065, "grad_norm": 2.5247926533009317, "learning_rate": 1.9826051649857452e-05, "loss": 1.0457, "step": 3539 }, { "epoch": 0.2630992196209587, "grad_norm": 2.2739325424989114, "learning_rate": 1.9825902612827075e-05, "loss": 1.0213, "step": 3540 }, { "epoch": 0.263173541434411, "grad_norm": 2.6759836044116674, "learning_rate": 1.9825753512538048e-05, "loss": 0.7722, "step": 3541 }, { "epoch": 0.26324786324786326, "grad_norm": 2.362663303764873, "learning_rate": 1.9825604348991326e-05, "loss": 0.9208, "step": 3542 }, { "epoch": 0.2633221850613155, "grad_norm": 2.165222391584521, "learning_rate": 1.9825455122187876e-05, "loss": 0.8496, "step": 3543 }, { "epoch": 0.26339650687476773, "grad_norm": 2.4057829210980706, "learning_rate": 1.9825305832128656e-05, "loss": 1.0398, "step": 3544 }, { "epoch": 0.26347082868821997, "grad_norm": 3.7112481749270247, "learning_rate": 1.982515647881462e-05, "loss": 0.9079, "step": 3545 }, { "epoch": 0.26354515050167227, "grad_norm": 3.9184631412452022, "learning_rate": 1.9825007062246744e-05, "loss": 1.0406, "step": 3546 }, { "epoch": 0.2636194723151245, "grad_norm": 2.950755840716172, "learning_rate": 1.9824857582425983e-05, "loss": 1.0242, "step": 3547 }, { "epoch": 0.26369379412857674, "grad_norm": 2.3254690921729813, "learning_rate": 1.982470803935329e-05, "loss": 1.1199, "step": 3548 }, { "epoch": 0.263768115942029, "grad_norm": 2.1180433382615416, "learning_rate": 1.9824558433029645e-05, "loss": 0.9796, "step": 3549 }, { "epoch": 0.2638424377554812, "grad_norm": 2.4035210418313366, "learning_rate": 1.9824408763456e-05, "loss": 0.7571, "step": 3550 }, { "epoch": 0.26391675956893346, "grad_norm": 2.0723581305915055, "learning_rate": 1.9824259030633323e-05, "loss": 0.7086, "step": 3551 }, { "epoch": 0.26399108138238575, "grad_norm": 2.7834523431350764, "learning_rate": 1.9824109234562573e-05, "loss": 1.0661, "step": 3552 }, { "epoch": 0.264065403195838, "grad_norm": 8.366703207729035, "learning_rate": 1.982395937524472e-05, "loss": 0.67, "step": 3553 }, { "epoch": 0.2641397250092902, "grad_norm": 2.652917709666211, "learning_rate": 1.9823809452680725e-05, "loss": 0.9644, "step": 3554 }, { "epoch": 0.26421404682274247, "grad_norm": 2.37894759564633, "learning_rate": 1.9823659466871553e-05, "loss": 0.896, "step": 3555 }, { "epoch": 0.2642883686361947, "grad_norm": 3.039047846176816, "learning_rate": 1.9823509417818173e-05, "loss": 1.0464, "step": 3556 }, { "epoch": 0.264362690449647, "grad_norm": 2.0671239556493672, "learning_rate": 1.9823359305521547e-05, "loss": 0.7965, "step": 3557 }, { "epoch": 0.26443701226309924, "grad_norm": 2.1908462047151764, "learning_rate": 1.982320912998265e-05, "loss": 1.0217, "step": 3558 }, { "epoch": 0.2645113340765515, "grad_norm": 2.165643233132923, "learning_rate": 1.9823058891202434e-05, "loss": 0.8628, "step": 3559 }, { "epoch": 0.2645856558900037, "grad_norm": 2.0514697656757104, "learning_rate": 1.9822908589181876e-05, "loss": 0.7279, "step": 3560 }, { "epoch": 0.26465997770345595, "grad_norm": 2.1869275084955775, "learning_rate": 1.9822758223921946e-05, "loss": 1.0354, "step": 3561 }, { "epoch": 0.2647342995169082, "grad_norm": 2.1208455643218636, "learning_rate": 1.9822607795423607e-05, "loss": 1.0686, "step": 3562 }, { "epoch": 0.2648086213303605, "grad_norm": 2.1869435702736917, "learning_rate": 1.9822457303687828e-05, "loss": 0.9164, "step": 3563 }, { "epoch": 0.2648829431438127, "grad_norm": 2.0461457856624095, "learning_rate": 1.9822306748715578e-05, "loss": 0.8825, "step": 3564 }, { "epoch": 0.26495726495726496, "grad_norm": 2.3293433995980535, "learning_rate": 1.982215613050783e-05, "loss": 0.7805, "step": 3565 }, { "epoch": 0.2650315867707172, "grad_norm": 2.080039812204621, "learning_rate": 1.9822005449065543e-05, "loss": 0.8047, "step": 3566 }, { "epoch": 0.26510590858416944, "grad_norm": 2.7186083493326128, "learning_rate": 1.98218547043897e-05, "loss": 1.0196, "step": 3567 }, { "epoch": 0.2651802303976217, "grad_norm": 2.1407815236351695, "learning_rate": 1.9821703896481266e-05, "loss": 0.9876, "step": 3568 }, { "epoch": 0.26525455221107397, "grad_norm": 2.081658956264216, "learning_rate": 1.982155302534121e-05, "loss": 0.8631, "step": 3569 }, { "epoch": 0.2653288740245262, "grad_norm": 2.841899170670992, "learning_rate": 1.9821402090970507e-05, "loss": 1.0279, "step": 3570 }, { "epoch": 0.26540319583797845, "grad_norm": 2.8443578030127457, "learning_rate": 1.9821251093370128e-05, "loss": 0.9566, "step": 3571 }, { "epoch": 0.2654775176514307, "grad_norm": 2.7793524161726753, "learning_rate": 1.9821100032541043e-05, "loss": 1.0143, "step": 3572 }, { "epoch": 0.2655518394648829, "grad_norm": 2.0645426818570134, "learning_rate": 1.9820948908484222e-05, "loss": 0.8685, "step": 3573 }, { "epoch": 0.2656261612783352, "grad_norm": 2.4098226390710615, "learning_rate": 1.9820797721200647e-05, "loss": 1.0201, "step": 3574 }, { "epoch": 0.26570048309178745, "grad_norm": 4.261243507701963, "learning_rate": 1.9820646470691283e-05, "loss": 0.8256, "step": 3575 }, { "epoch": 0.2657748049052397, "grad_norm": 2.055863043780091, "learning_rate": 1.9820495156957107e-05, "loss": 0.8107, "step": 3576 }, { "epoch": 0.26584912671869193, "grad_norm": 2.4244071745604288, "learning_rate": 1.982034377999909e-05, "loss": 0.9033, "step": 3577 }, { "epoch": 0.26592344853214417, "grad_norm": 2.113255353245588, "learning_rate": 1.9820192339818212e-05, "loss": 0.7835, "step": 3578 }, { "epoch": 0.2659977703455964, "grad_norm": 2.545876418465327, "learning_rate": 1.982004083641545e-05, "loss": 0.8312, "step": 3579 }, { "epoch": 0.2660720921590487, "grad_norm": 2.7450387498926747, "learning_rate": 1.9819889269791767e-05, "loss": 0.9684, "step": 3580 }, { "epoch": 0.26614641397250094, "grad_norm": 2.255998978700415, "learning_rate": 1.9819737639948148e-05, "loss": 0.8884, "step": 3581 }, { "epoch": 0.2662207357859532, "grad_norm": 2.253918723419396, "learning_rate": 1.981958594688557e-05, "loss": 1.086, "step": 3582 }, { "epoch": 0.2662950575994054, "grad_norm": 2.5762116725750865, "learning_rate": 1.9819434190605004e-05, "loss": 0.9025, "step": 3583 }, { "epoch": 0.26636937941285765, "grad_norm": 2.8655731042961774, "learning_rate": 1.9819282371107434e-05, "loss": 0.959, "step": 3584 }, { "epoch": 0.26644370122630995, "grad_norm": 2.1405182833173724, "learning_rate": 1.981913048839383e-05, "loss": 0.8947, "step": 3585 }, { "epoch": 0.2665180230397622, "grad_norm": 2.4276501060715217, "learning_rate": 1.9818978542465176e-05, "loss": 1.0584, "step": 3586 }, { "epoch": 0.2665923448532144, "grad_norm": 2.313823778600624, "learning_rate": 1.981882653332245e-05, "loss": 1.0412, "step": 3587 }, { "epoch": 0.26666666666666666, "grad_norm": 1.772738543223676, "learning_rate": 1.9818674460966622e-05, "loss": 0.674, "step": 3588 }, { "epoch": 0.2667409884801189, "grad_norm": 3.266047455127242, "learning_rate": 1.981852232539868e-05, "loss": 0.8307, "step": 3589 }, { "epoch": 0.26681531029357114, "grad_norm": 2.213018382400724, "learning_rate": 1.9818370126619602e-05, "loss": 0.8517, "step": 3590 }, { "epoch": 0.26688963210702343, "grad_norm": 2.1679969319421315, "learning_rate": 1.9818217864630368e-05, "loss": 0.9566, "step": 3591 }, { "epoch": 0.26696395392047567, "grad_norm": 3.1558695460468065, "learning_rate": 1.9818065539431953e-05, "loss": 0.7424, "step": 3592 }, { "epoch": 0.2670382757339279, "grad_norm": 2.306440428655618, "learning_rate": 1.9817913151025342e-05, "loss": 0.8796, "step": 3593 }, { "epoch": 0.26711259754738015, "grad_norm": 2.7538677817634816, "learning_rate": 1.9817760699411516e-05, "loss": 0.9091, "step": 3594 }, { "epoch": 0.2671869193608324, "grad_norm": 1.9451016744350411, "learning_rate": 1.9817608184591458e-05, "loss": 0.9185, "step": 3595 }, { "epoch": 0.2672612411742846, "grad_norm": 2.1414333620341446, "learning_rate": 1.9817455606566146e-05, "loss": 0.8598, "step": 3596 }, { "epoch": 0.2673355629877369, "grad_norm": 1.8663111363394125, "learning_rate": 1.9817302965336565e-05, "loss": 0.9373, "step": 3597 }, { "epoch": 0.26740988480118916, "grad_norm": 2.249670800817168, "learning_rate": 1.9817150260903695e-05, "loss": 0.8845, "step": 3598 }, { "epoch": 0.2674842066146414, "grad_norm": 2.898698612793166, "learning_rate": 1.981699749326852e-05, "loss": 1.0082, "step": 3599 }, { "epoch": 0.26755852842809363, "grad_norm": 3.0503108938359618, "learning_rate": 1.981684466243203e-05, "loss": 0.9652, "step": 3600 }, { "epoch": 0.26763285024154587, "grad_norm": 2.4688857663100205, "learning_rate": 1.9816691768395203e-05, "loss": 1.0718, "step": 3601 }, { "epoch": 0.26770717205499817, "grad_norm": 2.619444171339125, "learning_rate": 1.9816538811159018e-05, "loss": 0.8988, "step": 3602 }, { "epoch": 0.2677814938684504, "grad_norm": 2.4168100817881233, "learning_rate": 1.9816385790724468e-05, "loss": 0.8156, "step": 3603 }, { "epoch": 0.26785581568190264, "grad_norm": 2.5407502327483016, "learning_rate": 1.9816232707092536e-05, "loss": 1.1263, "step": 3604 }, { "epoch": 0.2679301374953549, "grad_norm": 2.3780648378998133, "learning_rate": 1.9816079560264208e-05, "loss": 1.0219, "step": 3605 }, { "epoch": 0.2680044593088071, "grad_norm": 2.5250421091232247, "learning_rate": 1.981592635024047e-05, "loss": 0.941, "step": 3606 }, { "epoch": 0.26807878112225936, "grad_norm": 2.4957530684327827, "learning_rate": 1.9815773077022305e-05, "loss": 0.8509, "step": 3607 }, { "epoch": 0.26815310293571165, "grad_norm": 3.2225898912121704, "learning_rate": 1.9815619740610704e-05, "loss": 0.9276, "step": 3608 }, { "epoch": 0.2682274247491639, "grad_norm": 2.006703445342752, "learning_rate": 1.9815466341006653e-05, "loss": 0.8499, "step": 3609 }, { "epoch": 0.26830174656261613, "grad_norm": 3.4301218253535466, "learning_rate": 1.981531287821114e-05, "loss": 0.9669, "step": 3610 }, { "epoch": 0.26837606837606837, "grad_norm": 2.111964234212821, "learning_rate": 1.9815159352225147e-05, "loss": 0.9263, "step": 3611 }, { "epoch": 0.2684503901895206, "grad_norm": 2.738647406425185, "learning_rate": 1.9815005763049667e-05, "loss": 0.76, "step": 3612 }, { "epoch": 0.2685247120029729, "grad_norm": 2.665776894615679, "learning_rate": 1.9814852110685693e-05, "loss": 1.0529, "step": 3613 }, { "epoch": 0.26859903381642514, "grad_norm": 2.67999674426517, "learning_rate": 1.981469839513421e-05, "loss": 1.1158, "step": 3614 }, { "epoch": 0.2686733556298774, "grad_norm": 2.754133992637106, "learning_rate": 1.9814544616396208e-05, "loss": 0.8701, "step": 3615 }, { "epoch": 0.2687476774433296, "grad_norm": 2.446137745200873, "learning_rate": 1.9814390774472677e-05, "loss": 0.9806, "step": 3616 }, { "epoch": 0.26882199925678185, "grad_norm": 2.590525537269405, "learning_rate": 1.9814236869364608e-05, "loss": 0.8854, "step": 3617 }, { "epoch": 0.2688963210702341, "grad_norm": 3.0071150027001528, "learning_rate": 1.981408290107299e-05, "loss": 0.9088, "step": 3618 }, { "epoch": 0.2689706428836864, "grad_norm": 2.4392490907826208, "learning_rate": 1.9813928869598813e-05, "loss": 1.0182, "step": 3619 }, { "epoch": 0.2690449646971386, "grad_norm": 2.2847317538522587, "learning_rate": 1.9813774774943076e-05, "loss": 0.6723, "step": 3620 }, { "epoch": 0.26911928651059086, "grad_norm": 2.1405466790709395, "learning_rate": 1.9813620617106763e-05, "loss": 0.9538, "step": 3621 }, { "epoch": 0.2691936083240431, "grad_norm": 2.6950667078123725, "learning_rate": 1.981346639609087e-05, "loss": 1.1024, "step": 3622 }, { "epoch": 0.26926793013749534, "grad_norm": 2.6492326091444784, "learning_rate": 1.981331211189639e-05, "loss": 0.8249, "step": 3623 }, { "epoch": 0.2693422519509476, "grad_norm": 2.8480505129943636, "learning_rate": 1.981315776452432e-05, "loss": 1.1562, "step": 3624 }, { "epoch": 0.26941657376439987, "grad_norm": 2.4607583465948553, "learning_rate": 1.981300335397564e-05, "loss": 1.1675, "step": 3625 }, { "epoch": 0.2694908955778521, "grad_norm": 2.1758728078537173, "learning_rate": 1.981284888025136e-05, "loss": 0.9597, "step": 3626 }, { "epoch": 0.26956521739130435, "grad_norm": 3.6042661710951727, "learning_rate": 1.9812694343352467e-05, "loss": 0.7184, "step": 3627 }, { "epoch": 0.2696395392047566, "grad_norm": 2.0728958039265106, "learning_rate": 1.981253974327996e-05, "loss": 0.6813, "step": 3628 }, { "epoch": 0.2697138610182088, "grad_norm": 2.2697581235024535, "learning_rate": 1.9812385080034824e-05, "loss": 1.0895, "step": 3629 }, { "epoch": 0.2697881828316611, "grad_norm": 2.7898305364590184, "learning_rate": 1.981223035361807e-05, "loss": 0.9218, "step": 3630 }, { "epoch": 0.26986250464511335, "grad_norm": 2.2187527125580937, "learning_rate": 1.9812075564030682e-05, "loss": 1.0206, "step": 3631 }, { "epoch": 0.2699368264585656, "grad_norm": 3.2540992718254986, "learning_rate": 1.981192071127366e-05, "loss": 0.8409, "step": 3632 }, { "epoch": 0.27001114827201783, "grad_norm": 2.191369420608318, "learning_rate": 1.9811765795348003e-05, "loss": 0.6263, "step": 3633 }, { "epoch": 0.27008547008547007, "grad_norm": 2.5641308634387534, "learning_rate": 1.9811610816254707e-05, "loss": 0.6082, "step": 3634 }, { "epoch": 0.2701597918989223, "grad_norm": 2.5588386766291222, "learning_rate": 1.981145577399477e-05, "loss": 1.0729, "step": 3635 }, { "epoch": 0.2702341137123746, "grad_norm": 2.2230160931093397, "learning_rate": 1.9811300668569188e-05, "loss": 0.7855, "step": 3636 }, { "epoch": 0.27030843552582684, "grad_norm": 2.2034795546352206, "learning_rate": 1.981114549997896e-05, "loss": 0.9087, "step": 3637 }, { "epoch": 0.2703827573392791, "grad_norm": 2.519958315411228, "learning_rate": 1.9810990268225092e-05, "loss": 0.9704, "step": 3638 }, { "epoch": 0.2704570791527313, "grad_norm": 2.718633454799908, "learning_rate": 1.981083497330857e-05, "loss": 0.8994, "step": 3639 }, { "epoch": 0.27053140096618356, "grad_norm": 2.1412735860697283, "learning_rate": 1.9810679615230407e-05, "loss": 0.7844, "step": 3640 }, { "epoch": 0.27060572277963585, "grad_norm": 2.085058106524773, "learning_rate": 1.9810524193991597e-05, "loss": 0.8739, "step": 3641 }, { "epoch": 0.2706800445930881, "grad_norm": 2.583319550088129, "learning_rate": 1.9810368709593142e-05, "loss": 1.0573, "step": 3642 }, { "epoch": 0.2707543664065403, "grad_norm": 2.43978347390757, "learning_rate": 1.9810213162036042e-05, "loss": 0.8679, "step": 3643 }, { "epoch": 0.27082868821999256, "grad_norm": 2.1436095651914866, "learning_rate": 1.98100575513213e-05, "loss": 1.022, "step": 3644 }, { "epoch": 0.2709030100334448, "grad_norm": 2.3654102902973975, "learning_rate": 1.980990187744991e-05, "loss": 0.9438, "step": 3645 }, { "epoch": 0.27097733184689704, "grad_norm": 2.0741159122645954, "learning_rate": 1.9809746140422884e-05, "loss": 1.0189, "step": 3646 }, { "epoch": 0.27105165366034933, "grad_norm": 2.2038457706096484, "learning_rate": 1.9809590340241226e-05, "loss": 0.9858, "step": 3647 }, { "epoch": 0.2711259754738016, "grad_norm": 3.1556765002132963, "learning_rate": 1.980943447690593e-05, "loss": 1.2323, "step": 3648 }, { "epoch": 0.2712002972872538, "grad_norm": 2.437823284760812, "learning_rate": 1.9809278550418005e-05, "loss": 0.9061, "step": 3649 }, { "epoch": 0.27127461910070605, "grad_norm": 3.479591399618629, "learning_rate": 1.9809122560778454e-05, "loss": 0.9087, "step": 3650 }, { "epoch": 0.2713489409141583, "grad_norm": 5.93353722821801, "learning_rate": 1.9808966507988278e-05, "loss": 0.7305, "step": 3651 }, { "epoch": 0.2714232627276106, "grad_norm": 1.9327858046151967, "learning_rate": 1.9808810392048488e-05, "loss": 0.7872, "step": 3652 }, { "epoch": 0.2714975845410628, "grad_norm": 2.117768609850109, "learning_rate": 1.9808654212960085e-05, "loss": 0.8627, "step": 3653 }, { "epoch": 0.27157190635451506, "grad_norm": 2.5674104488664273, "learning_rate": 1.9808497970724074e-05, "loss": 0.8218, "step": 3654 }, { "epoch": 0.2716462281679673, "grad_norm": 2.031026314135469, "learning_rate": 1.9808341665341463e-05, "loss": 0.6482, "step": 3655 }, { "epoch": 0.27172054998141953, "grad_norm": 2.257085361016451, "learning_rate": 1.9808185296813257e-05, "loss": 0.8211, "step": 3656 }, { "epoch": 0.2717948717948718, "grad_norm": 2.3571591611012606, "learning_rate": 1.980802886514046e-05, "loss": 0.8735, "step": 3657 }, { "epoch": 0.27186919360832407, "grad_norm": 2.453548841417508, "learning_rate": 1.9807872370324086e-05, "loss": 1.1431, "step": 3658 }, { "epoch": 0.2719435154217763, "grad_norm": 2.482660775217416, "learning_rate": 1.9807715812365137e-05, "loss": 0.9496, "step": 3659 }, { "epoch": 0.27201783723522854, "grad_norm": 2.2443220556183134, "learning_rate": 1.980755919126462e-05, "loss": 0.9073, "step": 3660 }, { "epoch": 0.2720921590486808, "grad_norm": 1.9761951136261744, "learning_rate": 1.9807402507023553e-05, "loss": 0.827, "step": 3661 }, { "epoch": 0.272166480862133, "grad_norm": 2.4698136800807164, "learning_rate": 1.9807245759642934e-05, "loss": 1.0893, "step": 3662 }, { "epoch": 0.27224080267558526, "grad_norm": 2.1943381740551557, "learning_rate": 1.980708894912377e-05, "loss": 0.859, "step": 3663 }, { "epoch": 0.27231512448903755, "grad_norm": 2.348928524096041, "learning_rate": 1.9806932075467084e-05, "loss": 0.8241, "step": 3664 }, { "epoch": 0.2723894463024898, "grad_norm": 2.4662496641935716, "learning_rate": 1.9806775138673876e-05, "loss": 0.9043, "step": 3665 }, { "epoch": 0.27246376811594203, "grad_norm": 2.117559112557741, "learning_rate": 1.9806618138745156e-05, "loss": 0.8566, "step": 3666 }, { "epoch": 0.27253808992939427, "grad_norm": 2.155684559376333, "learning_rate": 1.980646107568194e-05, "loss": 0.7821, "step": 3667 }, { "epoch": 0.2726124117428465, "grad_norm": 1.9174554885576311, "learning_rate": 1.9806303949485237e-05, "loss": 0.9815, "step": 3668 }, { "epoch": 0.2726867335562988, "grad_norm": 2.0433713517074255, "learning_rate": 1.9806146760156057e-05, "loss": 0.8203, "step": 3669 }, { "epoch": 0.27276105536975104, "grad_norm": 2.0810778362596967, "learning_rate": 1.980598950769541e-05, "loss": 0.9223, "step": 3670 }, { "epoch": 0.2728353771832033, "grad_norm": 2.092219763929374, "learning_rate": 1.980583219210431e-05, "loss": 0.8646, "step": 3671 }, { "epoch": 0.2729096989966555, "grad_norm": 2.9658463788316967, "learning_rate": 1.980567481338378e-05, "loss": 0.9271, "step": 3672 }, { "epoch": 0.27298402081010775, "grad_norm": 2.9913071343843085, "learning_rate": 1.9805517371534816e-05, "loss": 1.1011, "step": 3673 }, { "epoch": 0.27305834262356, "grad_norm": 2.452802675126228, "learning_rate": 1.9805359866558445e-05, "loss": 0.862, "step": 3674 }, { "epoch": 0.2731326644370123, "grad_norm": 2.429396493036413, "learning_rate": 1.9805202298455674e-05, "loss": 0.9488, "step": 3675 }, { "epoch": 0.2732069862504645, "grad_norm": 2.7676716862741686, "learning_rate": 1.980504466722752e-05, "loss": 1.013, "step": 3676 }, { "epoch": 0.27328130806391676, "grad_norm": 3.143801097889533, "learning_rate": 1.9804886972874995e-05, "loss": 0.7717, "step": 3677 }, { "epoch": 0.273355629877369, "grad_norm": 2.8161226874341883, "learning_rate": 1.980472921539912e-05, "loss": 0.934, "step": 3678 }, { "epoch": 0.27342995169082124, "grad_norm": 2.438010109366951, "learning_rate": 1.9804571394800902e-05, "loss": 0.9505, "step": 3679 }, { "epoch": 0.27350427350427353, "grad_norm": 2.130428632186456, "learning_rate": 1.9804413511081366e-05, "loss": 0.8342, "step": 3680 }, { "epoch": 0.27357859531772577, "grad_norm": 2.199747244633981, "learning_rate": 1.9804255564241524e-05, "loss": 0.856, "step": 3681 }, { "epoch": 0.273652917131178, "grad_norm": 1.9690684298124985, "learning_rate": 1.9804097554282393e-05, "loss": 0.7984, "step": 3682 }, { "epoch": 0.27372723894463025, "grad_norm": 2.873247847912598, "learning_rate": 1.980393948120499e-05, "loss": 0.9804, "step": 3683 }, { "epoch": 0.2738015607580825, "grad_norm": 2.2753420754373024, "learning_rate": 1.9803781345010336e-05, "loss": 0.9481, "step": 3684 }, { "epoch": 0.2738758825715347, "grad_norm": 2.2170979790906835, "learning_rate": 1.9803623145699446e-05, "loss": 0.6433, "step": 3685 }, { "epoch": 0.273950204384987, "grad_norm": 2.347069723839261, "learning_rate": 1.9803464883273332e-05, "loss": 0.8032, "step": 3686 }, { "epoch": 0.27402452619843926, "grad_norm": 2.1587270284270392, "learning_rate": 1.9803306557733025e-05, "loss": 0.8788, "step": 3687 }, { "epoch": 0.2740988480118915, "grad_norm": 2.525985911533081, "learning_rate": 1.980314816907954e-05, "loss": 0.8175, "step": 3688 }, { "epoch": 0.27417316982534373, "grad_norm": 2.3938746904228987, "learning_rate": 1.9802989717313897e-05, "loss": 0.8304, "step": 3689 }, { "epoch": 0.27424749163879597, "grad_norm": 2.7096613505344425, "learning_rate": 1.980283120243711e-05, "loss": 0.93, "step": 3690 }, { "epoch": 0.2743218134522482, "grad_norm": 2.4385674469829373, "learning_rate": 1.9802672624450208e-05, "loss": 0.8201, "step": 3691 }, { "epoch": 0.2743961352657005, "grad_norm": 2.239306180625961, "learning_rate": 1.9802513983354207e-05, "loss": 0.7919, "step": 3692 }, { "epoch": 0.27447045707915274, "grad_norm": 2.620176549593067, "learning_rate": 1.9802355279150132e-05, "loss": 0.8445, "step": 3693 }, { "epoch": 0.274544778892605, "grad_norm": 2.577641770971144, "learning_rate": 1.9802196511839e-05, "loss": 0.9041, "step": 3694 }, { "epoch": 0.2746191007060572, "grad_norm": 2.5112708076967416, "learning_rate": 1.9802037681421834e-05, "loss": 0.9918, "step": 3695 }, { "epoch": 0.27469342251950946, "grad_norm": 4.043719080366098, "learning_rate": 1.980187878789966e-05, "loss": 1.0783, "step": 3696 }, { "epoch": 0.27476774433296175, "grad_norm": 1.8021855882882094, "learning_rate": 1.98017198312735e-05, "loss": 0.9184, "step": 3697 }, { "epoch": 0.274842066146414, "grad_norm": 2.2746185214638994, "learning_rate": 1.980156081154437e-05, "loss": 1.0086, "step": 3698 }, { "epoch": 0.2749163879598662, "grad_norm": 1.8387195080734806, "learning_rate": 1.9801401728713306e-05, "loss": 0.8502, "step": 3699 }, { "epoch": 0.27499070977331846, "grad_norm": 2.7225948861104996, "learning_rate": 1.980124258278133e-05, "loss": 1.0706, "step": 3700 }, { "epoch": 0.2750650315867707, "grad_norm": 2.750333921487827, "learning_rate": 1.9801083373749455e-05, "loss": 0.8105, "step": 3701 }, { "epoch": 0.27513935340022294, "grad_norm": 2.107846400051198, "learning_rate": 1.9800924101618717e-05, "loss": 0.8124, "step": 3702 }, { "epoch": 0.27521367521367524, "grad_norm": 3.191345678023044, "learning_rate": 1.9800764766390135e-05, "loss": 0.9965, "step": 3703 }, { "epoch": 0.2752879970271275, "grad_norm": 2.1380865567511145, "learning_rate": 1.9800605368064742e-05, "loss": 0.806, "step": 3704 }, { "epoch": 0.2753623188405797, "grad_norm": 2.935318290361879, "learning_rate": 1.980044590664356e-05, "loss": 0.7968, "step": 3705 }, { "epoch": 0.27543664065403195, "grad_norm": 3.3835256065511357, "learning_rate": 1.9800286382127614e-05, "loss": 1.1234, "step": 3706 }, { "epoch": 0.2755109624674842, "grad_norm": 2.337093339380914, "learning_rate": 1.9800126794517936e-05, "loss": 0.76, "step": 3707 }, { "epoch": 0.2755852842809365, "grad_norm": 3.3121755806727307, "learning_rate": 1.9799967143815545e-05, "loss": 0.9761, "step": 3708 }, { "epoch": 0.2756596060943887, "grad_norm": 2.332654661753353, "learning_rate": 1.979980743002148e-05, "loss": 0.7292, "step": 3709 }, { "epoch": 0.27573392790784096, "grad_norm": 2.075619129368329, "learning_rate": 1.9799647653136756e-05, "loss": 0.8684, "step": 3710 }, { "epoch": 0.2758082497212932, "grad_norm": 2.4310883875721534, "learning_rate": 1.9799487813162414e-05, "loss": 0.9861, "step": 3711 }, { "epoch": 0.27588257153474544, "grad_norm": 2.485212580141604, "learning_rate": 1.9799327910099476e-05, "loss": 1.1164, "step": 3712 }, { "epoch": 0.2759568933481977, "grad_norm": 2.279091061584934, "learning_rate": 1.9799167943948975e-05, "loss": 0.7148, "step": 3713 }, { "epoch": 0.27603121516164997, "grad_norm": 2.48565863397216, "learning_rate": 1.9799007914711937e-05, "loss": 0.747, "step": 3714 }, { "epoch": 0.2761055369751022, "grad_norm": 2.2667065966113578, "learning_rate": 1.9798847822389397e-05, "loss": 0.805, "step": 3715 }, { "epoch": 0.27617985878855444, "grad_norm": 2.52109583101583, "learning_rate": 1.979868766698238e-05, "loss": 0.942, "step": 3716 }, { "epoch": 0.2762541806020067, "grad_norm": 2.7702611878391132, "learning_rate": 1.9798527448491925e-05, "loss": 0.7148, "step": 3717 }, { "epoch": 0.2763285024154589, "grad_norm": 3.4431353390144483, "learning_rate": 1.9798367166919057e-05, "loss": 1.0607, "step": 3718 }, { "epoch": 0.27640282422891116, "grad_norm": 2.381979378022434, "learning_rate": 1.979820682226481e-05, "loss": 1.0398, "step": 3719 }, { "epoch": 0.27647714604236345, "grad_norm": 2.789739669756828, "learning_rate": 1.979804641453021e-05, "loss": 1.0277, "step": 3720 }, { "epoch": 0.2765514678558157, "grad_norm": 2.979857050655675, "learning_rate": 1.97978859437163e-05, "loss": 0.8334, "step": 3721 }, { "epoch": 0.27662578966926793, "grad_norm": 2.2599933668923646, "learning_rate": 1.9797725409824112e-05, "loss": 1.0281, "step": 3722 }, { "epoch": 0.27670011148272017, "grad_norm": 2.6913362603885203, "learning_rate": 1.9797564812854673e-05, "loss": 0.9173, "step": 3723 }, { "epoch": 0.2767744332961724, "grad_norm": 2.4006832028182212, "learning_rate": 1.979740415280902e-05, "loss": 1.0177, "step": 3724 }, { "epoch": 0.2768487551096247, "grad_norm": 2.640574398203843, "learning_rate": 1.9797243429688184e-05, "loss": 0.8472, "step": 3725 }, { "epoch": 0.27692307692307694, "grad_norm": 2.267877676145644, "learning_rate": 1.9797082643493208e-05, "loss": 0.8055, "step": 3726 }, { "epoch": 0.2769973987365292, "grad_norm": 2.410439462001089, "learning_rate": 1.9796921794225123e-05, "loss": 0.7357, "step": 3727 }, { "epoch": 0.2770717205499814, "grad_norm": 1.785902698334078, "learning_rate": 1.979676088188496e-05, "loss": 0.5877, "step": 3728 }, { "epoch": 0.27714604236343365, "grad_norm": 3.1202729854537545, "learning_rate": 1.979659990647376e-05, "loss": 0.9858, "step": 3729 }, { "epoch": 0.2772203641768859, "grad_norm": 2.4722626259727223, "learning_rate": 1.9796438867992557e-05, "loss": 0.9467, "step": 3730 }, { "epoch": 0.2772946859903382, "grad_norm": 2.0546292116517444, "learning_rate": 1.979627776644239e-05, "loss": 0.8067, "step": 3731 }, { "epoch": 0.2773690078037904, "grad_norm": 2.330777809417224, "learning_rate": 1.9796116601824294e-05, "loss": 0.9071, "step": 3732 }, { "epoch": 0.27744332961724266, "grad_norm": 3.0343599017982386, "learning_rate": 1.979595537413931e-05, "loss": 0.9568, "step": 3733 }, { "epoch": 0.2775176514306949, "grad_norm": 2.1640578336742275, "learning_rate": 1.9795794083388474e-05, "loss": 0.8848, "step": 3734 }, { "epoch": 0.27759197324414714, "grad_norm": 1.805468330600847, "learning_rate": 1.9795632729572823e-05, "loss": 0.7239, "step": 3735 }, { "epoch": 0.27766629505759943, "grad_norm": 2.688965266350346, "learning_rate": 1.9795471312693394e-05, "loss": 1.1139, "step": 3736 }, { "epoch": 0.27774061687105167, "grad_norm": 2.09159729253617, "learning_rate": 1.979530983275123e-05, "loss": 0.9412, "step": 3737 }, { "epoch": 0.2778149386845039, "grad_norm": 2.291789889604927, "learning_rate": 1.9795148289747373e-05, "loss": 1.0266, "step": 3738 }, { "epoch": 0.27788926049795615, "grad_norm": 2.8336345335957502, "learning_rate": 1.9794986683682858e-05, "loss": 1.1292, "step": 3739 }, { "epoch": 0.2779635823114084, "grad_norm": 3.740208769309923, "learning_rate": 1.9794825014558725e-05, "loss": 1.0455, "step": 3740 }, { "epoch": 0.2780379041248606, "grad_norm": 2.6777964205460183, "learning_rate": 1.9794663282376018e-05, "loss": 1.1686, "step": 3741 }, { "epoch": 0.2781122259383129, "grad_norm": 2.8781291651045247, "learning_rate": 1.9794501487135778e-05, "loss": 0.9946, "step": 3742 }, { "epoch": 0.27818654775176516, "grad_norm": 2.346482661394152, "learning_rate": 1.9794339628839047e-05, "loss": 0.9852, "step": 3743 }, { "epoch": 0.2782608695652174, "grad_norm": 2.6026294050498904, "learning_rate": 1.9794177707486863e-05, "loss": 0.8746, "step": 3744 }, { "epoch": 0.27833519137866963, "grad_norm": 3.0704716358963093, "learning_rate": 1.979401572308027e-05, "loss": 1.0471, "step": 3745 }, { "epoch": 0.27840951319212187, "grad_norm": 2.365966585103119, "learning_rate": 1.9793853675620315e-05, "loss": 0.9616, "step": 3746 }, { "epoch": 0.2784838350055741, "grad_norm": 4.326159620452052, "learning_rate": 1.9793691565108035e-05, "loss": 1.0719, "step": 3747 }, { "epoch": 0.2785581568190264, "grad_norm": 2.4103142990967537, "learning_rate": 1.979352939154448e-05, "loss": 0.9058, "step": 3748 }, { "epoch": 0.27863247863247864, "grad_norm": 2.470925814509547, "learning_rate": 1.9793367154930684e-05, "loss": 0.8542, "step": 3749 }, { "epoch": 0.2787068004459309, "grad_norm": 2.5902399807899807, "learning_rate": 1.9793204855267703e-05, "loss": 0.9513, "step": 3750 }, { "epoch": 0.2787811222593831, "grad_norm": 3.05124389290256, "learning_rate": 1.979304249255658e-05, "loss": 0.8608, "step": 3751 }, { "epoch": 0.27885544407283536, "grad_norm": 2.235390632153477, "learning_rate": 1.9792880066798357e-05, "loss": 0.8272, "step": 3752 }, { "epoch": 0.27892976588628765, "grad_norm": 1.9350529295854522, "learning_rate": 1.9792717577994078e-05, "loss": 0.7765, "step": 3753 }, { "epoch": 0.2790040876997399, "grad_norm": 2.639502317114701, "learning_rate": 1.9792555026144795e-05, "loss": 1.103, "step": 3754 }, { "epoch": 0.2790784095131921, "grad_norm": 2.5126919179335583, "learning_rate": 1.9792392411251543e-05, "loss": 1.1514, "step": 3755 }, { "epoch": 0.27915273132664437, "grad_norm": 3.1736493771194922, "learning_rate": 1.9792229733315385e-05, "loss": 1.0403, "step": 3756 }, { "epoch": 0.2792270531400966, "grad_norm": 2.3293409998274988, "learning_rate": 1.9792066992337357e-05, "loss": 0.8428, "step": 3757 }, { "epoch": 0.27930137495354884, "grad_norm": 2.386595214137499, "learning_rate": 1.9791904188318508e-05, "loss": 0.8942, "step": 3758 }, { "epoch": 0.27937569676700114, "grad_norm": 2.0837228224969278, "learning_rate": 1.979174132125989e-05, "loss": 0.6054, "step": 3759 }, { "epoch": 0.2794500185804534, "grad_norm": 2.056642660791994, "learning_rate": 1.9791578391162548e-05, "loss": 0.8791, "step": 3760 }, { "epoch": 0.2795243403939056, "grad_norm": 1.991791602455489, "learning_rate": 1.9791415398027537e-05, "loss": 0.9037, "step": 3761 }, { "epoch": 0.27959866220735785, "grad_norm": 2.263652075195858, "learning_rate": 1.9791252341855894e-05, "loss": 0.8609, "step": 3762 }, { "epoch": 0.2796729840208101, "grad_norm": 2.0613192663404583, "learning_rate": 1.9791089222648683e-05, "loss": 0.9922, "step": 3763 }, { "epoch": 0.2797473058342624, "grad_norm": 2.7138057609446578, "learning_rate": 1.9790926040406943e-05, "loss": 0.7159, "step": 3764 }, { "epoch": 0.2798216276477146, "grad_norm": 2.2813042226862654, "learning_rate": 1.979076279513173e-05, "loss": 0.6578, "step": 3765 }, { "epoch": 0.27989594946116686, "grad_norm": 2.807723959856956, "learning_rate": 1.9790599486824096e-05, "loss": 0.8691, "step": 3766 }, { "epoch": 0.2799702712746191, "grad_norm": 2.197789869561746, "learning_rate": 1.9790436115485093e-05, "loss": 0.9675, "step": 3767 }, { "epoch": 0.28004459308807134, "grad_norm": 2.192440669823423, "learning_rate": 1.9790272681115767e-05, "loss": 0.9149, "step": 3768 }, { "epoch": 0.2801189149015236, "grad_norm": 2.164013449546379, "learning_rate": 1.979010918371717e-05, "loss": 1.0636, "step": 3769 }, { "epoch": 0.28019323671497587, "grad_norm": 2.4537532107531836, "learning_rate": 1.9789945623290365e-05, "loss": 0.8924, "step": 3770 }, { "epoch": 0.2802675585284281, "grad_norm": 1.9156544474995838, "learning_rate": 1.9789781999836393e-05, "loss": 0.8545, "step": 3771 }, { "epoch": 0.28034188034188035, "grad_norm": 2.457995874613844, "learning_rate": 1.9789618313356315e-05, "loss": 0.9435, "step": 3772 }, { "epoch": 0.2804162021553326, "grad_norm": 2.5917466881890827, "learning_rate": 1.978945456385118e-05, "loss": 0.9467, "step": 3773 }, { "epoch": 0.2804905239687848, "grad_norm": 1.9363248485946896, "learning_rate": 1.9789290751322046e-05, "loss": 0.6033, "step": 3774 }, { "epoch": 0.28056484578223706, "grad_norm": 2.4115618634812463, "learning_rate": 1.9789126875769968e-05, "loss": 0.9597, "step": 3775 }, { "epoch": 0.28063916759568935, "grad_norm": 2.3737923918242942, "learning_rate": 1.9788962937195995e-05, "loss": 0.9628, "step": 3776 }, { "epoch": 0.2807134894091416, "grad_norm": 2.476006996101526, "learning_rate": 1.978879893560119e-05, "loss": 0.7268, "step": 3777 }, { "epoch": 0.28078781122259383, "grad_norm": 2.224540457583595, "learning_rate": 1.9788634870986604e-05, "loss": 0.7691, "step": 3778 }, { "epoch": 0.28086213303604607, "grad_norm": 2.8094539371960385, "learning_rate": 1.9788470743353292e-05, "loss": 0.8639, "step": 3779 }, { "epoch": 0.2809364548494983, "grad_norm": 2.095844698489936, "learning_rate": 1.9788306552702318e-05, "loss": 0.8804, "step": 3780 }, { "epoch": 0.2810107766629506, "grad_norm": 2.1787652272859623, "learning_rate": 1.9788142299034732e-05, "loss": 0.8766, "step": 3781 }, { "epoch": 0.28108509847640284, "grad_norm": 2.3376518081497792, "learning_rate": 1.978797798235159e-05, "loss": 0.9653, "step": 3782 }, { "epoch": 0.2811594202898551, "grad_norm": 3.018373125653334, "learning_rate": 1.978781360265396e-05, "loss": 1.058, "step": 3783 }, { "epoch": 0.2812337421033073, "grad_norm": 2.205789856592216, "learning_rate": 1.978764915994289e-05, "loss": 0.9889, "step": 3784 }, { "epoch": 0.28130806391675955, "grad_norm": 2.0458833453706737, "learning_rate": 1.9787484654219445e-05, "loss": 0.7119, "step": 3785 }, { "epoch": 0.2813823857302118, "grad_norm": 2.1261023156314747, "learning_rate": 1.9787320085484683e-05, "loss": 0.9026, "step": 3786 }, { "epoch": 0.2814567075436641, "grad_norm": 2.9612242476809687, "learning_rate": 1.9787155453739656e-05, "loss": 1.0928, "step": 3787 }, { "epoch": 0.2815310293571163, "grad_norm": 3.5264643615476583, "learning_rate": 1.9786990758985432e-05, "loss": 1.079, "step": 3788 }, { "epoch": 0.28160535117056856, "grad_norm": 2.453256012822848, "learning_rate": 1.9786826001223074e-05, "loss": 0.9996, "step": 3789 }, { "epoch": 0.2816796729840208, "grad_norm": 2.173751251181972, "learning_rate": 1.9786661180453635e-05, "loss": 0.9512, "step": 3790 }, { "epoch": 0.28175399479747304, "grad_norm": 2.654377040634656, "learning_rate": 1.978649629667818e-05, "loss": 0.9838, "step": 3791 }, { "epoch": 0.28182831661092533, "grad_norm": 1.8275050290947474, "learning_rate": 1.9786331349897766e-05, "loss": 0.8116, "step": 3792 }, { "epoch": 0.28190263842437757, "grad_norm": 2.101419108739294, "learning_rate": 1.9786166340113463e-05, "loss": 1.0295, "step": 3793 }, { "epoch": 0.2819769602378298, "grad_norm": 1.8290235582057484, "learning_rate": 1.978600126732633e-05, "loss": 0.7517, "step": 3794 }, { "epoch": 0.28205128205128205, "grad_norm": 2.0574931009146455, "learning_rate": 1.978583613153742e-05, "loss": 0.873, "step": 3795 }, { "epoch": 0.2821256038647343, "grad_norm": 2.404334183676129, "learning_rate": 1.9785670932747813e-05, "loss": 1.0661, "step": 3796 }, { "epoch": 0.2821999256781865, "grad_norm": 2.568091239111754, "learning_rate": 1.978550567095856e-05, "loss": 1.0838, "step": 3797 }, { "epoch": 0.2822742474916388, "grad_norm": 2.1507915009387273, "learning_rate": 1.9785340346170732e-05, "loss": 0.9731, "step": 3798 }, { "epoch": 0.28234856930509106, "grad_norm": 3.015738186721353, "learning_rate": 1.978517495838539e-05, "loss": 1.0798, "step": 3799 }, { "epoch": 0.2824228911185433, "grad_norm": 2.210255194602384, "learning_rate": 1.97850095076036e-05, "loss": 0.8693, "step": 3800 }, { "epoch": 0.28249721293199553, "grad_norm": 2.756985480089919, "learning_rate": 1.9784843993826425e-05, "loss": 0.8848, "step": 3801 }, { "epoch": 0.28257153474544777, "grad_norm": 2.2730130676728195, "learning_rate": 1.978467841705493e-05, "loss": 0.5458, "step": 3802 }, { "epoch": 0.2826458565589, "grad_norm": 2.6822826201851573, "learning_rate": 1.978451277729019e-05, "loss": 0.7889, "step": 3803 }, { "epoch": 0.2827201783723523, "grad_norm": 2.102074443565569, "learning_rate": 1.9784347074533256e-05, "loss": 0.9551, "step": 3804 }, { "epoch": 0.28279450018580454, "grad_norm": 2.6542302826499427, "learning_rate": 1.9784181308785208e-05, "loss": 1.043, "step": 3805 }, { "epoch": 0.2828688219992568, "grad_norm": 3.2045941553538357, "learning_rate": 1.9784015480047107e-05, "loss": 0.6799, "step": 3806 }, { "epoch": 0.282943143812709, "grad_norm": 2.259777886145937, "learning_rate": 1.9783849588320024e-05, "loss": 0.9384, "step": 3807 }, { "epoch": 0.28301746562616126, "grad_norm": 2.2045602214683155, "learning_rate": 1.978368363360502e-05, "loss": 0.9192, "step": 3808 }, { "epoch": 0.28309178743961355, "grad_norm": 2.39487525629814, "learning_rate": 1.9783517615903173e-05, "loss": 0.9102, "step": 3809 }, { "epoch": 0.2831661092530658, "grad_norm": 2.109751119741787, "learning_rate": 1.9783351535215546e-05, "loss": 0.9304, "step": 3810 }, { "epoch": 0.28324043106651803, "grad_norm": 2.0963590854399747, "learning_rate": 1.9783185391543212e-05, "loss": 0.8537, "step": 3811 }, { "epoch": 0.28331475287997027, "grad_norm": 2.3017741398714815, "learning_rate": 1.9783019184887237e-05, "loss": 0.8822, "step": 3812 }, { "epoch": 0.2833890746934225, "grad_norm": 2.389340323683387, "learning_rate": 1.978285291524869e-05, "loss": 1.0386, "step": 3813 }, { "epoch": 0.28346339650687474, "grad_norm": 2.7545659972100753, "learning_rate": 1.9782686582628643e-05, "loss": 1.1277, "step": 3814 }, { "epoch": 0.28353771832032704, "grad_norm": 2.261574090916027, "learning_rate": 1.978252018702817e-05, "loss": 1.0055, "step": 3815 }, { "epoch": 0.2836120401337793, "grad_norm": 2.7658972616592212, "learning_rate": 1.9782353728448335e-05, "loss": 0.8459, "step": 3816 }, { "epoch": 0.2836863619472315, "grad_norm": 1.8386340734016227, "learning_rate": 1.978218720689022e-05, "loss": 0.8901, "step": 3817 }, { "epoch": 0.28376068376068375, "grad_norm": 2.2496932918024997, "learning_rate": 1.9782020622354886e-05, "loss": 0.9393, "step": 3818 }, { "epoch": 0.283835005574136, "grad_norm": 2.0851387171005604, "learning_rate": 1.9781853974843415e-05, "loss": 0.9818, "step": 3819 }, { "epoch": 0.2839093273875883, "grad_norm": 2.0146380061989455, "learning_rate": 1.9781687264356874e-05, "loss": 0.8287, "step": 3820 }, { "epoch": 0.2839836492010405, "grad_norm": 2.151612502773008, "learning_rate": 1.978152049089634e-05, "loss": 0.9176, "step": 3821 }, { "epoch": 0.28405797101449276, "grad_norm": 1.9446291575103773, "learning_rate": 1.978135365446288e-05, "loss": 0.7865, "step": 3822 }, { "epoch": 0.284132292827945, "grad_norm": 1.7645586605789165, "learning_rate": 1.9781186755057574e-05, "loss": 0.6067, "step": 3823 }, { "epoch": 0.28420661464139724, "grad_norm": 2.278824759628006, "learning_rate": 1.97810197926815e-05, "loss": 0.9234, "step": 3824 }, { "epoch": 0.2842809364548495, "grad_norm": 2.809001635652098, "learning_rate": 1.9780852767335723e-05, "loss": 0.9172, "step": 3825 }, { "epoch": 0.28435525826830177, "grad_norm": 1.9493044210323647, "learning_rate": 1.9780685679021324e-05, "loss": 0.7682, "step": 3826 }, { "epoch": 0.284429580081754, "grad_norm": 3.232316742841247, "learning_rate": 1.9780518527739382e-05, "loss": 0.7941, "step": 3827 }, { "epoch": 0.28450390189520625, "grad_norm": 2.5171781138893956, "learning_rate": 1.9780351313490967e-05, "loss": 0.8276, "step": 3828 }, { "epoch": 0.2845782237086585, "grad_norm": 2.7286382449947957, "learning_rate": 1.9780184036277154e-05, "loss": 0.8409, "step": 3829 }, { "epoch": 0.2846525455221107, "grad_norm": 2.3100003846266666, "learning_rate": 1.978001669609903e-05, "loss": 0.8768, "step": 3830 }, { "epoch": 0.28472686733556296, "grad_norm": 2.104692239501846, "learning_rate": 1.9779849292957662e-05, "loss": 0.8441, "step": 3831 }, { "epoch": 0.28480118914901525, "grad_norm": 3.3034084399147994, "learning_rate": 1.9779681826854133e-05, "loss": 1.0538, "step": 3832 }, { "epoch": 0.2848755109624675, "grad_norm": 2.464487369268993, "learning_rate": 1.9779514297789517e-05, "loss": 0.9179, "step": 3833 }, { "epoch": 0.28494983277591973, "grad_norm": 2.4799962363292423, "learning_rate": 1.97793467057649e-05, "loss": 1.0826, "step": 3834 }, { "epoch": 0.28502415458937197, "grad_norm": 2.025262094313951, "learning_rate": 1.9779179050781354e-05, "loss": 0.8425, "step": 3835 }, { "epoch": 0.2850984764028242, "grad_norm": 2.451642654516082, "learning_rate": 1.977901133283996e-05, "loss": 1.0793, "step": 3836 }, { "epoch": 0.2851727982162765, "grad_norm": 14.346048591669842, "learning_rate": 1.97788435519418e-05, "loss": 1.0643, "step": 3837 }, { "epoch": 0.28524712002972874, "grad_norm": 2.500099953644401, "learning_rate": 1.977867570808795e-05, "loss": 0.9157, "step": 3838 }, { "epoch": 0.285321441843181, "grad_norm": 2.346834502347458, "learning_rate": 1.9778507801279497e-05, "loss": 0.8979, "step": 3839 }, { "epoch": 0.2853957636566332, "grad_norm": 1.938833532943667, "learning_rate": 1.9778339831517513e-05, "loss": 1.0816, "step": 3840 }, { "epoch": 0.28547008547008546, "grad_norm": 2.7160796854578306, "learning_rate": 1.9778171798803088e-05, "loss": 0.7316, "step": 3841 }, { "epoch": 0.2855444072835377, "grad_norm": 2.2438024028834476, "learning_rate": 1.9778003703137297e-05, "loss": 0.9824, "step": 3842 }, { "epoch": 0.28561872909699, "grad_norm": 2.4429762963772563, "learning_rate": 1.977783554452123e-05, "loss": 0.7742, "step": 3843 }, { "epoch": 0.2856930509104422, "grad_norm": 2.0385822316428017, "learning_rate": 1.9777667322955965e-05, "loss": 0.9659, "step": 3844 }, { "epoch": 0.28576737272389446, "grad_norm": 2.107129718134783, "learning_rate": 1.9777499038442586e-05, "loss": 0.88, "step": 3845 }, { "epoch": 0.2858416945373467, "grad_norm": 2.4112667786931494, "learning_rate": 1.977733069098217e-05, "loss": 1.0732, "step": 3846 }, { "epoch": 0.28591601635079894, "grad_norm": 2.1969056159034213, "learning_rate": 1.977716228057581e-05, "loss": 0.7531, "step": 3847 }, { "epoch": 0.28599033816425123, "grad_norm": 1.6910532855748481, "learning_rate": 1.9776993807224585e-05, "loss": 0.6883, "step": 3848 }, { "epoch": 0.2860646599777035, "grad_norm": 2.635652377877988, "learning_rate": 1.9776825270929582e-05, "loss": 1.1382, "step": 3849 }, { "epoch": 0.2861389817911557, "grad_norm": 2.0569827485238683, "learning_rate": 1.9776656671691885e-05, "loss": 0.8196, "step": 3850 }, { "epoch": 0.28621330360460795, "grad_norm": 2.7859267089928363, "learning_rate": 1.9776488009512583e-05, "loss": 0.9916, "step": 3851 }, { "epoch": 0.2862876254180602, "grad_norm": 2.686079133550842, "learning_rate": 1.9776319284392753e-05, "loss": 1.0561, "step": 3852 }, { "epoch": 0.2863619472315124, "grad_norm": 1.7452799912311325, "learning_rate": 1.977615049633349e-05, "loss": 0.6834, "step": 3853 }, { "epoch": 0.2864362690449647, "grad_norm": 2.7057790437086506, "learning_rate": 1.9775981645335876e-05, "loss": 0.9692, "step": 3854 }, { "epoch": 0.28651059085841696, "grad_norm": 2.8857959255546546, "learning_rate": 1.9775812731400997e-05, "loss": 1.1027, "step": 3855 }, { "epoch": 0.2865849126718692, "grad_norm": 2.7088794056779637, "learning_rate": 1.9775643754529948e-05, "loss": 0.8174, "step": 3856 }, { "epoch": 0.28665923448532143, "grad_norm": 3.251528839077001, "learning_rate": 1.977547471472381e-05, "loss": 1.1718, "step": 3857 }, { "epoch": 0.2867335562987737, "grad_norm": 3.058745953752301, "learning_rate": 1.9775305611983673e-05, "loss": 1.0371, "step": 3858 }, { "epoch": 0.2868078781122259, "grad_norm": 2.3240711806027843, "learning_rate": 1.9775136446310627e-05, "loss": 0.9135, "step": 3859 }, { "epoch": 0.2868821999256782, "grad_norm": 2.4269321128615666, "learning_rate": 1.9774967217705752e-05, "loss": 0.712, "step": 3860 }, { "epoch": 0.28695652173913044, "grad_norm": 1.8971650721623317, "learning_rate": 1.9774797926170156e-05, "loss": 0.7025, "step": 3861 }, { "epoch": 0.2870308435525827, "grad_norm": 1.7366351032269802, "learning_rate": 1.9774628571704913e-05, "loss": 0.7405, "step": 3862 }, { "epoch": 0.2871051653660349, "grad_norm": 2.4053148306218386, "learning_rate": 1.9774459154311117e-05, "loss": 0.9728, "step": 3863 }, { "epoch": 0.28717948717948716, "grad_norm": 2.1193310875285616, "learning_rate": 1.977428967398986e-05, "loss": 0.7528, "step": 3864 }, { "epoch": 0.28725380899293945, "grad_norm": 2.1608215405947577, "learning_rate": 1.9774120130742238e-05, "loss": 0.8994, "step": 3865 }, { "epoch": 0.2873281308063917, "grad_norm": 2.706787215565973, "learning_rate": 1.977395052456933e-05, "loss": 0.976, "step": 3866 }, { "epoch": 0.28740245261984393, "grad_norm": 2.1410634377071367, "learning_rate": 1.9773780855472244e-05, "loss": 0.965, "step": 3867 }, { "epoch": 0.28747677443329617, "grad_norm": 2.626785329703076, "learning_rate": 1.977361112345206e-05, "loss": 0.9412, "step": 3868 }, { "epoch": 0.2875510962467484, "grad_norm": 2.566019635990859, "learning_rate": 1.9773441328509875e-05, "loss": 0.8632, "step": 3869 }, { "epoch": 0.28762541806020064, "grad_norm": 2.642326094321487, "learning_rate": 1.9773271470646783e-05, "loss": 1.1074, "step": 3870 }, { "epoch": 0.28769973987365294, "grad_norm": 2.5356143143546737, "learning_rate": 1.9773101549863877e-05, "loss": 1.034, "step": 3871 }, { "epoch": 0.2877740616871052, "grad_norm": 2.432457185627659, "learning_rate": 1.9772931566162246e-05, "loss": 0.8639, "step": 3872 }, { "epoch": 0.2878483835005574, "grad_norm": 2.347653050068267, "learning_rate": 1.977276151954299e-05, "loss": 0.7349, "step": 3873 }, { "epoch": 0.28792270531400965, "grad_norm": 2.2343039511407277, "learning_rate": 1.9772591410007206e-05, "loss": 0.9328, "step": 3874 }, { "epoch": 0.2879970271274619, "grad_norm": 2.3325157645959975, "learning_rate": 1.9772421237555982e-05, "loss": 0.8287, "step": 3875 }, { "epoch": 0.2880713489409142, "grad_norm": 2.751586127340751, "learning_rate": 1.977225100219042e-05, "loss": 0.9026, "step": 3876 }, { "epoch": 0.2881456707543664, "grad_norm": 2.6056751362743125, "learning_rate": 1.977208070391161e-05, "loss": 0.8155, "step": 3877 }, { "epoch": 0.28821999256781866, "grad_norm": 3.343086989608497, "learning_rate": 1.9771910342720654e-05, "loss": 0.7568, "step": 3878 }, { "epoch": 0.2882943143812709, "grad_norm": 1.8471426637498944, "learning_rate": 1.9771739918618645e-05, "loss": 0.833, "step": 3879 }, { "epoch": 0.28836863619472314, "grad_norm": 2.196330275793642, "learning_rate": 1.977156943160668e-05, "loss": 0.9926, "step": 3880 }, { "epoch": 0.2884429580081754, "grad_norm": 2.022079159445545, "learning_rate": 1.977139888168586e-05, "loss": 0.8979, "step": 3881 }, { "epoch": 0.28851727982162767, "grad_norm": 2.350071894170297, "learning_rate": 1.977122826885728e-05, "loss": 0.854, "step": 3882 }, { "epoch": 0.2885916016350799, "grad_norm": 1.983573429864029, "learning_rate": 1.977105759312204e-05, "loss": 0.959, "step": 3883 }, { "epoch": 0.28866592344853215, "grad_norm": 2.0730783734319504, "learning_rate": 1.977088685448124e-05, "loss": 0.8107, "step": 3884 }, { "epoch": 0.2887402452619844, "grad_norm": 2.4009898857355436, "learning_rate": 1.9770716052935975e-05, "loss": 1.0171, "step": 3885 }, { "epoch": 0.2888145670754366, "grad_norm": 2.7727670545706413, "learning_rate": 1.9770545188487348e-05, "loss": 1.04, "step": 3886 }, { "epoch": 0.28888888888888886, "grad_norm": 2.4992696366160967, "learning_rate": 1.9770374261136455e-05, "loss": 0.896, "step": 3887 }, { "epoch": 0.28896321070234116, "grad_norm": 1.964574737030567, "learning_rate": 1.9770203270884403e-05, "loss": 0.8714, "step": 3888 }, { "epoch": 0.2890375325157934, "grad_norm": 3.1358106609849274, "learning_rate": 1.9770032217732292e-05, "loss": 1.1049, "step": 3889 }, { "epoch": 0.28911185432924563, "grad_norm": 2.988925666725314, "learning_rate": 1.9769861101681215e-05, "loss": 0.9324, "step": 3890 }, { "epoch": 0.28918617614269787, "grad_norm": 2.2368462650601284, "learning_rate": 1.976968992273228e-05, "loss": 0.8811, "step": 3891 }, { "epoch": 0.2892604979561501, "grad_norm": 2.331308488563037, "learning_rate": 1.976951868088659e-05, "loss": 0.6055, "step": 3892 }, { "epoch": 0.2893348197696024, "grad_norm": 1.7274801528248638, "learning_rate": 1.9769347376145244e-05, "loss": 0.7967, "step": 3893 }, { "epoch": 0.28940914158305464, "grad_norm": 2.32343426581676, "learning_rate": 1.976917600850935e-05, "loss": 1.0384, "step": 3894 }, { "epoch": 0.2894834633965069, "grad_norm": 2.777029528491773, "learning_rate": 1.9769004577980003e-05, "loss": 0.9836, "step": 3895 }, { "epoch": 0.2895577852099591, "grad_norm": 2.2070747634189405, "learning_rate": 1.9768833084558313e-05, "loss": 0.7008, "step": 3896 }, { "epoch": 0.28963210702341136, "grad_norm": 2.009419987322854, "learning_rate": 1.976866152824538e-05, "loss": 0.8111, "step": 3897 }, { "epoch": 0.2897064288368636, "grad_norm": 2.659923126961069, "learning_rate": 1.9768489909042317e-05, "loss": 1.1076, "step": 3898 }, { "epoch": 0.2897807506503159, "grad_norm": 1.997437826684231, "learning_rate": 1.976831822695022e-05, "loss": 0.6584, "step": 3899 }, { "epoch": 0.2898550724637681, "grad_norm": 2.1126557475627936, "learning_rate": 1.9768146481970198e-05, "loss": 0.9514, "step": 3900 }, { "epoch": 0.28992939427722036, "grad_norm": 2.0221121327275067, "learning_rate": 1.9767974674103355e-05, "loss": 0.9379, "step": 3901 }, { "epoch": 0.2900037160906726, "grad_norm": 2.3506086421157257, "learning_rate": 1.9767802803350794e-05, "loss": 0.9928, "step": 3902 }, { "epoch": 0.29007803790412484, "grad_norm": 2.8681907733665883, "learning_rate": 1.9767630869713632e-05, "loss": 0.9917, "step": 3903 }, { "epoch": 0.29015235971757714, "grad_norm": 2.264371205248879, "learning_rate": 1.9767458873192965e-05, "loss": 0.9298, "step": 3904 }, { "epoch": 0.2902266815310294, "grad_norm": 2.8301309355711304, "learning_rate": 1.9767286813789907e-05, "loss": 0.7919, "step": 3905 }, { "epoch": 0.2903010033444816, "grad_norm": 2.053341035603334, "learning_rate": 1.9767114691505563e-05, "loss": 0.9103, "step": 3906 }, { "epoch": 0.29037532515793385, "grad_norm": 2.7571948836652305, "learning_rate": 1.976694250634104e-05, "loss": 1.1236, "step": 3907 }, { "epoch": 0.2904496469713861, "grad_norm": 2.3836940824818447, "learning_rate": 1.976677025829745e-05, "loss": 0.9038, "step": 3908 }, { "epoch": 0.2905239687848383, "grad_norm": 2.5447819805467278, "learning_rate": 1.9766597947375895e-05, "loss": 0.9481, "step": 3909 }, { "epoch": 0.2905982905982906, "grad_norm": 2.2615750493965137, "learning_rate": 1.9766425573577494e-05, "loss": 0.7138, "step": 3910 }, { "epoch": 0.29067261241174286, "grad_norm": 1.8855219182327763, "learning_rate": 1.976625313690335e-05, "loss": 0.8343, "step": 3911 }, { "epoch": 0.2907469342251951, "grad_norm": 2.386256216215523, "learning_rate": 1.9766080637354575e-05, "loss": 0.8009, "step": 3912 }, { "epoch": 0.29082125603864734, "grad_norm": 2.6708315901001525, "learning_rate": 1.9765908074932277e-05, "loss": 0.9049, "step": 3913 }, { "epoch": 0.2908955778520996, "grad_norm": 2.3821268864131744, "learning_rate": 1.9765735449637576e-05, "loss": 0.8488, "step": 3914 }, { "epoch": 0.2909698996655518, "grad_norm": 2.188343921048522, "learning_rate": 1.9765562761471573e-05, "loss": 0.8473, "step": 3915 }, { "epoch": 0.2910442214790041, "grad_norm": 2.0092603145787105, "learning_rate": 1.9765390010435383e-05, "loss": 0.8713, "step": 3916 }, { "epoch": 0.29111854329245634, "grad_norm": 2.687692657848782, "learning_rate": 1.976521719653012e-05, "loss": 0.9314, "step": 3917 }, { "epoch": 0.2911928651059086, "grad_norm": 2.2559214350631005, "learning_rate": 1.9765044319756894e-05, "loss": 0.9787, "step": 3918 }, { "epoch": 0.2912671869193608, "grad_norm": 3.579873240085772, "learning_rate": 1.976487138011682e-05, "loss": 0.8805, "step": 3919 }, { "epoch": 0.29134150873281306, "grad_norm": 2.589694112176005, "learning_rate": 1.9764698377611013e-05, "loss": 0.9878, "step": 3920 }, { "epoch": 0.29141583054626535, "grad_norm": 2.230874843676394, "learning_rate": 1.976452531224058e-05, "loss": 0.89, "step": 3921 }, { "epoch": 0.2914901523597176, "grad_norm": 2.4580244960357085, "learning_rate": 1.9764352184006644e-05, "loss": 0.918, "step": 3922 }, { "epoch": 0.29156447417316983, "grad_norm": 2.2002855945211928, "learning_rate": 1.9764178992910316e-05, "loss": 0.9494, "step": 3923 }, { "epoch": 0.29163879598662207, "grad_norm": 1.9724057126640289, "learning_rate": 1.9764005738952705e-05, "loss": 0.8742, "step": 3924 }, { "epoch": 0.2917131178000743, "grad_norm": 2.2982620843527943, "learning_rate": 1.9763832422134935e-05, "loss": 1.2685, "step": 3925 }, { "epoch": 0.29178743961352654, "grad_norm": 1.8505738576950288, "learning_rate": 1.976365904245812e-05, "loss": 0.8001, "step": 3926 }, { "epoch": 0.29186176142697884, "grad_norm": 2.352058891025014, "learning_rate": 1.9763485599923374e-05, "loss": 0.9644, "step": 3927 }, { "epoch": 0.2919360832404311, "grad_norm": 2.010243622463523, "learning_rate": 1.976331209453181e-05, "loss": 0.9163, "step": 3928 }, { "epoch": 0.2920104050538833, "grad_norm": 2.5226286267471405, "learning_rate": 1.9763138526284553e-05, "loss": 1.0779, "step": 3929 }, { "epoch": 0.29208472686733555, "grad_norm": 2.3873186351506583, "learning_rate": 1.9762964895182717e-05, "loss": 0.9792, "step": 3930 }, { "epoch": 0.2921590486807878, "grad_norm": 2.5765453786616104, "learning_rate": 1.9762791201227417e-05, "loss": 0.9301, "step": 3931 }, { "epoch": 0.2922333704942401, "grad_norm": 2.226180167423644, "learning_rate": 1.9762617444419776e-05, "loss": 0.8223, "step": 3932 }, { "epoch": 0.2923076923076923, "grad_norm": 2.256651244750961, "learning_rate": 1.976244362476091e-05, "loss": 0.7381, "step": 3933 }, { "epoch": 0.29238201412114456, "grad_norm": 2.3000252063206386, "learning_rate": 1.976226974225194e-05, "loss": 0.7845, "step": 3934 }, { "epoch": 0.2924563359345968, "grad_norm": 2.516798622159625, "learning_rate": 1.9762095796893982e-05, "loss": 0.9684, "step": 3935 }, { "epoch": 0.29253065774804904, "grad_norm": 2.5667200566435047, "learning_rate": 1.9761921788688158e-05, "loss": 1.0127, "step": 3936 }, { "epoch": 0.2926049795615013, "grad_norm": 2.02410960386597, "learning_rate": 1.976174771763559e-05, "loss": 0.9255, "step": 3937 }, { "epoch": 0.29267930137495357, "grad_norm": 3.110271183790895, "learning_rate": 1.976157358373739e-05, "loss": 1.03, "step": 3938 }, { "epoch": 0.2927536231884058, "grad_norm": 1.965912447539257, "learning_rate": 1.9761399386994694e-05, "loss": 0.8132, "step": 3939 }, { "epoch": 0.29282794500185805, "grad_norm": 2.150536034715067, "learning_rate": 1.976122512740861e-05, "loss": 0.876, "step": 3940 }, { "epoch": 0.2929022668153103, "grad_norm": 2.2217520154743027, "learning_rate": 1.9761050804980265e-05, "loss": 0.9582, "step": 3941 }, { "epoch": 0.2929765886287625, "grad_norm": 2.1551307224203033, "learning_rate": 1.9760876419710785e-05, "loss": 0.945, "step": 3942 }, { "epoch": 0.29305091044221476, "grad_norm": 2.286077224397258, "learning_rate": 1.9760701971601287e-05, "loss": 0.796, "step": 3943 }, { "epoch": 0.29312523225566706, "grad_norm": 2.76785493279435, "learning_rate": 1.9760527460652897e-05, "loss": 0.9829, "step": 3944 }, { "epoch": 0.2931995540691193, "grad_norm": 3.494028759275811, "learning_rate": 1.976035288686674e-05, "loss": 0.719, "step": 3945 }, { "epoch": 0.29327387588257153, "grad_norm": 2.2204160014423553, "learning_rate": 1.9760178250243932e-05, "loss": 0.6061, "step": 3946 }, { "epoch": 0.29334819769602377, "grad_norm": 2.035454000410244, "learning_rate": 1.9760003550785605e-05, "loss": 0.8206, "step": 3947 }, { "epoch": 0.293422519509476, "grad_norm": 2.424744978371629, "learning_rate": 1.9759828788492884e-05, "loss": 0.6767, "step": 3948 }, { "epoch": 0.2934968413229283, "grad_norm": 2.5470395650534057, "learning_rate": 1.975965396336689e-05, "loss": 1.0442, "step": 3949 }, { "epoch": 0.29357116313638054, "grad_norm": 3.237923028123835, "learning_rate": 1.975947907540875e-05, "loss": 0.798, "step": 3950 }, { "epoch": 0.2936454849498328, "grad_norm": 2.1766862333975276, "learning_rate": 1.975930412461959e-05, "loss": 0.9856, "step": 3951 }, { "epoch": 0.293719806763285, "grad_norm": 2.2479553852832495, "learning_rate": 1.975912911100054e-05, "loss": 0.9999, "step": 3952 }, { "epoch": 0.29379412857673726, "grad_norm": 3.33389091994125, "learning_rate": 1.9758954034552716e-05, "loss": 0.9937, "step": 3953 }, { "epoch": 0.2938684503901895, "grad_norm": 2.0924034183030376, "learning_rate": 1.9758778895277255e-05, "loss": 0.9419, "step": 3954 }, { "epoch": 0.2939427722036418, "grad_norm": 2.2282107196715826, "learning_rate": 1.9758603693175283e-05, "loss": 0.8215, "step": 3955 }, { "epoch": 0.294017094017094, "grad_norm": 2.1873718407264806, "learning_rate": 1.975842842824793e-05, "loss": 0.8331, "step": 3956 }, { "epoch": 0.29409141583054627, "grad_norm": 2.633317632425526, "learning_rate": 1.9758253100496314e-05, "loss": 1.097, "step": 3957 }, { "epoch": 0.2941657376439985, "grad_norm": 2.235873154071272, "learning_rate": 1.9758077709921576e-05, "loss": 0.8724, "step": 3958 }, { "epoch": 0.29424005945745074, "grad_norm": 1.9141935767611438, "learning_rate": 1.9757902256524838e-05, "loss": 0.6544, "step": 3959 }, { "epoch": 0.29431438127090304, "grad_norm": 2.1603531136795064, "learning_rate": 1.9757726740307233e-05, "loss": 1.0338, "step": 3960 }, { "epoch": 0.2943887030843553, "grad_norm": 2.6146630250921348, "learning_rate": 1.9757551161269887e-05, "loss": 0.7818, "step": 3961 }, { "epoch": 0.2944630248978075, "grad_norm": 1.8322483640717966, "learning_rate": 1.9757375519413935e-05, "loss": 0.8066, "step": 3962 }, { "epoch": 0.29453734671125975, "grad_norm": 2.320878832518691, "learning_rate": 1.9757199814740506e-05, "loss": 1.0971, "step": 3963 }, { "epoch": 0.294611668524712, "grad_norm": 2.414895647090885, "learning_rate": 1.9757024047250726e-05, "loss": 0.7224, "step": 3964 }, { "epoch": 0.2946859903381642, "grad_norm": 2.3765515099213097, "learning_rate": 1.975684821694574e-05, "loss": 0.6289, "step": 3965 }, { "epoch": 0.2947603121516165, "grad_norm": 1.7095466328380489, "learning_rate": 1.9756672323826666e-05, "loss": 0.6465, "step": 3966 }, { "epoch": 0.29483463396506876, "grad_norm": 2.34797170356801, "learning_rate": 1.975649636789464e-05, "loss": 0.9165, "step": 3967 }, { "epoch": 0.294908955778521, "grad_norm": 1.7493879351657937, "learning_rate": 1.9756320349150803e-05, "loss": 0.8877, "step": 3968 }, { "epoch": 0.29498327759197324, "grad_norm": 2.135572144072241, "learning_rate": 1.9756144267596278e-05, "loss": 0.9946, "step": 3969 }, { "epoch": 0.2950575994054255, "grad_norm": 2.6210361121656165, "learning_rate": 1.9755968123232202e-05, "loss": 0.9521, "step": 3970 }, { "epoch": 0.2951319212188777, "grad_norm": 2.1140803643551114, "learning_rate": 1.975579191605971e-05, "loss": 0.7381, "step": 3971 }, { "epoch": 0.29520624303233, "grad_norm": 2.0021863765320007, "learning_rate": 1.9755615646079936e-05, "loss": 0.8368, "step": 3972 }, { "epoch": 0.29528056484578225, "grad_norm": 2.3192757226306684, "learning_rate": 1.9755439313294015e-05, "loss": 0.9239, "step": 3973 }, { "epoch": 0.2953548866592345, "grad_norm": 2.4554910322788674, "learning_rate": 1.975526291770308e-05, "loss": 0.8194, "step": 3974 }, { "epoch": 0.2954292084726867, "grad_norm": 1.8552188911594403, "learning_rate": 1.975508645930827e-05, "loss": 0.8447, "step": 3975 }, { "epoch": 0.29550353028613896, "grad_norm": 1.9797230762795197, "learning_rate": 1.975490993811072e-05, "loss": 0.8383, "step": 3976 }, { "epoch": 0.29557785209959125, "grad_norm": 2.9871492749044295, "learning_rate": 1.9754733354111566e-05, "loss": 1.0289, "step": 3977 }, { "epoch": 0.2956521739130435, "grad_norm": 2.2001072909132113, "learning_rate": 1.9754556707311944e-05, "loss": 0.8738, "step": 3978 }, { "epoch": 0.29572649572649573, "grad_norm": 2.1164244778336734, "learning_rate": 1.975437999771299e-05, "loss": 0.8428, "step": 3979 }, { "epoch": 0.29580081753994797, "grad_norm": 2.806878616703854, "learning_rate": 1.975420322531585e-05, "loss": 0.8877, "step": 3980 }, { "epoch": 0.2958751393534002, "grad_norm": 1.8574816183845382, "learning_rate": 1.975402639012165e-05, "loss": 0.5363, "step": 3981 }, { "epoch": 0.29594946116685245, "grad_norm": 2.395384100343058, "learning_rate": 1.9753849492131537e-05, "loss": 0.9677, "step": 3982 }, { "epoch": 0.29602378298030474, "grad_norm": 1.8620139645384628, "learning_rate": 1.9753672531346645e-05, "loss": 0.8008, "step": 3983 }, { "epoch": 0.296098104793757, "grad_norm": 7.557323385266044, "learning_rate": 1.9753495507768113e-05, "loss": 1.0102, "step": 3984 }, { "epoch": 0.2961724266072092, "grad_norm": 2.292683811307751, "learning_rate": 1.975331842139709e-05, "loss": 0.9386, "step": 3985 }, { "epoch": 0.29624674842066145, "grad_norm": 2.6354408743639572, "learning_rate": 1.9753141272234707e-05, "loss": 1.0753, "step": 3986 }, { "epoch": 0.2963210702341137, "grad_norm": 2.3156847002850864, "learning_rate": 1.9752964060282102e-05, "loss": 0.8164, "step": 3987 }, { "epoch": 0.296395392047566, "grad_norm": 2.3166436605059966, "learning_rate": 1.9752786785540422e-05, "loss": 0.7703, "step": 3988 }, { "epoch": 0.2964697138610182, "grad_norm": 3.2073938889144076, "learning_rate": 1.9752609448010812e-05, "loss": 1.0217, "step": 3989 }, { "epoch": 0.29654403567447046, "grad_norm": 1.968277239158488, "learning_rate": 1.97524320476944e-05, "loss": 0.9528, "step": 3990 }, { "epoch": 0.2966183574879227, "grad_norm": 1.9182501359632713, "learning_rate": 1.9752254584592343e-05, "loss": 1.0208, "step": 3991 }, { "epoch": 0.29669267930137494, "grad_norm": 2.2305268931674505, "learning_rate": 1.9752077058705774e-05, "loss": 0.9518, "step": 3992 }, { "epoch": 0.2967670011148272, "grad_norm": 1.9761512336866238, "learning_rate": 1.975189947003584e-05, "loss": 0.8698, "step": 3993 }, { "epoch": 0.29684132292827947, "grad_norm": 2.1692447381294255, "learning_rate": 1.9751721818583686e-05, "loss": 0.7833, "step": 3994 }, { "epoch": 0.2969156447417317, "grad_norm": 4.555607392209191, "learning_rate": 1.975154410435045e-05, "loss": 0.9575, "step": 3995 }, { "epoch": 0.29698996655518395, "grad_norm": 2.891481420427761, "learning_rate": 1.9751366327337277e-05, "loss": 0.9423, "step": 3996 }, { "epoch": 0.2970642883686362, "grad_norm": 1.7555976432384819, "learning_rate": 1.9751188487545316e-05, "loss": 0.9155, "step": 3997 }, { "epoch": 0.2971386101820884, "grad_norm": 1.9384363139172238, "learning_rate": 1.975101058497571e-05, "loss": 0.8684, "step": 3998 }, { "epoch": 0.29721293199554066, "grad_norm": 2.104212569525744, "learning_rate": 1.97508326196296e-05, "loss": 0.6667, "step": 3999 }, { "epoch": 0.29728725380899296, "grad_norm": 2.4029069637509255, "learning_rate": 1.975065459150814e-05, "loss": 0.8989, "step": 4000 }, { "epoch": 0.2973615756224452, "grad_norm": 2.078042217710599, "learning_rate": 1.9750476500612468e-05, "loss": 0.9304, "step": 4001 }, { "epoch": 0.29743589743589743, "grad_norm": 2.5464276278621325, "learning_rate": 1.975029834694374e-05, "loss": 0.8711, "step": 4002 }, { "epoch": 0.29751021924934967, "grad_norm": 3.346524600091209, "learning_rate": 1.975012013050309e-05, "loss": 0.8253, "step": 4003 }, { "epoch": 0.2975845410628019, "grad_norm": 2.4042832805609153, "learning_rate": 1.9749941851291677e-05, "loss": 0.9467, "step": 4004 }, { "epoch": 0.2976588628762542, "grad_norm": 1.9585774094966129, "learning_rate": 1.9749763509310642e-05, "loss": 0.7491, "step": 4005 }, { "epoch": 0.29773318468970644, "grad_norm": 2.234489941528262, "learning_rate": 1.974958510456114e-05, "loss": 0.7881, "step": 4006 }, { "epoch": 0.2978075065031587, "grad_norm": 2.2185098628146656, "learning_rate": 1.974940663704431e-05, "loss": 0.827, "step": 4007 }, { "epoch": 0.2978818283166109, "grad_norm": 2.337398211838252, "learning_rate": 1.9749228106761307e-05, "loss": 0.9618, "step": 4008 }, { "epoch": 0.29795615013006316, "grad_norm": 2.053867459808637, "learning_rate": 1.974904951371328e-05, "loss": 0.9983, "step": 4009 }, { "epoch": 0.2980304719435154, "grad_norm": 1.884109820522232, "learning_rate": 1.9748870857901377e-05, "loss": 0.9024, "step": 4010 }, { "epoch": 0.2981047937569677, "grad_norm": 2.4786541963255075, "learning_rate": 1.974869213932675e-05, "loss": 1.0692, "step": 4011 }, { "epoch": 0.29817911557041993, "grad_norm": 2.328636290578159, "learning_rate": 1.9748513357990548e-05, "loss": 0.8526, "step": 4012 }, { "epoch": 0.29825343738387217, "grad_norm": 2.1263437717715945, "learning_rate": 1.9748334513893925e-05, "loss": 0.8441, "step": 4013 }, { "epoch": 0.2983277591973244, "grad_norm": 2.113105348623419, "learning_rate": 1.974815560703803e-05, "loss": 1.0573, "step": 4014 }, { "epoch": 0.29840208101077664, "grad_norm": 3.1010116900640234, "learning_rate": 1.9747976637424015e-05, "loss": 0.8441, "step": 4015 }, { "epoch": 0.29847640282422894, "grad_norm": 1.8093963682904264, "learning_rate": 1.9747797605053028e-05, "loss": 0.7953, "step": 4016 }, { "epoch": 0.2985507246376812, "grad_norm": 2.965643015070169, "learning_rate": 1.974761850992623e-05, "loss": 0.7932, "step": 4017 }, { "epoch": 0.2986250464511334, "grad_norm": 2.4857886123847206, "learning_rate": 1.974743935204477e-05, "loss": 0.7477, "step": 4018 }, { "epoch": 0.29869936826458565, "grad_norm": 2.2027894169729523, "learning_rate": 1.9747260131409796e-05, "loss": 0.8509, "step": 4019 }, { "epoch": 0.2987736900780379, "grad_norm": 2.301277125889378, "learning_rate": 1.9747080848022473e-05, "loss": 0.9222, "step": 4020 }, { "epoch": 0.29884801189149013, "grad_norm": 3.0915232145760876, "learning_rate": 1.9746901501883947e-05, "loss": 1.1309, "step": 4021 }, { "epoch": 0.2989223337049424, "grad_norm": 2.074217071917418, "learning_rate": 1.9746722092995373e-05, "loss": 0.9331, "step": 4022 }, { "epoch": 0.29899665551839466, "grad_norm": 2.0031897562850474, "learning_rate": 1.974654262135791e-05, "loss": 0.9684, "step": 4023 }, { "epoch": 0.2990709773318469, "grad_norm": 11.202389135545069, "learning_rate": 1.9746363086972713e-05, "loss": 1.1923, "step": 4024 }, { "epoch": 0.29914529914529914, "grad_norm": 2.363759030092852, "learning_rate": 1.9746183489840933e-05, "loss": 0.8722, "step": 4025 }, { "epoch": 0.2992196209587514, "grad_norm": 2.4423225565475932, "learning_rate": 1.974600382996373e-05, "loss": 1.0695, "step": 4026 }, { "epoch": 0.29929394277220367, "grad_norm": 2.203133001059507, "learning_rate": 1.974582410734226e-05, "loss": 0.7046, "step": 4027 }, { "epoch": 0.2993682645856559, "grad_norm": 2.012228234956307, "learning_rate": 1.9745644321977684e-05, "loss": 0.7477, "step": 4028 }, { "epoch": 0.29944258639910815, "grad_norm": 2.1416268251777675, "learning_rate": 1.9745464473871153e-05, "loss": 1.137, "step": 4029 }, { "epoch": 0.2995169082125604, "grad_norm": 2.550843713170728, "learning_rate": 1.9745284563023825e-05, "loss": 0.8652, "step": 4030 }, { "epoch": 0.2995912300260126, "grad_norm": 1.762864228653558, "learning_rate": 1.9745104589436865e-05, "loss": 0.7221, "step": 4031 }, { "epoch": 0.29966555183946486, "grad_norm": 2.6557859007339504, "learning_rate": 1.9744924553111424e-05, "loss": 0.9681, "step": 4032 }, { "epoch": 0.29973987365291715, "grad_norm": 2.6600927894616415, "learning_rate": 1.9744744454048662e-05, "loss": 0.8803, "step": 4033 }, { "epoch": 0.2998141954663694, "grad_norm": 1.8571940151874742, "learning_rate": 1.9744564292249747e-05, "loss": 0.5975, "step": 4034 }, { "epoch": 0.29988851727982163, "grad_norm": 2.4671209590215035, "learning_rate": 1.974438406771583e-05, "loss": 1.0121, "step": 4035 }, { "epoch": 0.29996283909327387, "grad_norm": 2.5207558805308627, "learning_rate": 1.974420378044807e-05, "loss": 1.0032, "step": 4036 }, { "epoch": 0.3000371609067261, "grad_norm": 2.1619950517656132, "learning_rate": 1.9744023430447638e-05, "loss": 0.8671, "step": 4037 }, { "epoch": 0.30011148272017835, "grad_norm": 2.387936270531701, "learning_rate": 1.9743843017715684e-05, "loss": 1.0748, "step": 4038 }, { "epoch": 0.30018580453363064, "grad_norm": 2.5745284654342258, "learning_rate": 1.9743662542253377e-05, "loss": 1.0669, "step": 4039 }, { "epoch": 0.3002601263470829, "grad_norm": 3.4839612700920974, "learning_rate": 1.9743482004061878e-05, "loss": 0.9115, "step": 4040 }, { "epoch": 0.3003344481605351, "grad_norm": 2.121267741380209, "learning_rate": 1.9743301403142344e-05, "loss": 0.8899, "step": 4041 }, { "epoch": 0.30040876997398736, "grad_norm": 2.2340911671455252, "learning_rate": 1.974312073949594e-05, "loss": 0.9247, "step": 4042 }, { "epoch": 0.3004830917874396, "grad_norm": 2.487147751506389, "learning_rate": 1.9742940013123833e-05, "loss": 0.8583, "step": 4043 }, { "epoch": 0.3005574136008919, "grad_norm": 2.7833836213703544, "learning_rate": 1.9742759224027185e-05, "loss": 1.0958, "step": 4044 }, { "epoch": 0.3006317354143441, "grad_norm": 1.8854242512221053, "learning_rate": 1.9742578372207154e-05, "loss": 0.7463, "step": 4045 }, { "epoch": 0.30070605722779636, "grad_norm": 2.2202885566124198, "learning_rate": 1.9742397457664913e-05, "loss": 0.9291, "step": 4046 }, { "epoch": 0.3007803790412486, "grad_norm": 2.3453045344040415, "learning_rate": 1.974221648040162e-05, "loss": 0.9408, "step": 4047 }, { "epoch": 0.30085470085470084, "grad_norm": 2.2931589117682254, "learning_rate": 1.9742035440418444e-05, "loss": 0.9037, "step": 4048 }, { "epoch": 0.3009290226681531, "grad_norm": 2.1965266124111955, "learning_rate": 1.9741854337716548e-05, "loss": 0.9406, "step": 4049 }, { "epoch": 0.3010033444816054, "grad_norm": 2.0810793863070685, "learning_rate": 1.9741673172297103e-05, "loss": 0.8876, "step": 4050 }, { "epoch": 0.3010776662950576, "grad_norm": 2.284013851686728, "learning_rate": 1.9741491944161266e-05, "loss": 0.7528, "step": 4051 }, { "epoch": 0.30115198810850985, "grad_norm": 2.2694464000304606, "learning_rate": 1.974131065331021e-05, "loss": 0.913, "step": 4052 }, { "epoch": 0.3012263099219621, "grad_norm": 2.3771218221962043, "learning_rate": 1.9741129299745108e-05, "loss": 0.9055, "step": 4053 }, { "epoch": 0.3013006317354143, "grad_norm": 2.3990533684119426, "learning_rate": 1.9740947883467115e-05, "loss": 0.8304, "step": 4054 }, { "epoch": 0.3013749535488666, "grad_norm": 1.992382507789156, "learning_rate": 1.974076640447741e-05, "loss": 0.7219, "step": 4055 }, { "epoch": 0.30144927536231886, "grad_norm": 1.9773635453217422, "learning_rate": 1.974058486277715e-05, "loss": 0.8916, "step": 4056 }, { "epoch": 0.3015235971757711, "grad_norm": 1.8222397601236417, "learning_rate": 1.9740403258367512e-05, "loss": 0.9554, "step": 4057 }, { "epoch": 0.30159791898922333, "grad_norm": 2.4177615033793383, "learning_rate": 1.9740221591249663e-05, "loss": 0.9129, "step": 4058 }, { "epoch": 0.3016722408026756, "grad_norm": 2.3408217278389407, "learning_rate": 1.9740039861424776e-05, "loss": 0.8199, "step": 4059 }, { "epoch": 0.3017465626161278, "grad_norm": 2.354423415712892, "learning_rate": 1.9739858068894017e-05, "loss": 0.8369, "step": 4060 }, { "epoch": 0.3018208844295801, "grad_norm": 2.5887749540414395, "learning_rate": 1.9739676213658553e-05, "loss": 1.0715, "step": 4061 }, { "epoch": 0.30189520624303234, "grad_norm": 2.217676794485369, "learning_rate": 1.9739494295719564e-05, "loss": 0.8663, "step": 4062 }, { "epoch": 0.3019695280564846, "grad_norm": 2.401154550265754, "learning_rate": 1.9739312315078213e-05, "loss": 0.8616, "step": 4063 }, { "epoch": 0.3020438498699368, "grad_norm": 2.4189181175251475, "learning_rate": 1.9739130271735675e-05, "loss": 1.0033, "step": 4064 }, { "epoch": 0.30211817168338906, "grad_norm": 4.308287949269641, "learning_rate": 1.9738948165693122e-05, "loss": 1.093, "step": 4065 }, { "epoch": 0.3021924934968413, "grad_norm": 2.222934701596796, "learning_rate": 1.9738765996951727e-05, "loss": 0.9725, "step": 4066 }, { "epoch": 0.3022668153102936, "grad_norm": 2.3005352958810414, "learning_rate": 1.973858376551266e-05, "loss": 0.7482, "step": 4067 }, { "epoch": 0.30234113712374583, "grad_norm": 2.900285788299686, "learning_rate": 1.9738401471377098e-05, "loss": 0.7965, "step": 4068 }, { "epoch": 0.30241545893719807, "grad_norm": 2.1769832260521125, "learning_rate": 1.9738219114546212e-05, "loss": 1.0141, "step": 4069 }, { "epoch": 0.3024897807506503, "grad_norm": 2.4585178514671098, "learning_rate": 1.9738036695021177e-05, "loss": 0.8377, "step": 4070 }, { "epoch": 0.30256410256410254, "grad_norm": 2.3339885406627334, "learning_rate": 1.9737854212803164e-05, "loss": 0.9604, "step": 4071 }, { "epoch": 0.30263842437755484, "grad_norm": 2.3875079644417916, "learning_rate": 1.9737671667893356e-05, "loss": 0.9388, "step": 4072 }, { "epoch": 0.3027127461910071, "grad_norm": 2.509640865039587, "learning_rate": 1.973748906029292e-05, "loss": 0.9855, "step": 4073 }, { "epoch": 0.3027870680044593, "grad_norm": 1.8647419169782373, "learning_rate": 1.9737306390003034e-05, "loss": 0.9291, "step": 4074 }, { "epoch": 0.30286138981791155, "grad_norm": 1.923652130864619, "learning_rate": 1.9737123657024874e-05, "loss": 0.871, "step": 4075 }, { "epoch": 0.3029357116313638, "grad_norm": 2.732895087887143, "learning_rate": 1.973694086135962e-05, "loss": 1.0859, "step": 4076 }, { "epoch": 0.30301003344481603, "grad_norm": 7.931730346837985, "learning_rate": 1.9736758003008446e-05, "loss": 0.8106, "step": 4077 }, { "epoch": 0.3030843552582683, "grad_norm": 2.4240607540558026, "learning_rate": 1.9736575081972527e-05, "loss": 1.1611, "step": 4078 }, { "epoch": 0.30315867707172056, "grad_norm": 2.2330130750834782, "learning_rate": 1.9736392098253045e-05, "loss": 0.8965, "step": 4079 }, { "epoch": 0.3032329988851728, "grad_norm": 3.2899504062484106, "learning_rate": 1.9736209051851173e-05, "loss": 0.8978, "step": 4080 }, { "epoch": 0.30330732069862504, "grad_norm": 2.3009709952736443, "learning_rate": 1.9736025942768094e-05, "loss": 1.0079, "step": 4081 }, { "epoch": 0.3033816425120773, "grad_norm": 2.6217734867626947, "learning_rate": 1.9735842771004984e-05, "loss": 0.9641, "step": 4082 }, { "epoch": 0.30345596432552957, "grad_norm": 3.0190292606789786, "learning_rate": 1.9735659536563025e-05, "loss": 0.8085, "step": 4083 }, { "epoch": 0.3035302861389818, "grad_norm": 10.772406430146003, "learning_rate": 1.9735476239443396e-05, "loss": 1.0627, "step": 4084 }, { "epoch": 0.30360460795243405, "grad_norm": 2.313787942714199, "learning_rate": 1.9735292879647272e-05, "loss": 0.8851, "step": 4085 }, { "epoch": 0.3036789297658863, "grad_norm": 2.1970307666201445, "learning_rate": 1.973510945717584e-05, "loss": 1.0804, "step": 4086 }, { "epoch": 0.3037532515793385, "grad_norm": 2.355541385241761, "learning_rate": 1.973492597203028e-05, "loss": 0.8991, "step": 4087 }, { "epoch": 0.30382757339279076, "grad_norm": 2.7763566897852976, "learning_rate": 1.973474242421177e-05, "loss": 0.9353, "step": 4088 }, { "epoch": 0.30390189520624306, "grad_norm": 2.0545950299642675, "learning_rate": 1.973455881372149e-05, "loss": 0.8434, "step": 4089 }, { "epoch": 0.3039762170196953, "grad_norm": 1.9806133876681016, "learning_rate": 1.9734375140560632e-05, "loss": 0.8389, "step": 4090 }, { "epoch": 0.30405053883314753, "grad_norm": 2.3593124294153762, "learning_rate": 1.9734191404730367e-05, "loss": 1.007, "step": 4091 }, { "epoch": 0.30412486064659977, "grad_norm": 2.5224333421831773, "learning_rate": 1.9734007606231886e-05, "loss": 1.0401, "step": 4092 }, { "epoch": 0.304199182460052, "grad_norm": 3.896791383787279, "learning_rate": 1.973382374506637e-05, "loss": 0.9744, "step": 4093 }, { "epoch": 0.30427350427350425, "grad_norm": 2.2278183032386156, "learning_rate": 1.9733639821234997e-05, "loss": 0.9299, "step": 4094 }, { "epoch": 0.30434782608695654, "grad_norm": 2.5441953504154844, "learning_rate": 1.973345583473896e-05, "loss": 0.9753, "step": 4095 }, { "epoch": 0.3044221479004088, "grad_norm": 2.8829870373690953, "learning_rate": 1.9733271785579437e-05, "loss": 0.9919, "step": 4096 }, { "epoch": 0.304496469713861, "grad_norm": 1.9346432935738802, "learning_rate": 1.9733087673757616e-05, "loss": 0.7396, "step": 4097 }, { "epoch": 0.30457079152731326, "grad_norm": 2.208880901337444, "learning_rate": 1.973290349927468e-05, "loss": 1.0743, "step": 4098 }, { "epoch": 0.3046451133407655, "grad_norm": 2.268673244616461, "learning_rate": 1.973271926213182e-05, "loss": 0.9313, "step": 4099 }, { "epoch": 0.3047194351542178, "grad_norm": 3.1605419256435012, "learning_rate": 1.9732534962330216e-05, "loss": 0.7182, "step": 4100 }, { "epoch": 0.30479375696767, "grad_norm": 2.3564990870592775, "learning_rate": 1.973235059987106e-05, "loss": 0.8488, "step": 4101 }, { "epoch": 0.30486807878112226, "grad_norm": 2.376324278531817, "learning_rate": 1.973216617475553e-05, "loss": 0.8659, "step": 4102 }, { "epoch": 0.3049424005945745, "grad_norm": 2.183532032496262, "learning_rate": 1.9731981686984823e-05, "loss": 0.8459, "step": 4103 }, { "epoch": 0.30501672240802674, "grad_norm": 2.061298557342376, "learning_rate": 1.9731797136560124e-05, "loss": 0.7967, "step": 4104 }, { "epoch": 0.305091044221479, "grad_norm": 1.9341070614322984, "learning_rate": 1.9731612523482616e-05, "loss": 0.6976, "step": 4105 }, { "epoch": 0.3051653660349313, "grad_norm": 10.913824430449447, "learning_rate": 1.9731427847753494e-05, "loss": 0.9908, "step": 4106 }, { "epoch": 0.3052396878483835, "grad_norm": 2.053255216486787, "learning_rate": 1.9731243109373942e-05, "loss": 0.9264, "step": 4107 }, { "epoch": 0.30531400966183575, "grad_norm": 2.375696616364814, "learning_rate": 1.9731058308345153e-05, "loss": 1.0971, "step": 4108 }, { "epoch": 0.305388331475288, "grad_norm": 2.4956621132384638, "learning_rate": 1.973087344466832e-05, "loss": 0.7819, "step": 4109 }, { "epoch": 0.3054626532887402, "grad_norm": 2.618848532812826, "learning_rate": 1.9730688518344622e-05, "loss": 0.9195, "step": 4110 }, { "epoch": 0.3055369751021925, "grad_norm": 2.8853212581229206, "learning_rate": 1.973050352937526e-05, "loss": 0.9819, "step": 4111 }, { "epoch": 0.30561129691564476, "grad_norm": 2.5417594388457867, "learning_rate": 1.973031847776142e-05, "loss": 0.8401, "step": 4112 }, { "epoch": 0.305685618729097, "grad_norm": 2.8218225074245806, "learning_rate": 1.9730133363504297e-05, "loss": 1.1226, "step": 4113 }, { "epoch": 0.30575994054254924, "grad_norm": 3.0592778148179534, "learning_rate": 1.9729948186605075e-05, "loss": 0.8467, "step": 4114 }, { "epoch": 0.3058342623560015, "grad_norm": 2.1935220353661506, "learning_rate": 1.9729762947064958e-05, "loss": 1.0336, "step": 4115 }, { "epoch": 0.3059085841694537, "grad_norm": 2.5146385638563733, "learning_rate": 1.9729577644885128e-05, "loss": 1.1042, "step": 4116 }, { "epoch": 0.305982905982906, "grad_norm": 2.56843577288574, "learning_rate": 1.972939228006678e-05, "loss": 0.9089, "step": 4117 }, { "epoch": 0.30605722779635824, "grad_norm": 2.5792020455831173, "learning_rate": 1.9729206852611114e-05, "loss": 1.0614, "step": 4118 }, { "epoch": 0.3061315496098105, "grad_norm": 2.1247152342582347, "learning_rate": 1.9729021362519315e-05, "loss": 0.8827, "step": 4119 }, { "epoch": 0.3062058714232627, "grad_norm": 2.4263263329112195, "learning_rate": 1.9728835809792587e-05, "loss": 0.992, "step": 4120 }, { "epoch": 0.30628019323671496, "grad_norm": 2.1356797943197416, "learning_rate": 1.9728650194432113e-05, "loss": 0.8589, "step": 4121 }, { "epoch": 0.3063545150501672, "grad_norm": 2.620767515090408, "learning_rate": 1.9728464516439096e-05, "loss": 1.0625, "step": 4122 }, { "epoch": 0.3064288368636195, "grad_norm": 2.149299449489978, "learning_rate": 1.9728278775814727e-05, "loss": 0.9244, "step": 4123 }, { "epoch": 0.30650315867707173, "grad_norm": 2.0043656673262746, "learning_rate": 1.9728092972560208e-05, "loss": 0.8144, "step": 4124 }, { "epoch": 0.30657748049052397, "grad_norm": 2.2521954576116663, "learning_rate": 1.972790710667673e-05, "loss": 0.836, "step": 4125 }, { "epoch": 0.3066518023039762, "grad_norm": 2.1263435769917804, "learning_rate": 1.972772117816549e-05, "loss": 0.8177, "step": 4126 }, { "epoch": 0.30672612411742844, "grad_norm": 2.5211549796276693, "learning_rate": 1.9727535187027683e-05, "loss": 0.8611, "step": 4127 }, { "epoch": 0.30680044593088074, "grad_norm": 2.4113966159863756, "learning_rate": 1.9727349133264514e-05, "loss": 0.9063, "step": 4128 }, { "epoch": 0.306874767744333, "grad_norm": 2.0783012112649346, "learning_rate": 1.9727163016877174e-05, "loss": 0.6938, "step": 4129 }, { "epoch": 0.3069490895577852, "grad_norm": 1.9515546567407456, "learning_rate": 1.9726976837866862e-05, "loss": 0.8808, "step": 4130 }, { "epoch": 0.30702341137123745, "grad_norm": 2.795713590468359, "learning_rate": 1.972679059623478e-05, "loss": 1.1752, "step": 4131 }, { "epoch": 0.3070977331846897, "grad_norm": 1.989018721674295, "learning_rate": 1.9726604291982126e-05, "loss": 1.0919, "step": 4132 }, { "epoch": 0.30717205499814193, "grad_norm": 1.729033189864988, "learning_rate": 1.9726417925110094e-05, "loss": 0.8677, "step": 4133 }, { "epoch": 0.3072463768115942, "grad_norm": 2.283622804182579, "learning_rate": 1.972623149561989e-05, "loss": 0.9477, "step": 4134 }, { "epoch": 0.30732069862504646, "grad_norm": 2.578854236746498, "learning_rate": 1.9726045003512714e-05, "loss": 0.885, "step": 4135 }, { "epoch": 0.3073950204384987, "grad_norm": 2.4958648359159015, "learning_rate": 1.9725858448789764e-05, "loss": 1.0161, "step": 4136 }, { "epoch": 0.30746934225195094, "grad_norm": 3.977615123447276, "learning_rate": 1.972567183145224e-05, "loss": 0.8337, "step": 4137 }, { "epoch": 0.3075436640654032, "grad_norm": 2.331178179191182, "learning_rate": 1.9725485151501348e-05, "loss": 0.9999, "step": 4138 }, { "epoch": 0.30761798587885547, "grad_norm": 2.12612303364096, "learning_rate": 1.9725298408938288e-05, "loss": 0.7551, "step": 4139 }, { "epoch": 0.3076923076923077, "grad_norm": 2.3033456209900085, "learning_rate": 1.9725111603764257e-05, "loss": 1.0407, "step": 4140 }, { "epoch": 0.30776662950575995, "grad_norm": 2.3224527019967423, "learning_rate": 1.9724924735980467e-05, "loss": 0.9659, "step": 4141 }, { "epoch": 0.3078409513192122, "grad_norm": 2.245533200182718, "learning_rate": 1.9724737805588112e-05, "loss": 1.0037, "step": 4142 }, { "epoch": 0.3079152731326644, "grad_norm": 1.985643001148859, "learning_rate": 1.97245508125884e-05, "loss": 0.6045, "step": 4143 }, { "epoch": 0.30798959494611666, "grad_norm": 1.8092023569284006, "learning_rate": 1.9724363756982536e-05, "loss": 0.8198, "step": 4144 }, { "epoch": 0.30806391675956896, "grad_norm": 2.7297015042782657, "learning_rate": 1.9724176638771724e-05, "loss": 0.9162, "step": 4145 }, { "epoch": 0.3081382385730212, "grad_norm": 2.515619217965771, "learning_rate": 1.9723989457957168e-05, "loss": 0.9623, "step": 4146 }, { "epoch": 0.30821256038647343, "grad_norm": 2.6618142012093884, "learning_rate": 1.9723802214540067e-05, "loss": 0.8606, "step": 4147 }, { "epoch": 0.30828688219992567, "grad_norm": 2.245732732751103, "learning_rate": 1.9723614908521634e-05, "loss": 0.9053, "step": 4148 }, { "epoch": 0.3083612040133779, "grad_norm": 2.33098787126138, "learning_rate": 1.9723427539903075e-05, "loss": 1.1339, "step": 4149 }, { "epoch": 0.30843552582683015, "grad_norm": 3.01091134930467, "learning_rate": 1.9723240108685593e-05, "loss": 0.9769, "step": 4150 }, { "epoch": 0.30850984764028244, "grad_norm": 2.4362217383258034, "learning_rate": 1.9723052614870394e-05, "loss": 0.9664, "step": 4151 }, { "epoch": 0.3085841694537347, "grad_norm": 2.262252363707753, "learning_rate": 1.972286505845869e-05, "loss": 1.0363, "step": 4152 }, { "epoch": 0.3086584912671869, "grad_norm": 1.919917544720341, "learning_rate": 1.972267743945168e-05, "loss": 1.0087, "step": 4153 }, { "epoch": 0.30873281308063916, "grad_norm": 2.0715560655567353, "learning_rate": 1.9722489757850583e-05, "loss": 0.8908, "step": 4154 }, { "epoch": 0.3088071348940914, "grad_norm": 2.350405167249745, "learning_rate": 1.9722302013656602e-05, "loss": 1.194, "step": 4155 }, { "epoch": 0.3088814567075437, "grad_norm": 3.584661637911928, "learning_rate": 1.972211420687094e-05, "loss": 0.8935, "step": 4156 }, { "epoch": 0.3089557785209959, "grad_norm": 2.035248046856531, "learning_rate": 1.9721926337494812e-05, "loss": 0.6204, "step": 4157 }, { "epoch": 0.30903010033444817, "grad_norm": 2.064773765086997, "learning_rate": 1.972173840552943e-05, "loss": 0.7807, "step": 4158 }, { "epoch": 0.3091044221479004, "grad_norm": 2.194184370037218, "learning_rate": 1.9721550410976e-05, "loss": 0.7544, "step": 4159 }, { "epoch": 0.30917874396135264, "grad_norm": 2.176834549943683, "learning_rate": 1.972136235383573e-05, "loss": 1.0063, "step": 4160 }, { "epoch": 0.3092530657748049, "grad_norm": 2.0957161668282156, "learning_rate": 1.9721174234109836e-05, "loss": 0.7736, "step": 4161 }, { "epoch": 0.3093273875882572, "grad_norm": 2.4985442038869214, "learning_rate": 1.9720986051799525e-05, "loss": 1.0226, "step": 4162 }, { "epoch": 0.3094017094017094, "grad_norm": 2.046648919048811, "learning_rate": 1.9720797806906013e-05, "loss": 0.8943, "step": 4163 }, { "epoch": 0.30947603121516165, "grad_norm": 2.3974770946146267, "learning_rate": 1.9720609499430504e-05, "loss": 1.0047, "step": 4164 }, { "epoch": 0.3095503530286139, "grad_norm": 2.712812775300882, "learning_rate": 1.972042112937422e-05, "loss": 1.1087, "step": 4165 }, { "epoch": 0.3096246748420661, "grad_norm": 2.190733999334431, "learning_rate": 1.9720232696738368e-05, "loss": 1.0706, "step": 4166 }, { "epoch": 0.3096989966555184, "grad_norm": 2.7406662261129195, "learning_rate": 1.972004420152416e-05, "loss": 1.0899, "step": 4167 }, { "epoch": 0.30977331846897066, "grad_norm": 2.4584319351241573, "learning_rate": 1.9719855643732813e-05, "loss": 1.1053, "step": 4168 }, { "epoch": 0.3098476402824229, "grad_norm": 2.0052736195279914, "learning_rate": 1.971966702336554e-05, "loss": 0.7728, "step": 4169 }, { "epoch": 0.30992196209587514, "grad_norm": 2.787200067004521, "learning_rate": 1.9719478340423554e-05, "loss": 0.7077, "step": 4170 }, { "epoch": 0.3099962839093274, "grad_norm": 2.336617448698158, "learning_rate": 1.9719289594908074e-05, "loss": 0.8991, "step": 4171 }, { "epoch": 0.3100706057227796, "grad_norm": 2.7376455355286007, "learning_rate": 1.971910078682031e-05, "loss": 0.8717, "step": 4172 }, { "epoch": 0.3101449275362319, "grad_norm": 2.4156749081644637, "learning_rate": 1.971891191616148e-05, "loss": 0.9956, "step": 4173 }, { "epoch": 0.31021924934968415, "grad_norm": 1.877919568946049, "learning_rate": 1.97187229829328e-05, "loss": 0.6631, "step": 4174 }, { "epoch": 0.3102935711631364, "grad_norm": 2.493283171082362, "learning_rate": 1.971853398713548e-05, "loss": 0.9254, "step": 4175 }, { "epoch": 0.3103678929765886, "grad_norm": 2.8387329342691783, "learning_rate": 1.9718344928770747e-05, "loss": 0.9856, "step": 4176 }, { "epoch": 0.31044221479004086, "grad_norm": 1.9718520448455328, "learning_rate": 1.9718155807839815e-05, "loss": 0.8626, "step": 4177 }, { "epoch": 0.3105165366034931, "grad_norm": 2.7406340407345704, "learning_rate": 1.97179666243439e-05, "loss": 1.0865, "step": 4178 }, { "epoch": 0.3105908584169454, "grad_norm": 4.191092445269535, "learning_rate": 1.9717777378284216e-05, "loss": 0.9519, "step": 4179 }, { "epoch": 0.31066518023039763, "grad_norm": 2.4873746816806386, "learning_rate": 1.9717588069661988e-05, "loss": 0.8689, "step": 4180 }, { "epoch": 0.31073950204384987, "grad_norm": 2.0257456114476007, "learning_rate": 1.9717398698478435e-05, "loss": 0.8397, "step": 4181 }, { "epoch": 0.3108138238573021, "grad_norm": 2.0210886423172423, "learning_rate": 1.971720926473477e-05, "loss": 0.8339, "step": 4182 }, { "epoch": 0.31088814567075435, "grad_norm": 2.627542132721297, "learning_rate": 1.9717019768432216e-05, "loss": 0.9721, "step": 4183 }, { "epoch": 0.31096246748420664, "grad_norm": 2.4829700142038176, "learning_rate": 1.9716830209571993e-05, "loss": 0.9033, "step": 4184 }, { "epoch": 0.3110367892976589, "grad_norm": 2.228342509307886, "learning_rate": 1.971664058815532e-05, "loss": 0.9697, "step": 4185 }, { "epoch": 0.3111111111111111, "grad_norm": 2.436107899303438, "learning_rate": 1.9716450904183426e-05, "loss": 1.032, "step": 4186 }, { "epoch": 0.31118543292456335, "grad_norm": 2.293045595273827, "learning_rate": 1.971626115765752e-05, "loss": 0.9553, "step": 4187 }, { "epoch": 0.3112597547380156, "grad_norm": 2.297389081927411, "learning_rate": 1.9716071348578828e-05, "loss": 0.9761, "step": 4188 }, { "epoch": 0.31133407655146783, "grad_norm": 1.804755254825853, "learning_rate": 1.9715881476948572e-05, "loss": 0.7769, "step": 4189 }, { "epoch": 0.3114083983649201, "grad_norm": 1.897247438340174, "learning_rate": 1.971569154276798e-05, "loss": 0.8721, "step": 4190 }, { "epoch": 0.31148272017837236, "grad_norm": 2.1030851133379613, "learning_rate": 1.9715501546038265e-05, "loss": 0.6853, "step": 4191 }, { "epoch": 0.3115570419918246, "grad_norm": 2.7364650376178496, "learning_rate": 1.9715311486760657e-05, "loss": 1.0535, "step": 4192 }, { "epoch": 0.31163136380527684, "grad_norm": 2.1437587184113074, "learning_rate": 1.9715121364936377e-05, "loss": 0.9558, "step": 4193 }, { "epoch": 0.3117056856187291, "grad_norm": 2.005751218516655, "learning_rate": 1.9714931180566653e-05, "loss": 0.7115, "step": 4194 }, { "epoch": 0.31178000743218137, "grad_norm": 2.2998950148909905, "learning_rate": 1.97147409336527e-05, "loss": 0.7287, "step": 4195 }, { "epoch": 0.3118543292456336, "grad_norm": 2.1115756790953837, "learning_rate": 1.9714550624195752e-05, "loss": 0.7616, "step": 4196 }, { "epoch": 0.31192865105908585, "grad_norm": 2.1650342909785154, "learning_rate": 1.9714360252197036e-05, "loss": 0.7217, "step": 4197 }, { "epoch": 0.3120029728725381, "grad_norm": 2.0065453594849387, "learning_rate": 1.9714169817657767e-05, "loss": 1.0722, "step": 4198 }, { "epoch": 0.3120772946859903, "grad_norm": 3.1199485926290382, "learning_rate": 1.9713979320579176e-05, "loss": 0.8784, "step": 4199 }, { "epoch": 0.31215161649944256, "grad_norm": 2.5915086915704766, "learning_rate": 1.9713788760962495e-05, "loss": 0.9041, "step": 4200 }, { "epoch": 0.31222593831289486, "grad_norm": 2.8093207613447233, "learning_rate": 1.9713598138808942e-05, "loss": 0.8563, "step": 4201 }, { "epoch": 0.3123002601263471, "grad_norm": 2.4206200956249737, "learning_rate": 1.971340745411975e-05, "loss": 1.0793, "step": 4202 }, { "epoch": 0.31237458193979933, "grad_norm": 2.20970482556005, "learning_rate": 1.9713216706896144e-05, "loss": 0.983, "step": 4203 }, { "epoch": 0.3124489037532516, "grad_norm": 2.4126051866210716, "learning_rate": 1.971302589713935e-05, "loss": 0.9661, "step": 4204 }, { "epoch": 0.3125232255667038, "grad_norm": 2.347333835419824, "learning_rate": 1.9712835024850606e-05, "loss": 0.9208, "step": 4205 }, { "epoch": 0.31259754738015605, "grad_norm": 2.5361543286318295, "learning_rate": 1.971264409003113e-05, "loss": 0.9847, "step": 4206 }, { "epoch": 0.31267186919360834, "grad_norm": 2.3623939187535044, "learning_rate": 1.9712453092682158e-05, "loss": 1.0408, "step": 4207 }, { "epoch": 0.3127461910070606, "grad_norm": 2.948854731710728, "learning_rate": 1.9712262032804916e-05, "loss": 0.9889, "step": 4208 }, { "epoch": 0.3128205128205128, "grad_norm": 2.684674954322875, "learning_rate": 1.971207091040063e-05, "loss": 0.8683, "step": 4209 }, { "epoch": 0.31289483463396506, "grad_norm": 2.1797603309624347, "learning_rate": 1.971187972547054e-05, "loss": 1.056, "step": 4210 }, { "epoch": 0.3129691564474173, "grad_norm": 3.1778846501475213, "learning_rate": 1.971168847801587e-05, "loss": 0.9848, "step": 4211 }, { "epoch": 0.3130434782608696, "grad_norm": 1.8809399410466479, "learning_rate": 1.9711497168037856e-05, "loss": 0.7975, "step": 4212 }, { "epoch": 0.31311780007432183, "grad_norm": 2.242957761228493, "learning_rate": 1.9711305795537727e-05, "loss": 0.9416, "step": 4213 }, { "epoch": 0.31319212188777407, "grad_norm": 2.522415157772204, "learning_rate": 1.9711114360516714e-05, "loss": 1.0971, "step": 4214 }, { "epoch": 0.3132664437012263, "grad_norm": 2.597831170529195, "learning_rate": 1.971092286297605e-05, "loss": 0.8273, "step": 4215 }, { "epoch": 0.31334076551467854, "grad_norm": 1.8665342749583314, "learning_rate": 1.971073130291697e-05, "loss": 0.7095, "step": 4216 }, { "epoch": 0.3134150873281308, "grad_norm": 2.186006605410372, "learning_rate": 1.9710539680340702e-05, "loss": 0.902, "step": 4217 }, { "epoch": 0.3134894091415831, "grad_norm": 2.3576085711350885, "learning_rate": 1.971034799524849e-05, "loss": 0.8589, "step": 4218 }, { "epoch": 0.3135637309550353, "grad_norm": 2.251532659166663, "learning_rate": 1.9710156247641557e-05, "loss": 0.9023, "step": 4219 }, { "epoch": 0.31363805276848755, "grad_norm": 2.0863229501692717, "learning_rate": 1.9709964437521144e-05, "loss": 0.6582, "step": 4220 }, { "epoch": 0.3137123745819398, "grad_norm": 2.1404481382736864, "learning_rate": 1.970977256488848e-05, "loss": 0.7044, "step": 4221 }, { "epoch": 0.31378669639539203, "grad_norm": 2.6521789999001566, "learning_rate": 1.9709580629744805e-05, "loss": 1.2857, "step": 4222 }, { "epoch": 0.3138610182088443, "grad_norm": 2.945132341047444, "learning_rate": 1.9709388632091356e-05, "loss": 0.9783, "step": 4223 }, { "epoch": 0.31393534002229656, "grad_norm": 3.5295539366963213, "learning_rate": 1.9709196571929365e-05, "loss": 1.0877, "step": 4224 }, { "epoch": 0.3140096618357488, "grad_norm": 2.5013811639853674, "learning_rate": 1.9709004449260073e-05, "loss": 1.041, "step": 4225 }, { "epoch": 0.31408398364920104, "grad_norm": 2.03485702153351, "learning_rate": 1.970881226408471e-05, "loss": 0.9569, "step": 4226 }, { "epoch": 0.3141583054626533, "grad_norm": 2.2913819852767627, "learning_rate": 1.9708620016404522e-05, "loss": 0.6585, "step": 4227 }, { "epoch": 0.3142326272761055, "grad_norm": 7.132527554505013, "learning_rate": 1.970842770622074e-05, "loss": 0.6044, "step": 4228 }, { "epoch": 0.3143069490895578, "grad_norm": 2.2071956920216484, "learning_rate": 1.9708235333534603e-05, "loss": 1.156, "step": 4229 }, { "epoch": 0.31438127090301005, "grad_norm": 2.3778283041928074, "learning_rate": 1.970804289834735e-05, "loss": 1.0138, "step": 4230 }, { "epoch": 0.3144555927164623, "grad_norm": 1.954836330703186, "learning_rate": 1.9707850400660226e-05, "loss": 0.8691, "step": 4231 }, { "epoch": 0.3145299145299145, "grad_norm": 2.237007553657879, "learning_rate": 1.970765784047446e-05, "loss": 0.6749, "step": 4232 }, { "epoch": 0.31460423634336676, "grad_norm": 3.0251501316661713, "learning_rate": 1.97074652177913e-05, "loss": 1.0844, "step": 4233 }, { "epoch": 0.314678558156819, "grad_norm": 2.502372196025075, "learning_rate": 1.9707272532611976e-05, "loss": 0.9313, "step": 4234 }, { "epoch": 0.3147528799702713, "grad_norm": 7.4592900740964225, "learning_rate": 1.970707978493774e-05, "loss": 0.909, "step": 4235 }, { "epoch": 0.31482720178372353, "grad_norm": 2.2817727485058374, "learning_rate": 1.9706886974769826e-05, "loss": 1.0528, "step": 4236 }, { "epoch": 0.31490152359717577, "grad_norm": 2.046331440380861, "learning_rate": 1.9706694102109482e-05, "loss": 0.9003, "step": 4237 }, { "epoch": 0.314975845410628, "grad_norm": 2.2191420795073737, "learning_rate": 1.970650116695794e-05, "loss": 0.873, "step": 4238 }, { "epoch": 0.31505016722408025, "grad_norm": 2.5139100724737213, "learning_rate": 1.9706308169316448e-05, "loss": 0.7074, "step": 4239 }, { "epoch": 0.31512448903753254, "grad_norm": 1.952710458195379, "learning_rate": 1.970611510918625e-05, "loss": 0.8611, "step": 4240 }, { "epoch": 0.3151988108509848, "grad_norm": 2.2603983911001575, "learning_rate": 1.9705921986568582e-05, "loss": 1.1215, "step": 4241 }, { "epoch": 0.315273132664437, "grad_norm": 1.9676289657664068, "learning_rate": 1.9705728801464696e-05, "loss": 0.7705, "step": 4242 }, { "epoch": 0.31534745447788926, "grad_norm": 2.1321097588842868, "learning_rate": 1.970553555387583e-05, "loss": 0.8542, "step": 4243 }, { "epoch": 0.3154217762913415, "grad_norm": 2.1909279745616623, "learning_rate": 1.9705342243803232e-05, "loss": 0.9388, "step": 4244 }, { "epoch": 0.31549609810479373, "grad_norm": 2.3049089210971374, "learning_rate": 1.9705148871248142e-05, "loss": 0.9814, "step": 4245 }, { "epoch": 0.315570419918246, "grad_norm": 2.0346648679248167, "learning_rate": 1.970495543621181e-05, "loss": 0.6809, "step": 4246 }, { "epoch": 0.31564474173169826, "grad_norm": 2.859306982330757, "learning_rate": 1.9704761938695473e-05, "loss": 1.0059, "step": 4247 }, { "epoch": 0.3157190635451505, "grad_norm": 2.6331135056925192, "learning_rate": 1.9704568378700384e-05, "loss": 1.0618, "step": 4248 }, { "epoch": 0.31579338535860274, "grad_norm": 2.0691434007879708, "learning_rate": 1.970437475622779e-05, "loss": 0.9566, "step": 4249 }, { "epoch": 0.315867707172055, "grad_norm": 2.0702313250762083, "learning_rate": 1.9704181071278932e-05, "loss": 0.8626, "step": 4250 }, { "epoch": 0.3159420289855073, "grad_norm": 2.541186804673117, "learning_rate": 1.970398732385506e-05, "loss": 1.0641, "step": 4251 }, { "epoch": 0.3160163507989595, "grad_norm": 2.121198011011415, "learning_rate": 1.9703793513957424e-05, "loss": 0.9097, "step": 4252 }, { "epoch": 0.31609067261241175, "grad_norm": 2.1120494845251687, "learning_rate": 1.9703599641587267e-05, "loss": 0.8211, "step": 4253 }, { "epoch": 0.316164994425864, "grad_norm": 2.3156870073471705, "learning_rate": 1.9703405706745838e-05, "loss": 0.9956, "step": 4254 }, { "epoch": 0.3162393162393162, "grad_norm": 2.082394510958922, "learning_rate": 1.9703211709434388e-05, "loss": 0.911, "step": 4255 }, { "epoch": 0.31631363805276846, "grad_norm": 2.1908467924977195, "learning_rate": 1.9703017649654163e-05, "loss": 1.0049, "step": 4256 }, { "epoch": 0.31638795986622076, "grad_norm": 2.424470834879614, "learning_rate": 1.9702823527406413e-05, "loss": 1.058, "step": 4257 }, { "epoch": 0.316462281679673, "grad_norm": 2.096140422498602, "learning_rate": 1.9702629342692393e-05, "loss": 0.7231, "step": 4258 }, { "epoch": 0.31653660349312523, "grad_norm": 2.123928906412818, "learning_rate": 1.9702435095513345e-05, "loss": 0.7969, "step": 4259 }, { "epoch": 0.3166109253065775, "grad_norm": 2.1594087624958336, "learning_rate": 1.9702240785870523e-05, "loss": 0.8281, "step": 4260 }, { "epoch": 0.3166852471200297, "grad_norm": 1.7529305141061662, "learning_rate": 1.970204641376518e-05, "loss": 0.7918, "step": 4261 }, { "epoch": 0.31675956893348195, "grad_norm": 2.2712442893883433, "learning_rate": 1.9701851979198565e-05, "loss": 0.8687, "step": 4262 }, { "epoch": 0.31683389074693424, "grad_norm": 2.314443429833485, "learning_rate": 1.970165748217193e-05, "loss": 1.1773, "step": 4263 }, { "epoch": 0.3169082125603865, "grad_norm": 2.5234722811736816, "learning_rate": 1.970146292268653e-05, "loss": 0.9673, "step": 4264 }, { "epoch": 0.3169825343738387, "grad_norm": 2.1076740620802146, "learning_rate": 1.970126830074361e-05, "loss": 0.8851, "step": 4265 }, { "epoch": 0.31705685618729096, "grad_norm": 1.76803217225829, "learning_rate": 1.9701073616344432e-05, "loss": 0.8046, "step": 4266 }, { "epoch": 0.3171311780007432, "grad_norm": 2.5019713490045334, "learning_rate": 1.9700878869490247e-05, "loss": 0.8182, "step": 4267 }, { "epoch": 0.3172054998141955, "grad_norm": 1.9000352063468078, "learning_rate": 1.9700684060182303e-05, "loss": 0.761, "step": 4268 }, { "epoch": 0.31727982162764773, "grad_norm": 2.2922430795688364, "learning_rate": 1.970048918842186e-05, "loss": 0.9029, "step": 4269 }, { "epoch": 0.31735414344109997, "grad_norm": 2.5141593614545528, "learning_rate": 1.970029425421017e-05, "loss": 0.919, "step": 4270 }, { "epoch": 0.3174284652545522, "grad_norm": 2.1876686687042435, "learning_rate": 1.970009925754849e-05, "loss": 0.7391, "step": 4271 }, { "epoch": 0.31750278706800444, "grad_norm": 2.0029020302742078, "learning_rate": 1.9699904198438076e-05, "loss": 1.022, "step": 4272 }, { "epoch": 0.3175771088814567, "grad_norm": 2.866772345517876, "learning_rate": 1.969970907688018e-05, "loss": 1.1172, "step": 4273 }, { "epoch": 0.317651430694909, "grad_norm": 2.156518405410833, "learning_rate": 1.969951389287606e-05, "loss": 0.7125, "step": 4274 }, { "epoch": 0.3177257525083612, "grad_norm": 2.871389884063706, "learning_rate": 1.9699318646426975e-05, "loss": 0.7904, "step": 4275 }, { "epoch": 0.31780007432181345, "grad_norm": 4.845749149518337, "learning_rate": 1.9699123337534175e-05, "loss": 0.8206, "step": 4276 }, { "epoch": 0.3178743961352657, "grad_norm": 2.44338442216781, "learning_rate": 1.9698927966198925e-05, "loss": 1.0071, "step": 4277 }, { "epoch": 0.31794871794871793, "grad_norm": 3.067353826928814, "learning_rate": 1.9698732532422484e-05, "loss": 1.1588, "step": 4278 }, { "epoch": 0.3180230397621702, "grad_norm": 2.367875014164279, "learning_rate": 1.96985370362061e-05, "loss": 1.105, "step": 4279 }, { "epoch": 0.31809736157562246, "grad_norm": 1.905158907761552, "learning_rate": 1.969834147755104e-05, "loss": 0.8703, "step": 4280 }, { "epoch": 0.3181716833890747, "grad_norm": 2.2586431930591346, "learning_rate": 1.9698145856458564e-05, "loss": 0.7953, "step": 4281 }, { "epoch": 0.31824600520252694, "grad_norm": 2.003938356985693, "learning_rate": 1.9697950172929925e-05, "loss": 0.941, "step": 4282 }, { "epoch": 0.3183203270159792, "grad_norm": 1.9434804087284974, "learning_rate": 1.9697754426966385e-05, "loss": 0.7659, "step": 4283 }, { "epoch": 0.3183946488294314, "grad_norm": 2.367126922545855, "learning_rate": 1.9697558618569206e-05, "loss": 0.7815, "step": 4284 }, { "epoch": 0.3184689706428837, "grad_norm": 2.632615077050777, "learning_rate": 1.9697362747739647e-05, "loss": 1.0132, "step": 4285 }, { "epoch": 0.31854329245633595, "grad_norm": 2.4547132761485884, "learning_rate": 1.9697166814478973e-05, "loss": 1.0662, "step": 4286 }, { "epoch": 0.3186176142697882, "grad_norm": 2.033199836667488, "learning_rate": 1.969697081878844e-05, "loss": 0.8326, "step": 4287 }, { "epoch": 0.3186919360832404, "grad_norm": 2.27371459040612, "learning_rate": 1.9696774760669312e-05, "loss": 1.0089, "step": 4288 }, { "epoch": 0.31876625789669266, "grad_norm": 2.1870268336073604, "learning_rate": 1.9696578640122853e-05, "loss": 0.8326, "step": 4289 }, { "epoch": 0.3188405797101449, "grad_norm": 2.2639658471982798, "learning_rate": 1.969638245715032e-05, "loss": 0.8555, "step": 4290 }, { "epoch": 0.3189149015235972, "grad_norm": 2.619363679467391, "learning_rate": 1.969618621175298e-05, "loss": 0.9456, "step": 4291 }, { "epoch": 0.31898922333704943, "grad_norm": 2.149487880721537, "learning_rate": 1.9695989903932103e-05, "loss": 0.8147, "step": 4292 }, { "epoch": 0.31906354515050167, "grad_norm": 2.2158103067813526, "learning_rate": 1.9695793533688937e-05, "loss": 0.8502, "step": 4293 }, { "epoch": 0.3191378669639539, "grad_norm": 1.8792925742076267, "learning_rate": 1.9695597101024762e-05, "loss": 0.9887, "step": 4294 }, { "epoch": 0.31921218877740615, "grad_norm": 3.165124605933162, "learning_rate": 1.9695400605940836e-05, "loss": 0.9465, "step": 4295 }, { "epoch": 0.31928651059085844, "grad_norm": 2.03263786005776, "learning_rate": 1.9695204048438418e-05, "loss": 0.8267, "step": 4296 }, { "epoch": 0.3193608324043107, "grad_norm": 1.7818710399349103, "learning_rate": 1.9695007428518785e-05, "loss": 0.7945, "step": 4297 }, { "epoch": 0.3194351542177629, "grad_norm": 2.133238998791925, "learning_rate": 1.9694810746183196e-05, "loss": 0.8103, "step": 4298 }, { "epoch": 0.31950947603121516, "grad_norm": 2.695264607303954, "learning_rate": 1.9694614001432917e-05, "loss": 0.9963, "step": 4299 }, { "epoch": 0.3195837978446674, "grad_norm": 2.170258432340017, "learning_rate": 1.9694417194269214e-05, "loss": 1.0194, "step": 4300 }, { "epoch": 0.31965811965811963, "grad_norm": 2.5186224912234896, "learning_rate": 1.969422032469336e-05, "loss": 0.7745, "step": 4301 }, { "epoch": 0.3197324414715719, "grad_norm": 3.0956862131337455, "learning_rate": 1.969402339270662e-05, "loss": 0.7946, "step": 4302 }, { "epoch": 0.31980676328502416, "grad_norm": 2.3238031553260305, "learning_rate": 1.9693826398310256e-05, "loss": 0.851, "step": 4303 }, { "epoch": 0.3198810850984764, "grad_norm": 2.1733595288259977, "learning_rate": 1.969362934150554e-05, "loss": 0.8653, "step": 4304 }, { "epoch": 0.31995540691192864, "grad_norm": 2.2150623054189067, "learning_rate": 1.969343222229375e-05, "loss": 1.0237, "step": 4305 }, { "epoch": 0.3200297287253809, "grad_norm": 1.9555296789764978, "learning_rate": 1.9693235040676137e-05, "loss": 0.7171, "step": 4306 }, { "epoch": 0.3201040505388332, "grad_norm": 2.354216304203162, "learning_rate": 1.9693037796653983e-05, "loss": 0.8375, "step": 4307 }, { "epoch": 0.3201783723522854, "grad_norm": 2.226891636671364, "learning_rate": 1.9692840490228553e-05, "loss": 0.9973, "step": 4308 }, { "epoch": 0.32025269416573765, "grad_norm": 2.1216251314691115, "learning_rate": 1.9692643121401122e-05, "loss": 0.9867, "step": 4309 }, { "epoch": 0.3203270159791899, "grad_norm": 1.7821841516089525, "learning_rate": 1.9692445690172953e-05, "loss": 0.7317, "step": 4310 }, { "epoch": 0.3204013377926421, "grad_norm": 2.736490562544485, "learning_rate": 1.9692248196545327e-05, "loss": 0.893, "step": 4311 }, { "epoch": 0.32047565960609437, "grad_norm": 2.186286647083411, "learning_rate": 1.9692050640519505e-05, "loss": 0.9625, "step": 4312 }, { "epoch": 0.32054998141954666, "grad_norm": 2.493846994826538, "learning_rate": 1.969185302209677e-05, "loss": 1.0634, "step": 4313 }, { "epoch": 0.3206243032329989, "grad_norm": 2.0409826634624335, "learning_rate": 1.9691655341278384e-05, "loss": 0.8898, "step": 4314 }, { "epoch": 0.32069862504645114, "grad_norm": 2.37773956372046, "learning_rate": 1.9691457598065626e-05, "loss": 0.9523, "step": 4315 }, { "epoch": 0.3207729468599034, "grad_norm": 2.182869029000098, "learning_rate": 1.9691259792459763e-05, "loss": 1.0043, "step": 4316 }, { "epoch": 0.3208472686733556, "grad_norm": 1.996484293796893, "learning_rate": 1.9691061924462074e-05, "loss": 0.8036, "step": 4317 }, { "epoch": 0.32092159048680785, "grad_norm": 2.343819725087056, "learning_rate": 1.9690863994073832e-05, "loss": 0.8989, "step": 4318 }, { "epoch": 0.32099591230026014, "grad_norm": 2.1211809889488724, "learning_rate": 1.9690666001296307e-05, "loss": 1.0068, "step": 4319 }, { "epoch": 0.3210702341137124, "grad_norm": 2.1306481239883635, "learning_rate": 1.969046794613078e-05, "loss": 0.8, "step": 4320 }, { "epoch": 0.3211445559271646, "grad_norm": 1.8411225865414997, "learning_rate": 1.9690269828578524e-05, "loss": 1.0201, "step": 4321 }, { "epoch": 0.32121887774061686, "grad_norm": 2.179175099565713, "learning_rate": 1.9690071648640813e-05, "loss": 0.698, "step": 4322 }, { "epoch": 0.3212931995540691, "grad_norm": 2.4587748666539593, "learning_rate": 1.9689873406318925e-05, "loss": 0.8523, "step": 4323 }, { "epoch": 0.3213675213675214, "grad_norm": 2.1492203428969083, "learning_rate": 1.9689675101614132e-05, "loss": 0.9055, "step": 4324 }, { "epoch": 0.32144184318097363, "grad_norm": 2.229235964095525, "learning_rate": 1.9689476734527717e-05, "loss": 0.9031, "step": 4325 }, { "epoch": 0.32151616499442587, "grad_norm": 1.9110682148811302, "learning_rate": 1.9689278305060947e-05, "loss": 0.688, "step": 4326 }, { "epoch": 0.3215904868078781, "grad_norm": 2.017863329693555, "learning_rate": 1.968907981321511e-05, "loss": 0.8518, "step": 4327 }, { "epoch": 0.32166480862133034, "grad_norm": 1.9848957541393506, "learning_rate": 1.9688881258991483e-05, "loss": 0.9102, "step": 4328 }, { "epoch": 0.3217391304347826, "grad_norm": 2.5647820751032637, "learning_rate": 1.9688682642391336e-05, "loss": 0.7589, "step": 4329 }, { "epoch": 0.3218134522482349, "grad_norm": 2.0437815652012437, "learning_rate": 1.9688483963415955e-05, "loss": 0.7009, "step": 4330 }, { "epoch": 0.3218877740616871, "grad_norm": 2.220583344060176, "learning_rate": 1.968828522206662e-05, "loss": 0.8141, "step": 4331 }, { "epoch": 0.32196209587513935, "grad_norm": 2.2412558388203765, "learning_rate": 1.96880864183446e-05, "loss": 0.8795, "step": 4332 }, { "epoch": 0.3220364176885916, "grad_norm": 2.2951527051108447, "learning_rate": 1.9687887552251187e-05, "loss": 0.9098, "step": 4333 }, { "epoch": 0.32211073950204383, "grad_norm": 1.7492560900907805, "learning_rate": 1.9687688623787658e-05, "loss": 0.746, "step": 4334 }, { "epoch": 0.3221850613154961, "grad_norm": 2.542827021168504, "learning_rate": 1.968748963295529e-05, "loss": 0.8237, "step": 4335 }, { "epoch": 0.32225938312894836, "grad_norm": 1.9395853131078096, "learning_rate": 1.9687290579755366e-05, "loss": 0.7321, "step": 4336 }, { "epoch": 0.3223337049424006, "grad_norm": 2.38586364267536, "learning_rate": 1.9687091464189168e-05, "loss": 1.0851, "step": 4337 }, { "epoch": 0.32240802675585284, "grad_norm": 2.171060201633813, "learning_rate": 1.968689228625798e-05, "loss": 0.9364, "step": 4338 }, { "epoch": 0.3224823485693051, "grad_norm": 1.9045629202000427, "learning_rate": 1.968669304596308e-05, "loss": 0.8727, "step": 4339 }, { "epoch": 0.3225566703827573, "grad_norm": 2.342710867239577, "learning_rate": 1.9686493743305756e-05, "loss": 1.0219, "step": 4340 }, { "epoch": 0.3226309921962096, "grad_norm": 2.441164666083297, "learning_rate": 1.9686294378287284e-05, "loss": 0.9555, "step": 4341 }, { "epoch": 0.32270531400966185, "grad_norm": 2.041876005163611, "learning_rate": 1.9686094950908953e-05, "loss": 0.9122, "step": 4342 }, { "epoch": 0.3227796358231141, "grad_norm": 2.7935285077443393, "learning_rate": 1.9685895461172045e-05, "loss": 1.0294, "step": 4343 }, { "epoch": 0.3228539576365663, "grad_norm": 2.5196941414202283, "learning_rate": 1.9685695909077843e-05, "loss": 1.042, "step": 4344 }, { "epoch": 0.32292827945001856, "grad_norm": 2.1989097738885035, "learning_rate": 1.9685496294627636e-05, "loss": 1.0436, "step": 4345 }, { "epoch": 0.3230026012634708, "grad_norm": 2.302314319869803, "learning_rate": 1.9685296617822706e-05, "loss": 0.7852, "step": 4346 }, { "epoch": 0.3230769230769231, "grad_norm": 2.5080492087766033, "learning_rate": 1.9685096878664337e-05, "loss": 0.9479, "step": 4347 }, { "epoch": 0.32315124489037533, "grad_norm": 2.1626182482663237, "learning_rate": 1.968489707715382e-05, "loss": 0.892, "step": 4348 }, { "epoch": 0.32322556670382757, "grad_norm": 3.057688530743322, "learning_rate": 1.9684697213292434e-05, "loss": 0.7434, "step": 4349 }, { "epoch": 0.3232998885172798, "grad_norm": 2.4147658693755996, "learning_rate": 1.9684497287081475e-05, "loss": 1.12, "step": 4350 }, { "epoch": 0.32337421033073205, "grad_norm": 2.45350290116164, "learning_rate": 1.968429729852222e-05, "loss": 0.939, "step": 4351 }, { "epoch": 0.32344853214418434, "grad_norm": 1.9738706429718138, "learning_rate": 1.9684097247615966e-05, "loss": 0.8302, "step": 4352 }, { "epoch": 0.3235228539576366, "grad_norm": 3.6794590428405987, "learning_rate": 1.968389713436399e-05, "loss": 0.9139, "step": 4353 }, { "epoch": 0.3235971757710888, "grad_norm": 2.0915829926633296, "learning_rate": 1.968369695876759e-05, "loss": 0.9081, "step": 4354 }, { "epoch": 0.32367149758454106, "grad_norm": 2.8908645141239435, "learning_rate": 1.9683496720828055e-05, "loss": 1.1251, "step": 4355 }, { "epoch": 0.3237458193979933, "grad_norm": 2.0185741970205187, "learning_rate": 1.968329642054667e-05, "loss": 0.8184, "step": 4356 }, { "epoch": 0.32382014121144553, "grad_norm": 1.7769089508787903, "learning_rate": 1.968309605792472e-05, "loss": 0.7579, "step": 4357 }, { "epoch": 0.3238944630248978, "grad_norm": 2.529605879913481, "learning_rate": 1.96828956329635e-05, "loss": 0.8839, "step": 4358 }, { "epoch": 0.32396878483835007, "grad_norm": 2.5035408477471988, "learning_rate": 1.9682695145664307e-05, "loss": 0.763, "step": 4359 }, { "epoch": 0.3240431066518023, "grad_norm": 2.2323608584901073, "learning_rate": 1.9682494596028418e-05, "loss": 0.8206, "step": 4360 }, { "epoch": 0.32411742846525454, "grad_norm": 3.821171703677521, "learning_rate": 1.9682293984057136e-05, "loss": 0.8794, "step": 4361 }, { "epoch": 0.3241917502787068, "grad_norm": 2.26085159267419, "learning_rate": 1.9682093309751743e-05, "loss": 0.8905, "step": 4362 }, { "epoch": 0.3242660720921591, "grad_norm": 1.976933302923889, "learning_rate": 1.968189257311354e-05, "loss": 0.7776, "step": 4363 }, { "epoch": 0.3243403939056113, "grad_norm": 1.6420805287665086, "learning_rate": 1.9681691774143814e-05, "loss": 0.7714, "step": 4364 }, { "epoch": 0.32441471571906355, "grad_norm": 2.1300588565790193, "learning_rate": 1.968149091284386e-05, "loss": 0.9679, "step": 4365 }, { "epoch": 0.3244890375325158, "grad_norm": 2.3066841651541314, "learning_rate": 1.9681289989214967e-05, "loss": 0.9549, "step": 4366 }, { "epoch": 0.324563359345968, "grad_norm": 3.6039466755081704, "learning_rate": 1.9681089003258434e-05, "loss": 0.7074, "step": 4367 }, { "epoch": 0.32463768115942027, "grad_norm": 2.120017719577645, "learning_rate": 1.968088795497555e-05, "loss": 0.8768, "step": 4368 }, { "epoch": 0.32471200297287256, "grad_norm": 2.143287938181428, "learning_rate": 1.968068684436761e-05, "loss": 0.8804, "step": 4369 }, { "epoch": 0.3247863247863248, "grad_norm": 2.590120953100039, "learning_rate": 1.9680485671435914e-05, "loss": 0.8155, "step": 4370 }, { "epoch": 0.32486064659977704, "grad_norm": 3.9014127916583408, "learning_rate": 1.9680284436181752e-05, "loss": 0.9403, "step": 4371 }, { "epoch": 0.3249349684132293, "grad_norm": 1.9423801192246641, "learning_rate": 1.9680083138606423e-05, "loss": 0.6397, "step": 4372 }, { "epoch": 0.3250092902266815, "grad_norm": 2.6263498397005187, "learning_rate": 1.9679881778711218e-05, "loss": 0.9211, "step": 4373 }, { "epoch": 0.3250836120401338, "grad_norm": 2.179870559092577, "learning_rate": 1.9679680356497437e-05, "loss": 1.0157, "step": 4374 }, { "epoch": 0.32515793385358605, "grad_norm": 2.3358317525118477, "learning_rate": 1.967947887196638e-05, "loss": 0.9198, "step": 4375 }, { "epoch": 0.3252322556670383, "grad_norm": 2.1151145012722155, "learning_rate": 1.9679277325119336e-05, "loss": 0.8624, "step": 4376 }, { "epoch": 0.3253065774804905, "grad_norm": 2.476333143888995, "learning_rate": 1.9679075715957608e-05, "loss": 1.0259, "step": 4377 }, { "epoch": 0.32538089929394276, "grad_norm": 1.8843132555953042, "learning_rate": 1.9678874044482492e-05, "loss": 0.9203, "step": 4378 }, { "epoch": 0.325455221107395, "grad_norm": 2.1172653408633435, "learning_rate": 1.967867231069529e-05, "loss": 0.9617, "step": 4379 }, { "epoch": 0.3255295429208473, "grad_norm": 2.20998414396906, "learning_rate": 1.9678470514597296e-05, "loss": 1.1239, "step": 4380 }, { "epoch": 0.32560386473429953, "grad_norm": 3.5827736026965544, "learning_rate": 1.9678268656189807e-05, "loss": 0.6902, "step": 4381 }, { "epoch": 0.32567818654775177, "grad_norm": 2.530551491030466, "learning_rate": 1.967806673547413e-05, "loss": 0.9498, "step": 4382 }, { "epoch": 0.325752508361204, "grad_norm": 3.3187479024536564, "learning_rate": 1.9677864752451563e-05, "loss": 0.8147, "step": 4383 }, { "epoch": 0.32582683017465625, "grad_norm": 3.3330149444730597, "learning_rate": 1.9677662707123405e-05, "loss": 0.833, "step": 4384 }, { "epoch": 0.3259011519881085, "grad_norm": 2.491010905901585, "learning_rate": 1.9677460599490953e-05, "loss": 0.8445, "step": 4385 }, { "epoch": 0.3259754738015608, "grad_norm": 1.9806700789577292, "learning_rate": 1.9677258429555514e-05, "loss": 0.7957, "step": 4386 }, { "epoch": 0.326049795615013, "grad_norm": 1.8462579104439196, "learning_rate": 1.967705619731839e-05, "loss": 0.8547, "step": 4387 }, { "epoch": 0.32612411742846525, "grad_norm": 2.1452599338902307, "learning_rate": 1.9676853902780877e-05, "loss": 0.9828, "step": 4388 }, { "epoch": 0.3261984392419175, "grad_norm": 2.475003669751965, "learning_rate": 1.967665154594428e-05, "loss": 0.7794, "step": 4389 }, { "epoch": 0.32627276105536973, "grad_norm": 2.451354838488815, "learning_rate": 1.9676449126809903e-05, "loss": 1.0237, "step": 4390 }, { "epoch": 0.326347082868822, "grad_norm": 2.1725594548034617, "learning_rate": 1.967624664537905e-05, "loss": 0.9664, "step": 4391 }, { "epoch": 0.32642140468227426, "grad_norm": 1.6998815098216735, "learning_rate": 1.9676044101653023e-05, "loss": 0.8153, "step": 4392 }, { "epoch": 0.3264957264957265, "grad_norm": 1.9415637993202666, "learning_rate": 1.9675841495633124e-05, "loss": 0.7016, "step": 4393 }, { "epoch": 0.32657004830917874, "grad_norm": 2.1501586536961352, "learning_rate": 1.9675638827320663e-05, "loss": 0.7698, "step": 4394 }, { "epoch": 0.326644370122631, "grad_norm": 2.8222036541694955, "learning_rate": 1.967543609671694e-05, "loss": 1.1017, "step": 4395 }, { "epoch": 0.3267186919360832, "grad_norm": 2.0410885434548103, "learning_rate": 1.967523330382326e-05, "loss": 0.8468, "step": 4396 }, { "epoch": 0.3267930137495355, "grad_norm": 2.5153913773390677, "learning_rate": 1.9675030448640934e-05, "loss": 0.9443, "step": 4397 }, { "epoch": 0.32686733556298775, "grad_norm": 2.242416745158364, "learning_rate": 1.9674827531171258e-05, "loss": 0.8345, "step": 4398 }, { "epoch": 0.32694165737644, "grad_norm": 1.9646848593191377, "learning_rate": 1.967462455141555e-05, "loss": 0.8606, "step": 4399 }, { "epoch": 0.3270159791898922, "grad_norm": 2.4714531139375393, "learning_rate": 1.9674421509375108e-05, "loss": 0.8727, "step": 4400 }, { "epoch": 0.32709030100334446, "grad_norm": 1.9823657497932674, "learning_rate": 1.967421840505124e-05, "loss": 0.9591, "step": 4401 }, { "epoch": 0.32716462281679676, "grad_norm": 1.9715952820657685, "learning_rate": 1.967401523844526e-05, "loss": 0.8965, "step": 4402 }, { "epoch": 0.327238944630249, "grad_norm": 1.8461587222909404, "learning_rate": 1.967381200955847e-05, "loss": 0.8703, "step": 4403 }, { "epoch": 0.32731326644370123, "grad_norm": 2.222282794783189, "learning_rate": 1.967360871839218e-05, "loss": 0.936, "step": 4404 }, { "epoch": 0.3273875882571535, "grad_norm": 2.1552679189256794, "learning_rate": 1.9673405364947704e-05, "loss": 1.0125, "step": 4405 }, { "epoch": 0.3274619100706057, "grad_norm": 3.0130957209695906, "learning_rate": 1.9673201949226337e-05, "loss": 0.9642, "step": 4406 }, { "epoch": 0.32753623188405795, "grad_norm": 2.3800099454936205, "learning_rate": 1.9672998471229407e-05, "loss": 0.8418, "step": 4407 }, { "epoch": 0.32761055369751024, "grad_norm": 2.7887297937016173, "learning_rate": 1.9672794930958208e-05, "loss": 1.0158, "step": 4408 }, { "epoch": 0.3276848755109625, "grad_norm": 2.02515307023688, "learning_rate": 1.9672591328414058e-05, "loss": 0.9014, "step": 4409 }, { "epoch": 0.3277591973244147, "grad_norm": 2.0933019074579997, "learning_rate": 1.967238766359827e-05, "loss": 0.8231, "step": 4410 }, { "epoch": 0.32783351913786696, "grad_norm": 2.077420112043497, "learning_rate": 1.9672183936512147e-05, "loss": 0.8894, "step": 4411 }, { "epoch": 0.3279078409513192, "grad_norm": 2.2552364698895317, "learning_rate": 1.9671980147157012e-05, "loss": 0.9008, "step": 4412 }, { "epoch": 0.32798216276477143, "grad_norm": 1.9754574759352328, "learning_rate": 1.9671776295534167e-05, "loss": 0.713, "step": 4413 }, { "epoch": 0.32805648457822373, "grad_norm": 1.7301384733999063, "learning_rate": 1.967157238164493e-05, "loss": 0.5808, "step": 4414 }, { "epoch": 0.32813080639167597, "grad_norm": 2.9324785565001097, "learning_rate": 1.9671368405490608e-05, "loss": 0.8315, "step": 4415 }, { "epoch": 0.3282051282051282, "grad_norm": 2.1538953661497744, "learning_rate": 1.9671164367072523e-05, "loss": 0.9827, "step": 4416 }, { "epoch": 0.32827945001858044, "grad_norm": 2.460919372239774, "learning_rate": 1.967096026639198e-05, "loss": 0.8013, "step": 4417 }, { "epoch": 0.3283537718320327, "grad_norm": 2.805553809289826, "learning_rate": 1.9670756103450297e-05, "loss": 0.9271, "step": 4418 }, { "epoch": 0.328428093645485, "grad_norm": 3.3338038593157386, "learning_rate": 1.967055187824879e-05, "loss": 0.8971, "step": 4419 }, { "epoch": 0.3285024154589372, "grad_norm": 2.806836742153041, "learning_rate": 1.9670347590788773e-05, "loss": 1.0125, "step": 4420 }, { "epoch": 0.32857673727238945, "grad_norm": 3.6608265264666344, "learning_rate": 1.9670143241071554e-05, "loss": 0.9775, "step": 4421 }, { "epoch": 0.3286510590858417, "grad_norm": 2.21475342700255, "learning_rate": 1.9669938829098458e-05, "loss": 0.8914, "step": 4422 }, { "epoch": 0.32872538089929393, "grad_norm": 2.253481364023302, "learning_rate": 1.9669734354870798e-05, "loss": 0.8606, "step": 4423 }, { "epoch": 0.32879970271274617, "grad_norm": 1.8438989931950478, "learning_rate": 1.966952981838989e-05, "loss": 0.6024, "step": 4424 }, { "epoch": 0.32887402452619846, "grad_norm": 3.4312748954373817, "learning_rate": 1.966932521965705e-05, "loss": 1.0544, "step": 4425 }, { "epoch": 0.3289483463396507, "grad_norm": 1.9994582851303622, "learning_rate": 1.9669120558673597e-05, "loss": 0.8297, "step": 4426 }, { "epoch": 0.32902266815310294, "grad_norm": 2.0517745448943256, "learning_rate": 1.9668915835440846e-05, "loss": 0.8043, "step": 4427 }, { "epoch": 0.3290969899665552, "grad_norm": 2.079022505439979, "learning_rate": 1.9668711049960118e-05, "loss": 1.0522, "step": 4428 }, { "epoch": 0.3291713117800074, "grad_norm": 5.3781911548468395, "learning_rate": 1.9668506202232727e-05, "loss": 0.8602, "step": 4429 }, { "epoch": 0.3292456335934597, "grad_norm": 2.625587732183939, "learning_rate": 1.9668301292260003e-05, "loss": 0.5959, "step": 4430 }, { "epoch": 0.32931995540691195, "grad_norm": 2.5439052771143764, "learning_rate": 1.9668096320043245e-05, "loss": 0.9849, "step": 4431 }, { "epoch": 0.3293942772203642, "grad_norm": 2.505026313186998, "learning_rate": 1.9667891285583793e-05, "loss": 1.0962, "step": 4432 }, { "epoch": 0.3294685990338164, "grad_norm": 2.2460378246331847, "learning_rate": 1.9667686188882957e-05, "loss": 1.0879, "step": 4433 }, { "epoch": 0.32954292084726866, "grad_norm": 2.643417130532382, "learning_rate": 1.9667481029942058e-05, "loss": 0.8868, "step": 4434 }, { "epoch": 0.3296172426607209, "grad_norm": 2.094435118108416, "learning_rate": 1.9667275808762415e-05, "loss": 0.6903, "step": 4435 }, { "epoch": 0.3296915644741732, "grad_norm": 1.96442191217333, "learning_rate": 1.9667070525345356e-05, "loss": 0.9302, "step": 4436 }, { "epoch": 0.32976588628762543, "grad_norm": 2.111772442468077, "learning_rate": 1.96668651796922e-05, "loss": 0.8624, "step": 4437 }, { "epoch": 0.32984020810107767, "grad_norm": 1.6830305754873864, "learning_rate": 1.9666659771804265e-05, "loss": 0.6175, "step": 4438 }, { "epoch": 0.3299145299145299, "grad_norm": 2.0233904898808968, "learning_rate": 1.9666454301682873e-05, "loss": 0.8738, "step": 4439 }, { "epoch": 0.32998885172798215, "grad_norm": 2.0788628763860872, "learning_rate": 1.9666248769329353e-05, "loss": 0.9977, "step": 4440 }, { "epoch": 0.3300631735414344, "grad_norm": 2.4984070728407346, "learning_rate": 1.9666043174745023e-05, "loss": 0.9445, "step": 4441 }, { "epoch": 0.3301374953548867, "grad_norm": 2.4928363175679134, "learning_rate": 1.9665837517931213e-05, "loss": 0.7684, "step": 4442 }, { "epoch": 0.3302118171683389, "grad_norm": 2.583760950280332, "learning_rate": 1.9665631798889236e-05, "loss": 0.6788, "step": 4443 }, { "epoch": 0.33028613898179116, "grad_norm": 2.058033644650947, "learning_rate": 1.966542601762043e-05, "loss": 0.6867, "step": 4444 }, { "epoch": 0.3303604607952434, "grad_norm": 1.770285696458966, "learning_rate": 1.966522017412611e-05, "loss": 0.7764, "step": 4445 }, { "epoch": 0.33043478260869563, "grad_norm": 2.7187144578594826, "learning_rate": 1.9665014268407604e-05, "loss": 0.9487, "step": 4446 }, { "epoch": 0.3305091044221479, "grad_norm": 1.9369926615220732, "learning_rate": 1.9664808300466233e-05, "loss": 0.7007, "step": 4447 }, { "epoch": 0.33058342623560016, "grad_norm": 2.0463673392942128, "learning_rate": 1.9664602270303335e-05, "loss": 0.894, "step": 4448 }, { "epoch": 0.3306577480490524, "grad_norm": 3.2160147659074045, "learning_rate": 1.9664396177920223e-05, "loss": 1.1046, "step": 4449 }, { "epoch": 0.33073206986250464, "grad_norm": 2.2153882951302473, "learning_rate": 1.9664190023318234e-05, "loss": 0.8128, "step": 4450 }, { "epoch": 0.3308063916759569, "grad_norm": 2.696065360863292, "learning_rate": 1.966398380649869e-05, "loss": 1.0335, "step": 4451 }, { "epoch": 0.3308807134894091, "grad_norm": 3.091051760003069, "learning_rate": 1.966377752746292e-05, "loss": 0.9447, "step": 4452 }, { "epoch": 0.3309550353028614, "grad_norm": 2.7077486726573956, "learning_rate": 1.9663571186212248e-05, "loss": 0.8471, "step": 4453 }, { "epoch": 0.33102935711631365, "grad_norm": 1.9839483768956323, "learning_rate": 1.9663364782748012e-05, "loss": 0.8657, "step": 4454 }, { "epoch": 0.3311036789297659, "grad_norm": 2.138308908415277, "learning_rate": 1.966315831707153e-05, "loss": 0.642, "step": 4455 }, { "epoch": 0.3311780007432181, "grad_norm": 2.815179900225431, "learning_rate": 1.966295178918414e-05, "loss": 0.8692, "step": 4456 }, { "epoch": 0.33125232255667036, "grad_norm": 2.5003484257804445, "learning_rate": 1.9662745199087166e-05, "loss": 0.9372, "step": 4457 }, { "epoch": 0.33132664437012266, "grad_norm": 2.471378747576696, "learning_rate": 1.966253854678194e-05, "loss": 0.9906, "step": 4458 }, { "epoch": 0.3314009661835749, "grad_norm": 2.2360803220274805, "learning_rate": 1.966233183226979e-05, "loss": 0.9215, "step": 4459 }, { "epoch": 0.33147528799702713, "grad_norm": 5.26557354435341, "learning_rate": 1.9662125055552054e-05, "loss": 0.9022, "step": 4460 }, { "epoch": 0.3315496098104794, "grad_norm": 2.7341140852414303, "learning_rate": 1.9661918216630053e-05, "loss": 0.9686, "step": 4461 }, { "epoch": 0.3316239316239316, "grad_norm": 2.053168587711442, "learning_rate": 1.9661711315505125e-05, "loss": 1.0477, "step": 4462 }, { "epoch": 0.33169825343738385, "grad_norm": 2.236234449654846, "learning_rate": 1.9661504352178606e-05, "loss": 0.842, "step": 4463 }, { "epoch": 0.33177257525083614, "grad_norm": 2.1729131142459996, "learning_rate": 1.9661297326651815e-05, "loss": 0.8935, "step": 4464 }, { "epoch": 0.3318468970642884, "grad_norm": 2.4039902937476874, "learning_rate": 1.96610902389261e-05, "loss": 0.8277, "step": 4465 }, { "epoch": 0.3319212188777406, "grad_norm": 2.507469687160043, "learning_rate": 1.9660883089002783e-05, "loss": 0.8874, "step": 4466 }, { "epoch": 0.33199554069119286, "grad_norm": 2.1802626953324746, "learning_rate": 1.9660675876883206e-05, "loss": 0.8192, "step": 4467 }, { "epoch": 0.3320698625046451, "grad_norm": 1.9213823150444433, "learning_rate": 1.9660468602568693e-05, "loss": 0.8289, "step": 4468 }, { "epoch": 0.33214418431809734, "grad_norm": 2.05900265785713, "learning_rate": 1.966026126606059e-05, "loss": 1.0094, "step": 4469 }, { "epoch": 0.33221850613154963, "grad_norm": 2.911633276985434, "learning_rate": 1.9660053867360223e-05, "loss": 0.8758, "step": 4470 }, { "epoch": 0.33229282794500187, "grad_norm": 2.270691418214574, "learning_rate": 1.9659846406468933e-05, "loss": 0.9434, "step": 4471 }, { "epoch": 0.3323671497584541, "grad_norm": 2.5988320185239164, "learning_rate": 1.9659638883388053e-05, "loss": 0.9579, "step": 4472 }, { "epoch": 0.33244147157190634, "grad_norm": 1.8949639003358205, "learning_rate": 1.9659431298118916e-05, "loss": 0.7005, "step": 4473 }, { "epoch": 0.3325157933853586, "grad_norm": 2.6637607772545486, "learning_rate": 1.9659223650662862e-05, "loss": 0.7713, "step": 4474 }, { "epoch": 0.3325901151988109, "grad_norm": 2.1560453885512123, "learning_rate": 1.9659015941021228e-05, "loss": 0.8286, "step": 4475 }, { "epoch": 0.3326644370122631, "grad_norm": 2.0158725784157814, "learning_rate": 1.965880816919535e-05, "loss": 0.9239, "step": 4476 }, { "epoch": 0.33273875882571535, "grad_norm": 2.29756184909519, "learning_rate": 1.9658600335186572e-05, "loss": 1.0467, "step": 4477 }, { "epoch": 0.3328130806391676, "grad_norm": 2.1386387690108415, "learning_rate": 1.965839243899622e-05, "loss": 0.7644, "step": 4478 }, { "epoch": 0.33288740245261983, "grad_norm": 2.0811121534287014, "learning_rate": 1.965818448062564e-05, "loss": 0.8118, "step": 4479 }, { "epoch": 0.33296172426607207, "grad_norm": 1.6560773352129063, "learning_rate": 1.965797646007617e-05, "loss": 0.7712, "step": 4480 }, { "epoch": 0.33303604607952436, "grad_norm": 2.3142629953556555, "learning_rate": 1.9657768377349145e-05, "loss": 0.8501, "step": 4481 }, { "epoch": 0.3331103678929766, "grad_norm": 2.690067700433865, "learning_rate": 1.9657560232445912e-05, "loss": 0.9126, "step": 4482 }, { "epoch": 0.33318468970642884, "grad_norm": 1.9638704237843654, "learning_rate": 1.9657352025367808e-05, "loss": 0.8039, "step": 4483 }, { "epoch": 0.3332590115198811, "grad_norm": 2.374411873721217, "learning_rate": 1.9657143756116174e-05, "loss": 1.0013, "step": 4484 }, { "epoch": 0.3333333333333333, "grad_norm": 2.5078277575477714, "learning_rate": 1.9656935424692346e-05, "loss": 0.8963, "step": 4485 }, { "epoch": 0.3334076551467856, "grad_norm": 2.709806085570329, "learning_rate": 1.9656727031097668e-05, "loss": 1.1362, "step": 4486 }, { "epoch": 0.33348197696023785, "grad_norm": 2.2626382771622167, "learning_rate": 1.9656518575333485e-05, "loss": 0.9876, "step": 4487 }, { "epoch": 0.3335562987736901, "grad_norm": 2.3990772031328627, "learning_rate": 1.9656310057401137e-05, "loss": 0.9907, "step": 4488 }, { "epoch": 0.3336306205871423, "grad_norm": 2.194856485068922, "learning_rate": 1.9656101477301964e-05, "loss": 0.7647, "step": 4489 }, { "epoch": 0.33370494240059456, "grad_norm": 1.9794045314684265, "learning_rate": 1.9655892835037308e-05, "loss": 0.7218, "step": 4490 }, { "epoch": 0.3337792642140468, "grad_norm": 2.1402169910228395, "learning_rate": 1.965568413060852e-05, "loss": 0.8444, "step": 4491 }, { "epoch": 0.3338535860274991, "grad_norm": 1.983019169256435, "learning_rate": 1.965547536401694e-05, "loss": 0.7335, "step": 4492 }, { "epoch": 0.33392790784095133, "grad_norm": 2.1193398258331286, "learning_rate": 1.9655266535263906e-05, "loss": 0.9006, "step": 4493 }, { "epoch": 0.33400222965440357, "grad_norm": 2.098870023238764, "learning_rate": 1.9655057644350767e-05, "loss": 1.14, "step": 4494 }, { "epoch": 0.3340765514678558, "grad_norm": 2.580039073179136, "learning_rate": 1.965484869127887e-05, "loss": 1.0441, "step": 4495 }, { "epoch": 0.33415087328130805, "grad_norm": 2.53591478737437, "learning_rate": 1.9654639676049555e-05, "loss": 1.0254, "step": 4496 }, { "epoch": 0.3342251950947603, "grad_norm": 1.7470800987666808, "learning_rate": 1.9654430598664173e-05, "loss": 0.8145, "step": 4497 }, { "epoch": 0.3342995169082126, "grad_norm": 2.08403512397392, "learning_rate": 1.965422145912407e-05, "loss": 0.8622, "step": 4498 }, { "epoch": 0.3343738387216648, "grad_norm": 2.3334622973542687, "learning_rate": 1.9654012257430587e-05, "loss": 0.9603, "step": 4499 }, { "epoch": 0.33444816053511706, "grad_norm": 3.078631589546779, "learning_rate": 1.9653802993585077e-05, "loss": 1.0724, "step": 4500 }, { "epoch": 0.3345224823485693, "grad_norm": 2.372368204479593, "learning_rate": 1.965359366758888e-05, "loss": 0.8965, "step": 4501 }, { "epoch": 0.33459680416202153, "grad_norm": 1.700345420222983, "learning_rate": 1.9653384279443352e-05, "loss": 0.7124, "step": 4502 }, { "epoch": 0.3346711259754738, "grad_norm": 2.558928736589733, "learning_rate": 1.9653174829149834e-05, "loss": 1.1306, "step": 4503 }, { "epoch": 0.33474544778892606, "grad_norm": 2.0420552797095586, "learning_rate": 1.965296531670968e-05, "loss": 0.9271, "step": 4504 }, { "epoch": 0.3348197696023783, "grad_norm": 1.9080455483805205, "learning_rate": 1.9652755742124233e-05, "loss": 0.666, "step": 4505 }, { "epoch": 0.33489409141583054, "grad_norm": 2.31832627216738, "learning_rate": 1.9652546105394847e-05, "loss": 0.6719, "step": 4506 }, { "epoch": 0.3349684132292828, "grad_norm": 2.431717466132714, "learning_rate": 1.965233640652287e-05, "loss": 0.7348, "step": 4507 }, { "epoch": 0.335042735042735, "grad_norm": 1.8174512317107643, "learning_rate": 1.9652126645509652e-05, "loss": 0.8456, "step": 4508 }, { "epoch": 0.3351170568561873, "grad_norm": 2.4096002363351094, "learning_rate": 1.9651916822356544e-05, "loss": 0.9439, "step": 4509 }, { "epoch": 0.33519137866963955, "grad_norm": 2.3972432916554003, "learning_rate": 1.9651706937064892e-05, "loss": 0.8258, "step": 4510 }, { "epoch": 0.3352657004830918, "grad_norm": 2.3597376656443054, "learning_rate": 1.9651496989636055e-05, "loss": 0.9498, "step": 4511 }, { "epoch": 0.335340022296544, "grad_norm": 2.3270220103905417, "learning_rate": 1.9651286980071385e-05, "loss": 0.7932, "step": 4512 }, { "epoch": 0.33541434410999627, "grad_norm": 2.371608107889797, "learning_rate": 1.9651076908372227e-05, "loss": 0.8896, "step": 4513 }, { "epoch": 0.33548866592344856, "grad_norm": 2.2572022305804094, "learning_rate": 1.9650866774539933e-05, "loss": 1.0737, "step": 4514 }, { "epoch": 0.3355629877369008, "grad_norm": 2.1967456370375618, "learning_rate": 1.9650656578575864e-05, "loss": 0.9175, "step": 4515 }, { "epoch": 0.33563730955035304, "grad_norm": 2.17714624874017, "learning_rate": 1.965044632048137e-05, "loss": 0.7021, "step": 4516 }, { "epoch": 0.3357116313638053, "grad_norm": 2.455096132660869, "learning_rate": 1.96502360002578e-05, "loss": 0.8682, "step": 4517 }, { "epoch": 0.3357859531772575, "grad_norm": 2.188001395005615, "learning_rate": 1.965002561790651e-05, "loss": 0.8318, "step": 4518 }, { "epoch": 0.33586027499070975, "grad_norm": 2.5379242161927826, "learning_rate": 1.9649815173428858e-05, "loss": 0.8576, "step": 4519 }, { "epoch": 0.33593459680416204, "grad_norm": 2.2241015159987914, "learning_rate": 1.9649604666826196e-05, "loss": 1.008, "step": 4520 }, { "epoch": 0.3360089186176143, "grad_norm": 1.9280561120710011, "learning_rate": 1.9649394098099882e-05, "loss": 0.7484, "step": 4521 }, { "epoch": 0.3360832404310665, "grad_norm": 1.885553797337589, "learning_rate": 1.9649183467251267e-05, "loss": 0.7265, "step": 4522 }, { "epoch": 0.33615756224451876, "grad_norm": 2.392934243779046, "learning_rate": 1.964897277428171e-05, "loss": 0.9069, "step": 4523 }, { "epoch": 0.336231884057971, "grad_norm": 2.287091941749542, "learning_rate": 1.9648762019192567e-05, "loss": 0.8875, "step": 4524 }, { "epoch": 0.33630620587142324, "grad_norm": 2.219218807086659, "learning_rate": 1.9648551201985194e-05, "loss": 0.9388, "step": 4525 }, { "epoch": 0.33638052768487553, "grad_norm": 2.1990170665966455, "learning_rate": 1.964834032266095e-05, "loss": 0.9099, "step": 4526 }, { "epoch": 0.33645484949832777, "grad_norm": 3.0732642282675684, "learning_rate": 1.9648129381221193e-05, "loss": 0.9854, "step": 4527 }, { "epoch": 0.33652917131178, "grad_norm": 2.296051382285332, "learning_rate": 1.9647918377667276e-05, "loss": 1.0292, "step": 4528 }, { "epoch": 0.33660349312523224, "grad_norm": 2.4423918286987494, "learning_rate": 1.9647707312000562e-05, "loss": 0.9488, "step": 4529 }, { "epoch": 0.3366778149386845, "grad_norm": 2.523851576130055, "learning_rate": 1.9647496184222412e-05, "loss": 0.7425, "step": 4530 }, { "epoch": 0.3367521367521368, "grad_norm": 2.7928527873205455, "learning_rate": 1.964728499433418e-05, "loss": 1.1228, "step": 4531 }, { "epoch": 0.336826458565589, "grad_norm": 2.348026615647761, "learning_rate": 1.9647073742337232e-05, "loss": 0.9886, "step": 4532 }, { "epoch": 0.33690078037904125, "grad_norm": 3.1033040873997244, "learning_rate": 1.964686242823292e-05, "loss": 1.1618, "step": 4533 }, { "epoch": 0.3369751021924935, "grad_norm": 1.905257422990227, "learning_rate": 1.9646651052022604e-05, "loss": 0.841, "step": 4534 }, { "epoch": 0.33704942400594573, "grad_norm": 2.600165592716497, "learning_rate": 1.9646439613707655e-05, "loss": 0.9339, "step": 4535 }, { "epoch": 0.33712374581939797, "grad_norm": 2.109613736823123, "learning_rate": 1.9646228113289425e-05, "loss": 0.8436, "step": 4536 }, { "epoch": 0.33719806763285026, "grad_norm": 1.9768608619004286, "learning_rate": 1.964601655076928e-05, "loss": 0.8798, "step": 4537 }, { "epoch": 0.3372723894463025, "grad_norm": 2.1295440288560847, "learning_rate": 1.964580492614858e-05, "loss": 0.978, "step": 4538 }, { "epoch": 0.33734671125975474, "grad_norm": 2.2432528503980853, "learning_rate": 1.964559323942869e-05, "loss": 0.7503, "step": 4539 }, { "epoch": 0.337421033073207, "grad_norm": 2.116444518123895, "learning_rate": 1.9645381490610975e-05, "loss": 0.9201, "step": 4540 }, { "epoch": 0.3374953548866592, "grad_norm": 1.9569982194003965, "learning_rate": 1.9645169679696786e-05, "loss": 0.8482, "step": 4541 }, { "epoch": 0.3375696767001115, "grad_norm": 2.3428715258187975, "learning_rate": 1.9644957806687502e-05, "loss": 0.7966, "step": 4542 }, { "epoch": 0.33764399851356375, "grad_norm": 2.140743435155725, "learning_rate": 1.9644745871584477e-05, "loss": 0.8521, "step": 4543 }, { "epoch": 0.337718320327016, "grad_norm": 2.6039220439039465, "learning_rate": 1.9644533874389077e-05, "loss": 0.9512, "step": 4544 }, { "epoch": 0.3377926421404682, "grad_norm": 2.436669988731774, "learning_rate": 1.964432181510267e-05, "loss": 1.0319, "step": 4545 }, { "epoch": 0.33786696395392046, "grad_norm": 3.4461883592107827, "learning_rate": 1.9644109693726623e-05, "loss": 0.9415, "step": 4546 }, { "epoch": 0.3379412857673727, "grad_norm": 2.3002577300313143, "learning_rate": 1.9643897510262292e-05, "loss": 1.0082, "step": 4547 }, { "epoch": 0.338015607580825, "grad_norm": 2.1832247891323213, "learning_rate": 1.9643685264711056e-05, "loss": 0.9798, "step": 4548 }, { "epoch": 0.33808992939427723, "grad_norm": 1.9512888013466647, "learning_rate": 1.9643472957074266e-05, "loss": 0.7556, "step": 4549 }, { "epoch": 0.33816425120772947, "grad_norm": 2.0475552113017828, "learning_rate": 1.9643260587353302e-05, "loss": 0.905, "step": 4550 }, { "epoch": 0.3382385730211817, "grad_norm": 2.001013417327594, "learning_rate": 1.964304815554953e-05, "loss": 0.7973, "step": 4551 }, { "epoch": 0.33831289483463395, "grad_norm": 2.082404023075128, "learning_rate": 1.9642835661664313e-05, "loss": 0.8304, "step": 4552 }, { "epoch": 0.3383872166480862, "grad_norm": 2.0607298523507818, "learning_rate": 1.9642623105699016e-05, "loss": 0.5122, "step": 4553 }, { "epoch": 0.3384615384615385, "grad_norm": 3.2544877960762486, "learning_rate": 1.9642410487655015e-05, "loss": 0.8422, "step": 4554 }, { "epoch": 0.3385358602749907, "grad_norm": 1.999148911714419, "learning_rate": 1.9642197807533675e-05, "loss": 0.7634, "step": 4555 }, { "epoch": 0.33861018208844296, "grad_norm": 3.283801376439876, "learning_rate": 1.9641985065336366e-05, "loss": 0.7587, "step": 4556 }, { "epoch": 0.3386845039018952, "grad_norm": 2.5489440254652544, "learning_rate": 1.9641772261064457e-05, "loss": 0.8866, "step": 4557 }, { "epoch": 0.33875882571534743, "grad_norm": 1.8835260826427191, "learning_rate": 1.9641559394719318e-05, "loss": 0.8341, "step": 4558 }, { "epoch": 0.3388331475287997, "grad_norm": 2.3070724812874057, "learning_rate": 1.9641346466302324e-05, "loss": 1.0605, "step": 4559 }, { "epoch": 0.33890746934225197, "grad_norm": 2.0748928975517806, "learning_rate": 1.9641133475814837e-05, "loss": 0.8051, "step": 4560 }, { "epoch": 0.3389817911557042, "grad_norm": 2.3658404384512464, "learning_rate": 1.9640920423258235e-05, "loss": 0.8271, "step": 4561 }, { "epoch": 0.33905611296915644, "grad_norm": 2.4052426032084036, "learning_rate": 1.9640707308633885e-05, "loss": 0.8542, "step": 4562 }, { "epoch": 0.3391304347826087, "grad_norm": 2.883571783009841, "learning_rate": 1.9640494131943165e-05, "loss": 1.0428, "step": 4563 }, { "epoch": 0.3392047565960609, "grad_norm": 1.654533615874197, "learning_rate": 1.964028089318744e-05, "loss": 0.8316, "step": 4564 }, { "epoch": 0.3392790784095132, "grad_norm": 2.2053696437164434, "learning_rate": 1.9640067592368092e-05, "loss": 0.9786, "step": 4565 }, { "epoch": 0.33935340022296545, "grad_norm": 2.1089104052163035, "learning_rate": 1.9639854229486483e-05, "loss": 0.9892, "step": 4566 }, { "epoch": 0.3394277220364177, "grad_norm": 1.9067540461062764, "learning_rate": 1.9639640804543996e-05, "loss": 0.8535, "step": 4567 }, { "epoch": 0.33950204384986993, "grad_norm": 2.5183249560697094, "learning_rate": 1.9639427317542004e-05, "loss": 0.9921, "step": 4568 }, { "epoch": 0.33957636566332217, "grad_norm": 2.330756481440806, "learning_rate": 1.9639213768481875e-05, "loss": 0.8156, "step": 4569 }, { "epoch": 0.33965068747677446, "grad_norm": 2.572745218419527, "learning_rate": 1.9639000157364992e-05, "loss": 1.0482, "step": 4570 }, { "epoch": 0.3397250092902267, "grad_norm": 2.016741483085358, "learning_rate": 1.963878648419272e-05, "loss": 0.8436, "step": 4571 }, { "epoch": 0.33979933110367894, "grad_norm": 9.683022452167078, "learning_rate": 1.9638572748966446e-05, "loss": 0.9075, "step": 4572 }, { "epoch": 0.3398736529171312, "grad_norm": 2.069141790313588, "learning_rate": 1.963835895168754e-05, "loss": 0.9802, "step": 4573 }, { "epoch": 0.3399479747305834, "grad_norm": 1.7678446185250891, "learning_rate": 1.963814509235738e-05, "loss": 0.6882, "step": 4574 }, { "epoch": 0.34002229654403565, "grad_norm": 2.1840211592881453, "learning_rate": 1.963793117097734e-05, "loss": 0.8317, "step": 4575 }, { "epoch": 0.34009661835748795, "grad_norm": 1.9150755558615862, "learning_rate": 1.96377171875488e-05, "loss": 0.6835, "step": 4576 }, { "epoch": 0.3401709401709402, "grad_norm": 1.7926233405263108, "learning_rate": 1.9637503142073132e-05, "loss": 0.6952, "step": 4577 }, { "epoch": 0.3402452619843924, "grad_norm": 2.0463292726036313, "learning_rate": 1.9637289034551723e-05, "loss": 1.1351, "step": 4578 }, { "epoch": 0.34031958379784466, "grad_norm": 2.0478662758613946, "learning_rate": 1.9637074864985947e-05, "loss": 0.8204, "step": 4579 }, { "epoch": 0.3403939056112969, "grad_norm": 3.2429887077848805, "learning_rate": 1.9636860633377183e-05, "loss": 0.7571, "step": 4580 }, { "epoch": 0.34046822742474914, "grad_norm": 2.3126511407583363, "learning_rate": 1.9636646339726813e-05, "loss": 0.8778, "step": 4581 }, { "epoch": 0.34054254923820143, "grad_norm": 2.0487169245619903, "learning_rate": 1.9636431984036212e-05, "loss": 0.9257, "step": 4582 }, { "epoch": 0.34061687105165367, "grad_norm": 2.495380919955084, "learning_rate": 1.963621756630676e-05, "loss": 0.8593, "step": 4583 }, { "epoch": 0.3406911928651059, "grad_norm": 2.2005241704738303, "learning_rate": 1.9636003086539843e-05, "loss": 0.8826, "step": 4584 }, { "epoch": 0.34076551467855815, "grad_norm": 1.9227323350189975, "learning_rate": 1.9635788544736836e-05, "loss": 0.7661, "step": 4585 }, { "epoch": 0.3408398364920104, "grad_norm": 2.3537133647568504, "learning_rate": 1.963557394089912e-05, "loss": 0.718, "step": 4586 }, { "epoch": 0.3409141583054627, "grad_norm": 2.3090043465560663, "learning_rate": 1.9635359275028084e-05, "loss": 0.9515, "step": 4587 }, { "epoch": 0.3409884801189149, "grad_norm": 2.882061980104425, "learning_rate": 1.96351445471251e-05, "loss": 0.9675, "step": 4588 }, { "epoch": 0.34106280193236715, "grad_norm": 2.3477243681940547, "learning_rate": 1.9634929757191557e-05, "loss": 1.1004, "step": 4589 }, { "epoch": 0.3411371237458194, "grad_norm": 2.2793924333977333, "learning_rate": 1.9634714905228836e-05, "loss": 0.9375, "step": 4590 }, { "epoch": 0.34121144555927163, "grad_norm": 3.0805966253923733, "learning_rate": 1.963449999123832e-05, "loss": 0.9563, "step": 4591 }, { "epoch": 0.34128576737272387, "grad_norm": 2.3466277268406692, "learning_rate": 1.9634285015221394e-05, "loss": 0.9362, "step": 4592 }, { "epoch": 0.34136008918617616, "grad_norm": 2.255364253924726, "learning_rate": 1.9634069977179445e-05, "loss": 1.0732, "step": 4593 }, { "epoch": 0.3414344109996284, "grad_norm": 2.0354220729207695, "learning_rate": 1.9633854877113848e-05, "loss": 0.9126, "step": 4594 }, { "epoch": 0.34150873281308064, "grad_norm": 2.2731828120511937, "learning_rate": 1.963363971502599e-05, "loss": 0.982, "step": 4595 }, { "epoch": 0.3415830546265329, "grad_norm": 2.2548807763285725, "learning_rate": 1.9633424490917267e-05, "loss": 0.9459, "step": 4596 }, { "epoch": 0.3416573764399851, "grad_norm": 2.5386958314377828, "learning_rate": 1.963320920478905e-05, "loss": 0.7972, "step": 4597 }, { "epoch": 0.3417316982534374, "grad_norm": 3.07433738896344, "learning_rate": 1.963299385664274e-05, "loss": 0.9005, "step": 4598 }, { "epoch": 0.34180602006688965, "grad_norm": 2.518236096389047, "learning_rate": 1.963277844647971e-05, "loss": 0.7134, "step": 4599 }, { "epoch": 0.3418803418803419, "grad_norm": 1.945319202337445, "learning_rate": 1.9632562974301354e-05, "loss": 0.9298, "step": 4600 }, { "epoch": 0.3419546636937941, "grad_norm": 1.8845670298036385, "learning_rate": 1.9632347440109053e-05, "loss": 0.8825, "step": 4601 }, { "epoch": 0.34202898550724636, "grad_norm": 2.6655832183728947, "learning_rate": 1.9632131843904207e-05, "loss": 1.0995, "step": 4602 }, { "epoch": 0.3421033073206986, "grad_norm": 2.3009074039026784, "learning_rate": 1.963191618568819e-05, "loss": 0.8712, "step": 4603 }, { "epoch": 0.3421776291341509, "grad_norm": 2.850949279159159, "learning_rate": 1.9631700465462396e-05, "loss": 0.833, "step": 4604 }, { "epoch": 0.34225195094760313, "grad_norm": 2.226157047557202, "learning_rate": 1.963148468322822e-05, "loss": 0.6205, "step": 4605 }, { "epoch": 0.3423262727610554, "grad_norm": 2.1303418792521316, "learning_rate": 1.9631268838987038e-05, "loss": 0.9205, "step": 4606 }, { "epoch": 0.3424005945745076, "grad_norm": 2.3355858907980545, "learning_rate": 1.963105293274025e-05, "loss": 0.9471, "step": 4607 }, { "epoch": 0.34247491638795985, "grad_norm": 2.2824737936751665, "learning_rate": 1.9630836964489245e-05, "loss": 1.0017, "step": 4608 }, { "epoch": 0.3425492382014121, "grad_norm": 11.32816107953785, "learning_rate": 1.963062093423541e-05, "loss": 0.8845, "step": 4609 }, { "epoch": 0.3426235600148644, "grad_norm": 2.046551882329639, "learning_rate": 1.9630404841980137e-05, "loss": 0.8786, "step": 4610 }, { "epoch": 0.3426978818283166, "grad_norm": 3.1860977019993433, "learning_rate": 1.9630188687724814e-05, "loss": 0.7579, "step": 4611 }, { "epoch": 0.34277220364176886, "grad_norm": 2.199997787263279, "learning_rate": 1.962997247147084e-05, "loss": 0.8464, "step": 4612 }, { "epoch": 0.3428465254552211, "grad_norm": 2.5803597390286024, "learning_rate": 1.9629756193219602e-05, "loss": 1.0261, "step": 4613 }, { "epoch": 0.34292084726867333, "grad_norm": 2.453517100695168, "learning_rate": 1.9629539852972494e-05, "loss": 0.9369, "step": 4614 }, { "epoch": 0.34299516908212563, "grad_norm": 3.1971345120523735, "learning_rate": 1.9629323450730905e-05, "loss": 1.2432, "step": 4615 }, { "epoch": 0.34306949089557787, "grad_norm": 3.263001450621055, "learning_rate": 1.9629106986496235e-05, "loss": 1.0424, "step": 4616 }, { "epoch": 0.3431438127090301, "grad_norm": 2.2353556591418924, "learning_rate": 1.9628890460269874e-05, "loss": 0.9444, "step": 4617 }, { "epoch": 0.34321813452248234, "grad_norm": 2.3422946578469066, "learning_rate": 1.9628673872053213e-05, "loss": 0.685, "step": 4618 }, { "epoch": 0.3432924563359346, "grad_norm": 2.291396110245552, "learning_rate": 1.962845722184765e-05, "loss": 0.9925, "step": 4619 }, { "epoch": 0.3433667781493868, "grad_norm": 1.9629737647397958, "learning_rate": 1.962824050965458e-05, "loss": 0.8776, "step": 4620 }, { "epoch": 0.3434410999628391, "grad_norm": 2.0164646410095575, "learning_rate": 1.9628023735475397e-05, "loss": 0.9396, "step": 4621 }, { "epoch": 0.34351542177629135, "grad_norm": 2.6105114775796587, "learning_rate": 1.9627806899311494e-05, "loss": 0.7327, "step": 4622 }, { "epoch": 0.3435897435897436, "grad_norm": 2.19053067868968, "learning_rate": 1.962759000116427e-05, "loss": 1.0942, "step": 4623 }, { "epoch": 0.34366406540319583, "grad_norm": 3.8834767053919723, "learning_rate": 1.9627373041035125e-05, "loss": 0.8618, "step": 4624 }, { "epoch": 0.34373838721664807, "grad_norm": 2.4212240756697367, "learning_rate": 1.962715601892545e-05, "loss": 0.9912, "step": 4625 }, { "epoch": 0.34381270903010036, "grad_norm": 3.0767822427002716, "learning_rate": 1.9626938934836644e-05, "loss": 0.8589, "step": 4626 }, { "epoch": 0.3438870308435526, "grad_norm": 2.813546864399551, "learning_rate": 1.9626721788770108e-05, "loss": 0.7383, "step": 4627 }, { "epoch": 0.34396135265700484, "grad_norm": 2.801183002223715, "learning_rate": 1.962650458072723e-05, "loss": 1.025, "step": 4628 }, { "epoch": 0.3440356744704571, "grad_norm": 2.0418281699809793, "learning_rate": 1.962628731070942e-05, "loss": 0.7845, "step": 4629 }, { "epoch": 0.3441099962839093, "grad_norm": 2.0271690936353455, "learning_rate": 1.9626069978718066e-05, "loss": 0.7909, "step": 4630 }, { "epoch": 0.34418431809736155, "grad_norm": 2.3755555872777285, "learning_rate": 1.962585258475458e-05, "loss": 0.8224, "step": 4631 }, { "epoch": 0.34425863991081385, "grad_norm": 2.11490779172553, "learning_rate": 1.962563512882035e-05, "loss": 0.8354, "step": 4632 }, { "epoch": 0.3443329617242661, "grad_norm": 2.490241578079228, "learning_rate": 1.9625417610916784e-05, "loss": 1.177, "step": 4633 }, { "epoch": 0.3444072835377183, "grad_norm": 4.068300952727587, "learning_rate": 1.9625200031045275e-05, "loss": 0.6611, "step": 4634 }, { "epoch": 0.34448160535117056, "grad_norm": 2.063632462863558, "learning_rate": 1.962498238920723e-05, "loss": 0.9913, "step": 4635 }, { "epoch": 0.3445559271646228, "grad_norm": 2.3701954629895274, "learning_rate": 1.962476468540405e-05, "loss": 0.8911, "step": 4636 }, { "epoch": 0.34463024897807504, "grad_norm": 5.730696439987484, "learning_rate": 1.962454691963713e-05, "loss": 1.0305, "step": 4637 }, { "epoch": 0.34470457079152733, "grad_norm": 2.4322624100368895, "learning_rate": 1.962432909190788e-05, "loss": 0.9918, "step": 4638 }, { "epoch": 0.34477889260497957, "grad_norm": 2.6868065605117244, "learning_rate": 1.9624111202217696e-05, "loss": 1.0939, "step": 4639 }, { "epoch": 0.3448532144184318, "grad_norm": 2.6428323126649786, "learning_rate": 1.9623893250567983e-05, "loss": 0.9044, "step": 4640 }, { "epoch": 0.34492753623188405, "grad_norm": 2.0309771313831284, "learning_rate": 1.9623675236960145e-05, "loss": 0.8203, "step": 4641 }, { "epoch": 0.3450018580453363, "grad_norm": 2.2711845068054126, "learning_rate": 1.9623457161395584e-05, "loss": 1.0887, "step": 4642 }, { "epoch": 0.3450761798587886, "grad_norm": 2.0660836770511626, "learning_rate": 1.962323902387571e-05, "loss": 0.7031, "step": 4643 }, { "epoch": 0.3451505016722408, "grad_norm": 2.6346672185297058, "learning_rate": 1.962302082440192e-05, "loss": 0.991, "step": 4644 }, { "epoch": 0.34522482348569306, "grad_norm": 2.2442052975906237, "learning_rate": 1.9622802562975617e-05, "loss": 0.8105, "step": 4645 }, { "epoch": 0.3452991452991453, "grad_norm": 2.561371256216669, "learning_rate": 1.9622584239598215e-05, "loss": 1.0913, "step": 4646 }, { "epoch": 0.34537346711259753, "grad_norm": 2.3803076399701655, "learning_rate": 1.9622365854271116e-05, "loss": 0.9525, "step": 4647 }, { "epoch": 0.34544778892604977, "grad_norm": 2.3346248330063797, "learning_rate": 1.962214740699572e-05, "loss": 0.7721, "step": 4648 }, { "epoch": 0.34552211073950206, "grad_norm": 2.171708520062946, "learning_rate": 1.9621928897773442e-05, "loss": 0.8579, "step": 4649 }, { "epoch": 0.3455964325529543, "grad_norm": 2.505922701303647, "learning_rate": 1.9621710326605685e-05, "loss": 0.9223, "step": 4650 }, { "epoch": 0.34567075436640654, "grad_norm": 2.53786991898941, "learning_rate": 1.9621491693493854e-05, "loss": 0.8642, "step": 4651 }, { "epoch": 0.3457450761798588, "grad_norm": 2.374255096452724, "learning_rate": 1.9621272998439363e-05, "loss": 0.9173, "step": 4652 }, { "epoch": 0.345819397993311, "grad_norm": 2.015849963875846, "learning_rate": 1.9621054241443613e-05, "loss": 0.8176, "step": 4653 }, { "epoch": 0.3458937198067633, "grad_norm": 2.647729882678171, "learning_rate": 1.9620835422508012e-05, "loss": 0.939, "step": 4654 }, { "epoch": 0.34596804162021555, "grad_norm": 3.398676770515263, "learning_rate": 1.9620616541633975e-05, "loss": 1.1225, "step": 4655 }, { "epoch": 0.3460423634336678, "grad_norm": 1.6213010942937711, "learning_rate": 1.9620397598822906e-05, "loss": 0.7846, "step": 4656 }, { "epoch": 0.34611668524712, "grad_norm": 2.279300050058933, "learning_rate": 1.962017859407622e-05, "loss": 0.9094, "step": 4657 }, { "epoch": 0.34619100706057226, "grad_norm": 3.1457842381781065, "learning_rate": 1.961995952739532e-05, "loss": 0.8755, "step": 4658 }, { "epoch": 0.3462653288740245, "grad_norm": 2.636846859958232, "learning_rate": 1.9619740398781618e-05, "loss": 1.066, "step": 4659 }, { "epoch": 0.3463396506874768, "grad_norm": 1.9146379267532327, "learning_rate": 1.9619521208236533e-05, "loss": 0.9634, "step": 4660 }, { "epoch": 0.34641397250092903, "grad_norm": 1.9556222280972098, "learning_rate": 1.9619301955761464e-05, "loss": 0.8489, "step": 4661 }, { "epoch": 0.3464882943143813, "grad_norm": 2.391141348150996, "learning_rate": 1.961908264135783e-05, "loss": 0.9231, "step": 4662 }, { "epoch": 0.3465626161278335, "grad_norm": 1.9311446776797756, "learning_rate": 1.9618863265027043e-05, "loss": 0.8095, "step": 4663 }, { "epoch": 0.34663693794128575, "grad_norm": 2.038080241231913, "learning_rate": 1.961864382677051e-05, "loss": 0.7858, "step": 4664 }, { "epoch": 0.346711259754738, "grad_norm": 2.588264552477446, "learning_rate": 1.9618424326589647e-05, "loss": 1.0995, "step": 4665 }, { "epoch": 0.3467855815681903, "grad_norm": 2.125682168375114, "learning_rate": 1.961820476448587e-05, "loss": 1.0505, "step": 4666 }, { "epoch": 0.3468599033816425, "grad_norm": 2.1368589421174167, "learning_rate": 1.9617985140460587e-05, "loss": 0.8107, "step": 4667 }, { "epoch": 0.34693422519509476, "grad_norm": 2.0792661757975357, "learning_rate": 1.9617765454515218e-05, "loss": 0.8153, "step": 4668 }, { "epoch": 0.347008547008547, "grad_norm": 2.1448782139244336, "learning_rate": 1.961754570665117e-05, "loss": 0.7766, "step": 4669 }, { "epoch": 0.34708286882199924, "grad_norm": 2.2270042504115004, "learning_rate": 1.9617325896869867e-05, "loss": 0.9549, "step": 4670 }, { "epoch": 0.34715719063545153, "grad_norm": 2.1159778489804193, "learning_rate": 1.9617106025172713e-05, "loss": 0.6987, "step": 4671 }, { "epoch": 0.34723151244890377, "grad_norm": 1.9477535557820709, "learning_rate": 1.9616886091561135e-05, "loss": 0.7582, "step": 4672 }, { "epoch": 0.347305834262356, "grad_norm": 2.2356502961453555, "learning_rate": 1.961666609603654e-05, "loss": 1.0454, "step": 4673 }, { "epoch": 0.34738015607580824, "grad_norm": 2.036538032268675, "learning_rate": 1.961644603860035e-05, "loss": 0.9731, "step": 4674 }, { "epoch": 0.3474544778892605, "grad_norm": 2.0971241005507175, "learning_rate": 1.9616225919253974e-05, "loss": 0.809, "step": 4675 }, { "epoch": 0.3475287997027127, "grad_norm": 2.2667694582089917, "learning_rate": 1.961600573799884e-05, "loss": 0.8811, "step": 4676 }, { "epoch": 0.347603121516165, "grad_norm": 1.7981047931179466, "learning_rate": 1.9615785494836358e-05, "loss": 0.644, "step": 4677 }, { "epoch": 0.34767744332961725, "grad_norm": 1.8233575716444357, "learning_rate": 1.961556518976795e-05, "loss": 0.6555, "step": 4678 }, { "epoch": 0.3477517651430695, "grad_norm": 2.1005529208296254, "learning_rate": 1.9615344822795027e-05, "loss": 0.8658, "step": 4679 }, { "epoch": 0.34782608695652173, "grad_norm": 2.0257436707109475, "learning_rate": 1.9615124393919015e-05, "loss": 0.8984, "step": 4680 }, { "epoch": 0.34790040876997397, "grad_norm": 1.7493159544831374, "learning_rate": 1.9614903903141337e-05, "loss": 0.7584, "step": 4681 }, { "epoch": 0.34797473058342626, "grad_norm": 2.360983850712496, "learning_rate": 1.96146833504634e-05, "loss": 0.9079, "step": 4682 }, { "epoch": 0.3480490523968785, "grad_norm": 2.759152328176901, "learning_rate": 1.9614462735886632e-05, "loss": 1.1136, "step": 4683 }, { "epoch": 0.34812337421033074, "grad_norm": 1.9821968952578484, "learning_rate": 1.961424205941245e-05, "loss": 0.8493, "step": 4684 }, { "epoch": 0.348197696023783, "grad_norm": 2.823859306308053, "learning_rate": 1.961402132104228e-05, "loss": 0.7394, "step": 4685 }, { "epoch": 0.3482720178372352, "grad_norm": 2.2879469931111514, "learning_rate": 1.961380052077754e-05, "loss": 0.9071, "step": 4686 }, { "epoch": 0.34834633965068745, "grad_norm": 2.044434070607403, "learning_rate": 1.9613579658619647e-05, "loss": 1.0405, "step": 4687 }, { "epoch": 0.34842066146413975, "grad_norm": 2.3364926033026645, "learning_rate": 1.961335873457003e-05, "loss": 0.6676, "step": 4688 }, { "epoch": 0.348494983277592, "grad_norm": 2.253325833195991, "learning_rate": 1.961313774863011e-05, "loss": 1.0745, "step": 4689 }, { "epoch": 0.3485693050910442, "grad_norm": 2.273112326429409, "learning_rate": 1.9612916700801305e-05, "loss": 1.0597, "step": 4690 }, { "epoch": 0.34864362690449646, "grad_norm": 2.756701812209291, "learning_rate": 1.9612695591085038e-05, "loss": 1.0228, "step": 4691 }, { "epoch": 0.3487179487179487, "grad_norm": 5.42240013479342, "learning_rate": 1.9612474419482738e-05, "loss": 0.9266, "step": 4692 }, { "epoch": 0.34879227053140094, "grad_norm": 2.195922710435357, "learning_rate": 1.961225318599583e-05, "loss": 0.7817, "step": 4693 }, { "epoch": 0.34886659234485323, "grad_norm": 2.1091594363466464, "learning_rate": 1.961203189062573e-05, "loss": 0.74, "step": 4694 }, { "epoch": 0.34894091415830547, "grad_norm": 2.4557655101405365, "learning_rate": 1.961181053337387e-05, "loss": 1.0244, "step": 4695 }, { "epoch": 0.3490152359717577, "grad_norm": 2.3999179148395458, "learning_rate": 1.9611589114241675e-05, "loss": 0.9109, "step": 4696 }, { "epoch": 0.34908955778520995, "grad_norm": 2.0685516818766647, "learning_rate": 1.9611367633230562e-05, "loss": 1.048, "step": 4697 }, { "epoch": 0.3491638795986622, "grad_norm": 2.1176892186900367, "learning_rate": 1.9611146090341968e-05, "loss": 1.0497, "step": 4698 }, { "epoch": 0.3492382014121145, "grad_norm": 1.9739648893329613, "learning_rate": 1.9610924485577312e-05, "loss": 0.8144, "step": 4699 }, { "epoch": 0.3493125232255667, "grad_norm": 2.300199939687456, "learning_rate": 1.961070281893802e-05, "loss": 1.1941, "step": 4700 }, { "epoch": 0.34938684503901896, "grad_norm": 2.2710005120240924, "learning_rate": 1.9610481090425527e-05, "loss": 0.9975, "step": 4701 }, { "epoch": 0.3494611668524712, "grad_norm": 2.792482808750765, "learning_rate": 1.9610259300041254e-05, "loss": 0.8663, "step": 4702 }, { "epoch": 0.34953548866592343, "grad_norm": 1.9545105091847408, "learning_rate": 1.961003744778663e-05, "loss": 0.8543, "step": 4703 }, { "epoch": 0.34960981047937567, "grad_norm": 2.031969303713782, "learning_rate": 1.9609815533663084e-05, "loss": 0.7652, "step": 4704 }, { "epoch": 0.34968413229282796, "grad_norm": 2.394910065012903, "learning_rate": 1.9609593557672042e-05, "loss": 0.9127, "step": 4705 }, { "epoch": 0.3497584541062802, "grad_norm": 1.8407217856815519, "learning_rate": 1.9609371519814938e-05, "loss": 0.782, "step": 4706 }, { "epoch": 0.34983277591973244, "grad_norm": 3.1390671060955673, "learning_rate": 1.9609149420093196e-05, "loss": 1.0661, "step": 4707 }, { "epoch": 0.3499070977331847, "grad_norm": 2.0312939993879713, "learning_rate": 1.960892725850825e-05, "loss": 1.0715, "step": 4708 }, { "epoch": 0.3499814195466369, "grad_norm": 1.9640469879487972, "learning_rate": 1.9608705035061527e-05, "loss": 0.9897, "step": 4709 }, { "epoch": 0.3500557413600892, "grad_norm": 1.9647673900449067, "learning_rate": 1.9608482749754464e-05, "loss": 0.9181, "step": 4710 }, { "epoch": 0.35013006317354145, "grad_norm": 2.432798701152932, "learning_rate": 1.9608260402588482e-05, "loss": 1.125, "step": 4711 }, { "epoch": 0.3502043849869937, "grad_norm": 2.4470625408238864, "learning_rate": 1.9608037993565023e-05, "loss": 0.7714, "step": 4712 }, { "epoch": 0.3502787068004459, "grad_norm": 2.094377691743243, "learning_rate": 1.9607815522685512e-05, "loss": 0.986, "step": 4713 }, { "epoch": 0.35035302861389817, "grad_norm": 2.062027404239036, "learning_rate": 1.9607592989951383e-05, "loss": 0.7621, "step": 4714 }, { "epoch": 0.3504273504273504, "grad_norm": 2.4298824385588143, "learning_rate": 1.960737039536407e-05, "loss": 0.7571, "step": 4715 }, { "epoch": 0.3505016722408027, "grad_norm": 2.096261322323073, "learning_rate": 1.9607147738925003e-05, "loss": 0.9487, "step": 4716 }, { "epoch": 0.35057599405425494, "grad_norm": 2.9235596269405892, "learning_rate": 1.960692502063562e-05, "loss": 0.8827, "step": 4717 }, { "epoch": 0.3506503158677072, "grad_norm": 2.6073191550209236, "learning_rate": 1.960670224049735e-05, "loss": 0.8907, "step": 4718 }, { "epoch": 0.3507246376811594, "grad_norm": 2.070147353943864, "learning_rate": 1.960647939851163e-05, "loss": 0.908, "step": 4719 }, { "epoch": 0.35079895949461165, "grad_norm": 2.303766859410775, "learning_rate": 1.960625649467989e-05, "loss": 1.0167, "step": 4720 }, { "epoch": 0.3508732813080639, "grad_norm": 2.954278094979792, "learning_rate": 1.9606033529003574e-05, "loss": 0.9683, "step": 4721 }, { "epoch": 0.3509476031215162, "grad_norm": 2.4018724607955857, "learning_rate": 1.9605810501484113e-05, "loss": 0.973, "step": 4722 }, { "epoch": 0.3510219249349684, "grad_norm": 2.710959276559703, "learning_rate": 1.960558741212294e-05, "loss": 1.0617, "step": 4723 }, { "epoch": 0.35109624674842066, "grad_norm": 2.4726427650487945, "learning_rate": 1.9605364260921495e-05, "loss": 0.933, "step": 4724 }, { "epoch": 0.3511705685618729, "grad_norm": 2.3330067661778195, "learning_rate": 1.9605141047881212e-05, "loss": 0.8967, "step": 4725 }, { "epoch": 0.35124489037532514, "grad_norm": 2.1650752848096753, "learning_rate": 1.9604917773003534e-05, "loss": 0.9763, "step": 4726 }, { "epoch": 0.35131921218877743, "grad_norm": 2.102642638047347, "learning_rate": 1.960469443628989e-05, "loss": 0.863, "step": 4727 }, { "epoch": 0.35139353400222967, "grad_norm": 1.9329639719295078, "learning_rate": 1.960447103774172e-05, "loss": 0.8686, "step": 4728 }, { "epoch": 0.3514678558156819, "grad_norm": 2.7957021473668413, "learning_rate": 1.9604247577360465e-05, "loss": 1.0647, "step": 4729 }, { "epoch": 0.35154217762913414, "grad_norm": 2.0518550344504214, "learning_rate": 1.9604024055147564e-05, "loss": 0.8289, "step": 4730 }, { "epoch": 0.3516164994425864, "grad_norm": 2.3037060023790747, "learning_rate": 1.9603800471104452e-05, "loss": 0.8368, "step": 4731 }, { "epoch": 0.3516908212560386, "grad_norm": 2.581427546073006, "learning_rate": 1.9603576825232573e-05, "loss": 0.8677, "step": 4732 }, { "epoch": 0.3517651430694909, "grad_norm": 2.869512166861772, "learning_rate": 1.9603353117533364e-05, "loss": 1.1427, "step": 4733 }, { "epoch": 0.35183946488294315, "grad_norm": 3.245638669451951, "learning_rate": 1.960312934800826e-05, "loss": 0.9775, "step": 4734 }, { "epoch": 0.3519137866963954, "grad_norm": 2.5433034889823447, "learning_rate": 1.960290551665872e-05, "loss": 0.8909, "step": 4735 }, { "epoch": 0.35198810850984763, "grad_norm": 2.8434571273744123, "learning_rate": 1.9602681623486162e-05, "loss": 1.0099, "step": 4736 }, { "epoch": 0.35206243032329987, "grad_norm": 2.033482874916766, "learning_rate": 1.9602457668492042e-05, "loss": 0.8835, "step": 4737 }, { "epoch": 0.35213675213675216, "grad_norm": 3.8701834572894445, "learning_rate": 1.96022336516778e-05, "loss": 0.8759, "step": 4738 }, { "epoch": 0.3522110739502044, "grad_norm": 2.389058323092328, "learning_rate": 1.9602009573044875e-05, "loss": 1.073, "step": 4739 }, { "epoch": 0.35228539576365664, "grad_norm": 1.9890920289647505, "learning_rate": 1.9601785432594707e-05, "loss": 0.8664, "step": 4740 }, { "epoch": 0.3523597175771089, "grad_norm": 2.5797116135391023, "learning_rate": 1.9601561230328742e-05, "loss": 0.636, "step": 4741 }, { "epoch": 0.3524340393905611, "grad_norm": 1.9567818742712015, "learning_rate": 1.9601336966248427e-05, "loss": 0.8847, "step": 4742 }, { "epoch": 0.35250836120401335, "grad_norm": 3.239534811470972, "learning_rate": 1.9601112640355204e-05, "loss": 0.7795, "step": 4743 }, { "epoch": 0.35258268301746565, "grad_norm": 1.9499548076305027, "learning_rate": 1.9600888252650515e-05, "loss": 0.9217, "step": 4744 }, { "epoch": 0.3526570048309179, "grad_norm": 2.0278339677426622, "learning_rate": 1.9600663803135806e-05, "loss": 0.7252, "step": 4745 }, { "epoch": 0.3527313266443701, "grad_norm": 2.6435235923522598, "learning_rate": 1.960043929181252e-05, "loss": 0.9159, "step": 4746 }, { "epoch": 0.35280564845782236, "grad_norm": 2.200041358777751, "learning_rate": 1.9600214718682103e-05, "loss": 0.9475, "step": 4747 }, { "epoch": 0.3528799702712746, "grad_norm": 2.1321094652883006, "learning_rate": 1.9599990083746007e-05, "loss": 0.9042, "step": 4748 }, { "epoch": 0.3529542920847269, "grad_norm": 1.9778736700776458, "learning_rate": 1.9599765387005667e-05, "loss": 0.7199, "step": 4749 }, { "epoch": 0.35302861389817913, "grad_norm": 1.906035710458298, "learning_rate": 1.959954062846254e-05, "loss": 0.795, "step": 4750 }, { "epoch": 0.35310293571163137, "grad_norm": 2.0628600745963546, "learning_rate": 1.9599315808118063e-05, "loss": 0.9094, "step": 4751 }, { "epoch": 0.3531772575250836, "grad_norm": 2.1006050513506196, "learning_rate": 1.9599090925973693e-05, "loss": 1.0374, "step": 4752 }, { "epoch": 0.35325157933853585, "grad_norm": 1.8250854077689898, "learning_rate": 1.9598865982030874e-05, "loss": 0.8894, "step": 4753 }, { "epoch": 0.3533259011519881, "grad_norm": 1.8941004063065339, "learning_rate": 1.959864097629105e-05, "loss": 0.7098, "step": 4754 }, { "epoch": 0.3534002229654404, "grad_norm": 2.073462358368265, "learning_rate": 1.9598415908755675e-05, "loss": 0.9089, "step": 4755 }, { "epoch": 0.3534745447788926, "grad_norm": 2.1261324675689295, "learning_rate": 1.95981907794262e-05, "loss": 0.9174, "step": 4756 }, { "epoch": 0.35354886659234486, "grad_norm": 2.024581748336293, "learning_rate": 1.959796558830407e-05, "loss": 1.0045, "step": 4757 }, { "epoch": 0.3536231884057971, "grad_norm": 2.154738726399343, "learning_rate": 1.9597740335390732e-05, "loss": 1.0912, "step": 4758 }, { "epoch": 0.35369751021924933, "grad_norm": 2.0001416529850693, "learning_rate": 1.9597515020687642e-05, "loss": 1.0209, "step": 4759 }, { "epoch": 0.35377183203270157, "grad_norm": 2.506172700175234, "learning_rate": 1.959728964419625e-05, "loss": 1.1056, "step": 4760 }, { "epoch": 0.35384615384615387, "grad_norm": 2.616849916086969, "learning_rate": 1.9597064205918003e-05, "loss": 0.9858, "step": 4761 }, { "epoch": 0.3539204756596061, "grad_norm": 1.9199692005848636, "learning_rate": 1.9596838705854353e-05, "loss": 0.8562, "step": 4762 }, { "epoch": 0.35399479747305834, "grad_norm": 3.242245896691382, "learning_rate": 1.9596613144006755e-05, "loss": 0.9354, "step": 4763 }, { "epoch": 0.3540691192865106, "grad_norm": 1.9649623548864812, "learning_rate": 1.9596387520376662e-05, "loss": 0.8394, "step": 4764 }, { "epoch": 0.3541434410999628, "grad_norm": 3.072773151882788, "learning_rate": 1.959616183496552e-05, "loss": 0.9174, "step": 4765 }, { "epoch": 0.3542177629134151, "grad_norm": 2.095235479281242, "learning_rate": 1.9595936087774787e-05, "loss": 0.9584, "step": 4766 }, { "epoch": 0.35429208472686735, "grad_norm": 2.107972015167445, "learning_rate": 1.9595710278805916e-05, "loss": 1.0294, "step": 4767 }, { "epoch": 0.3543664065403196, "grad_norm": 2.027725521695629, "learning_rate": 1.959548440806036e-05, "loss": 0.8951, "step": 4768 }, { "epoch": 0.35444072835377183, "grad_norm": 2.0987678269485865, "learning_rate": 1.9595258475539577e-05, "loss": 0.9683, "step": 4769 }, { "epoch": 0.35451505016722407, "grad_norm": 2.0799264879754005, "learning_rate": 1.9595032481245014e-05, "loss": 0.8284, "step": 4770 }, { "epoch": 0.3545893719806763, "grad_norm": 2.3637420643949163, "learning_rate": 1.959480642517813e-05, "loss": 1.0448, "step": 4771 }, { "epoch": 0.3546636937941286, "grad_norm": 2.1206586486357564, "learning_rate": 1.9594580307340378e-05, "loss": 0.8565, "step": 4772 }, { "epoch": 0.35473801560758084, "grad_norm": 3.797324720318724, "learning_rate": 1.959435412773322e-05, "loss": 0.7981, "step": 4773 }, { "epoch": 0.3548123374210331, "grad_norm": 2.1009356197769495, "learning_rate": 1.9594127886358105e-05, "loss": 0.7651, "step": 4774 }, { "epoch": 0.3548866592344853, "grad_norm": 2.4331454463155944, "learning_rate": 1.9593901583216494e-05, "loss": 0.8237, "step": 4775 }, { "epoch": 0.35496098104793755, "grad_norm": 2.3206657804728015, "learning_rate": 1.959367521830984e-05, "loss": 0.9606, "step": 4776 }, { "epoch": 0.35503530286138985, "grad_norm": 2.1872367361060263, "learning_rate": 1.9593448791639604e-05, "loss": 1.1533, "step": 4777 }, { "epoch": 0.3551096246748421, "grad_norm": 2.323860924464855, "learning_rate": 1.9593222303207242e-05, "loss": 0.8346, "step": 4778 }, { "epoch": 0.3551839464882943, "grad_norm": 2.298704028237061, "learning_rate": 1.9592995753014214e-05, "loss": 1.1238, "step": 4779 }, { "epoch": 0.35525826830174656, "grad_norm": 4.199116510912091, "learning_rate": 1.9592769141061975e-05, "loss": 0.9177, "step": 4780 }, { "epoch": 0.3553325901151988, "grad_norm": 2.627749672574687, "learning_rate": 1.9592542467351988e-05, "loss": 0.7676, "step": 4781 }, { "epoch": 0.35540691192865104, "grad_norm": 2.250514305000337, "learning_rate": 1.9592315731885713e-05, "loss": 0.9538, "step": 4782 }, { "epoch": 0.35548123374210333, "grad_norm": 2.670317072027983, "learning_rate": 1.9592088934664603e-05, "loss": 0.9723, "step": 4783 }, { "epoch": 0.35555555555555557, "grad_norm": 2.392295502400296, "learning_rate": 1.9591862075690123e-05, "loss": 1.1043, "step": 4784 }, { "epoch": 0.3556298773690078, "grad_norm": 2.0362223186217885, "learning_rate": 1.9591635154963732e-05, "loss": 0.9077, "step": 4785 }, { "epoch": 0.35570419918246005, "grad_norm": 1.9726079867676114, "learning_rate": 1.959140817248689e-05, "loss": 0.7385, "step": 4786 }, { "epoch": 0.3557785209959123, "grad_norm": 1.991007190017394, "learning_rate": 1.9591181128261064e-05, "loss": 0.7533, "step": 4787 }, { "epoch": 0.3558528428093645, "grad_norm": 2.2583211164248076, "learning_rate": 1.959095402228771e-05, "loss": 0.9967, "step": 4788 }, { "epoch": 0.3559271646228168, "grad_norm": 1.7598544853590232, "learning_rate": 1.959072685456829e-05, "loss": 0.7043, "step": 4789 }, { "epoch": 0.35600148643626905, "grad_norm": 2.274254747924878, "learning_rate": 1.9590499625104273e-05, "loss": 0.8852, "step": 4790 }, { "epoch": 0.3560758082497213, "grad_norm": 1.8729059713968585, "learning_rate": 1.9590272333897113e-05, "loss": 0.8135, "step": 4791 }, { "epoch": 0.35615013006317353, "grad_norm": 2.409283311238864, "learning_rate": 1.9590044980948274e-05, "loss": 0.9177, "step": 4792 }, { "epoch": 0.35622445187662577, "grad_norm": 2.5017513385087593, "learning_rate": 1.958981756625923e-05, "loss": 0.8059, "step": 4793 }, { "epoch": 0.35629877369007806, "grad_norm": 1.9656880381892576, "learning_rate": 1.9589590089831434e-05, "loss": 0.9877, "step": 4794 }, { "epoch": 0.3563730955035303, "grad_norm": 2.1841357440834406, "learning_rate": 1.9589362551666357e-05, "loss": 0.8393, "step": 4795 }, { "epoch": 0.35644741731698254, "grad_norm": 2.3272004760015927, "learning_rate": 1.958913495176546e-05, "loss": 1.0898, "step": 4796 }, { "epoch": 0.3565217391304348, "grad_norm": 2.314490391292195, "learning_rate": 1.9588907290130208e-05, "loss": 0.9973, "step": 4797 }, { "epoch": 0.356596060943887, "grad_norm": 2.134009586064259, "learning_rate": 1.958867956676207e-05, "loss": 1.0385, "step": 4798 }, { "epoch": 0.35667038275733925, "grad_norm": 2.510201826598352, "learning_rate": 1.958845178166251e-05, "loss": 1.0726, "step": 4799 }, { "epoch": 0.35674470457079155, "grad_norm": 2.378922454374103, "learning_rate": 1.9588223934832994e-05, "loss": 0.9738, "step": 4800 }, { "epoch": 0.3568190263842438, "grad_norm": 1.926133673784245, "learning_rate": 1.958799602627499e-05, "loss": 0.7857, "step": 4801 }, { "epoch": 0.356893348197696, "grad_norm": 2.0771144005142315, "learning_rate": 1.9587768055989967e-05, "loss": 0.9465, "step": 4802 }, { "epoch": 0.35696767001114826, "grad_norm": 1.9980347302117236, "learning_rate": 1.9587540023979392e-05, "loss": 0.9802, "step": 4803 }, { "epoch": 0.3570419918246005, "grad_norm": 2.467721460093629, "learning_rate": 1.958731193024473e-05, "loss": 0.9208, "step": 4804 }, { "epoch": 0.3571163136380528, "grad_norm": 2.2015906491504076, "learning_rate": 1.9587083774787448e-05, "loss": 1.0775, "step": 4805 }, { "epoch": 0.35719063545150503, "grad_norm": 2.9261985137726505, "learning_rate": 1.9586855557609024e-05, "loss": 1.0908, "step": 4806 }, { "epoch": 0.3572649572649573, "grad_norm": 2.277414174654716, "learning_rate": 1.958662727871092e-05, "loss": 1.0139, "step": 4807 }, { "epoch": 0.3573392790784095, "grad_norm": 2.729567528673585, "learning_rate": 1.9586398938094604e-05, "loss": 0.799, "step": 4808 }, { "epoch": 0.35741360089186175, "grad_norm": 1.63328070723597, "learning_rate": 1.958617053576155e-05, "loss": 0.7178, "step": 4809 }, { "epoch": 0.357487922705314, "grad_norm": 2.353470164957903, "learning_rate": 1.9585942071713228e-05, "loss": 1.081, "step": 4810 }, { "epoch": 0.3575622445187663, "grad_norm": 2.149474602495894, "learning_rate": 1.958571354595111e-05, "loss": 1.1914, "step": 4811 }, { "epoch": 0.3576365663322185, "grad_norm": 2.02934539731011, "learning_rate": 1.9585484958476662e-05, "loss": 0.7657, "step": 4812 }, { "epoch": 0.35771088814567076, "grad_norm": 2.743694867981307, "learning_rate": 1.958525630929136e-05, "loss": 0.978, "step": 4813 }, { "epoch": 0.357785209959123, "grad_norm": 2.17742567294887, "learning_rate": 1.9585027598396678e-05, "loss": 0.748, "step": 4814 }, { "epoch": 0.35785953177257523, "grad_norm": 1.8422734208792395, "learning_rate": 1.9584798825794084e-05, "loss": 0.9329, "step": 4815 }, { "epoch": 0.3579338535860275, "grad_norm": 2.8737113299810892, "learning_rate": 1.9584569991485052e-05, "loss": 1.1055, "step": 4816 }, { "epoch": 0.35800817539947977, "grad_norm": 1.9226623672377907, "learning_rate": 1.958434109547106e-05, "loss": 0.9236, "step": 4817 }, { "epoch": 0.358082497212932, "grad_norm": 2.3605002004987083, "learning_rate": 1.9584112137753572e-05, "loss": 1.0138, "step": 4818 }, { "epoch": 0.35815681902638424, "grad_norm": 2.42453513958114, "learning_rate": 1.9583883118334064e-05, "loss": 1.1406, "step": 4819 }, { "epoch": 0.3582311408398365, "grad_norm": 2.5457759269753724, "learning_rate": 1.958365403721402e-05, "loss": 0.8343, "step": 4820 }, { "epoch": 0.3583054626532887, "grad_norm": 1.9874344230225023, "learning_rate": 1.9583424894394908e-05, "loss": 0.9981, "step": 4821 }, { "epoch": 0.358379784466741, "grad_norm": 2.273630356873284, "learning_rate": 1.9583195689878204e-05, "loss": 0.8281, "step": 4822 }, { "epoch": 0.35845410628019325, "grad_norm": 2.2480331238590106, "learning_rate": 1.958296642366538e-05, "loss": 0.9749, "step": 4823 }, { "epoch": 0.3585284280936455, "grad_norm": 2.434407009824203, "learning_rate": 1.958273709575792e-05, "loss": 0.9973, "step": 4824 }, { "epoch": 0.35860274990709773, "grad_norm": 1.9678054350200602, "learning_rate": 1.958250770615729e-05, "loss": 0.7573, "step": 4825 }, { "epoch": 0.35867707172054997, "grad_norm": 2.078829420990078, "learning_rate": 1.958227825486498e-05, "loss": 1.0493, "step": 4826 }, { "epoch": 0.3587513935340022, "grad_norm": 2.2512793903909682, "learning_rate": 1.9582048741882455e-05, "loss": 0.9188, "step": 4827 }, { "epoch": 0.3588257153474545, "grad_norm": 1.7605185874637876, "learning_rate": 1.9581819167211197e-05, "loss": 0.9294, "step": 4828 }, { "epoch": 0.35890003716090674, "grad_norm": 2.1910480860558312, "learning_rate": 1.9581589530852688e-05, "loss": 0.8682, "step": 4829 }, { "epoch": 0.358974358974359, "grad_norm": 7.915079666779051, "learning_rate": 1.95813598328084e-05, "loss": 0.7689, "step": 4830 }, { "epoch": 0.3590486807878112, "grad_norm": 2.319416747167144, "learning_rate": 1.9581130073079814e-05, "loss": 1.0692, "step": 4831 }, { "epoch": 0.35912300260126345, "grad_norm": 2.1723906370765595, "learning_rate": 1.958090025166841e-05, "loss": 0.8305, "step": 4832 }, { "epoch": 0.35919732441471575, "grad_norm": 1.8989786021231336, "learning_rate": 1.958067036857567e-05, "loss": 0.7147, "step": 4833 }, { "epoch": 0.359271646228168, "grad_norm": 2.2003139117013832, "learning_rate": 1.9580440423803068e-05, "loss": 1.0414, "step": 4834 }, { "epoch": 0.3593459680416202, "grad_norm": 2.3018749252366812, "learning_rate": 1.958021041735209e-05, "loss": 1.0197, "step": 4835 }, { "epoch": 0.35942028985507246, "grad_norm": 2.201772212957382, "learning_rate": 1.9579980349224214e-05, "loss": 0.9627, "step": 4836 }, { "epoch": 0.3594946116685247, "grad_norm": 2.3631025540518924, "learning_rate": 1.9579750219420922e-05, "loss": 0.8433, "step": 4837 }, { "epoch": 0.35956893348197694, "grad_norm": 2.072722121448486, "learning_rate": 1.9579520027943694e-05, "loss": 0.9349, "step": 4838 }, { "epoch": 0.35964325529542923, "grad_norm": 2.2521441691696666, "learning_rate": 1.957928977479401e-05, "loss": 0.8776, "step": 4839 }, { "epoch": 0.35971757710888147, "grad_norm": 2.0537238883909827, "learning_rate": 1.957905945997336e-05, "loss": 0.7685, "step": 4840 }, { "epoch": 0.3597918989223337, "grad_norm": 2.660978889838988, "learning_rate": 1.957882908348322e-05, "loss": 1.0337, "step": 4841 }, { "epoch": 0.35986622073578595, "grad_norm": 5.8460240307567615, "learning_rate": 1.9578598645325075e-05, "loss": 0.9964, "step": 4842 }, { "epoch": 0.3599405425492382, "grad_norm": 2.5462695928524144, "learning_rate": 1.9578368145500414e-05, "loss": 1.0563, "step": 4843 }, { "epoch": 0.3600148643626904, "grad_norm": 2.7398913816907493, "learning_rate": 1.957813758401071e-05, "loss": 1.0588, "step": 4844 }, { "epoch": 0.3600891861761427, "grad_norm": 2.374644374132936, "learning_rate": 1.9577906960857456e-05, "loss": 0.9854, "step": 4845 }, { "epoch": 0.36016350798959496, "grad_norm": 3.5096701574225433, "learning_rate": 1.9577676276042134e-05, "loss": 0.8598, "step": 4846 }, { "epoch": 0.3602378298030472, "grad_norm": 2.3389192077660907, "learning_rate": 1.9577445529566228e-05, "loss": 0.8689, "step": 4847 }, { "epoch": 0.36031215161649943, "grad_norm": 2.4283702449860973, "learning_rate": 1.9577214721431223e-05, "loss": 0.822, "step": 4848 }, { "epoch": 0.36038647342995167, "grad_norm": 2.234318647010122, "learning_rate": 1.957698385163861e-05, "loss": 0.9728, "step": 4849 }, { "epoch": 0.36046079524340396, "grad_norm": 3.3228101015570113, "learning_rate": 1.957675292018987e-05, "loss": 1.1418, "step": 4850 }, { "epoch": 0.3605351170568562, "grad_norm": 1.8563071741978505, "learning_rate": 1.957652192708649e-05, "loss": 0.8539, "step": 4851 }, { "epoch": 0.36060943887030844, "grad_norm": 2.241319507643308, "learning_rate": 1.9576290872329963e-05, "loss": 0.739, "step": 4852 }, { "epoch": 0.3606837606837607, "grad_norm": 1.8659534039447176, "learning_rate": 1.957605975592177e-05, "loss": 0.8972, "step": 4853 }, { "epoch": 0.3607580824972129, "grad_norm": 1.8003890259013937, "learning_rate": 1.95758285778634e-05, "loss": 0.6542, "step": 4854 }, { "epoch": 0.36083240431066516, "grad_norm": 2.2985465646806826, "learning_rate": 1.9575597338156343e-05, "loss": 0.9357, "step": 4855 }, { "epoch": 0.36090672612411745, "grad_norm": 2.0282918664864447, "learning_rate": 1.9575366036802087e-05, "loss": 1.0333, "step": 4856 }, { "epoch": 0.3609810479375697, "grad_norm": 1.9951024627287017, "learning_rate": 1.957513467380212e-05, "loss": 1.0363, "step": 4857 }, { "epoch": 0.3610553697510219, "grad_norm": 1.7183859440686973, "learning_rate": 1.9574903249157936e-05, "loss": 0.7062, "step": 4858 }, { "epoch": 0.36112969156447416, "grad_norm": 2.2431176191918087, "learning_rate": 1.9574671762871023e-05, "loss": 0.9218, "step": 4859 }, { "epoch": 0.3612040133779264, "grad_norm": 5.105123648668366, "learning_rate": 1.9574440214942867e-05, "loss": 0.7742, "step": 4860 }, { "epoch": 0.3612783351913787, "grad_norm": 1.9929082612256122, "learning_rate": 1.957420860537496e-05, "loss": 1.1146, "step": 4861 }, { "epoch": 0.36135265700483093, "grad_norm": 1.7408039120117398, "learning_rate": 1.95739769341688e-05, "loss": 0.7415, "step": 4862 }, { "epoch": 0.3614269788182832, "grad_norm": 2.155036607674375, "learning_rate": 1.9573745201325866e-05, "loss": 0.7174, "step": 4863 }, { "epoch": 0.3615013006317354, "grad_norm": 2.0969642447668484, "learning_rate": 1.9573513406847665e-05, "loss": 0.9802, "step": 4864 }, { "epoch": 0.36157562244518765, "grad_norm": 2.118714887409234, "learning_rate": 1.9573281550735678e-05, "loss": 0.7928, "step": 4865 }, { "epoch": 0.3616499442586399, "grad_norm": 2.2643139010401225, "learning_rate": 1.95730496329914e-05, "loss": 0.9144, "step": 4866 }, { "epoch": 0.3617242660720922, "grad_norm": 2.4641184668729053, "learning_rate": 1.9572817653616324e-05, "loss": 1.0856, "step": 4867 }, { "epoch": 0.3617985878855444, "grad_norm": 1.9312390942429851, "learning_rate": 1.9572585612611946e-05, "loss": 0.9085, "step": 4868 }, { "epoch": 0.36187290969899666, "grad_norm": 1.9082690825272692, "learning_rate": 1.957235350997976e-05, "loss": 0.7848, "step": 4869 }, { "epoch": 0.3619472315124489, "grad_norm": 1.908312288868207, "learning_rate": 1.9572121345721257e-05, "loss": 0.6232, "step": 4870 }, { "epoch": 0.36202155332590114, "grad_norm": 2.593283940361416, "learning_rate": 1.9571889119837935e-05, "loss": 0.8884, "step": 4871 }, { "epoch": 0.3620958751393534, "grad_norm": 2.914697427070413, "learning_rate": 1.957165683233129e-05, "loss": 0.8769, "step": 4872 }, { "epoch": 0.36217019695280567, "grad_norm": 2.131586014134929, "learning_rate": 1.957142448320281e-05, "loss": 1.0299, "step": 4873 }, { "epoch": 0.3622445187662579, "grad_norm": 2.3103366359641337, "learning_rate": 1.9571192072453997e-05, "loss": 1.0353, "step": 4874 }, { "epoch": 0.36231884057971014, "grad_norm": 2.0433632862016973, "learning_rate": 1.9570959600086347e-05, "loss": 1.1569, "step": 4875 }, { "epoch": 0.3623931623931624, "grad_norm": 2.450576629051348, "learning_rate": 1.9570727066101356e-05, "loss": 1.0139, "step": 4876 }, { "epoch": 0.3624674842066146, "grad_norm": 2.494617825794005, "learning_rate": 1.957049447050052e-05, "loss": 1.1106, "step": 4877 }, { "epoch": 0.3625418060200669, "grad_norm": 2.175671083980355, "learning_rate": 1.9570261813285337e-05, "loss": 0.9311, "step": 4878 }, { "epoch": 0.36261612783351915, "grad_norm": 1.9342223774793823, "learning_rate": 1.9570029094457306e-05, "loss": 0.8502, "step": 4879 }, { "epoch": 0.3626904496469714, "grad_norm": 2.279731592954084, "learning_rate": 1.9569796314017925e-05, "loss": 0.7856, "step": 4880 }, { "epoch": 0.36276477146042363, "grad_norm": 2.4587202885820187, "learning_rate": 1.9569563471968688e-05, "loss": 0.9304, "step": 4881 }, { "epoch": 0.36283909327387587, "grad_norm": 2.2335598701242616, "learning_rate": 1.95693305683111e-05, "loss": 0.7159, "step": 4882 }, { "epoch": 0.3629134150873281, "grad_norm": 2.0631951344875294, "learning_rate": 1.956909760304666e-05, "loss": 0.9336, "step": 4883 }, { "epoch": 0.3629877369007804, "grad_norm": 1.8628443751620254, "learning_rate": 1.9568864576176866e-05, "loss": 0.8614, "step": 4884 }, { "epoch": 0.36306205871423264, "grad_norm": 1.9736586264346363, "learning_rate": 1.9568631487703213e-05, "loss": 0.7877, "step": 4885 }, { "epoch": 0.3631363805276849, "grad_norm": 2.349422011465292, "learning_rate": 1.9568398337627213e-05, "loss": 0.6445, "step": 4886 }, { "epoch": 0.3632107023411371, "grad_norm": 2.2820142319739136, "learning_rate": 1.9568165125950358e-05, "loss": 0.9206, "step": 4887 }, { "epoch": 0.36328502415458935, "grad_norm": 2.329530107190278, "learning_rate": 1.9567931852674154e-05, "loss": 0.8314, "step": 4888 }, { "epoch": 0.36335934596804165, "grad_norm": 2.0420519045761103, "learning_rate": 1.95676985178001e-05, "loss": 0.8875, "step": 4889 }, { "epoch": 0.3634336677814939, "grad_norm": 2.101104858984552, "learning_rate": 1.95674651213297e-05, "loss": 1.0674, "step": 4890 }, { "epoch": 0.3635079895949461, "grad_norm": 2.3746936007888175, "learning_rate": 1.9567231663264456e-05, "loss": 0.8769, "step": 4891 }, { "epoch": 0.36358231140839836, "grad_norm": 2.2745123424434244, "learning_rate": 1.956699814360587e-05, "loss": 0.8264, "step": 4892 }, { "epoch": 0.3636566332218506, "grad_norm": 2.3095957266304765, "learning_rate": 1.956676456235545e-05, "loss": 0.9392, "step": 4893 }, { "epoch": 0.36373095503530284, "grad_norm": 2.3517114009612468, "learning_rate": 1.9566530919514693e-05, "loss": 1.2048, "step": 4894 }, { "epoch": 0.36380527684875513, "grad_norm": 2.083891977930371, "learning_rate": 1.9566297215085106e-05, "loss": 0.9269, "step": 4895 }, { "epoch": 0.36387959866220737, "grad_norm": 3.332922227524293, "learning_rate": 1.9566063449068196e-05, "loss": 0.7366, "step": 4896 }, { "epoch": 0.3639539204756596, "grad_norm": 2.1583796851680828, "learning_rate": 1.9565829621465462e-05, "loss": 1.0143, "step": 4897 }, { "epoch": 0.36402824228911185, "grad_norm": 1.9022962129354783, "learning_rate": 1.956559573227842e-05, "loss": 0.717, "step": 4898 }, { "epoch": 0.3641025641025641, "grad_norm": 2.2083261955785685, "learning_rate": 1.9565361781508563e-05, "loss": 0.7945, "step": 4899 }, { "epoch": 0.3641768859160163, "grad_norm": 4.367890462851207, "learning_rate": 1.9565127769157403e-05, "loss": 0.935, "step": 4900 }, { "epoch": 0.3642512077294686, "grad_norm": 2.1468515512894166, "learning_rate": 1.956489369522645e-05, "loss": 1.0964, "step": 4901 }, { "epoch": 0.36432552954292086, "grad_norm": 2.0411383054001884, "learning_rate": 1.9564659559717204e-05, "loss": 0.8582, "step": 4902 }, { "epoch": 0.3643998513563731, "grad_norm": 2.091402625527423, "learning_rate": 1.956442536263118e-05, "loss": 0.9679, "step": 4903 }, { "epoch": 0.36447417316982533, "grad_norm": 2.3460120565300535, "learning_rate": 1.956419110396988e-05, "loss": 0.8789, "step": 4904 }, { "epoch": 0.36454849498327757, "grad_norm": 2.022781610744658, "learning_rate": 1.9563956783734813e-05, "loss": 0.7933, "step": 4905 }, { "epoch": 0.36462281679672987, "grad_norm": 2.333636268849299, "learning_rate": 1.9563722401927488e-05, "loss": 1.0548, "step": 4906 }, { "epoch": 0.3646971386101821, "grad_norm": 2.1867542749821425, "learning_rate": 1.956348795854942e-05, "loss": 1.053, "step": 4907 }, { "epoch": 0.36477146042363434, "grad_norm": 2.1354398927116276, "learning_rate": 1.9563253453602106e-05, "loss": 0.8759, "step": 4908 }, { "epoch": 0.3648457822370866, "grad_norm": 2.3068561773470977, "learning_rate": 1.9563018887087067e-05, "loss": 0.7251, "step": 4909 }, { "epoch": 0.3649201040505388, "grad_norm": 2.10126423790797, "learning_rate": 1.9562784259005804e-05, "loss": 0.9438, "step": 4910 }, { "epoch": 0.36499442586399106, "grad_norm": 2.037312228622355, "learning_rate": 1.9562549569359835e-05, "loss": 0.964, "step": 4911 }, { "epoch": 0.36506874767744335, "grad_norm": 2.291591309705079, "learning_rate": 1.956231481815067e-05, "loss": 0.9951, "step": 4912 }, { "epoch": 0.3651430694908956, "grad_norm": 2.4562369199253187, "learning_rate": 1.9562080005379817e-05, "loss": 0.6893, "step": 4913 }, { "epoch": 0.3652173913043478, "grad_norm": 2.3320404756062, "learning_rate": 1.9561845131048787e-05, "loss": 0.9979, "step": 4914 }, { "epoch": 0.36529171311780007, "grad_norm": 1.868119254635548, "learning_rate": 1.9561610195159095e-05, "loss": 0.9416, "step": 4915 }, { "epoch": 0.3653660349312523, "grad_norm": 2.051274605567523, "learning_rate": 1.9561375197712252e-05, "loss": 0.7871, "step": 4916 }, { "epoch": 0.3654403567447046, "grad_norm": 2.0754841400747552, "learning_rate": 1.9561140138709774e-05, "loss": 0.9898, "step": 4917 }, { "epoch": 0.36551467855815684, "grad_norm": 1.9808406573444026, "learning_rate": 1.956090501815317e-05, "loss": 0.6911, "step": 4918 }, { "epoch": 0.3655890003716091, "grad_norm": 1.9743505996024955, "learning_rate": 1.9560669836043958e-05, "loss": 0.8294, "step": 4919 }, { "epoch": 0.3656633221850613, "grad_norm": 2.9080842898167982, "learning_rate": 1.9560434592383645e-05, "loss": 0.9662, "step": 4920 }, { "epoch": 0.36573764399851355, "grad_norm": 2.2312098394378963, "learning_rate": 1.9560199287173754e-05, "loss": 1.0207, "step": 4921 }, { "epoch": 0.3658119658119658, "grad_norm": 2.5564568371236014, "learning_rate": 1.9559963920415794e-05, "loss": 0.7299, "step": 4922 }, { "epoch": 0.3658862876254181, "grad_norm": 2.2476470797665233, "learning_rate": 1.9559728492111284e-05, "loss": 1.0347, "step": 4923 }, { "epoch": 0.3659606094388703, "grad_norm": 2.3137985557737926, "learning_rate": 1.955949300226174e-05, "loss": 0.7882, "step": 4924 }, { "epoch": 0.36603493125232256, "grad_norm": 2.6779300907361807, "learning_rate": 1.9559257450868672e-05, "loss": 0.9917, "step": 4925 }, { "epoch": 0.3661092530657748, "grad_norm": 1.9164446703426699, "learning_rate": 1.9559021837933602e-05, "loss": 0.7038, "step": 4926 }, { "epoch": 0.36618357487922704, "grad_norm": 1.8067976176218015, "learning_rate": 1.9558786163458045e-05, "loss": 0.8005, "step": 4927 }, { "epoch": 0.3662578966926793, "grad_norm": 2.97414176938903, "learning_rate": 1.9558550427443518e-05, "loss": 1.0604, "step": 4928 }, { "epoch": 0.36633221850613157, "grad_norm": 2.0142353252160183, "learning_rate": 1.9558314629891542e-05, "loss": 1.0523, "step": 4929 }, { "epoch": 0.3664065403195838, "grad_norm": 2.418137112670188, "learning_rate": 1.955807877080363e-05, "loss": 0.9295, "step": 4930 }, { "epoch": 0.36648086213303604, "grad_norm": 2.7340353911452713, "learning_rate": 1.95578428501813e-05, "loss": 1.0536, "step": 4931 }, { "epoch": 0.3665551839464883, "grad_norm": 2.167368769174195, "learning_rate": 1.955760686802608e-05, "loss": 0.9284, "step": 4932 }, { "epoch": 0.3666295057599405, "grad_norm": 1.7566469149775716, "learning_rate": 1.955737082433948e-05, "loss": 0.8315, "step": 4933 }, { "epoch": 0.3667038275733928, "grad_norm": 2.1232040610690524, "learning_rate": 1.955713471912302e-05, "loss": 0.9833, "step": 4934 }, { "epoch": 0.36677814938684505, "grad_norm": 2.1932820881421757, "learning_rate": 1.9556898552378225e-05, "loss": 0.788, "step": 4935 }, { "epoch": 0.3668524712002973, "grad_norm": 2.3546376340890856, "learning_rate": 1.9556662324106615e-05, "loss": 0.9116, "step": 4936 }, { "epoch": 0.36692679301374953, "grad_norm": 2.897978041056734, "learning_rate": 1.955642603430971e-05, "loss": 0.9951, "step": 4937 }, { "epoch": 0.36700111482720177, "grad_norm": 1.746126549357166, "learning_rate": 1.9556189682989022e-05, "loss": 0.9049, "step": 4938 }, { "epoch": 0.367075436640654, "grad_norm": 2.341032728932929, "learning_rate": 1.9555953270146087e-05, "loss": 0.9273, "step": 4939 }, { "epoch": 0.3671497584541063, "grad_norm": 2.02302003951731, "learning_rate": 1.955571679578242e-05, "loss": 0.74, "step": 4940 }, { "epoch": 0.36722408026755854, "grad_norm": 2.0853134489428853, "learning_rate": 1.9555480259899542e-05, "loss": 0.7182, "step": 4941 }, { "epoch": 0.3672984020810108, "grad_norm": 2.0633314517067607, "learning_rate": 1.955524366249898e-05, "loss": 0.9285, "step": 4942 }, { "epoch": 0.367372723894463, "grad_norm": 2.040859285578191, "learning_rate": 1.9555007003582252e-05, "loss": 0.8637, "step": 4943 }, { "epoch": 0.36744704570791525, "grad_norm": 2.013217261200141, "learning_rate": 1.955477028315089e-05, "loss": 0.9216, "step": 4944 }, { "epoch": 0.36752136752136755, "grad_norm": 2.2721242439629705, "learning_rate": 1.955453350120641e-05, "loss": 0.958, "step": 4945 }, { "epoch": 0.3675956893348198, "grad_norm": 1.8801176661507655, "learning_rate": 1.955429665775034e-05, "loss": 0.8286, "step": 4946 }, { "epoch": 0.367670011148272, "grad_norm": 2.264353478354366, "learning_rate": 1.95540597527842e-05, "loss": 1.0088, "step": 4947 }, { "epoch": 0.36774433296172426, "grad_norm": 2.5384012124416317, "learning_rate": 1.955382278630952e-05, "loss": 0.9268, "step": 4948 }, { "epoch": 0.3678186547751765, "grad_norm": 1.7938379312464614, "learning_rate": 1.955358575832783e-05, "loss": 0.9171, "step": 4949 }, { "epoch": 0.36789297658862874, "grad_norm": 2.194304931644391, "learning_rate": 1.9553348668840643e-05, "loss": 0.8712, "step": 4950 }, { "epoch": 0.36796729840208103, "grad_norm": 2.481402415136531, "learning_rate": 1.95531115178495e-05, "loss": 0.9386, "step": 4951 }, { "epoch": 0.36804162021553327, "grad_norm": 4.191184729930706, "learning_rate": 1.9552874305355916e-05, "loss": 0.8099, "step": 4952 }, { "epoch": 0.3681159420289855, "grad_norm": 2.5815347670312803, "learning_rate": 1.9552637031361423e-05, "loss": 0.8876, "step": 4953 }, { "epoch": 0.36819026384243775, "grad_norm": 2.0623551769325212, "learning_rate": 1.955239969586755e-05, "loss": 0.8308, "step": 4954 }, { "epoch": 0.36826458565589, "grad_norm": 1.947003574660017, "learning_rate": 1.9552162298875824e-05, "loss": 0.769, "step": 4955 }, { "epoch": 0.3683389074693422, "grad_norm": 1.738260682672815, "learning_rate": 1.955192484038777e-05, "loss": 0.7872, "step": 4956 }, { "epoch": 0.3684132292827945, "grad_norm": 2.338908296822095, "learning_rate": 1.9551687320404923e-05, "loss": 0.901, "step": 4957 }, { "epoch": 0.36848755109624676, "grad_norm": 2.18112502242134, "learning_rate": 1.955144973892881e-05, "loss": 1.0399, "step": 4958 }, { "epoch": 0.368561872909699, "grad_norm": 2.1034525612285218, "learning_rate": 1.9551212095960955e-05, "loss": 0.8111, "step": 4959 }, { "epoch": 0.36863619472315123, "grad_norm": 2.2088999655995742, "learning_rate": 1.9550974391502892e-05, "loss": 0.8126, "step": 4960 }, { "epoch": 0.36871051653660347, "grad_norm": 1.9045748040537982, "learning_rate": 1.9550736625556153e-05, "loss": 0.7914, "step": 4961 }, { "epoch": 0.36878483835005577, "grad_norm": 2.483611423450792, "learning_rate": 1.9550498798122266e-05, "loss": 0.7056, "step": 4962 }, { "epoch": 0.368859160163508, "grad_norm": 2.4012094320540984, "learning_rate": 1.9550260909202766e-05, "loss": 1.0251, "step": 4963 }, { "epoch": 0.36893348197696024, "grad_norm": 17.691473880390703, "learning_rate": 1.955002295879918e-05, "loss": 1.1801, "step": 4964 }, { "epoch": 0.3690078037904125, "grad_norm": 2.647687362697998, "learning_rate": 1.9549784946913043e-05, "loss": 1.1499, "step": 4965 }, { "epoch": 0.3690821256038647, "grad_norm": 2.307454203813398, "learning_rate": 1.9549546873545884e-05, "loss": 0.9831, "step": 4966 }, { "epoch": 0.36915644741731696, "grad_norm": 1.7643305783625889, "learning_rate": 1.9549308738699237e-05, "loss": 0.9133, "step": 4967 }, { "epoch": 0.36923076923076925, "grad_norm": 2.4408567260993608, "learning_rate": 1.954907054237464e-05, "loss": 1.0351, "step": 4968 }, { "epoch": 0.3693050910442215, "grad_norm": 2.1861386716683415, "learning_rate": 1.9548832284573616e-05, "loss": 1.0077, "step": 4969 }, { "epoch": 0.36937941285767373, "grad_norm": 2.7559885928139116, "learning_rate": 1.9548593965297712e-05, "loss": 0.833, "step": 4970 }, { "epoch": 0.36945373467112597, "grad_norm": 2.017525497980199, "learning_rate": 1.954835558454845e-05, "loss": 0.7215, "step": 4971 }, { "epoch": 0.3695280564845782, "grad_norm": 2.204357755045555, "learning_rate": 1.9548117142327373e-05, "loss": 1.0226, "step": 4972 }, { "epoch": 0.3696023782980305, "grad_norm": 2.3674568453929754, "learning_rate": 1.9547878638636014e-05, "loss": 0.9979, "step": 4973 }, { "epoch": 0.36967670011148274, "grad_norm": 2.18137270066026, "learning_rate": 1.9547640073475905e-05, "loss": 0.9224, "step": 4974 }, { "epoch": 0.369751021924935, "grad_norm": 2.2825723072856077, "learning_rate": 1.9547401446848587e-05, "loss": 0.7208, "step": 4975 }, { "epoch": 0.3698253437383872, "grad_norm": 3.1000527874678228, "learning_rate": 1.9547162758755594e-05, "loss": 1.0627, "step": 4976 }, { "epoch": 0.36989966555183945, "grad_norm": 2.245093901682594, "learning_rate": 1.954692400919846e-05, "loss": 1.0673, "step": 4977 }, { "epoch": 0.3699739873652917, "grad_norm": 2.6405213931614697, "learning_rate": 1.9546685198178726e-05, "loss": 0.875, "step": 4978 }, { "epoch": 0.370048309178744, "grad_norm": 2.666231120325187, "learning_rate": 1.9546446325697926e-05, "loss": 0.8788, "step": 4979 }, { "epoch": 0.3701226309921962, "grad_norm": 2.0855314899988757, "learning_rate": 1.9546207391757605e-05, "loss": 0.6706, "step": 4980 }, { "epoch": 0.37019695280564846, "grad_norm": 5.259411499180443, "learning_rate": 1.9545968396359294e-05, "loss": 1.0169, "step": 4981 }, { "epoch": 0.3702712746191007, "grad_norm": 2.2555906885013832, "learning_rate": 1.9545729339504535e-05, "loss": 0.7001, "step": 4982 }, { "epoch": 0.37034559643255294, "grad_norm": 2.8343841120043547, "learning_rate": 1.9545490221194862e-05, "loss": 1.066, "step": 4983 }, { "epoch": 0.3704199182460052, "grad_norm": 2.4671689854327314, "learning_rate": 1.9545251041431817e-05, "loss": 1.1979, "step": 4984 }, { "epoch": 0.37049424005945747, "grad_norm": 2.297183180809405, "learning_rate": 1.9545011800216945e-05, "loss": 0.8701, "step": 4985 }, { "epoch": 0.3705685618729097, "grad_norm": 2.079928989561087, "learning_rate": 1.954477249755178e-05, "loss": 1.0708, "step": 4986 }, { "epoch": 0.37064288368636195, "grad_norm": 2.480555540998136, "learning_rate": 1.954453313343787e-05, "loss": 0.9609, "step": 4987 }, { "epoch": 0.3707172054998142, "grad_norm": 2.906067480011748, "learning_rate": 1.9544293707876746e-05, "loss": 1.0094, "step": 4988 }, { "epoch": 0.3707915273132664, "grad_norm": 2.6117856595397173, "learning_rate": 1.9544054220869956e-05, "loss": 0.9261, "step": 4989 }, { "epoch": 0.3708658491267187, "grad_norm": 1.8579829219839588, "learning_rate": 1.9543814672419036e-05, "loss": 0.8708, "step": 4990 }, { "epoch": 0.37094017094017095, "grad_norm": 1.9567988582737894, "learning_rate": 1.9543575062525538e-05, "loss": 0.9471, "step": 4991 }, { "epoch": 0.3710144927536232, "grad_norm": 2.819595523702406, "learning_rate": 1.9543335391190998e-05, "loss": 0.8301, "step": 4992 }, { "epoch": 0.37108881456707543, "grad_norm": 3.196298376209473, "learning_rate": 1.9543095658416957e-05, "loss": 0.917, "step": 4993 }, { "epoch": 0.37116313638052767, "grad_norm": 2.6334313209682305, "learning_rate": 1.9542855864204964e-05, "loss": 1.0398, "step": 4994 }, { "epoch": 0.3712374581939799, "grad_norm": 2.2682061588521, "learning_rate": 1.954261600855656e-05, "loss": 0.8893, "step": 4995 }, { "epoch": 0.3713117800074322, "grad_norm": 3.0262483181088706, "learning_rate": 1.954237609147329e-05, "loss": 1.0221, "step": 4996 }, { "epoch": 0.37138610182088444, "grad_norm": 2.373407056860489, "learning_rate": 1.9542136112956694e-05, "loss": 1.0188, "step": 4997 }, { "epoch": 0.3714604236343367, "grad_norm": 1.9496911794012237, "learning_rate": 1.9541896073008323e-05, "loss": 0.9043, "step": 4998 }, { "epoch": 0.3715347454477889, "grad_norm": 1.83578460117601, "learning_rate": 1.954165597162972e-05, "loss": 0.9911, "step": 4999 }, { "epoch": 0.37160906726124115, "grad_norm": 2.194430781358541, "learning_rate": 1.9541415808822434e-05, "loss": 0.9689, "step": 5000 }, { "epoch": 0.37168338907469345, "grad_norm": 2.252392612545437, "learning_rate": 1.9541175584588005e-05, "loss": 0.7467, "step": 5001 }, { "epoch": 0.3717577108881457, "grad_norm": 2.019392562402287, "learning_rate": 1.954093529892798e-05, "loss": 1.0256, "step": 5002 }, { "epoch": 0.3718320327015979, "grad_norm": 1.771261034185119, "learning_rate": 1.9540694951843914e-05, "loss": 0.8483, "step": 5003 }, { "epoch": 0.37190635451505016, "grad_norm": 2.08003097184628, "learning_rate": 1.9540454543337346e-05, "loss": 0.998, "step": 5004 }, { "epoch": 0.3719806763285024, "grad_norm": 2.8098707005029526, "learning_rate": 1.9540214073409826e-05, "loss": 1.0472, "step": 5005 }, { "epoch": 0.37205499814195464, "grad_norm": 2.2751651987263326, "learning_rate": 1.9539973542062904e-05, "loss": 0.9347, "step": 5006 }, { "epoch": 0.37212931995540693, "grad_norm": 2.3549106219242804, "learning_rate": 1.9539732949298128e-05, "loss": 1.1709, "step": 5007 }, { "epoch": 0.3722036417688592, "grad_norm": 2.174597250256609, "learning_rate": 1.9539492295117046e-05, "loss": 0.8261, "step": 5008 }, { "epoch": 0.3722779635823114, "grad_norm": 1.946206747812073, "learning_rate": 1.9539251579521207e-05, "loss": 0.8295, "step": 5009 }, { "epoch": 0.37235228539576365, "grad_norm": 2.2535585520877586, "learning_rate": 1.953901080251216e-05, "loss": 1.0482, "step": 5010 }, { "epoch": 0.3724266072092159, "grad_norm": 2.872605782176276, "learning_rate": 1.9538769964091456e-05, "loss": 1.1774, "step": 5011 }, { "epoch": 0.3725009290226681, "grad_norm": 4.240285085721873, "learning_rate": 1.9538529064260648e-05, "loss": 0.9418, "step": 5012 }, { "epoch": 0.3725752508361204, "grad_norm": 2.7445087001404236, "learning_rate": 1.9538288103021286e-05, "loss": 0.7376, "step": 5013 }, { "epoch": 0.37264957264957266, "grad_norm": 2.087955956368206, "learning_rate": 1.953804708037492e-05, "loss": 0.7512, "step": 5014 }, { "epoch": 0.3727238944630249, "grad_norm": 2.5252770832272478, "learning_rate": 1.95378059963231e-05, "loss": 1.0168, "step": 5015 }, { "epoch": 0.37279821627647713, "grad_norm": 2.167676972897465, "learning_rate": 1.953756485086738e-05, "loss": 0.8805, "step": 5016 }, { "epoch": 0.3728725380899294, "grad_norm": 1.9067634212311761, "learning_rate": 1.9537323644009314e-05, "loss": 0.7786, "step": 5017 }, { "epoch": 0.37294685990338167, "grad_norm": 1.9016923793849485, "learning_rate": 1.953708237575045e-05, "loss": 0.6877, "step": 5018 }, { "epoch": 0.3730211817168339, "grad_norm": 1.9908269294363163, "learning_rate": 1.9536841046092346e-05, "loss": 0.7733, "step": 5019 }, { "epoch": 0.37309550353028614, "grad_norm": 2.022480234980999, "learning_rate": 1.9536599655036554e-05, "loss": 0.9164, "step": 5020 }, { "epoch": 0.3731698253437384, "grad_norm": 2.2512816524924366, "learning_rate": 1.953635820258463e-05, "loss": 0.8676, "step": 5021 }, { "epoch": 0.3732441471571906, "grad_norm": 2.5852139713675135, "learning_rate": 1.9536116688738124e-05, "loss": 1.0269, "step": 5022 }, { "epoch": 0.37331846897064286, "grad_norm": 2.9646711465032145, "learning_rate": 1.9535875113498595e-05, "loss": 1.0147, "step": 5023 }, { "epoch": 0.37339279078409515, "grad_norm": 2.5887241393264717, "learning_rate": 1.95356334768676e-05, "loss": 0.9424, "step": 5024 }, { "epoch": 0.3734671125975474, "grad_norm": 3.3306365896847128, "learning_rate": 1.9535391778846685e-05, "loss": 0.8286, "step": 5025 }, { "epoch": 0.37354143441099963, "grad_norm": 2.2261707323976196, "learning_rate": 1.9535150019437416e-05, "loss": 1.0668, "step": 5026 }, { "epoch": 0.37361575622445187, "grad_norm": 2.0945859896303958, "learning_rate": 1.9534908198641346e-05, "loss": 0.9393, "step": 5027 }, { "epoch": 0.3736900780379041, "grad_norm": 2.626184860622062, "learning_rate": 1.9534666316460035e-05, "loss": 0.7876, "step": 5028 }, { "epoch": 0.3737643998513564, "grad_norm": 2.524195918769191, "learning_rate": 1.953442437289503e-05, "loss": 0.8445, "step": 5029 }, { "epoch": 0.37383872166480864, "grad_norm": 2.8702654437042145, "learning_rate": 1.95341823679479e-05, "loss": 1.2247, "step": 5030 }, { "epoch": 0.3739130434782609, "grad_norm": 2.295295470099451, "learning_rate": 1.9533940301620198e-05, "loss": 0.8007, "step": 5031 }, { "epoch": 0.3739873652917131, "grad_norm": 1.9485496088230145, "learning_rate": 1.9533698173913483e-05, "loss": 0.8299, "step": 5032 }, { "epoch": 0.37406168710516535, "grad_norm": 1.9626554569133388, "learning_rate": 1.9533455984829316e-05, "loss": 0.8183, "step": 5033 }, { "epoch": 0.3741360089186176, "grad_norm": 2.0878272507748927, "learning_rate": 1.953321373436925e-05, "loss": 0.8339, "step": 5034 }, { "epoch": 0.3742103307320699, "grad_norm": 2.0835091555328087, "learning_rate": 1.953297142253485e-05, "loss": 0.8439, "step": 5035 }, { "epoch": 0.3742846525455221, "grad_norm": 2.0826088721372282, "learning_rate": 1.953272904932767e-05, "loss": 0.9222, "step": 5036 }, { "epoch": 0.37435897435897436, "grad_norm": 2.0262370331708275, "learning_rate": 1.953248661474928e-05, "loss": 0.9642, "step": 5037 }, { "epoch": 0.3744332961724266, "grad_norm": 1.9980053888409257, "learning_rate": 1.953224411880124e-05, "loss": 0.8736, "step": 5038 }, { "epoch": 0.37450761798587884, "grad_norm": 2.3747785720466528, "learning_rate": 1.95320015614851e-05, "loss": 1.0287, "step": 5039 }, { "epoch": 0.3745819397993311, "grad_norm": 2.4878771728338824, "learning_rate": 1.9531758942802433e-05, "loss": 0.9392, "step": 5040 }, { "epoch": 0.37465626161278337, "grad_norm": 2.458373525555631, "learning_rate": 1.9531516262754792e-05, "loss": 0.8838, "step": 5041 }, { "epoch": 0.3747305834262356, "grad_norm": 2.3881895535894064, "learning_rate": 1.953127352134375e-05, "loss": 1.028, "step": 5042 }, { "epoch": 0.37480490523968785, "grad_norm": 2.521424330863599, "learning_rate": 1.953103071857086e-05, "loss": 0.9883, "step": 5043 }, { "epoch": 0.3748792270531401, "grad_norm": 2.319858486204007, "learning_rate": 1.9530787854437686e-05, "loss": 1.0622, "step": 5044 }, { "epoch": 0.3749535488665923, "grad_norm": 2.3800481441888848, "learning_rate": 1.95305449289458e-05, "loss": 0.9425, "step": 5045 }, { "epoch": 0.3750278706800446, "grad_norm": 2.2328681173872025, "learning_rate": 1.9530301942096757e-05, "loss": 0.8872, "step": 5046 }, { "epoch": 0.37510219249349686, "grad_norm": 2.2472610582980783, "learning_rate": 1.9530058893892124e-05, "loss": 0.7813, "step": 5047 }, { "epoch": 0.3751765143069491, "grad_norm": 2.1172513103375965, "learning_rate": 1.9529815784333466e-05, "loss": 1.0066, "step": 5048 }, { "epoch": 0.37525083612040133, "grad_norm": 1.8594315453954904, "learning_rate": 1.952957261342235e-05, "loss": 0.8111, "step": 5049 }, { "epoch": 0.37532515793385357, "grad_norm": 1.9657549870263369, "learning_rate": 1.952932938116034e-05, "loss": 0.8447, "step": 5050 }, { "epoch": 0.3753994797473058, "grad_norm": 1.8445063846105219, "learning_rate": 1.9529086087549004e-05, "loss": 0.7666, "step": 5051 }, { "epoch": 0.3754738015607581, "grad_norm": 2.42530038419144, "learning_rate": 1.9528842732589902e-05, "loss": 1.0045, "step": 5052 }, { "epoch": 0.37554812337421034, "grad_norm": 1.905445814132835, "learning_rate": 1.9528599316284605e-05, "loss": 0.8328, "step": 5053 }, { "epoch": 0.3756224451876626, "grad_norm": 1.9314824607728318, "learning_rate": 1.952835583863468e-05, "loss": 0.8948, "step": 5054 }, { "epoch": 0.3756967670011148, "grad_norm": 2.375509910234356, "learning_rate": 1.95281122996417e-05, "loss": 0.9908, "step": 5055 }, { "epoch": 0.37577108881456706, "grad_norm": 2.6311319865583176, "learning_rate": 1.9527868699307226e-05, "loss": 0.866, "step": 5056 }, { "epoch": 0.37584541062801935, "grad_norm": 1.7877804012500549, "learning_rate": 1.952762503763282e-05, "loss": 0.8495, "step": 5057 }, { "epoch": 0.3759197324414716, "grad_norm": 3.2689127496856862, "learning_rate": 1.9527381314620065e-05, "loss": 1.0704, "step": 5058 }, { "epoch": 0.3759940542549238, "grad_norm": 2.598211328217694, "learning_rate": 1.9527137530270522e-05, "loss": 0.9468, "step": 5059 }, { "epoch": 0.37606837606837606, "grad_norm": 2.2559700328912577, "learning_rate": 1.9526893684585762e-05, "loss": 0.8988, "step": 5060 }, { "epoch": 0.3761426978818283, "grad_norm": 1.8760653263553848, "learning_rate": 1.9526649777567356e-05, "loss": 0.7083, "step": 5061 }, { "epoch": 0.37621701969528054, "grad_norm": 1.9735265264036694, "learning_rate": 1.952640580921687e-05, "loss": 0.7417, "step": 5062 }, { "epoch": 0.37629134150873284, "grad_norm": 2.479121471845676, "learning_rate": 1.952616177953588e-05, "loss": 0.9912, "step": 5063 }, { "epoch": 0.3763656633221851, "grad_norm": 2.0327561903721043, "learning_rate": 1.9525917688525956e-05, "loss": 0.8473, "step": 5064 }, { "epoch": 0.3764399851356373, "grad_norm": 2.4248694615523316, "learning_rate": 1.9525673536188665e-05, "loss": 1.0737, "step": 5065 }, { "epoch": 0.37651430694908955, "grad_norm": 3.3477189994307475, "learning_rate": 1.9525429322525583e-05, "loss": 0.9252, "step": 5066 }, { "epoch": 0.3765886287625418, "grad_norm": 2.36932385055152, "learning_rate": 1.952518504753828e-05, "loss": 0.8924, "step": 5067 }, { "epoch": 0.376662950575994, "grad_norm": 7.0635997521339675, "learning_rate": 1.9524940711228334e-05, "loss": 0.7611, "step": 5068 }, { "epoch": 0.3767372723894463, "grad_norm": 2.196778835542625, "learning_rate": 1.952469631359731e-05, "loss": 0.9729, "step": 5069 }, { "epoch": 0.37681159420289856, "grad_norm": 2.3647312051754588, "learning_rate": 1.952445185464679e-05, "loss": 1.0127, "step": 5070 }, { "epoch": 0.3768859160163508, "grad_norm": 2.8445765507899443, "learning_rate": 1.952420733437834e-05, "loss": 0.956, "step": 5071 }, { "epoch": 0.37696023782980304, "grad_norm": 2.9386577202086905, "learning_rate": 1.9523962752793536e-05, "loss": 1.074, "step": 5072 }, { "epoch": 0.3770345596432553, "grad_norm": 2.484473135166428, "learning_rate": 1.9523718109893953e-05, "loss": 0.9037, "step": 5073 }, { "epoch": 0.37710888145670757, "grad_norm": 2.398457717781883, "learning_rate": 1.952347340568117e-05, "loss": 1.036, "step": 5074 }, { "epoch": 0.3771832032701598, "grad_norm": 2.5241681643482874, "learning_rate": 1.952322864015676e-05, "loss": 0.9215, "step": 5075 }, { "epoch": 0.37725752508361204, "grad_norm": 2.017588843303456, "learning_rate": 1.9522983813322296e-05, "loss": 0.8481, "step": 5076 }, { "epoch": 0.3773318468970643, "grad_norm": 2.173134045627794, "learning_rate": 1.9522738925179356e-05, "loss": 0.9474, "step": 5077 }, { "epoch": 0.3774061687105165, "grad_norm": 1.780371387804167, "learning_rate": 1.952249397572952e-05, "loss": 0.7708, "step": 5078 }, { "epoch": 0.37748049052396876, "grad_norm": 2.207993459807467, "learning_rate": 1.9522248964974356e-05, "loss": 0.6903, "step": 5079 }, { "epoch": 0.37755481233742105, "grad_norm": 2.0208432739586186, "learning_rate": 1.9522003892915453e-05, "loss": 0.8515, "step": 5080 }, { "epoch": 0.3776291341508733, "grad_norm": 2.258202966954268, "learning_rate": 1.952175875955438e-05, "loss": 0.9099, "step": 5081 }, { "epoch": 0.37770345596432553, "grad_norm": 2.1992691955445913, "learning_rate": 1.9521513564892722e-05, "loss": 1.0064, "step": 5082 }, { "epoch": 0.37777777777777777, "grad_norm": 1.9384452448961937, "learning_rate": 1.952126830893205e-05, "loss": 0.7324, "step": 5083 }, { "epoch": 0.37785209959123, "grad_norm": 2.1765735788817824, "learning_rate": 1.952102299167395e-05, "loss": 0.8062, "step": 5084 }, { "epoch": 0.3779264214046823, "grad_norm": 2.146519485610506, "learning_rate": 1.9520777613119993e-05, "loss": 0.9385, "step": 5085 }, { "epoch": 0.37800074321813454, "grad_norm": 2.154819595080008, "learning_rate": 1.9520532173271768e-05, "loss": 0.9445, "step": 5086 }, { "epoch": 0.3780750650315868, "grad_norm": 2.0654131855255833, "learning_rate": 1.9520286672130847e-05, "loss": 1.0203, "step": 5087 }, { "epoch": 0.378149386845039, "grad_norm": 3.0104546930409555, "learning_rate": 1.952004110969882e-05, "loss": 1.1468, "step": 5088 }, { "epoch": 0.37822370865849125, "grad_norm": 2.2688620162235362, "learning_rate": 1.9519795485977258e-05, "loss": 1.0119, "step": 5089 }, { "epoch": 0.3782980304719435, "grad_norm": 2.208351378163126, "learning_rate": 1.9519549800967747e-05, "loss": 0.9272, "step": 5090 }, { "epoch": 0.3783723522853958, "grad_norm": 2.6103774446907253, "learning_rate": 1.951930405467187e-05, "loss": 0.9408, "step": 5091 }, { "epoch": 0.378446674098848, "grad_norm": 1.8443573397895532, "learning_rate": 1.9519058247091202e-05, "loss": 0.8293, "step": 5092 }, { "epoch": 0.37852099591230026, "grad_norm": 1.8275522298231397, "learning_rate": 1.9518812378227336e-05, "loss": 0.7813, "step": 5093 }, { "epoch": 0.3785953177257525, "grad_norm": 2.001611301747081, "learning_rate": 1.951856644808185e-05, "loss": 0.7809, "step": 5094 }, { "epoch": 0.37866963953920474, "grad_norm": 2.1601394585156775, "learning_rate": 1.9518320456656326e-05, "loss": 0.6904, "step": 5095 }, { "epoch": 0.37874396135265703, "grad_norm": 2.289098313067051, "learning_rate": 1.9518074403952348e-05, "loss": 1.0224, "step": 5096 }, { "epoch": 0.37881828316610927, "grad_norm": 6.285646719991413, "learning_rate": 1.9517828289971502e-05, "loss": 1.0001, "step": 5097 }, { "epoch": 0.3788926049795615, "grad_norm": 3.063523284447113, "learning_rate": 1.951758211471537e-05, "loss": 0.7931, "step": 5098 }, { "epoch": 0.37896692679301375, "grad_norm": 2.2380254503032506, "learning_rate": 1.9517335878185538e-05, "loss": 1.0526, "step": 5099 }, { "epoch": 0.379041248606466, "grad_norm": 2.273077107710304, "learning_rate": 1.9517089580383594e-05, "loss": 0.9343, "step": 5100 }, { "epoch": 0.3791155704199182, "grad_norm": 1.786876071365129, "learning_rate": 1.951684322131112e-05, "loss": 0.8566, "step": 5101 }, { "epoch": 0.3791898922333705, "grad_norm": 2.3370444555060796, "learning_rate": 1.95165968009697e-05, "loss": 1.0675, "step": 5102 }, { "epoch": 0.37926421404682276, "grad_norm": 2.01548581524604, "learning_rate": 1.951635031936093e-05, "loss": 0.8363, "step": 5103 }, { "epoch": 0.379338535860275, "grad_norm": 1.9508105653834025, "learning_rate": 1.9516103776486388e-05, "loss": 1.0039, "step": 5104 }, { "epoch": 0.37941285767372723, "grad_norm": 2.000313887936075, "learning_rate": 1.951585717234766e-05, "loss": 0.95, "step": 5105 }, { "epoch": 0.37948717948717947, "grad_norm": 2.5278059477991883, "learning_rate": 1.951561050694634e-05, "loss": 1.0517, "step": 5106 }, { "epoch": 0.3795615013006317, "grad_norm": 2.005813999654966, "learning_rate": 1.9515363780284015e-05, "loss": 0.9633, "step": 5107 }, { "epoch": 0.379635823114084, "grad_norm": 2.0207846110231173, "learning_rate": 1.951511699236227e-05, "loss": 0.9397, "step": 5108 }, { "epoch": 0.37971014492753624, "grad_norm": 2.445301741874003, "learning_rate": 1.9514870143182693e-05, "loss": 1.0574, "step": 5109 }, { "epoch": 0.3797844667409885, "grad_norm": 3.2267148144848683, "learning_rate": 1.9514623232746877e-05, "loss": 0.7296, "step": 5110 }, { "epoch": 0.3798587885544407, "grad_norm": 3.0845642924025887, "learning_rate": 1.951437626105641e-05, "loss": 1.1198, "step": 5111 }, { "epoch": 0.37993311036789296, "grad_norm": 2.391403058216484, "learning_rate": 1.9514129228112887e-05, "loss": 0.7833, "step": 5112 }, { "epoch": 0.38000743218134525, "grad_norm": 2.79383150279429, "learning_rate": 1.9513882133917888e-05, "loss": 0.8804, "step": 5113 }, { "epoch": 0.3800817539947975, "grad_norm": 2.47070620663148, "learning_rate": 1.951363497847301e-05, "loss": 0.8949, "step": 5114 }, { "epoch": 0.3801560758082497, "grad_norm": 2.030685110963272, "learning_rate": 1.951338776177985e-05, "loss": 0.8594, "step": 5115 }, { "epoch": 0.38023039762170197, "grad_norm": 2.404648967379119, "learning_rate": 1.951314048383999e-05, "loss": 0.9295, "step": 5116 }, { "epoch": 0.3803047194351542, "grad_norm": 2.115757881089927, "learning_rate": 1.951289314465502e-05, "loss": 0.833, "step": 5117 }, { "epoch": 0.38037904124860644, "grad_norm": 2.1676733148234426, "learning_rate": 1.9512645744226542e-05, "loss": 0.8371, "step": 5118 }, { "epoch": 0.38045336306205874, "grad_norm": 2.338154961848516, "learning_rate": 1.951239828255614e-05, "loss": 0.8757, "step": 5119 }, { "epoch": 0.380527684875511, "grad_norm": 2.1286066699476036, "learning_rate": 1.9512150759645418e-05, "loss": 0.8904, "step": 5120 }, { "epoch": 0.3806020066889632, "grad_norm": 2.9727736503715203, "learning_rate": 1.951190317549596e-05, "loss": 0.966, "step": 5121 }, { "epoch": 0.38067632850241545, "grad_norm": 2.3353270217353916, "learning_rate": 1.9511655530109365e-05, "loss": 0.9716, "step": 5122 }, { "epoch": 0.3807506503158677, "grad_norm": 2.4256654154291595, "learning_rate": 1.951140782348722e-05, "loss": 1.0664, "step": 5123 }, { "epoch": 0.38082497212932, "grad_norm": 2.7770754307154233, "learning_rate": 1.951116005563113e-05, "loss": 1.138, "step": 5124 }, { "epoch": 0.3808992939427722, "grad_norm": 2.3939309375195315, "learning_rate": 1.951091222654268e-05, "loss": 0.729, "step": 5125 }, { "epoch": 0.38097361575622446, "grad_norm": 2.675707126383271, "learning_rate": 1.9510664336223475e-05, "loss": 0.9092, "step": 5126 }, { "epoch": 0.3810479375696767, "grad_norm": 2.3277992659577857, "learning_rate": 1.9510416384675106e-05, "loss": 1.0395, "step": 5127 }, { "epoch": 0.38112225938312894, "grad_norm": 2.3617873754069207, "learning_rate": 1.9510168371899168e-05, "loss": 0.9617, "step": 5128 }, { "epoch": 0.3811965811965812, "grad_norm": 2.3016088647869286, "learning_rate": 1.950992029789726e-05, "loss": 1.1049, "step": 5129 }, { "epoch": 0.38127090301003347, "grad_norm": 2.206455167933326, "learning_rate": 1.9509672162670977e-05, "loss": 0.92, "step": 5130 }, { "epoch": 0.3813452248234857, "grad_norm": 2.1409051425638514, "learning_rate": 1.950942396622192e-05, "loss": 0.8921, "step": 5131 }, { "epoch": 0.38141954663693795, "grad_norm": 2.470633517665203, "learning_rate": 1.9509175708551688e-05, "loss": 0.7489, "step": 5132 }, { "epoch": 0.3814938684503902, "grad_norm": 2.1042687932111197, "learning_rate": 1.950892738966187e-05, "loss": 0.7843, "step": 5133 }, { "epoch": 0.3815681902638424, "grad_norm": 2.1764881916675334, "learning_rate": 1.9508679009554075e-05, "loss": 0.9864, "step": 5134 }, { "epoch": 0.38164251207729466, "grad_norm": 2.0759223621491847, "learning_rate": 1.9508430568229898e-05, "loss": 0.9555, "step": 5135 }, { "epoch": 0.38171683389074695, "grad_norm": 2.1710893417103487, "learning_rate": 1.9508182065690934e-05, "loss": 0.9047, "step": 5136 }, { "epoch": 0.3817911557041992, "grad_norm": 1.8932137703201588, "learning_rate": 1.9507933501938792e-05, "loss": 0.7526, "step": 5137 }, { "epoch": 0.38186547751765143, "grad_norm": 1.8119905928377302, "learning_rate": 1.9507684876975066e-05, "loss": 0.9274, "step": 5138 }, { "epoch": 0.38193979933110367, "grad_norm": 6.725433056505254, "learning_rate": 1.950743619080136e-05, "loss": 0.8422, "step": 5139 }, { "epoch": 0.3820141211445559, "grad_norm": 2.5120541327770605, "learning_rate": 1.9507187443419267e-05, "loss": 1.1168, "step": 5140 }, { "epoch": 0.3820884429580082, "grad_norm": 2.012073296943739, "learning_rate": 1.9506938634830403e-05, "loss": 0.9116, "step": 5141 }, { "epoch": 0.38216276477146044, "grad_norm": 2.3686369128004072, "learning_rate": 1.9506689765036354e-05, "loss": 0.885, "step": 5142 }, { "epoch": 0.3822370865849127, "grad_norm": 1.9885517781084707, "learning_rate": 1.9506440834038733e-05, "loss": 0.6904, "step": 5143 }, { "epoch": 0.3823114083983649, "grad_norm": 2.306513247836102, "learning_rate": 1.9506191841839136e-05, "loss": 1.0138, "step": 5144 }, { "epoch": 0.38238573021181715, "grad_norm": 2.3651684828783903, "learning_rate": 1.9505942788439173e-05, "loss": 0.8813, "step": 5145 }, { "epoch": 0.3824600520252694, "grad_norm": 2.2881451017234014, "learning_rate": 1.9505693673840445e-05, "loss": 0.9385, "step": 5146 }, { "epoch": 0.3825343738387217, "grad_norm": 2.2267754347851274, "learning_rate": 1.950544449804455e-05, "loss": 1.0538, "step": 5147 }, { "epoch": 0.3826086956521739, "grad_norm": 2.3811550695489103, "learning_rate": 1.95051952610531e-05, "loss": 0.8953, "step": 5148 }, { "epoch": 0.38268301746562616, "grad_norm": 2.213213870179543, "learning_rate": 1.9504945962867694e-05, "loss": 0.7872, "step": 5149 }, { "epoch": 0.3827573392790784, "grad_norm": 2.704751382213284, "learning_rate": 1.950469660348994e-05, "loss": 1.1256, "step": 5150 }, { "epoch": 0.38283166109253064, "grad_norm": 2.2730434953030634, "learning_rate": 1.9504447182921438e-05, "loss": 0.8921, "step": 5151 }, { "epoch": 0.38290598290598293, "grad_norm": 2.3268199762050985, "learning_rate": 1.95041977011638e-05, "loss": 0.8158, "step": 5152 }, { "epoch": 0.38298030471943517, "grad_norm": 2.1431331818508883, "learning_rate": 1.950394815821864e-05, "loss": 0.7874, "step": 5153 }, { "epoch": 0.3830546265328874, "grad_norm": 2.3618238002318135, "learning_rate": 1.9503698554087543e-05, "loss": 1.0732, "step": 5154 }, { "epoch": 0.38312894834633965, "grad_norm": 2.172517927451281, "learning_rate": 1.950344888877213e-05, "loss": 0.9152, "step": 5155 }, { "epoch": 0.3832032701597919, "grad_norm": 2.43857044800919, "learning_rate": 1.950319916227401e-05, "loss": 0.8307, "step": 5156 }, { "epoch": 0.3832775919732441, "grad_norm": 2.256098998181951, "learning_rate": 1.9502949374594785e-05, "loss": 0.929, "step": 5157 }, { "epoch": 0.3833519137866964, "grad_norm": 2.010277607709049, "learning_rate": 1.9502699525736064e-05, "loss": 0.8249, "step": 5158 }, { "epoch": 0.38342623560014866, "grad_norm": 2.62735007423678, "learning_rate": 1.950244961569946e-05, "loss": 0.9548, "step": 5159 }, { "epoch": 0.3835005574136009, "grad_norm": 2.5583323830242977, "learning_rate": 1.9502199644486575e-05, "loss": 1.0415, "step": 5160 }, { "epoch": 0.38357487922705313, "grad_norm": 2.177264071387282, "learning_rate": 1.9501949612099023e-05, "loss": 0.7844, "step": 5161 }, { "epoch": 0.38364920104050537, "grad_norm": 3.0653521655661597, "learning_rate": 1.950169951853841e-05, "loss": 0.8575, "step": 5162 }, { "epoch": 0.3837235228539576, "grad_norm": 2.3009414250322573, "learning_rate": 1.9501449363806353e-05, "loss": 0.9993, "step": 5163 }, { "epoch": 0.3837978446674099, "grad_norm": 1.8853563438690892, "learning_rate": 1.9501199147904454e-05, "loss": 0.8493, "step": 5164 }, { "epoch": 0.38387216648086214, "grad_norm": 2.142839359640104, "learning_rate": 1.9500948870834332e-05, "loss": 0.9281, "step": 5165 }, { "epoch": 0.3839464882943144, "grad_norm": 1.8233673362037013, "learning_rate": 1.950069853259759e-05, "loss": 0.6945, "step": 5166 }, { "epoch": 0.3840208101077666, "grad_norm": 2.732059267443505, "learning_rate": 1.9500448133195845e-05, "loss": 0.7586, "step": 5167 }, { "epoch": 0.38409513192121886, "grad_norm": 2.280765640352299, "learning_rate": 1.950019767263071e-05, "loss": 0.9833, "step": 5168 }, { "epoch": 0.38416945373467115, "grad_norm": 2.3636858267685557, "learning_rate": 1.9499947150903793e-05, "loss": 1.1015, "step": 5169 }, { "epoch": 0.3842437755481234, "grad_norm": 2.2559900714131658, "learning_rate": 1.9499696568016708e-05, "loss": 1.041, "step": 5170 }, { "epoch": 0.38431809736157563, "grad_norm": 2.839985063560706, "learning_rate": 1.9499445923971072e-05, "loss": 1.2414, "step": 5171 }, { "epoch": 0.38439241917502787, "grad_norm": 2.8506986367561713, "learning_rate": 1.9499195218768492e-05, "loss": 0.8884, "step": 5172 }, { "epoch": 0.3844667409884801, "grad_norm": 2.122155055275475, "learning_rate": 1.9498944452410595e-05, "loss": 0.9341, "step": 5173 }, { "epoch": 0.38454106280193234, "grad_norm": 2.3100399230098527, "learning_rate": 1.9498693624898978e-05, "loss": 0.8466, "step": 5174 }, { "epoch": 0.38461538461538464, "grad_norm": 2.544917686405413, "learning_rate": 1.949844273623527e-05, "loss": 1.1977, "step": 5175 }, { "epoch": 0.3846897064288369, "grad_norm": 2.525656687980234, "learning_rate": 1.9498191786421073e-05, "loss": 1.0402, "step": 5176 }, { "epoch": 0.3847640282422891, "grad_norm": 2.04147532170119, "learning_rate": 1.9497940775458017e-05, "loss": 0.9099, "step": 5177 }, { "epoch": 0.38483835005574135, "grad_norm": 1.97863627028213, "learning_rate": 1.9497689703347707e-05, "loss": 0.7521, "step": 5178 }, { "epoch": 0.3849126718691936, "grad_norm": 2.469808295301353, "learning_rate": 1.9497438570091766e-05, "loss": 0.9717, "step": 5179 }, { "epoch": 0.3849869936826459, "grad_norm": 2.338735248624655, "learning_rate": 1.949718737569181e-05, "loss": 1.0052, "step": 5180 }, { "epoch": 0.3850613154960981, "grad_norm": 2.503594880644482, "learning_rate": 1.949693612014945e-05, "loss": 0.9513, "step": 5181 }, { "epoch": 0.38513563730955036, "grad_norm": 1.654832899715974, "learning_rate": 1.949668480346631e-05, "loss": 0.8648, "step": 5182 }, { "epoch": 0.3852099591230026, "grad_norm": 2.3340483852642464, "learning_rate": 1.9496433425644006e-05, "loss": 0.9938, "step": 5183 }, { "epoch": 0.38528428093645484, "grad_norm": 1.5751344571804744, "learning_rate": 1.9496181986684156e-05, "loss": 0.5876, "step": 5184 }, { "epoch": 0.3853586027499071, "grad_norm": 2.2856846535874746, "learning_rate": 1.949593048658838e-05, "loss": 1.1406, "step": 5185 }, { "epoch": 0.38543292456335937, "grad_norm": 2.0930866132594836, "learning_rate": 1.9495678925358294e-05, "loss": 0.971, "step": 5186 }, { "epoch": 0.3855072463768116, "grad_norm": 1.8202771466079697, "learning_rate": 1.9495427302995524e-05, "loss": 0.8223, "step": 5187 }, { "epoch": 0.38558156819026385, "grad_norm": 2.632671011642805, "learning_rate": 1.949517561950168e-05, "loss": 0.8482, "step": 5188 }, { "epoch": 0.3856558900037161, "grad_norm": 1.999926781030602, "learning_rate": 1.9494923874878395e-05, "loss": 0.7139, "step": 5189 }, { "epoch": 0.3857302118171683, "grad_norm": 2.155755070851014, "learning_rate": 1.949467206912728e-05, "loss": 0.9907, "step": 5190 }, { "epoch": 0.38580453363062056, "grad_norm": 1.8847725122660042, "learning_rate": 1.9494420202249955e-05, "loss": 0.8105, "step": 5191 }, { "epoch": 0.38587885544407285, "grad_norm": 2.4699006784262836, "learning_rate": 1.9494168274248048e-05, "loss": 0.8517, "step": 5192 }, { "epoch": 0.3859531772575251, "grad_norm": 2.4633443815206784, "learning_rate": 1.949391628512318e-05, "loss": 0.9286, "step": 5193 }, { "epoch": 0.38602749907097733, "grad_norm": 1.8646750386263387, "learning_rate": 1.949366423487697e-05, "loss": 0.5823, "step": 5194 }, { "epoch": 0.38610182088442957, "grad_norm": 2.4546627686944906, "learning_rate": 1.949341212351104e-05, "loss": 0.8979, "step": 5195 }, { "epoch": 0.3861761426978818, "grad_norm": 5.350812776140006, "learning_rate": 1.9493159951027017e-05, "loss": 0.8886, "step": 5196 }, { "epoch": 0.3862504645113341, "grad_norm": 2.0757522230751446, "learning_rate": 1.9492907717426525e-05, "loss": 0.9467, "step": 5197 }, { "epoch": 0.38632478632478634, "grad_norm": 2.1537808626649504, "learning_rate": 1.9492655422711183e-05, "loss": 1.0798, "step": 5198 }, { "epoch": 0.3863991081382386, "grad_norm": 2.4671639378905534, "learning_rate": 1.949240306688262e-05, "loss": 0.9963, "step": 5199 }, { "epoch": 0.3864734299516908, "grad_norm": 2.6939184593903827, "learning_rate": 1.9492150649942457e-05, "loss": 0.7986, "step": 5200 }, { "epoch": 0.38654775176514306, "grad_norm": 2.2855643415694904, "learning_rate": 1.949189817189232e-05, "loss": 0.9203, "step": 5201 }, { "epoch": 0.3866220735785953, "grad_norm": 2.035599374507511, "learning_rate": 1.9491645632733837e-05, "loss": 0.9093, "step": 5202 }, { "epoch": 0.3866963953920476, "grad_norm": 2.3080112430718747, "learning_rate": 1.949139303246863e-05, "loss": 0.9542, "step": 5203 }, { "epoch": 0.3867707172054998, "grad_norm": 2.0990396025048175, "learning_rate": 1.949114037109833e-05, "loss": 0.8958, "step": 5204 }, { "epoch": 0.38684503901895206, "grad_norm": 1.9309079206291273, "learning_rate": 1.949088764862456e-05, "loss": 0.9755, "step": 5205 }, { "epoch": 0.3869193608324043, "grad_norm": 2.597150529473665, "learning_rate": 1.9490634865048944e-05, "loss": 1.0331, "step": 5206 }, { "epoch": 0.38699368264585654, "grad_norm": 2.29718946803912, "learning_rate": 1.9490382020373117e-05, "loss": 0.692, "step": 5207 }, { "epoch": 0.38706800445930883, "grad_norm": 2.167479079502582, "learning_rate": 1.94901291145987e-05, "loss": 0.9618, "step": 5208 }, { "epoch": 0.3871423262727611, "grad_norm": 2.260736334869773, "learning_rate": 1.9489876147727326e-05, "loss": 1.1285, "step": 5209 }, { "epoch": 0.3872166480862133, "grad_norm": 2.1034778382568686, "learning_rate": 1.9489623119760623e-05, "loss": 0.8635, "step": 5210 }, { "epoch": 0.38729096989966555, "grad_norm": 2.454408816218546, "learning_rate": 1.948937003070022e-05, "loss": 0.6828, "step": 5211 }, { "epoch": 0.3873652917131178, "grad_norm": 2.685155648310013, "learning_rate": 1.948911688054774e-05, "loss": 1.0497, "step": 5212 }, { "epoch": 0.38743961352657, "grad_norm": 2.0619281512371366, "learning_rate": 1.9488863669304823e-05, "loss": 0.6736, "step": 5213 }, { "epoch": 0.3875139353400223, "grad_norm": 2.326768924215766, "learning_rate": 1.9488610396973093e-05, "loss": 0.984, "step": 5214 }, { "epoch": 0.38758825715347456, "grad_norm": 2.298099852287056, "learning_rate": 1.9488357063554177e-05, "loss": 1.1445, "step": 5215 }, { "epoch": 0.3876625789669268, "grad_norm": 2.515719606492809, "learning_rate": 1.9488103669049714e-05, "loss": 0.9435, "step": 5216 }, { "epoch": 0.38773690078037903, "grad_norm": 3.0986429265110473, "learning_rate": 1.9487850213461332e-05, "loss": 1.0661, "step": 5217 }, { "epoch": 0.3878112225938313, "grad_norm": 2.303610349270821, "learning_rate": 1.9487596696790663e-05, "loss": 0.7782, "step": 5218 }, { "epoch": 0.3878855444072835, "grad_norm": 2.420626555145299, "learning_rate": 1.948734311903934e-05, "loss": 0.7767, "step": 5219 }, { "epoch": 0.3879598662207358, "grad_norm": 2.2463229060828644, "learning_rate": 1.948708948020899e-05, "loss": 1.0113, "step": 5220 }, { "epoch": 0.38803418803418804, "grad_norm": 2.080287134147884, "learning_rate": 1.9486835780301252e-05, "loss": 0.9149, "step": 5221 }, { "epoch": 0.3881085098476403, "grad_norm": 3.2380123210589047, "learning_rate": 1.948658201931776e-05, "loss": 0.9184, "step": 5222 }, { "epoch": 0.3881828316610925, "grad_norm": 1.7809351913554332, "learning_rate": 1.9486328197260143e-05, "loss": 0.8992, "step": 5223 }, { "epoch": 0.38825715347454476, "grad_norm": 1.733506522971957, "learning_rate": 1.9486074314130038e-05, "loss": 0.6583, "step": 5224 }, { "epoch": 0.38833147528799705, "grad_norm": 2.4255334669524014, "learning_rate": 1.9485820369929077e-05, "loss": 1.171, "step": 5225 }, { "epoch": 0.3884057971014493, "grad_norm": 2.6904177096884, "learning_rate": 1.9485566364658897e-05, "loss": 0.8216, "step": 5226 }, { "epoch": 0.38848011891490153, "grad_norm": 1.998335922152889, "learning_rate": 1.9485312298321136e-05, "loss": 1.0185, "step": 5227 }, { "epoch": 0.38855444072835377, "grad_norm": 2.061706396186329, "learning_rate": 1.9485058170917424e-05, "loss": 1.0936, "step": 5228 }, { "epoch": 0.388628762541806, "grad_norm": 2.2046724705510004, "learning_rate": 1.9484803982449397e-05, "loss": 0.9149, "step": 5229 }, { "epoch": 0.38870308435525824, "grad_norm": 2.098605691701794, "learning_rate": 1.9484549732918697e-05, "loss": 0.9526, "step": 5230 }, { "epoch": 0.38877740616871054, "grad_norm": 2.1097247860807014, "learning_rate": 1.9484295422326957e-05, "loss": 0.9569, "step": 5231 }, { "epoch": 0.3888517279821628, "grad_norm": 2.1508030290970406, "learning_rate": 1.948404105067582e-05, "loss": 1.0435, "step": 5232 }, { "epoch": 0.388926049795615, "grad_norm": 2.249194613535465, "learning_rate": 1.9483786617966913e-05, "loss": 1.0, "step": 5233 }, { "epoch": 0.38900037160906725, "grad_norm": 2.1035452192941357, "learning_rate": 1.9483532124201883e-05, "loss": 1.0066, "step": 5234 }, { "epoch": 0.3890746934225195, "grad_norm": 2.5050803717392056, "learning_rate": 1.948327756938236e-05, "loss": 0.8279, "step": 5235 }, { "epoch": 0.3891490152359718, "grad_norm": 1.6904548546358125, "learning_rate": 1.9483022953509996e-05, "loss": 0.6651, "step": 5236 }, { "epoch": 0.389223337049424, "grad_norm": 2.026541122824743, "learning_rate": 1.9482768276586417e-05, "loss": 0.86, "step": 5237 }, { "epoch": 0.38929765886287626, "grad_norm": 2.352804221589974, "learning_rate": 1.948251353861327e-05, "loss": 0.8182, "step": 5238 }, { "epoch": 0.3893719806763285, "grad_norm": 2.570195902515513, "learning_rate": 1.948225873959219e-05, "loss": 1.0731, "step": 5239 }, { "epoch": 0.38944630248978074, "grad_norm": 2.414916558984371, "learning_rate": 1.9482003879524825e-05, "loss": 0.9738, "step": 5240 }, { "epoch": 0.389520624303233, "grad_norm": 1.7467840525857454, "learning_rate": 1.9481748958412808e-05, "loss": 0.679, "step": 5241 }, { "epoch": 0.38959494611668527, "grad_norm": 2.210083896717338, "learning_rate": 1.9481493976257785e-05, "loss": 0.9221, "step": 5242 }, { "epoch": 0.3896692679301375, "grad_norm": 2.0842531336143177, "learning_rate": 1.9481238933061394e-05, "loss": 1.1044, "step": 5243 }, { "epoch": 0.38974358974358975, "grad_norm": 2.1775999989615236, "learning_rate": 1.9480983828825278e-05, "loss": 0.8142, "step": 5244 }, { "epoch": 0.389817911557042, "grad_norm": 2.0213501658706887, "learning_rate": 1.948072866355108e-05, "loss": 0.8355, "step": 5245 }, { "epoch": 0.3898922333704942, "grad_norm": 2.815450492341179, "learning_rate": 1.9480473437240444e-05, "loss": 0.8485, "step": 5246 }, { "epoch": 0.38996655518394646, "grad_norm": 2.0037603258065015, "learning_rate": 1.9480218149895012e-05, "loss": 0.7996, "step": 5247 }, { "epoch": 0.39004087699739876, "grad_norm": 2.038817433838632, "learning_rate": 1.9479962801516428e-05, "loss": 0.9236, "step": 5248 }, { "epoch": 0.390115198810851, "grad_norm": 2.1022394844094254, "learning_rate": 1.9479707392106335e-05, "loss": 1.0508, "step": 5249 }, { "epoch": 0.39018952062430323, "grad_norm": 2.1702718219729342, "learning_rate": 1.9479451921666377e-05, "loss": 0.9826, "step": 5250 }, { "epoch": 0.39026384243775547, "grad_norm": 2.1433095990906286, "learning_rate": 1.94791963901982e-05, "loss": 0.9613, "step": 5251 }, { "epoch": 0.3903381642512077, "grad_norm": 2.0717066274405957, "learning_rate": 1.9478940797703446e-05, "loss": 0.8811, "step": 5252 }, { "epoch": 0.39041248606466, "grad_norm": 3.511125728015711, "learning_rate": 1.9478685144183767e-05, "loss": 0.9622, "step": 5253 }, { "epoch": 0.39048680787811224, "grad_norm": 1.8999004032470064, "learning_rate": 1.9478429429640802e-05, "loss": 0.6319, "step": 5254 }, { "epoch": 0.3905611296915645, "grad_norm": 2.2222262481527104, "learning_rate": 1.94781736540762e-05, "loss": 0.9745, "step": 5255 }, { "epoch": 0.3906354515050167, "grad_norm": 2.551464326953833, "learning_rate": 1.9477917817491608e-05, "loss": 0.9109, "step": 5256 }, { "epoch": 0.39070977331846896, "grad_norm": 2.100229877124226, "learning_rate": 1.9477661919888675e-05, "loss": 1.0073, "step": 5257 }, { "epoch": 0.3907840951319212, "grad_norm": 2.819150585664223, "learning_rate": 1.9477405961269043e-05, "loss": 1.0414, "step": 5258 }, { "epoch": 0.3908584169453735, "grad_norm": 3.3761633301111935, "learning_rate": 1.947714994163437e-05, "loss": 1.1785, "step": 5259 }, { "epoch": 0.3909327387588257, "grad_norm": 1.9560323964058959, "learning_rate": 1.947689386098629e-05, "loss": 0.8233, "step": 5260 }, { "epoch": 0.39100706057227796, "grad_norm": 2.6144473498673957, "learning_rate": 1.9476637719326464e-05, "loss": 0.9715, "step": 5261 }, { "epoch": 0.3910813823857302, "grad_norm": 2.151500370656266, "learning_rate": 1.9476381516656532e-05, "loss": 0.9997, "step": 5262 }, { "epoch": 0.39115570419918244, "grad_norm": 1.7486504699682688, "learning_rate": 1.947612525297815e-05, "loss": 0.7829, "step": 5263 }, { "epoch": 0.39123002601263474, "grad_norm": 2.085048633215544, "learning_rate": 1.9475868928292962e-05, "loss": 0.903, "step": 5264 }, { "epoch": 0.391304347826087, "grad_norm": 9.140058085341844, "learning_rate": 1.9475612542602628e-05, "loss": 1.1769, "step": 5265 }, { "epoch": 0.3913786696395392, "grad_norm": 2.221024845402399, "learning_rate": 1.9475356095908785e-05, "loss": 0.9985, "step": 5266 }, { "epoch": 0.39145299145299145, "grad_norm": 1.9127514583649239, "learning_rate": 1.9475099588213092e-05, "loss": 0.8061, "step": 5267 }, { "epoch": 0.3915273132664437, "grad_norm": 2.2208523297818705, "learning_rate": 1.9474843019517204e-05, "loss": 0.9582, "step": 5268 }, { "epoch": 0.3916016350798959, "grad_norm": 2.2774054686407377, "learning_rate": 1.9474586389822767e-05, "loss": 0.9777, "step": 5269 }, { "epoch": 0.3916759568933482, "grad_norm": 2.611107248803359, "learning_rate": 1.9474329699131434e-05, "loss": 0.7041, "step": 5270 }, { "epoch": 0.39175027870680046, "grad_norm": 2.3915883749929434, "learning_rate": 1.9474072947444855e-05, "loss": 0.9647, "step": 5271 }, { "epoch": 0.3918246005202527, "grad_norm": 2.2212479896604735, "learning_rate": 1.947381613476469e-05, "loss": 0.7531, "step": 5272 }, { "epoch": 0.39189892233370494, "grad_norm": 2.1788164851428893, "learning_rate": 1.9473559261092587e-05, "loss": 0.9155, "step": 5273 }, { "epoch": 0.3919732441471572, "grad_norm": 2.600457788094657, "learning_rate": 1.9473302326430202e-05, "loss": 0.711, "step": 5274 }, { "epoch": 0.3920475659606094, "grad_norm": 2.278703695798361, "learning_rate": 1.9473045330779184e-05, "loss": 0.7674, "step": 5275 }, { "epoch": 0.3921218877740617, "grad_norm": 3.473225164453993, "learning_rate": 1.9472788274141195e-05, "loss": 1.1191, "step": 5276 }, { "epoch": 0.39219620958751394, "grad_norm": 2.425498080690353, "learning_rate": 1.9472531156517887e-05, "loss": 0.8009, "step": 5277 }, { "epoch": 0.3922705314009662, "grad_norm": 2.5064561151566416, "learning_rate": 1.9472273977910914e-05, "loss": 0.9191, "step": 5278 }, { "epoch": 0.3923448532144184, "grad_norm": 2.196340302491958, "learning_rate": 1.9472016738321933e-05, "loss": 0.9783, "step": 5279 }, { "epoch": 0.39241917502787066, "grad_norm": 1.5835375876519413, "learning_rate": 1.94717594377526e-05, "loss": 0.5879, "step": 5280 }, { "epoch": 0.39249349684132295, "grad_norm": 2.4567403152983367, "learning_rate": 1.947150207620457e-05, "loss": 0.9603, "step": 5281 }, { "epoch": 0.3925678186547752, "grad_norm": 2.310117713970996, "learning_rate": 1.94712446536795e-05, "loss": 0.9399, "step": 5282 }, { "epoch": 0.39264214046822743, "grad_norm": 2.0228198642194277, "learning_rate": 1.947098717017905e-05, "loss": 0.7509, "step": 5283 }, { "epoch": 0.39271646228167967, "grad_norm": 2.545044754922092, "learning_rate": 1.9470729625704877e-05, "loss": 0.7708, "step": 5284 }, { "epoch": 0.3927907840951319, "grad_norm": 2.117677931505377, "learning_rate": 1.947047202025864e-05, "loss": 0.9131, "step": 5285 }, { "epoch": 0.39286510590858414, "grad_norm": 2.441823772682311, "learning_rate": 1.947021435384199e-05, "loss": 1.0407, "step": 5286 }, { "epoch": 0.39293942772203644, "grad_norm": 1.9525566982849967, "learning_rate": 1.946995662645659e-05, "loss": 0.9652, "step": 5287 }, { "epoch": 0.3930137495354887, "grad_norm": 1.955684664555629, "learning_rate": 1.9469698838104107e-05, "loss": 1.0655, "step": 5288 }, { "epoch": 0.3930880713489409, "grad_norm": 2.507580474996823, "learning_rate": 1.946944098878619e-05, "loss": 0.9304, "step": 5289 }, { "epoch": 0.39316239316239315, "grad_norm": 1.9232998547312652, "learning_rate": 1.9469183078504508e-05, "loss": 0.7229, "step": 5290 }, { "epoch": 0.3932367149758454, "grad_norm": 3.7594743259297907, "learning_rate": 1.9468925107260715e-05, "loss": 1.0715, "step": 5291 }, { "epoch": 0.3933110367892977, "grad_norm": 2.597805762937843, "learning_rate": 1.9468667075056472e-05, "loss": 1.0537, "step": 5292 }, { "epoch": 0.3933853586027499, "grad_norm": 2.1290862353246496, "learning_rate": 1.9468408981893437e-05, "loss": 1.0269, "step": 5293 }, { "epoch": 0.39345968041620216, "grad_norm": 2.717052422133515, "learning_rate": 1.9468150827773282e-05, "loss": 0.8204, "step": 5294 }, { "epoch": 0.3935340022296544, "grad_norm": 2.1628559294799263, "learning_rate": 1.9467892612697662e-05, "loss": 0.7184, "step": 5295 }, { "epoch": 0.39360832404310664, "grad_norm": 1.6363634747674145, "learning_rate": 1.946763433666824e-05, "loss": 0.8513, "step": 5296 }, { "epoch": 0.3936826458565589, "grad_norm": 2.214226282164464, "learning_rate": 1.9467375999686683e-05, "loss": 0.9466, "step": 5297 }, { "epoch": 0.39375696767001117, "grad_norm": 1.8681534645696123, "learning_rate": 1.9467117601754647e-05, "loss": 0.7891, "step": 5298 }, { "epoch": 0.3938312894834634, "grad_norm": 2.470025043464866, "learning_rate": 1.94668591428738e-05, "loss": 0.8444, "step": 5299 }, { "epoch": 0.39390561129691565, "grad_norm": 2.080184334180349, "learning_rate": 1.9466600623045805e-05, "loss": 0.9183, "step": 5300 }, { "epoch": 0.3939799331103679, "grad_norm": 2.436228823293206, "learning_rate": 1.9466342042272326e-05, "loss": 0.8902, "step": 5301 }, { "epoch": 0.3940542549238201, "grad_norm": 1.9669207989458142, "learning_rate": 1.946608340055503e-05, "loss": 0.837, "step": 5302 }, { "epoch": 0.39412857673727236, "grad_norm": 2.0733893952879154, "learning_rate": 1.946582469789558e-05, "loss": 0.9601, "step": 5303 }, { "epoch": 0.39420289855072466, "grad_norm": 3.1317821708216216, "learning_rate": 1.946556593429564e-05, "loss": 1.0107, "step": 5304 }, { "epoch": 0.3942772203641769, "grad_norm": 3.4036030753329936, "learning_rate": 1.9465307109756877e-05, "loss": 1.1288, "step": 5305 }, { "epoch": 0.39435154217762913, "grad_norm": 2.4210947647078758, "learning_rate": 1.946504822428096e-05, "loss": 1.1178, "step": 5306 }, { "epoch": 0.39442586399108137, "grad_norm": 1.928272254130866, "learning_rate": 1.9464789277869555e-05, "loss": 1.0289, "step": 5307 }, { "epoch": 0.3945001858045336, "grad_norm": 1.7591293074883914, "learning_rate": 1.946453027052432e-05, "loss": 0.7778, "step": 5308 }, { "epoch": 0.3945745076179859, "grad_norm": 1.912913610340075, "learning_rate": 1.9464271202246938e-05, "loss": 0.7202, "step": 5309 }, { "epoch": 0.39464882943143814, "grad_norm": 2.4134097372050425, "learning_rate": 1.9464012073039066e-05, "loss": 0.8933, "step": 5310 }, { "epoch": 0.3947231512448904, "grad_norm": 2.321040149043276, "learning_rate": 1.9463752882902377e-05, "loss": 0.818, "step": 5311 }, { "epoch": 0.3947974730583426, "grad_norm": 1.6786622108863047, "learning_rate": 1.9463493631838537e-05, "loss": 0.7411, "step": 5312 }, { "epoch": 0.39487179487179486, "grad_norm": 1.9313082607127159, "learning_rate": 1.9463234319849216e-05, "loss": 0.8113, "step": 5313 }, { "epoch": 0.3949461166852471, "grad_norm": 2.148899295280926, "learning_rate": 1.9462974946936084e-05, "loss": 0.9632, "step": 5314 }, { "epoch": 0.3950204384986994, "grad_norm": 2.502310559165257, "learning_rate": 1.9462715513100806e-05, "loss": 1.1084, "step": 5315 }, { "epoch": 0.3950947603121516, "grad_norm": 1.9835454169497018, "learning_rate": 1.946245601834506e-05, "loss": 0.7648, "step": 5316 }, { "epoch": 0.39516908212560387, "grad_norm": 1.9163952523368888, "learning_rate": 1.9462196462670518e-05, "loss": 0.8287, "step": 5317 }, { "epoch": 0.3952434039390561, "grad_norm": 2.0230568267923936, "learning_rate": 1.946193684607884e-05, "loss": 0.9877, "step": 5318 }, { "epoch": 0.39531772575250834, "grad_norm": 3.826876851432793, "learning_rate": 1.9461677168571707e-05, "loss": 0.9259, "step": 5319 }, { "epoch": 0.39539204756596064, "grad_norm": 2.0478832330852472, "learning_rate": 1.9461417430150784e-05, "loss": 1.0639, "step": 5320 }, { "epoch": 0.3954663693794129, "grad_norm": 2.121787764605457, "learning_rate": 1.9461157630817747e-05, "loss": 0.8386, "step": 5321 }, { "epoch": 0.3955406911928651, "grad_norm": 2.030859465455592, "learning_rate": 1.946089777057427e-05, "loss": 0.9157, "step": 5322 }, { "epoch": 0.39561501300631735, "grad_norm": 2.185691717780106, "learning_rate": 1.9460637849422023e-05, "loss": 0.9253, "step": 5323 }, { "epoch": 0.3956893348197696, "grad_norm": 2.269262017431202, "learning_rate": 1.9460377867362678e-05, "loss": 1.0109, "step": 5324 }, { "epoch": 0.3957636566332218, "grad_norm": 2.0663252013730733, "learning_rate": 1.9460117824397916e-05, "loss": 0.9431, "step": 5325 }, { "epoch": 0.3958379784466741, "grad_norm": 1.7508913165040783, "learning_rate": 1.9459857720529402e-05, "loss": 0.6183, "step": 5326 }, { "epoch": 0.39591230026012636, "grad_norm": 2.012923452508684, "learning_rate": 1.945959755575882e-05, "loss": 0.9265, "step": 5327 }, { "epoch": 0.3959866220735786, "grad_norm": 2.355171755211928, "learning_rate": 1.9459337330087835e-05, "loss": 0.9167, "step": 5328 }, { "epoch": 0.39606094388703084, "grad_norm": 1.792125136774461, "learning_rate": 1.9459077043518125e-05, "loss": 0.7532, "step": 5329 }, { "epoch": 0.3961352657004831, "grad_norm": 2.1016081498247523, "learning_rate": 1.9458816696051372e-05, "loss": 0.8844, "step": 5330 }, { "epoch": 0.3962095875139353, "grad_norm": 2.285578540604685, "learning_rate": 1.945855628768925e-05, "loss": 0.9137, "step": 5331 }, { "epoch": 0.3962839093273876, "grad_norm": 2.22139109173519, "learning_rate": 1.945829581843343e-05, "loss": 0.9606, "step": 5332 }, { "epoch": 0.39635823114083985, "grad_norm": 2.2008503309260172, "learning_rate": 1.945803528828559e-05, "loss": 0.9935, "step": 5333 }, { "epoch": 0.3964325529542921, "grad_norm": 2.3150001586716993, "learning_rate": 1.9457774697247416e-05, "loss": 1.0392, "step": 5334 }, { "epoch": 0.3965068747677443, "grad_norm": 1.7421051987929246, "learning_rate": 1.9457514045320572e-05, "loss": 0.8164, "step": 5335 }, { "epoch": 0.39658119658119656, "grad_norm": 2.194438112648083, "learning_rate": 1.945725333250675e-05, "loss": 1.109, "step": 5336 }, { "epoch": 0.39665551839464885, "grad_norm": 2.0665380380354597, "learning_rate": 1.945699255880762e-05, "loss": 0.8576, "step": 5337 }, { "epoch": 0.3967298402081011, "grad_norm": 1.8877452760387161, "learning_rate": 1.945673172422486e-05, "loss": 0.6317, "step": 5338 }, { "epoch": 0.39680416202155333, "grad_norm": 1.8835056385581594, "learning_rate": 1.9456470828760153e-05, "loss": 0.8337, "step": 5339 }, { "epoch": 0.39687848383500557, "grad_norm": 2.464065015709909, "learning_rate": 1.945620987241518e-05, "loss": 1.1305, "step": 5340 }, { "epoch": 0.3969528056484578, "grad_norm": 1.9728490354322368, "learning_rate": 1.9455948855191615e-05, "loss": 0.9894, "step": 5341 }, { "epoch": 0.39702712746191005, "grad_norm": 3.123515808037912, "learning_rate": 1.9455687777091145e-05, "loss": 0.9938, "step": 5342 }, { "epoch": 0.39710144927536234, "grad_norm": 2.025667078149218, "learning_rate": 1.9455426638115443e-05, "loss": 1.0471, "step": 5343 }, { "epoch": 0.3971757710888146, "grad_norm": 2.0011252591737096, "learning_rate": 1.9455165438266198e-05, "loss": 0.7706, "step": 5344 }, { "epoch": 0.3972500929022668, "grad_norm": 3.247163884413122, "learning_rate": 1.945490417754509e-05, "loss": 0.9466, "step": 5345 }, { "epoch": 0.39732441471571905, "grad_norm": 2.5486903087406394, "learning_rate": 1.9454642855953793e-05, "loss": 0.9345, "step": 5346 }, { "epoch": 0.3973987365291713, "grad_norm": 1.8945989864368533, "learning_rate": 1.9454381473494002e-05, "loss": 0.7703, "step": 5347 }, { "epoch": 0.3974730583426236, "grad_norm": 3.0626312509495865, "learning_rate": 1.9454120030167392e-05, "loss": 0.8997, "step": 5348 }, { "epoch": 0.3975473801560758, "grad_norm": 2.760421892994876, "learning_rate": 1.9453858525975648e-05, "loss": 1.1936, "step": 5349 }, { "epoch": 0.39762170196952806, "grad_norm": 2.8562244068450866, "learning_rate": 1.9453596960920452e-05, "loss": 0.9572, "step": 5350 }, { "epoch": 0.3976960237829803, "grad_norm": 1.902752970155236, "learning_rate": 1.945333533500349e-05, "loss": 0.7707, "step": 5351 }, { "epoch": 0.39777034559643254, "grad_norm": 2.131472344184176, "learning_rate": 1.945307364822645e-05, "loss": 0.9721, "step": 5352 }, { "epoch": 0.3978446674098848, "grad_norm": 2.2685440318590677, "learning_rate": 1.9452811900591008e-05, "loss": 1.0043, "step": 5353 }, { "epoch": 0.39791898922333707, "grad_norm": 2.7974202453623715, "learning_rate": 1.9452550092098855e-05, "loss": 0.8748, "step": 5354 }, { "epoch": 0.3979933110367893, "grad_norm": 2.4207375710544397, "learning_rate": 1.945228822275167e-05, "loss": 0.5384, "step": 5355 }, { "epoch": 0.39806763285024155, "grad_norm": 2.651800050599213, "learning_rate": 1.945202629255115e-05, "loss": 0.8584, "step": 5356 }, { "epoch": 0.3981419546636938, "grad_norm": 2.7885679430800554, "learning_rate": 1.9451764301498973e-05, "loss": 0.8809, "step": 5357 }, { "epoch": 0.398216276477146, "grad_norm": 2.2907530729403787, "learning_rate": 1.945150224959683e-05, "loss": 0.9959, "step": 5358 }, { "epoch": 0.39829059829059826, "grad_norm": 1.7719326212298825, "learning_rate": 1.94512401368464e-05, "loss": 0.7825, "step": 5359 }, { "epoch": 0.39836492010405056, "grad_norm": 2.3191805715690452, "learning_rate": 1.9450977963249382e-05, "loss": 1.0757, "step": 5360 }, { "epoch": 0.3984392419175028, "grad_norm": 2.267888836327689, "learning_rate": 1.9450715728807457e-05, "loss": 1.0694, "step": 5361 }, { "epoch": 0.39851356373095503, "grad_norm": 2.2209530753484317, "learning_rate": 1.9450453433522312e-05, "loss": 0.9425, "step": 5362 }, { "epoch": 0.39858788554440727, "grad_norm": 1.9252137347240885, "learning_rate": 1.945019107739564e-05, "loss": 0.9032, "step": 5363 }, { "epoch": 0.3986622073578595, "grad_norm": 1.838575143679494, "learning_rate": 1.9449928660429127e-05, "loss": 0.7351, "step": 5364 }, { "epoch": 0.3987365291713118, "grad_norm": 2.3293332976402743, "learning_rate": 1.9449666182624466e-05, "loss": 1.0299, "step": 5365 }, { "epoch": 0.39881085098476404, "grad_norm": 2.2330570958411693, "learning_rate": 1.9449403643983343e-05, "loss": 0.9299, "step": 5366 }, { "epoch": 0.3988851727982163, "grad_norm": 1.7784731408617063, "learning_rate": 1.944914104450745e-05, "loss": 0.7937, "step": 5367 }, { "epoch": 0.3989594946116685, "grad_norm": 1.8862497531160465, "learning_rate": 1.9448878384198477e-05, "loss": 0.9198, "step": 5368 }, { "epoch": 0.39903381642512076, "grad_norm": 2.218453970324206, "learning_rate": 1.9448615663058116e-05, "loss": 0.9725, "step": 5369 }, { "epoch": 0.399108138238573, "grad_norm": 2.2579433849142196, "learning_rate": 1.9448352881088056e-05, "loss": 0.973, "step": 5370 }, { "epoch": 0.3991824600520253, "grad_norm": 2.0049542555056545, "learning_rate": 1.9448090038289993e-05, "loss": 1.0187, "step": 5371 }, { "epoch": 0.39925678186547753, "grad_norm": 2.2186296308648235, "learning_rate": 1.9447827134665615e-05, "loss": 0.9755, "step": 5372 }, { "epoch": 0.39933110367892977, "grad_norm": 8.275492774936621, "learning_rate": 1.9447564170216615e-05, "loss": 0.7924, "step": 5373 }, { "epoch": 0.399405425492382, "grad_norm": 3.130096813479493, "learning_rate": 1.9447301144944687e-05, "loss": 0.9873, "step": 5374 }, { "epoch": 0.39947974730583424, "grad_norm": 2.443239661325146, "learning_rate": 1.9447038058851527e-05, "loss": 0.7849, "step": 5375 }, { "epoch": 0.39955406911928654, "grad_norm": 2.1404776392874045, "learning_rate": 1.9446774911938823e-05, "loss": 0.8102, "step": 5376 }, { "epoch": 0.3996283909327388, "grad_norm": 2.3391097230059077, "learning_rate": 1.9446511704208275e-05, "loss": 1.005, "step": 5377 }, { "epoch": 0.399702712746191, "grad_norm": 1.7600217420371338, "learning_rate": 1.9446248435661576e-05, "loss": 0.7229, "step": 5378 }, { "epoch": 0.39977703455964325, "grad_norm": 2.740092538934726, "learning_rate": 1.9445985106300414e-05, "loss": 1.1068, "step": 5379 }, { "epoch": 0.3998513563730955, "grad_norm": 2.3803624149096017, "learning_rate": 1.9445721716126495e-05, "loss": 1.1809, "step": 5380 }, { "epoch": 0.39992567818654773, "grad_norm": 2.0789105917027473, "learning_rate": 1.944545826514151e-05, "loss": 0.9037, "step": 5381 }, { "epoch": 0.4, "grad_norm": 4.451955100825634, "learning_rate": 1.944519475334715e-05, "loss": 1.0002, "step": 5382 }, { "epoch": 0.40007432181345226, "grad_norm": 2.346583155550199, "learning_rate": 1.944493118074512e-05, "loss": 1.0543, "step": 5383 }, { "epoch": 0.4001486436269045, "grad_norm": 1.9500057390150374, "learning_rate": 1.9444667547337112e-05, "loss": 0.9014, "step": 5384 }, { "epoch": 0.40022296544035674, "grad_norm": 2.3931393235122203, "learning_rate": 1.9444403853124823e-05, "loss": 0.8787, "step": 5385 }, { "epoch": 0.400297287253809, "grad_norm": 2.5744682801781558, "learning_rate": 1.9444140098109954e-05, "loss": 0.9413, "step": 5386 }, { "epoch": 0.4003716090672612, "grad_norm": 2.7060087096899696, "learning_rate": 1.94438762822942e-05, "loss": 0.9585, "step": 5387 }, { "epoch": 0.4004459308807135, "grad_norm": 1.7155977237361508, "learning_rate": 1.9443612405679265e-05, "loss": 0.8913, "step": 5388 }, { "epoch": 0.40052025269416575, "grad_norm": 6.365903788481026, "learning_rate": 1.944334846826684e-05, "loss": 0.8385, "step": 5389 }, { "epoch": 0.400594574507618, "grad_norm": 2.186497144593163, "learning_rate": 1.9443084470058627e-05, "loss": 0.8941, "step": 5390 }, { "epoch": 0.4006688963210702, "grad_norm": 2.1479945106110523, "learning_rate": 1.9442820411056325e-05, "loss": 0.9685, "step": 5391 }, { "epoch": 0.40074321813452246, "grad_norm": 2.188464386439882, "learning_rate": 1.9442556291261636e-05, "loss": 1.0823, "step": 5392 }, { "epoch": 0.40081753994797475, "grad_norm": 2.311464638234019, "learning_rate": 1.944229211067626e-05, "loss": 0.9067, "step": 5393 }, { "epoch": 0.400891861761427, "grad_norm": 1.9978214321923105, "learning_rate": 1.94420278693019e-05, "loss": 0.8196, "step": 5394 }, { "epoch": 0.40096618357487923, "grad_norm": 2.3555456839557087, "learning_rate": 1.9441763567140247e-05, "loss": 0.9035, "step": 5395 }, { "epoch": 0.40104050538833147, "grad_norm": 3.209010430112366, "learning_rate": 1.9441499204193017e-05, "loss": 0.7717, "step": 5396 }, { "epoch": 0.4011148272017837, "grad_norm": 3.068023571578246, "learning_rate": 1.9441234780461903e-05, "loss": 1.0454, "step": 5397 }, { "epoch": 0.40118914901523595, "grad_norm": 1.994121815117316, "learning_rate": 1.944097029594861e-05, "loss": 0.981, "step": 5398 }, { "epoch": 0.40126347082868824, "grad_norm": 2.6224106969629895, "learning_rate": 1.9440705750654837e-05, "loss": 0.9897, "step": 5399 }, { "epoch": 0.4013377926421405, "grad_norm": 1.9703275222443577, "learning_rate": 1.9440441144582293e-05, "loss": 0.7508, "step": 5400 }, { "epoch": 0.4014121144555927, "grad_norm": 2.0943369170963977, "learning_rate": 1.9440176477732676e-05, "loss": 0.8015, "step": 5401 }, { "epoch": 0.40148643626904496, "grad_norm": 2.619470620243269, "learning_rate": 1.94399117501077e-05, "loss": 0.9646, "step": 5402 }, { "epoch": 0.4015607580824972, "grad_norm": 2.4475316190139473, "learning_rate": 1.9439646961709054e-05, "loss": 0.9205, "step": 5403 }, { "epoch": 0.4016350798959495, "grad_norm": 2.491970107575959, "learning_rate": 1.9439382112538454e-05, "loss": 0.9692, "step": 5404 }, { "epoch": 0.4017094017094017, "grad_norm": 2.375044366373191, "learning_rate": 1.94391172025976e-05, "loss": 0.8767, "step": 5405 }, { "epoch": 0.40178372352285396, "grad_norm": 2.2568052392525897, "learning_rate": 1.94388522318882e-05, "loss": 1.1716, "step": 5406 }, { "epoch": 0.4018580453363062, "grad_norm": 2.254699594535526, "learning_rate": 1.943858720041196e-05, "loss": 0.9337, "step": 5407 }, { "epoch": 0.40193236714975844, "grad_norm": 2.0005038189390163, "learning_rate": 1.9438322108170587e-05, "loss": 0.9525, "step": 5408 }, { "epoch": 0.4020066889632107, "grad_norm": 2.6431394754400928, "learning_rate": 1.9438056955165786e-05, "loss": 0.948, "step": 5409 }, { "epoch": 0.402081010776663, "grad_norm": 2.4004944660525664, "learning_rate": 1.943779174139926e-05, "loss": 0.9792, "step": 5410 }, { "epoch": 0.4021553325901152, "grad_norm": 2.5628304676515223, "learning_rate": 1.943752646687272e-05, "loss": 1.0223, "step": 5411 }, { "epoch": 0.40222965440356745, "grad_norm": 2.100583682311196, "learning_rate": 1.9437261131587877e-05, "loss": 0.8923, "step": 5412 }, { "epoch": 0.4023039762170197, "grad_norm": 2.0701359204556837, "learning_rate": 1.9436995735546437e-05, "loss": 0.7772, "step": 5413 }, { "epoch": 0.4023782980304719, "grad_norm": 2.2635133677968082, "learning_rate": 1.943673027875011e-05, "loss": 0.9708, "step": 5414 }, { "epoch": 0.40245261984392416, "grad_norm": 2.3143031977359807, "learning_rate": 1.9436464761200596e-05, "loss": 0.8663, "step": 5415 }, { "epoch": 0.40252694165737646, "grad_norm": 2.080078548929304, "learning_rate": 1.9436199182899617e-05, "loss": 0.7097, "step": 5416 }, { "epoch": 0.4026012634708287, "grad_norm": 2.1471404026244625, "learning_rate": 1.9435933543848876e-05, "loss": 0.9988, "step": 5417 }, { "epoch": 0.40267558528428093, "grad_norm": 2.479213502051494, "learning_rate": 1.9435667844050084e-05, "loss": 0.9675, "step": 5418 }, { "epoch": 0.4027499070977332, "grad_norm": 1.945197913645975, "learning_rate": 1.9435402083504952e-05, "loss": 0.8979, "step": 5419 }, { "epoch": 0.4028242289111854, "grad_norm": 2.1393713184191663, "learning_rate": 1.9435136262215195e-05, "loss": 0.9112, "step": 5420 }, { "epoch": 0.4028985507246377, "grad_norm": 2.6045637562795974, "learning_rate": 1.9434870380182516e-05, "loss": 1.1259, "step": 5421 }, { "epoch": 0.40297287253808994, "grad_norm": 2.3473491445896335, "learning_rate": 1.9434604437408633e-05, "loss": 0.7602, "step": 5422 }, { "epoch": 0.4030471943515422, "grad_norm": 2.4277002366992386, "learning_rate": 1.943433843389525e-05, "loss": 0.9339, "step": 5423 }, { "epoch": 0.4031215161649944, "grad_norm": 2.2740601303998287, "learning_rate": 1.9434072369644095e-05, "loss": 0.9112, "step": 5424 }, { "epoch": 0.40319583797844666, "grad_norm": 1.9891889941906413, "learning_rate": 1.9433806244656868e-05, "loss": 0.7534, "step": 5425 }, { "epoch": 0.4032701597918989, "grad_norm": 2.71876067119387, "learning_rate": 1.9433540058935284e-05, "loss": 0.9908, "step": 5426 }, { "epoch": 0.4033444816053512, "grad_norm": 1.9414898531492848, "learning_rate": 1.943327381248106e-05, "loss": 0.7207, "step": 5427 }, { "epoch": 0.40341880341880343, "grad_norm": 2.011664160606336, "learning_rate": 1.943300750529591e-05, "loss": 0.9131, "step": 5428 }, { "epoch": 0.40349312523225567, "grad_norm": 2.59481514142445, "learning_rate": 1.9432741137381544e-05, "loss": 0.8583, "step": 5429 }, { "epoch": 0.4035674470457079, "grad_norm": 2.3864459867065264, "learning_rate": 1.943247470873968e-05, "loss": 1.0087, "step": 5430 }, { "epoch": 0.40364176885916014, "grad_norm": 2.0125148148751526, "learning_rate": 1.9432208219372035e-05, "loss": 1.0228, "step": 5431 }, { "epoch": 0.40371609067261244, "grad_norm": 1.9824001412722017, "learning_rate": 1.9431941669280324e-05, "loss": 0.8767, "step": 5432 }, { "epoch": 0.4037904124860647, "grad_norm": 2.3283604953739894, "learning_rate": 1.943167505846626e-05, "loss": 1.0674, "step": 5433 }, { "epoch": 0.4038647342995169, "grad_norm": 3.046476973120564, "learning_rate": 1.943140838693156e-05, "loss": 0.655, "step": 5434 }, { "epoch": 0.40393905611296915, "grad_norm": 4.8262177608199375, "learning_rate": 1.9431141654677948e-05, "loss": 0.9166, "step": 5435 }, { "epoch": 0.4040133779264214, "grad_norm": 1.738163352978682, "learning_rate": 1.943087486170713e-05, "loss": 0.7969, "step": 5436 }, { "epoch": 0.40408769973987363, "grad_norm": 1.9761736201161297, "learning_rate": 1.943060800802083e-05, "loss": 0.9077, "step": 5437 }, { "epoch": 0.4041620215533259, "grad_norm": 1.8669271197718127, "learning_rate": 1.9430341093620764e-05, "loss": 0.7007, "step": 5438 }, { "epoch": 0.40423634336677816, "grad_norm": 1.867332329518326, "learning_rate": 1.9430074118508652e-05, "loss": 0.82, "step": 5439 }, { "epoch": 0.4043106651802304, "grad_norm": 2.203273828861461, "learning_rate": 1.9429807082686215e-05, "loss": 1.036, "step": 5440 }, { "epoch": 0.40438498699368264, "grad_norm": 2.459441562309526, "learning_rate": 1.942953998615517e-05, "loss": 1.0445, "step": 5441 }, { "epoch": 0.4044593088071349, "grad_norm": 2.2538706857400905, "learning_rate": 1.942927282891723e-05, "loss": 0.613, "step": 5442 }, { "epoch": 0.4045336306205871, "grad_norm": 2.653409058389616, "learning_rate": 1.9429005610974125e-05, "loss": 0.966, "step": 5443 }, { "epoch": 0.4046079524340394, "grad_norm": 2.3247523078348142, "learning_rate": 1.942873833232757e-05, "loss": 0.9303, "step": 5444 }, { "epoch": 0.40468227424749165, "grad_norm": 1.8169955619282898, "learning_rate": 1.9428470992979286e-05, "loss": 0.6437, "step": 5445 }, { "epoch": 0.4047565960609439, "grad_norm": 2.313574702626412, "learning_rate": 1.9428203592930998e-05, "loss": 0.8511, "step": 5446 }, { "epoch": 0.4048309178743961, "grad_norm": 1.886521015328811, "learning_rate": 1.942793613218442e-05, "loss": 0.8149, "step": 5447 }, { "epoch": 0.40490523968784836, "grad_norm": 2.189835246535876, "learning_rate": 1.9427668610741278e-05, "loss": 0.8697, "step": 5448 }, { "epoch": 0.40497956150130066, "grad_norm": 2.1130392608225397, "learning_rate": 1.94274010286033e-05, "loss": 0.8619, "step": 5449 }, { "epoch": 0.4050538833147529, "grad_norm": 2.0496432034618985, "learning_rate": 1.94271333857722e-05, "loss": 1.0487, "step": 5450 }, { "epoch": 0.40512820512820513, "grad_norm": 1.8574209100807044, "learning_rate": 1.9426865682249704e-05, "loss": 0.9414, "step": 5451 }, { "epoch": 0.40520252694165737, "grad_norm": 2.30564559441394, "learning_rate": 1.9426597918037537e-05, "loss": 1.0159, "step": 5452 }, { "epoch": 0.4052768487551096, "grad_norm": 1.755933451680635, "learning_rate": 1.942633009313742e-05, "loss": 0.6762, "step": 5453 }, { "epoch": 0.40535117056856185, "grad_norm": 2.2947903589491734, "learning_rate": 1.942606220755108e-05, "loss": 0.9037, "step": 5454 }, { "epoch": 0.40542549238201414, "grad_norm": 2.0474802494743845, "learning_rate": 1.9425794261280245e-05, "loss": 0.8837, "step": 5455 }, { "epoch": 0.4054998141954664, "grad_norm": 2.2765442309256727, "learning_rate": 1.942552625432663e-05, "loss": 0.8849, "step": 5456 }, { "epoch": 0.4055741360089186, "grad_norm": 2.3058759573178547, "learning_rate": 1.9425258186691967e-05, "loss": 0.7218, "step": 5457 }, { "epoch": 0.40564845782237086, "grad_norm": 2.3089647480623254, "learning_rate": 1.9424990058377982e-05, "loss": 1.0313, "step": 5458 }, { "epoch": 0.4057227796358231, "grad_norm": 2.2452387017662385, "learning_rate": 1.94247218693864e-05, "loss": 0.9946, "step": 5459 }, { "epoch": 0.4057971014492754, "grad_norm": 3.012888646078083, "learning_rate": 1.9424453619718945e-05, "loss": 1.0791, "step": 5460 }, { "epoch": 0.4058714232627276, "grad_norm": 2.314452560820378, "learning_rate": 1.942418530937735e-05, "loss": 0.8715, "step": 5461 }, { "epoch": 0.40594574507617986, "grad_norm": 5.323777827686028, "learning_rate": 1.9423916938363336e-05, "loss": 1.0205, "step": 5462 }, { "epoch": 0.4060200668896321, "grad_norm": 2.2455877400301008, "learning_rate": 1.942364850667864e-05, "loss": 0.9035, "step": 5463 }, { "epoch": 0.40609438870308434, "grad_norm": 2.18337300111563, "learning_rate": 1.9423380014324977e-05, "loss": 0.8721, "step": 5464 }, { "epoch": 0.4061687105165366, "grad_norm": 2.3639436793464528, "learning_rate": 1.9423111461304084e-05, "loss": 0.9107, "step": 5465 }, { "epoch": 0.4062430323299889, "grad_norm": 2.5020769325102337, "learning_rate": 1.942284284761769e-05, "loss": 0.9587, "step": 5466 }, { "epoch": 0.4063173541434411, "grad_norm": 1.9048054305479178, "learning_rate": 1.942257417326752e-05, "loss": 0.9203, "step": 5467 }, { "epoch": 0.40639167595689335, "grad_norm": 2.342367269507569, "learning_rate": 1.942230543825531e-05, "loss": 0.9105, "step": 5468 }, { "epoch": 0.4064659977703456, "grad_norm": 1.9324436052853586, "learning_rate": 1.9422036642582785e-05, "loss": 0.6477, "step": 5469 }, { "epoch": 0.4065403195837978, "grad_norm": 2.135046139955571, "learning_rate": 1.9421767786251676e-05, "loss": 0.6806, "step": 5470 }, { "epoch": 0.4066146413972501, "grad_norm": 2.3885848763016164, "learning_rate": 1.9421498869263715e-05, "loss": 0.7782, "step": 5471 }, { "epoch": 0.40668896321070236, "grad_norm": 1.7496460903674282, "learning_rate": 1.9421229891620635e-05, "loss": 0.6669, "step": 5472 }, { "epoch": 0.4067632850241546, "grad_norm": 3.083412471552971, "learning_rate": 1.9420960853324162e-05, "loss": 0.9721, "step": 5473 }, { "epoch": 0.40683760683760684, "grad_norm": 1.933522724384964, "learning_rate": 1.9420691754376034e-05, "loss": 0.8663, "step": 5474 }, { "epoch": 0.4069119286510591, "grad_norm": 2.3267150874508036, "learning_rate": 1.9420422594777982e-05, "loss": 1.0353, "step": 5475 }, { "epoch": 0.4069862504645113, "grad_norm": 1.7507828881735865, "learning_rate": 1.9420153374531737e-05, "loss": 0.6815, "step": 5476 }, { "epoch": 0.4070605722779636, "grad_norm": 2.4084966829941354, "learning_rate": 1.9419884093639038e-05, "loss": 0.949, "step": 5477 }, { "epoch": 0.40713489409141584, "grad_norm": 2.047973143029097, "learning_rate": 1.941961475210161e-05, "loss": 0.9369, "step": 5478 }, { "epoch": 0.4072092159048681, "grad_norm": 2.2524032574308324, "learning_rate": 1.9419345349921188e-05, "loss": 1.1089, "step": 5479 }, { "epoch": 0.4072835377183203, "grad_norm": 1.7682786863234354, "learning_rate": 1.9419075887099513e-05, "loss": 0.8146, "step": 5480 }, { "epoch": 0.40735785953177256, "grad_norm": 1.8895594175010904, "learning_rate": 1.9418806363638313e-05, "loss": 0.7282, "step": 5481 }, { "epoch": 0.4074321813452248, "grad_norm": 2.0204266536245936, "learning_rate": 1.941853677953933e-05, "loss": 0.9153, "step": 5482 }, { "epoch": 0.4075065031586771, "grad_norm": 2.0409703872039815, "learning_rate": 1.9418267134804294e-05, "loss": 0.832, "step": 5483 }, { "epoch": 0.40758082497212933, "grad_norm": 3.663755031735628, "learning_rate": 1.9417997429434945e-05, "loss": 0.792, "step": 5484 }, { "epoch": 0.40765514678558157, "grad_norm": 2.2017085895590207, "learning_rate": 1.9417727663433016e-05, "loss": 1.0275, "step": 5485 }, { "epoch": 0.4077294685990338, "grad_norm": 1.8081774793651268, "learning_rate": 1.9417457836800244e-05, "loss": 0.8111, "step": 5486 }, { "epoch": 0.40780379041248604, "grad_norm": 2.365805789072245, "learning_rate": 1.941718794953837e-05, "loss": 0.7482, "step": 5487 }, { "epoch": 0.40787811222593834, "grad_norm": 2.1939113427091432, "learning_rate": 1.9416918001649122e-05, "loss": 1.0799, "step": 5488 }, { "epoch": 0.4079524340393906, "grad_norm": 1.974085963406345, "learning_rate": 1.9416647993134254e-05, "loss": 0.9478, "step": 5489 }, { "epoch": 0.4080267558528428, "grad_norm": 1.9170723995469783, "learning_rate": 1.941637792399549e-05, "loss": 0.8097, "step": 5490 }, { "epoch": 0.40810107766629505, "grad_norm": 2.5737735141083524, "learning_rate": 1.9416107794234573e-05, "loss": 1.0647, "step": 5491 }, { "epoch": 0.4081753994797473, "grad_norm": 1.772787581477794, "learning_rate": 1.941583760385324e-05, "loss": 0.8441, "step": 5492 }, { "epoch": 0.40824972129319953, "grad_norm": 1.928348634158629, "learning_rate": 1.941556735285324e-05, "loss": 0.9037, "step": 5493 }, { "epoch": 0.4083240431066518, "grad_norm": 2.2297165030695494, "learning_rate": 1.9415297041236297e-05, "loss": 1.0161, "step": 5494 }, { "epoch": 0.40839836492010406, "grad_norm": 2.1503050935868266, "learning_rate": 1.9415026669004166e-05, "loss": 0.9001, "step": 5495 }, { "epoch": 0.4084726867335563, "grad_norm": 2.7587472876071226, "learning_rate": 1.941475623615858e-05, "loss": 0.9663, "step": 5496 }, { "epoch": 0.40854700854700854, "grad_norm": 1.6587513155138602, "learning_rate": 1.9414485742701284e-05, "loss": 0.9177, "step": 5497 }, { "epoch": 0.4086213303604608, "grad_norm": 1.916735772202766, "learning_rate": 1.9414215188634016e-05, "loss": 1.0582, "step": 5498 }, { "epoch": 0.40869565217391307, "grad_norm": 2.5055251864429477, "learning_rate": 1.9413944573958518e-05, "loss": 0.9601, "step": 5499 }, { "epoch": 0.4087699739873653, "grad_norm": 1.8794116085908328, "learning_rate": 1.9413673898676536e-05, "loss": 1.0419, "step": 5500 }, { "epoch": 0.40884429580081755, "grad_norm": 2.3422407880743537, "learning_rate": 1.9413403162789806e-05, "loss": 1.0686, "step": 5501 }, { "epoch": 0.4089186176142698, "grad_norm": 2.2889053183035686, "learning_rate": 1.941313236630008e-05, "loss": 0.6864, "step": 5502 }, { "epoch": 0.408992939427722, "grad_norm": 2.0670983665315656, "learning_rate": 1.941286150920909e-05, "loss": 0.9842, "step": 5503 }, { "epoch": 0.40906726124117426, "grad_norm": 2.328876046337254, "learning_rate": 1.941259059151859e-05, "loss": 1.0643, "step": 5504 }, { "epoch": 0.40914158305462656, "grad_norm": 1.7952520851767386, "learning_rate": 1.941231961323032e-05, "loss": 0.7714, "step": 5505 }, { "epoch": 0.4092159048680788, "grad_norm": 1.9325316702595048, "learning_rate": 1.9412048574346026e-05, "loss": 0.8273, "step": 5506 }, { "epoch": 0.40929022668153103, "grad_norm": 2.9470516970344405, "learning_rate": 1.941177747486745e-05, "loss": 1.0465, "step": 5507 }, { "epoch": 0.40936454849498327, "grad_norm": 2.5506383620935686, "learning_rate": 1.9411506314796337e-05, "loss": 0.8374, "step": 5508 }, { "epoch": 0.4094388703084355, "grad_norm": 2.3500016039965064, "learning_rate": 1.9411235094134438e-05, "loss": 0.9745, "step": 5509 }, { "epoch": 0.40951319212188775, "grad_norm": 2.700215472823054, "learning_rate": 1.9410963812883496e-05, "loss": 0.8727, "step": 5510 }, { "epoch": 0.40958751393534004, "grad_norm": 2.5069301253706926, "learning_rate": 1.9410692471045253e-05, "loss": 0.881, "step": 5511 }, { "epoch": 0.4096618357487923, "grad_norm": 1.9138760867334086, "learning_rate": 1.9410421068621463e-05, "loss": 0.8805, "step": 5512 }, { "epoch": 0.4097361575622445, "grad_norm": 2.967966138039022, "learning_rate": 1.941014960561387e-05, "loss": 1.0825, "step": 5513 }, { "epoch": 0.40981047937569676, "grad_norm": 2.9838979502061544, "learning_rate": 1.9409878082024223e-05, "loss": 1.0071, "step": 5514 }, { "epoch": 0.409884801189149, "grad_norm": 3.9362694505159057, "learning_rate": 1.9409606497854268e-05, "loss": 0.7443, "step": 5515 }, { "epoch": 0.4099591230026013, "grad_norm": 2.3302789359822422, "learning_rate": 1.9409334853105754e-05, "loss": 0.9906, "step": 5516 }, { "epoch": 0.4100334448160535, "grad_norm": 2.2972651554865147, "learning_rate": 1.940906314778043e-05, "loss": 0.7773, "step": 5517 }, { "epoch": 0.41010776662950577, "grad_norm": 1.8413950011842404, "learning_rate": 1.940879138188005e-05, "loss": 0.9204, "step": 5518 }, { "epoch": 0.410182088442958, "grad_norm": 1.6283325341038122, "learning_rate": 1.9408519555406355e-05, "loss": 0.7209, "step": 5519 }, { "epoch": 0.41025641025641024, "grad_norm": 1.8054073345123653, "learning_rate": 1.9408247668361098e-05, "loss": 0.8437, "step": 5520 }, { "epoch": 0.4103307320698625, "grad_norm": 2.170215467181556, "learning_rate": 1.9407975720746037e-05, "loss": 0.8963, "step": 5521 }, { "epoch": 0.4104050538833148, "grad_norm": 2.144613096632816, "learning_rate": 1.940770371256291e-05, "loss": 1.046, "step": 5522 }, { "epoch": 0.410479375696767, "grad_norm": 2.173456701777131, "learning_rate": 1.9407431643813478e-05, "loss": 1.033, "step": 5523 }, { "epoch": 0.41055369751021925, "grad_norm": 2.296673313921581, "learning_rate": 1.940715951449949e-05, "loss": 1.0017, "step": 5524 }, { "epoch": 0.4106280193236715, "grad_norm": 2.542173494693372, "learning_rate": 1.9406887324622694e-05, "loss": 0.8141, "step": 5525 }, { "epoch": 0.4107023411371237, "grad_norm": 2.5095027477273573, "learning_rate": 1.9406615074184846e-05, "loss": 0.9669, "step": 5526 }, { "epoch": 0.410776662950576, "grad_norm": 2.285334221049916, "learning_rate": 1.9406342763187703e-05, "loss": 0.9981, "step": 5527 }, { "epoch": 0.41085098476402826, "grad_norm": 2.2768891921882863, "learning_rate": 1.9406070391633007e-05, "loss": 0.9298, "step": 5528 }, { "epoch": 0.4109253065774805, "grad_norm": 2.488372740681137, "learning_rate": 1.9405797959522522e-05, "loss": 1.0492, "step": 5529 }, { "epoch": 0.41099962839093274, "grad_norm": 2.2003420423231836, "learning_rate": 1.9405525466858e-05, "loss": 0.9279, "step": 5530 }, { "epoch": 0.411073950204385, "grad_norm": 2.1963663996676965, "learning_rate": 1.9405252913641186e-05, "loss": 0.9882, "step": 5531 }, { "epoch": 0.4111482720178372, "grad_norm": 2.212310156188349, "learning_rate": 1.9404980299873844e-05, "loss": 0.8652, "step": 5532 }, { "epoch": 0.4112225938312895, "grad_norm": 2.031139872980262, "learning_rate": 1.9404707625557728e-05, "loss": 0.9523, "step": 5533 }, { "epoch": 0.41129691564474175, "grad_norm": 2.5521391881597966, "learning_rate": 1.9404434890694595e-05, "loss": 1.0973, "step": 5534 }, { "epoch": 0.411371237458194, "grad_norm": 3.039322521470073, "learning_rate": 1.9404162095286195e-05, "loss": 0.9386, "step": 5535 }, { "epoch": 0.4114455592716462, "grad_norm": 2.810213721855528, "learning_rate": 1.9403889239334284e-05, "loss": 0.9287, "step": 5536 }, { "epoch": 0.41151988108509846, "grad_norm": 2.116364731418232, "learning_rate": 1.9403616322840627e-05, "loss": 1.1262, "step": 5537 }, { "epoch": 0.4115942028985507, "grad_norm": 1.9610817684813333, "learning_rate": 1.9403343345806976e-05, "loss": 0.9131, "step": 5538 }, { "epoch": 0.411668524712003, "grad_norm": 2.1642579522529193, "learning_rate": 1.9403070308235088e-05, "loss": 0.8144, "step": 5539 }, { "epoch": 0.41174284652545523, "grad_norm": 1.932101383348651, "learning_rate": 1.940279721012672e-05, "loss": 0.9468, "step": 5540 }, { "epoch": 0.41181716833890747, "grad_norm": 2.1782448110746016, "learning_rate": 1.940252405148363e-05, "loss": 0.9629, "step": 5541 }, { "epoch": 0.4118914901523597, "grad_norm": 2.171811319827772, "learning_rate": 1.9402250832307583e-05, "loss": 0.8943, "step": 5542 }, { "epoch": 0.41196581196581195, "grad_norm": 1.900347545978029, "learning_rate": 1.940197755260033e-05, "loss": 0.9246, "step": 5543 }, { "epoch": 0.41204013377926424, "grad_norm": 2.234819381078545, "learning_rate": 1.9401704212363633e-05, "loss": 1.0657, "step": 5544 }, { "epoch": 0.4121144555927165, "grad_norm": 2.500638124379713, "learning_rate": 1.940143081159925e-05, "loss": 0.9411, "step": 5545 }, { "epoch": 0.4121887774061687, "grad_norm": 2.1411876269223153, "learning_rate": 1.9401157350308946e-05, "loss": 1.0509, "step": 5546 }, { "epoch": 0.41226309921962095, "grad_norm": 2.0128834175895443, "learning_rate": 1.9400883828494482e-05, "loss": 0.9219, "step": 5547 }, { "epoch": 0.4123374210330732, "grad_norm": 2.560111357333546, "learning_rate": 1.9400610246157613e-05, "loss": 1.0425, "step": 5548 }, { "epoch": 0.41241174284652543, "grad_norm": 1.9671574831916998, "learning_rate": 1.94003366033001e-05, "loss": 0.9434, "step": 5549 }, { "epoch": 0.4124860646599777, "grad_norm": 2.4969350610561674, "learning_rate": 1.940006289992371e-05, "loss": 0.7898, "step": 5550 }, { "epoch": 0.41256038647342996, "grad_norm": 2.287746196485312, "learning_rate": 1.93997891360302e-05, "loss": 0.7761, "step": 5551 }, { "epoch": 0.4126347082868822, "grad_norm": 2.093228552180147, "learning_rate": 1.939951531162134e-05, "loss": 0.9137, "step": 5552 }, { "epoch": 0.41270903010033444, "grad_norm": 1.968802307769158, "learning_rate": 1.9399241426698886e-05, "loss": 0.805, "step": 5553 }, { "epoch": 0.4127833519137867, "grad_norm": 2.5160025879462444, "learning_rate": 1.9398967481264607e-05, "loss": 0.8941, "step": 5554 }, { "epoch": 0.41285767372723897, "grad_norm": 3.939064736012554, "learning_rate": 1.939869347532026e-05, "loss": 1.0431, "step": 5555 }, { "epoch": 0.4129319955406912, "grad_norm": 1.9949403039111995, "learning_rate": 1.9398419408867607e-05, "loss": 0.874, "step": 5556 }, { "epoch": 0.41300631735414345, "grad_norm": 2.3446400136647023, "learning_rate": 1.9398145281908424e-05, "loss": 1.1069, "step": 5557 }, { "epoch": 0.4130806391675957, "grad_norm": 5.073883172615544, "learning_rate": 1.939787109444447e-05, "loss": 0.9548, "step": 5558 }, { "epoch": 0.4131549609810479, "grad_norm": 1.7111712013374134, "learning_rate": 1.93975968464775e-05, "loss": 0.7868, "step": 5559 }, { "epoch": 0.41322928279450016, "grad_norm": 1.9540151809740736, "learning_rate": 1.93973225380093e-05, "loss": 0.721, "step": 5560 }, { "epoch": 0.41330360460795246, "grad_norm": 2.2502007060745894, "learning_rate": 1.9397048169041622e-05, "loss": 0.8694, "step": 5561 }, { "epoch": 0.4133779264214047, "grad_norm": 2.2586202649561313, "learning_rate": 1.9396773739576233e-05, "loss": 0.9622, "step": 5562 }, { "epoch": 0.41345224823485693, "grad_norm": 1.7632682228172396, "learning_rate": 1.9396499249614904e-05, "loss": 0.781, "step": 5563 }, { "epoch": 0.41352657004830917, "grad_norm": 2.3622170366635236, "learning_rate": 1.93962246991594e-05, "loss": 1.0431, "step": 5564 }, { "epoch": 0.4136008918617614, "grad_norm": 2.2190445660091433, "learning_rate": 1.939595008821149e-05, "loss": 1.062, "step": 5565 }, { "epoch": 0.41367521367521365, "grad_norm": 1.8034061306616886, "learning_rate": 1.9395675416772942e-05, "loss": 0.7305, "step": 5566 }, { "epoch": 0.41374953548866594, "grad_norm": 2.007520311935092, "learning_rate": 1.939540068484552e-05, "loss": 1.0048, "step": 5567 }, { "epoch": 0.4138238573021182, "grad_norm": 1.971254171741663, "learning_rate": 1.9395125892430994e-05, "loss": 0.8978, "step": 5568 }, { "epoch": 0.4138981791155704, "grad_norm": 2.176687297000533, "learning_rate": 1.939485103953114e-05, "loss": 0.979, "step": 5569 }, { "epoch": 0.41397250092902266, "grad_norm": 2.2833990420608035, "learning_rate": 1.939457612614772e-05, "loss": 1.0238, "step": 5570 }, { "epoch": 0.4140468227424749, "grad_norm": 2.1264307773906013, "learning_rate": 1.9394301152282505e-05, "loss": 1.0132, "step": 5571 }, { "epoch": 0.4141211445559272, "grad_norm": 2.5525732808586117, "learning_rate": 1.939402611793727e-05, "loss": 0.9984, "step": 5572 }, { "epoch": 0.41419546636937943, "grad_norm": 2.1981049301432196, "learning_rate": 1.9393751023113784e-05, "loss": 1.008, "step": 5573 }, { "epoch": 0.41426978818283167, "grad_norm": 1.7834373377621526, "learning_rate": 1.939347586781381e-05, "loss": 0.6415, "step": 5574 }, { "epoch": 0.4143441099962839, "grad_norm": 2.0709984624135185, "learning_rate": 1.939320065203913e-05, "loss": 0.8607, "step": 5575 }, { "epoch": 0.41441843180973614, "grad_norm": 2.2128042994916317, "learning_rate": 1.939292537579151e-05, "loss": 0.9758, "step": 5576 }, { "epoch": 0.4144927536231884, "grad_norm": 1.9613566842436887, "learning_rate": 1.9392650039072723e-05, "loss": 0.8069, "step": 5577 }, { "epoch": 0.4145670754366407, "grad_norm": 1.9707729628986832, "learning_rate": 1.9392374641884545e-05, "loss": 1.0879, "step": 5578 }, { "epoch": 0.4146413972500929, "grad_norm": 2.0344160240610707, "learning_rate": 1.9392099184228742e-05, "loss": 0.9882, "step": 5579 }, { "epoch": 0.41471571906354515, "grad_norm": 2.1257265185012093, "learning_rate": 1.93918236661071e-05, "loss": 0.9335, "step": 5580 }, { "epoch": 0.4147900408769974, "grad_norm": 1.9519865649879937, "learning_rate": 1.9391548087521377e-05, "loss": 1.0144, "step": 5581 }, { "epoch": 0.41486436269044963, "grad_norm": 2.1810387800355175, "learning_rate": 1.9391272448473356e-05, "loss": 1.1061, "step": 5582 }, { "epoch": 0.4149386845039019, "grad_norm": 2.8026480705131487, "learning_rate": 1.939099674896481e-05, "loss": 1.111, "step": 5583 }, { "epoch": 0.41501300631735416, "grad_norm": 2.302041231418942, "learning_rate": 1.9390720988997514e-05, "loss": 0.9453, "step": 5584 }, { "epoch": 0.4150873281308064, "grad_norm": 1.9883106105060344, "learning_rate": 1.9390445168573248e-05, "loss": 0.7963, "step": 5585 }, { "epoch": 0.41516164994425864, "grad_norm": 2.142648123140624, "learning_rate": 1.939016928769378e-05, "loss": 0.8625, "step": 5586 }, { "epoch": 0.4152359717577109, "grad_norm": 2.4617387407117763, "learning_rate": 1.938989334636089e-05, "loss": 1.005, "step": 5587 }, { "epoch": 0.4153102935711631, "grad_norm": 2.384413137815561, "learning_rate": 1.9389617344576352e-05, "loss": 0.9652, "step": 5588 }, { "epoch": 0.4153846153846154, "grad_norm": 2.964635695520141, "learning_rate": 1.9389341282341946e-05, "loss": 1.0324, "step": 5589 }, { "epoch": 0.41545893719806765, "grad_norm": 2.778857456774204, "learning_rate": 1.938906515965945e-05, "loss": 0.8025, "step": 5590 }, { "epoch": 0.4155332590115199, "grad_norm": 2.444991756748109, "learning_rate": 1.9388788976530638e-05, "loss": 0.6825, "step": 5591 }, { "epoch": 0.4156075808249721, "grad_norm": 1.974857802915275, "learning_rate": 1.938851273295729e-05, "loss": 1.0148, "step": 5592 }, { "epoch": 0.41568190263842436, "grad_norm": 2.13466391059356, "learning_rate": 1.938823642894118e-05, "loss": 0.8621, "step": 5593 }, { "epoch": 0.4157562244518766, "grad_norm": 2.142315127098619, "learning_rate": 1.93879600644841e-05, "loss": 1.0714, "step": 5594 }, { "epoch": 0.4158305462653289, "grad_norm": 2.787831603602894, "learning_rate": 1.938768363958781e-05, "loss": 1.1551, "step": 5595 }, { "epoch": 0.41590486807878113, "grad_norm": 2.0597293158037844, "learning_rate": 1.9387407154254108e-05, "loss": 0.9431, "step": 5596 }, { "epoch": 0.41597918989223337, "grad_norm": 2.1353248156264737, "learning_rate": 1.938713060848476e-05, "loss": 0.9536, "step": 5597 }, { "epoch": 0.4160535117056856, "grad_norm": 2.7096261634659657, "learning_rate": 1.9386854002281558e-05, "loss": 0.9394, "step": 5598 }, { "epoch": 0.41612783351913785, "grad_norm": 2.133274663086162, "learning_rate": 1.938657733564627e-05, "loss": 0.7329, "step": 5599 }, { "epoch": 0.41620215533259014, "grad_norm": 2.264213831376213, "learning_rate": 1.938630060858069e-05, "loss": 1.1219, "step": 5600 }, { "epoch": 0.4162764771460424, "grad_norm": 2.209377592379767, "learning_rate": 1.938602382108659e-05, "loss": 0.9096, "step": 5601 }, { "epoch": 0.4163507989594946, "grad_norm": 1.8592238467637343, "learning_rate": 1.9385746973165757e-05, "loss": 0.9135, "step": 5602 }, { "epoch": 0.41642512077294686, "grad_norm": 2.1867910286774865, "learning_rate": 1.9385470064819973e-05, "loss": 0.9719, "step": 5603 }, { "epoch": 0.4164994425863991, "grad_norm": 2.052053933129165, "learning_rate": 1.9385193096051022e-05, "loss": 0.9339, "step": 5604 }, { "epoch": 0.41657376439985133, "grad_norm": 2.300325227065524, "learning_rate": 1.938491606686068e-05, "loss": 0.8091, "step": 5605 }, { "epoch": 0.4166480862133036, "grad_norm": 2.1112924591339044, "learning_rate": 1.9384638977250735e-05, "loss": 0.9517, "step": 5606 }, { "epoch": 0.41672240802675586, "grad_norm": 2.229364123002988, "learning_rate": 1.9384361827222975e-05, "loss": 1.0952, "step": 5607 }, { "epoch": 0.4167967298402081, "grad_norm": 2.099045228332065, "learning_rate": 1.9384084616779176e-05, "loss": 0.8766, "step": 5608 }, { "epoch": 0.41687105165366034, "grad_norm": 1.6974669984448758, "learning_rate": 1.9383807345921132e-05, "loss": 0.9366, "step": 5609 }, { "epoch": 0.4169453734671126, "grad_norm": 1.9645940413432068, "learning_rate": 1.938353001465062e-05, "loss": 0.8965, "step": 5610 }, { "epoch": 0.4170196952805649, "grad_norm": 2.152505024419476, "learning_rate": 1.938325262296943e-05, "loss": 0.9795, "step": 5611 }, { "epoch": 0.4170940170940171, "grad_norm": 2.572628989838315, "learning_rate": 1.9382975170879348e-05, "loss": 1.1103, "step": 5612 }, { "epoch": 0.41716833890746935, "grad_norm": 2.5089091716103145, "learning_rate": 1.9382697658382157e-05, "loss": 0.9826, "step": 5613 }, { "epoch": 0.4172426607209216, "grad_norm": 2.248599745111764, "learning_rate": 1.9382420085479645e-05, "loss": 1.0039, "step": 5614 }, { "epoch": 0.4173169825343738, "grad_norm": 2.5018837581084776, "learning_rate": 1.9382142452173597e-05, "loss": 1.137, "step": 5615 }, { "epoch": 0.41739130434782606, "grad_norm": 1.8034961449886315, "learning_rate": 1.9381864758465806e-05, "loss": 0.6628, "step": 5616 }, { "epoch": 0.41746562616127836, "grad_norm": 2.058435305388569, "learning_rate": 1.9381587004358054e-05, "loss": 0.8488, "step": 5617 }, { "epoch": 0.4175399479747306, "grad_norm": 2.68539638403626, "learning_rate": 1.9381309189852137e-05, "loss": 0.8878, "step": 5618 }, { "epoch": 0.41761426978818283, "grad_norm": 2.1590014639641013, "learning_rate": 1.9381031314949833e-05, "loss": 0.8977, "step": 5619 }, { "epoch": 0.4176885916016351, "grad_norm": 2.4992595248112583, "learning_rate": 1.9380753379652937e-05, "loss": 0.9259, "step": 5620 }, { "epoch": 0.4177629134150873, "grad_norm": 1.7938848809119197, "learning_rate": 1.9380475383963237e-05, "loss": 0.7815, "step": 5621 }, { "epoch": 0.41783723522853955, "grad_norm": 1.8780647625348412, "learning_rate": 1.9380197327882527e-05, "loss": 0.8809, "step": 5622 }, { "epoch": 0.41791155704199184, "grad_norm": 2.204376236865153, "learning_rate": 1.937991921141259e-05, "loss": 0.9414, "step": 5623 }, { "epoch": 0.4179858788554441, "grad_norm": 2.3998218649995686, "learning_rate": 1.937964103455522e-05, "loss": 0.8228, "step": 5624 }, { "epoch": 0.4180602006688963, "grad_norm": 1.9778593216452405, "learning_rate": 1.937936279731221e-05, "loss": 0.5726, "step": 5625 }, { "epoch": 0.41813452248234856, "grad_norm": 2.2764831058863537, "learning_rate": 1.9379084499685344e-05, "loss": 1.0374, "step": 5626 }, { "epoch": 0.4182088442958008, "grad_norm": 2.066402657590678, "learning_rate": 1.9378806141676426e-05, "loss": 0.8846, "step": 5627 }, { "epoch": 0.4182831661092531, "grad_norm": 3.0848671522807147, "learning_rate": 1.9378527723287232e-05, "loss": 0.9335, "step": 5628 }, { "epoch": 0.41835748792270533, "grad_norm": 2.144274342716479, "learning_rate": 1.9378249244519568e-05, "loss": 0.8193, "step": 5629 }, { "epoch": 0.41843180973615757, "grad_norm": 1.7649223996183463, "learning_rate": 1.9377970705375223e-05, "loss": 0.6683, "step": 5630 }, { "epoch": 0.4185061315496098, "grad_norm": 2.0439945116103084, "learning_rate": 1.9377692105855988e-05, "loss": 0.7892, "step": 5631 }, { "epoch": 0.41858045336306204, "grad_norm": 1.81726724766557, "learning_rate": 1.9377413445963657e-05, "loss": 0.7987, "step": 5632 }, { "epoch": 0.4186547751765143, "grad_norm": 1.969575084484752, "learning_rate": 1.9377134725700025e-05, "loss": 0.8748, "step": 5633 }, { "epoch": 0.4187290969899666, "grad_norm": 2.079975320393278, "learning_rate": 1.9376855945066885e-05, "loss": 0.9552, "step": 5634 }, { "epoch": 0.4188034188034188, "grad_norm": 2.575135589569401, "learning_rate": 1.9376577104066038e-05, "loss": 0.8804, "step": 5635 }, { "epoch": 0.41887774061687105, "grad_norm": 2.0092408670477795, "learning_rate": 1.937629820269927e-05, "loss": 0.8087, "step": 5636 }, { "epoch": 0.4189520624303233, "grad_norm": 1.9433746040519824, "learning_rate": 1.9376019240968382e-05, "loss": 0.8521, "step": 5637 }, { "epoch": 0.41902638424377553, "grad_norm": 1.982825303773877, "learning_rate": 1.9375740218875164e-05, "loss": 0.8023, "step": 5638 }, { "epoch": 0.4191007060572278, "grad_norm": 2.1766222754895113, "learning_rate": 1.9375461136421423e-05, "loss": 0.9433, "step": 5639 }, { "epoch": 0.41917502787068006, "grad_norm": 2.4489441996165264, "learning_rate": 1.9375181993608948e-05, "loss": 0.931, "step": 5640 }, { "epoch": 0.4192493496841323, "grad_norm": 1.6991473103408317, "learning_rate": 1.9374902790439537e-05, "loss": 0.7188, "step": 5641 }, { "epoch": 0.41932367149758454, "grad_norm": 2.053364999097333, "learning_rate": 1.9374623526914987e-05, "loss": 0.8885, "step": 5642 }, { "epoch": 0.4193979933110368, "grad_norm": 1.8598274022570895, "learning_rate": 1.93743442030371e-05, "loss": 0.9721, "step": 5643 }, { "epoch": 0.419472315124489, "grad_norm": 1.8757761966418383, "learning_rate": 1.937406481880767e-05, "loss": 0.7312, "step": 5644 }, { "epoch": 0.4195466369379413, "grad_norm": 2.445942549779319, "learning_rate": 1.9373785374228497e-05, "loss": 1.1525, "step": 5645 }, { "epoch": 0.41962095875139355, "grad_norm": 2.032039689560753, "learning_rate": 1.937350586930138e-05, "loss": 0.7678, "step": 5646 }, { "epoch": 0.4196952805648458, "grad_norm": 5.9590991111961875, "learning_rate": 1.9373226304028118e-05, "loss": 1.0722, "step": 5647 }, { "epoch": 0.419769602378298, "grad_norm": 2.286197054260778, "learning_rate": 1.9372946678410515e-05, "loss": 0.9993, "step": 5648 }, { "epoch": 0.41984392419175026, "grad_norm": 1.8300556855183787, "learning_rate": 1.9372666992450363e-05, "loss": 0.9575, "step": 5649 }, { "epoch": 0.4199182460052025, "grad_norm": 2.5126116450004092, "learning_rate": 1.937238724614947e-05, "loss": 1.0105, "step": 5650 }, { "epoch": 0.4199925678186548, "grad_norm": 2.0158033853259445, "learning_rate": 1.9372107439509634e-05, "loss": 0.8873, "step": 5651 }, { "epoch": 0.42006688963210703, "grad_norm": 2.198704522347755, "learning_rate": 1.9371827572532656e-05, "loss": 0.9197, "step": 5652 }, { "epoch": 0.42014121144555927, "grad_norm": 2.1454492551250897, "learning_rate": 1.937154764522034e-05, "loss": 0.8426, "step": 5653 }, { "epoch": 0.4202155332590115, "grad_norm": 2.275906094543935, "learning_rate": 1.9371267657574484e-05, "loss": 0.9182, "step": 5654 }, { "epoch": 0.42028985507246375, "grad_norm": 2.297712284893805, "learning_rate": 1.9370987609596896e-05, "loss": 1.0366, "step": 5655 }, { "epoch": 0.42036417688591604, "grad_norm": 1.949644929358019, "learning_rate": 1.9370707501289372e-05, "loss": 0.8869, "step": 5656 }, { "epoch": 0.4204384986993683, "grad_norm": 2.3665922572789895, "learning_rate": 1.9370427332653722e-05, "loss": 0.8139, "step": 5657 }, { "epoch": 0.4205128205128205, "grad_norm": 2.425118605267053, "learning_rate": 1.9370147103691742e-05, "loss": 0.8084, "step": 5658 }, { "epoch": 0.42058714232627276, "grad_norm": 3.5236884071679193, "learning_rate": 1.9369866814405248e-05, "loss": 1.0109, "step": 5659 }, { "epoch": 0.420661464139725, "grad_norm": 2.0443238710506413, "learning_rate": 1.9369586464796035e-05, "loss": 0.8716, "step": 5660 }, { "epoch": 0.42073578595317723, "grad_norm": 2.3167704987795172, "learning_rate": 1.936930605486591e-05, "loss": 1.0543, "step": 5661 }, { "epoch": 0.4208101077666295, "grad_norm": 2.2692557210013455, "learning_rate": 1.9369025584616677e-05, "loss": 0.8404, "step": 5662 }, { "epoch": 0.42088442958008176, "grad_norm": 2.9688590537754145, "learning_rate": 1.9368745054050148e-05, "loss": 0.9103, "step": 5663 }, { "epoch": 0.420958751393534, "grad_norm": 2.882266101599215, "learning_rate": 1.9368464463168123e-05, "loss": 1.1471, "step": 5664 }, { "epoch": 0.42103307320698624, "grad_norm": 2.389801235156561, "learning_rate": 1.9368183811972406e-05, "loss": 0.8475, "step": 5665 }, { "epoch": 0.4211073950204385, "grad_norm": 2.3061864194715307, "learning_rate": 1.9367903100464812e-05, "loss": 0.8542, "step": 5666 }, { "epoch": 0.4211817168338908, "grad_norm": 2.1289446007752626, "learning_rate": 1.936762232864714e-05, "loss": 1.0797, "step": 5667 }, { "epoch": 0.421256038647343, "grad_norm": 1.8981584174407182, "learning_rate": 1.9367341496521205e-05, "loss": 0.9228, "step": 5668 }, { "epoch": 0.42133036046079525, "grad_norm": 2.057891550465114, "learning_rate": 1.936706060408881e-05, "loss": 1.0113, "step": 5669 }, { "epoch": 0.4214046822742475, "grad_norm": 2.030901610514091, "learning_rate": 1.9366779651351767e-05, "loss": 0.7016, "step": 5670 }, { "epoch": 0.4214790040876997, "grad_norm": 2.0229955331825398, "learning_rate": 1.9366498638311882e-05, "loss": 0.7411, "step": 5671 }, { "epoch": 0.42155332590115197, "grad_norm": 2.253530443918218, "learning_rate": 1.9366217564970963e-05, "loss": 1.138, "step": 5672 }, { "epoch": 0.42162764771460426, "grad_norm": 2.4532280247368132, "learning_rate": 1.9365936431330822e-05, "loss": 0.8045, "step": 5673 }, { "epoch": 0.4217019695280565, "grad_norm": 8.755641135084852, "learning_rate": 1.936565523739327e-05, "loss": 1.0547, "step": 5674 }, { "epoch": 0.42177629134150874, "grad_norm": 2.5086631192181743, "learning_rate": 1.9365373983160112e-05, "loss": 0.6418, "step": 5675 }, { "epoch": 0.421850613154961, "grad_norm": 3.0729122194816445, "learning_rate": 1.9365092668633165e-05, "loss": 0.926, "step": 5676 }, { "epoch": 0.4219249349684132, "grad_norm": 1.9268715249105413, "learning_rate": 1.9364811293814237e-05, "loss": 0.6605, "step": 5677 }, { "epoch": 0.42199925678186545, "grad_norm": 1.918031644675036, "learning_rate": 1.936452985870514e-05, "loss": 0.8923, "step": 5678 }, { "epoch": 0.42207357859531774, "grad_norm": 1.8672830251328945, "learning_rate": 1.9364248363307686e-05, "loss": 0.4834, "step": 5679 }, { "epoch": 0.42214790040877, "grad_norm": 2.3962311028829633, "learning_rate": 1.9363966807623682e-05, "loss": 1.0313, "step": 5680 }, { "epoch": 0.4222222222222222, "grad_norm": 2.277500160018477, "learning_rate": 1.936368519165495e-05, "loss": 0.7773, "step": 5681 }, { "epoch": 0.42229654403567446, "grad_norm": 1.9577148835626317, "learning_rate": 1.93634035154033e-05, "loss": 0.866, "step": 5682 }, { "epoch": 0.4223708658491267, "grad_norm": 2.6296216100592447, "learning_rate": 1.936312177887054e-05, "loss": 0.9525, "step": 5683 }, { "epoch": 0.422445187662579, "grad_norm": 2.0136404089918076, "learning_rate": 1.936283998205849e-05, "loss": 0.8602, "step": 5684 }, { "epoch": 0.42251950947603123, "grad_norm": 1.7097649834594058, "learning_rate": 1.9362558124968963e-05, "loss": 0.7696, "step": 5685 }, { "epoch": 0.42259383128948347, "grad_norm": 1.8794554328411799, "learning_rate": 1.936227620760377e-05, "loss": 0.8284, "step": 5686 }, { "epoch": 0.4226681531029357, "grad_norm": 1.811939438623915, "learning_rate": 1.9361994229964728e-05, "loss": 0.7057, "step": 5687 }, { "epoch": 0.42274247491638794, "grad_norm": 3.203452631635699, "learning_rate": 1.9361712192053656e-05, "loss": 0.8285, "step": 5688 }, { "epoch": 0.4228167967298402, "grad_norm": 1.8148469227940378, "learning_rate": 1.9361430093872365e-05, "loss": 0.7992, "step": 5689 }, { "epoch": 0.4228911185432925, "grad_norm": 7.70340504739217, "learning_rate": 1.9361147935422674e-05, "loss": 0.9789, "step": 5690 }, { "epoch": 0.4229654403567447, "grad_norm": 1.9658082588168764, "learning_rate": 1.93608657167064e-05, "loss": 0.8119, "step": 5691 }, { "epoch": 0.42303976217019695, "grad_norm": 1.8814234478607652, "learning_rate": 1.9360583437725352e-05, "loss": 0.7841, "step": 5692 }, { "epoch": 0.4231140839836492, "grad_norm": 2.2744756500683905, "learning_rate": 1.9360301098481357e-05, "loss": 1.018, "step": 5693 }, { "epoch": 0.42318840579710143, "grad_norm": 2.282726119331956, "learning_rate": 1.936001869897623e-05, "loss": 0.8824, "step": 5694 }, { "epoch": 0.4232627276105537, "grad_norm": 3.5466311075785417, "learning_rate": 1.935973623921179e-05, "loss": 1.0164, "step": 5695 }, { "epoch": 0.42333704942400596, "grad_norm": 1.971582628228291, "learning_rate": 1.935945371918985e-05, "loss": 0.8446, "step": 5696 }, { "epoch": 0.4234113712374582, "grad_norm": 2.488121658945853, "learning_rate": 1.9359171138912234e-05, "loss": 0.9848, "step": 5697 }, { "epoch": 0.42348569305091044, "grad_norm": 1.993506145688147, "learning_rate": 1.9358888498380758e-05, "loss": 0.8827, "step": 5698 }, { "epoch": 0.4235600148643627, "grad_norm": 2.32067744569229, "learning_rate": 1.9358605797597247e-05, "loss": 1.0125, "step": 5699 }, { "epoch": 0.4236343366778149, "grad_norm": 2.070021139647222, "learning_rate": 1.935832303656352e-05, "loss": 0.7809, "step": 5700 }, { "epoch": 0.4237086584912672, "grad_norm": 1.8897426117326626, "learning_rate": 1.9358040215281385e-05, "loss": 0.8058, "step": 5701 }, { "epoch": 0.42378298030471945, "grad_norm": 16.19535518488006, "learning_rate": 1.935775733375268e-05, "loss": 1.0872, "step": 5702 }, { "epoch": 0.4238573021181717, "grad_norm": 2.463952758045088, "learning_rate": 1.935747439197922e-05, "loss": 0.7682, "step": 5703 }, { "epoch": 0.4239316239316239, "grad_norm": 1.6626562082597376, "learning_rate": 1.935719138996282e-05, "loss": 0.7114, "step": 5704 }, { "epoch": 0.42400594574507616, "grad_norm": 1.7447120459601693, "learning_rate": 1.935690832770531e-05, "loss": 0.8623, "step": 5705 }, { "epoch": 0.4240802675585284, "grad_norm": 1.7729753114633724, "learning_rate": 1.935662520520851e-05, "loss": 1.0036, "step": 5706 }, { "epoch": 0.4241545893719807, "grad_norm": 6.078134051358839, "learning_rate": 1.9356342022474242e-05, "loss": 1.086, "step": 5707 }, { "epoch": 0.42422891118543293, "grad_norm": 2.982574419897247, "learning_rate": 1.9356058779504327e-05, "loss": 0.8125, "step": 5708 }, { "epoch": 0.42430323299888517, "grad_norm": 2.9211011120309514, "learning_rate": 1.9355775476300596e-05, "loss": 1.1003, "step": 5709 }, { "epoch": 0.4243775548123374, "grad_norm": 3.2787189147495526, "learning_rate": 1.9355492112864863e-05, "loss": 1.1827, "step": 5710 }, { "epoch": 0.42445187662578965, "grad_norm": 2.6158121291583436, "learning_rate": 1.935520868919896e-05, "loss": 0.9384, "step": 5711 }, { "epoch": 0.42452619843924194, "grad_norm": 1.7315609795209193, "learning_rate": 1.9354925205304708e-05, "loss": 0.7091, "step": 5712 }, { "epoch": 0.4246005202526942, "grad_norm": 2.0275866646185516, "learning_rate": 1.9354641661183935e-05, "loss": 0.8383, "step": 5713 }, { "epoch": 0.4246748420661464, "grad_norm": 2.19407831682445, "learning_rate": 1.9354358056838463e-05, "loss": 0.7521, "step": 5714 }, { "epoch": 0.42474916387959866, "grad_norm": 3.701922092786722, "learning_rate": 1.935407439227012e-05, "loss": 0.9744, "step": 5715 }, { "epoch": 0.4248234856930509, "grad_norm": 20.5656047886571, "learning_rate": 1.9353790667480726e-05, "loss": 1.1425, "step": 5716 }, { "epoch": 0.42489780750650313, "grad_norm": 2.0987560717519873, "learning_rate": 1.9353506882472118e-05, "loss": 0.9601, "step": 5717 }, { "epoch": 0.4249721293199554, "grad_norm": 1.9122667396914284, "learning_rate": 1.935322303724612e-05, "loss": 0.8254, "step": 5718 }, { "epoch": 0.42504645113340767, "grad_norm": 1.989831403573034, "learning_rate": 1.9352939131804556e-05, "loss": 0.8649, "step": 5719 }, { "epoch": 0.4251207729468599, "grad_norm": 2.229150934298655, "learning_rate": 1.9352655166149252e-05, "loss": 0.8562, "step": 5720 }, { "epoch": 0.42519509476031214, "grad_norm": 2.4534408188098045, "learning_rate": 1.935237114028204e-05, "loss": 0.8923, "step": 5721 }, { "epoch": 0.4252694165737644, "grad_norm": 2.3206921972649472, "learning_rate": 1.935208705420475e-05, "loss": 0.7537, "step": 5722 }, { "epoch": 0.4253437383872167, "grad_norm": 2.809267185241734, "learning_rate": 1.935180290791921e-05, "loss": 0.9545, "step": 5723 }, { "epoch": 0.4254180602006689, "grad_norm": 2.4362349723166097, "learning_rate": 1.9351518701427247e-05, "loss": 0.8205, "step": 5724 }, { "epoch": 0.42549238201412115, "grad_norm": 2.119542727269398, "learning_rate": 1.9351234434730687e-05, "loss": 0.9851, "step": 5725 }, { "epoch": 0.4255667038275734, "grad_norm": 2.009550676492325, "learning_rate": 1.935095010783137e-05, "loss": 0.6414, "step": 5726 }, { "epoch": 0.4256410256410256, "grad_norm": 2.262811633441018, "learning_rate": 1.935066572073112e-05, "loss": 0.9615, "step": 5727 }, { "epoch": 0.42571534745447787, "grad_norm": 2.1894084328979906, "learning_rate": 1.935038127343177e-05, "loss": 1.0222, "step": 5728 }, { "epoch": 0.42578966926793016, "grad_norm": 1.8631201387595666, "learning_rate": 1.9350096765935152e-05, "loss": 0.7347, "step": 5729 }, { "epoch": 0.4258639910813824, "grad_norm": 2.9997774698544752, "learning_rate": 1.9349812198243093e-05, "loss": 0.9382, "step": 5730 }, { "epoch": 0.42593831289483464, "grad_norm": 3.52002954614149, "learning_rate": 1.9349527570357427e-05, "loss": 1.0196, "step": 5731 }, { "epoch": 0.4260126347082869, "grad_norm": 2.239658590857138, "learning_rate": 1.934924288227999e-05, "loss": 0.9618, "step": 5732 }, { "epoch": 0.4260869565217391, "grad_norm": 24.659594933510917, "learning_rate": 1.9348958134012612e-05, "loss": 0.8874, "step": 5733 }, { "epoch": 0.42616127833519135, "grad_norm": 2.4583127799961484, "learning_rate": 1.934867332555713e-05, "loss": 1.0335, "step": 5734 }, { "epoch": 0.42623560014864365, "grad_norm": 1.92123371615159, "learning_rate": 1.934838845691537e-05, "loss": 1.0007, "step": 5735 }, { "epoch": 0.4263099219620959, "grad_norm": 2.563033277259182, "learning_rate": 1.934810352808917e-05, "loss": 0.9182, "step": 5736 }, { "epoch": 0.4263842437755481, "grad_norm": 2.662890065961474, "learning_rate": 1.9347818539080364e-05, "loss": 1.0154, "step": 5737 }, { "epoch": 0.42645856558900036, "grad_norm": 2.553340851535678, "learning_rate": 1.9347533489890793e-05, "loss": 0.8894, "step": 5738 }, { "epoch": 0.4265328874024526, "grad_norm": 2.7580167565046674, "learning_rate": 1.9347248380522282e-05, "loss": 0.6182, "step": 5739 }, { "epoch": 0.4266072092159049, "grad_norm": 3.138030482695574, "learning_rate": 1.934696321097667e-05, "loss": 0.7433, "step": 5740 }, { "epoch": 0.42668153102935713, "grad_norm": 2.4408856540354535, "learning_rate": 1.9346677981255797e-05, "loss": 0.8936, "step": 5741 }, { "epoch": 0.42675585284280937, "grad_norm": 1.9394957317278925, "learning_rate": 1.934639269136149e-05, "loss": 0.8617, "step": 5742 }, { "epoch": 0.4268301746562616, "grad_norm": 2.5332017946906893, "learning_rate": 1.93461073412956e-05, "loss": 0.6698, "step": 5743 }, { "epoch": 0.42690449646971385, "grad_norm": 2.3148259840108185, "learning_rate": 1.934582193105995e-05, "loss": 1.0361, "step": 5744 }, { "epoch": 0.4269788182831661, "grad_norm": 2.5150080158516936, "learning_rate": 1.9345536460656386e-05, "loss": 0.8592, "step": 5745 }, { "epoch": 0.4270531400966184, "grad_norm": 2.4440288712320255, "learning_rate": 1.9345250930086746e-05, "loss": 1.105, "step": 5746 }, { "epoch": 0.4271274619100706, "grad_norm": 2.5967180141623607, "learning_rate": 1.934496533935286e-05, "loss": 0.7959, "step": 5747 }, { "epoch": 0.42720178372352285, "grad_norm": 2.680166265693037, "learning_rate": 1.9344679688456572e-05, "loss": 0.8582, "step": 5748 }, { "epoch": 0.4272761055369751, "grad_norm": 2.5726794333331306, "learning_rate": 1.9344393977399725e-05, "loss": 0.8302, "step": 5749 }, { "epoch": 0.42735042735042733, "grad_norm": 2.651047474921664, "learning_rate": 1.9344108206184152e-05, "loss": 1.1107, "step": 5750 }, { "epoch": 0.4274247491638796, "grad_norm": 2.5447073501475175, "learning_rate": 1.9343822374811696e-05, "loss": 0.9635, "step": 5751 }, { "epoch": 0.42749907097733186, "grad_norm": 2.215116247426091, "learning_rate": 1.9343536483284196e-05, "loss": 0.8361, "step": 5752 }, { "epoch": 0.4275733927907841, "grad_norm": 1.5271960047443551, "learning_rate": 1.9343250531603493e-05, "loss": 0.8613, "step": 5753 }, { "epoch": 0.42764771460423634, "grad_norm": 2.240791041250633, "learning_rate": 1.9342964519771427e-05, "loss": 0.9715, "step": 5754 }, { "epoch": 0.4277220364176886, "grad_norm": 2.2988519091221997, "learning_rate": 1.9342678447789843e-05, "loss": 0.7707, "step": 5755 }, { "epoch": 0.4277963582311408, "grad_norm": 1.8695035556104176, "learning_rate": 1.9342392315660578e-05, "loss": 0.8764, "step": 5756 }, { "epoch": 0.4278706800445931, "grad_norm": 1.9912735587884849, "learning_rate": 1.9342106123385472e-05, "loss": 0.7891, "step": 5757 }, { "epoch": 0.42794500185804535, "grad_norm": 1.8729772834344371, "learning_rate": 1.9341819870966375e-05, "loss": 0.7255, "step": 5758 }, { "epoch": 0.4280193236714976, "grad_norm": 2.571019451434956, "learning_rate": 1.934153355840513e-05, "loss": 0.7863, "step": 5759 }, { "epoch": 0.4280936454849498, "grad_norm": 2.7039612518440705, "learning_rate": 1.9341247185703573e-05, "loss": 1.0459, "step": 5760 }, { "epoch": 0.42816796729840206, "grad_norm": 2.5606291058373305, "learning_rate": 1.9340960752863555e-05, "loss": 0.9855, "step": 5761 }, { "epoch": 0.4282422891118543, "grad_norm": 2.205539804442886, "learning_rate": 1.9340674259886908e-05, "loss": 0.9029, "step": 5762 }, { "epoch": 0.4283166109253066, "grad_norm": 1.8965397178252088, "learning_rate": 1.9340387706775492e-05, "loss": 0.9148, "step": 5763 }, { "epoch": 0.42839093273875883, "grad_norm": 5.483677108611167, "learning_rate": 1.934010109353114e-05, "loss": 0.5897, "step": 5764 }, { "epoch": 0.4284652545522111, "grad_norm": 2.4356944605424733, "learning_rate": 1.9339814420155707e-05, "loss": 0.9334, "step": 5765 }, { "epoch": 0.4285395763656633, "grad_norm": 11.636459044013275, "learning_rate": 1.933952768665103e-05, "loss": 1.0744, "step": 5766 }, { "epoch": 0.42861389817911555, "grad_norm": 2.1182883774487116, "learning_rate": 1.933924089301896e-05, "loss": 0.948, "step": 5767 }, { "epoch": 0.42868821999256784, "grad_norm": 2.163315711531107, "learning_rate": 1.933895403926134e-05, "loss": 0.9561, "step": 5768 }, { "epoch": 0.4287625418060201, "grad_norm": 1.9731690900387755, "learning_rate": 1.933866712538002e-05, "loss": 0.7991, "step": 5769 }, { "epoch": 0.4288368636194723, "grad_norm": 2.1003943563892955, "learning_rate": 1.9338380151376842e-05, "loss": 0.8226, "step": 5770 }, { "epoch": 0.42891118543292456, "grad_norm": 2.4306518331457947, "learning_rate": 1.933809311725366e-05, "loss": 0.9322, "step": 5771 }, { "epoch": 0.4289855072463768, "grad_norm": 2.3304895376546892, "learning_rate": 1.9337806023012323e-05, "loss": 0.8112, "step": 5772 }, { "epoch": 0.42905982905982903, "grad_norm": 2.431756597473796, "learning_rate": 1.933751886865467e-05, "loss": 0.986, "step": 5773 }, { "epoch": 0.42913415087328133, "grad_norm": 3.033354876534104, "learning_rate": 1.9337231654182556e-05, "loss": 0.9049, "step": 5774 }, { "epoch": 0.42920847268673357, "grad_norm": 2.628556346912068, "learning_rate": 1.9336944379597827e-05, "loss": 1.0716, "step": 5775 }, { "epoch": 0.4292827945001858, "grad_norm": 2.053444405072034, "learning_rate": 1.9336657044902338e-05, "loss": 0.9191, "step": 5776 }, { "epoch": 0.42935711631363804, "grad_norm": 2.282691064143746, "learning_rate": 1.9336369650097935e-05, "loss": 1.0158, "step": 5777 }, { "epoch": 0.4294314381270903, "grad_norm": 2.0752956009825123, "learning_rate": 1.9336082195186468e-05, "loss": 0.8054, "step": 5778 }, { "epoch": 0.4295057599405426, "grad_norm": 2.78872948767042, "learning_rate": 1.9335794680169787e-05, "loss": 1.1588, "step": 5779 }, { "epoch": 0.4295800817539948, "grad_norm": 2.529335044373342, "learning_rate": 1.9335507105049745e-05, "loss": 0.9964, "step": 5780 }, { "epoch": 0.42965440356744705, "grad_norm": 2.4415165245582093, "learning_rate": 1.9335219469828193e-05, "loss": 0.9284, "step": 5781 }, { "epoch": 0.4297287253808993, "grad_norm": 2.454757672686373, "learning_rate": 1.9334931774506983e-05, "loss": 1.1219, "step": 5782 }, { "epoch": 0.42980304719435153, "grad_norm": 1.7881957002760875, "learning_rate": 1.9334644019087965e-05, "loss": 0.8142, "step": 5783 }, { "epoch": 0.42987736900780377, "grad_norm": 2.321175935289688, "learning_rate": 1.9334356203572997e-05, "loss": 0.852, "step": 5784 }, { "epoch": 0.42995169082125606, "grad_norm": 2.061183028428403, "learning_rate": 1.9334068327963924e-05, "loss": 0.8499, "step": 5785 }, { "epoch": 0.4300260126347083, "grad_norm": 2.217105497814463, "learning_rate": 1.9333780392262604e-05, "loss": 0.8762, "step": 5786 }, { "epoch": 0.43010033444816054, "grad_norm": 2.218121519452783, "learning_rate": 1.9333492396470893e-05, "loss": 0.8151, "step": 5787 }, { "epoch": 0.4301746562616128, "grad_norm": 2.8036681889229538, "learning_rate": 1.9333204340590638e-05, "loss": 1.1576, "step": 5788 }, { "epoch": 0.430248978075065, "grad_norm": 1.9376794019003023, "learning_rate": 1.9332916224623698e-05, "loss": 0.8234, "step": 5789 }, { "epoch": 0.43032329988851725, "grad_norm": 2.7915978550742864, "learning_rate": 1.933262804857193e-05, "loss": 0.5723, "step": 5790 }, { "epoch": 0.43039762170196955, "grad_norm": 2.098832087720511, "learning_rate": 1.933233981243719e-05, "loss": 0.9812, "step": 5791 }, { "epoch": 0.4304719435154218, "grad_norm": 2.1001371005272733, "learning_rate": 1.9332051516221323e-05, "loss": 0.7882, "step": 5792 }, { "epoch": 0.430546265328874, "grad_norm": 1.9543688793218663, "learning_rate": 1.93317631599262e-05, "loss": 0.8658, "step": 5793 }, { "epoch": 0.43062058714232626, "grad_norm": 2.7313374385814377, "learning_rate": 1.9331474743553665e-05, "loss": 0.8793, "step": 5794 }, { "epoch": 0.4306949089557785, "grad_norm": 2.369600146700605, "learning_rate": 1.9331186267105583e-05, "loss": 1.0216, "step": 5795 }, { "epoch": 0.4307692307692308, "grad_norm": 3.738638855582578, "learning_rate": 1.9330897730583803e-05, "loss": 0.9241, "step": 5796 }, { "epoch": 0.43084355258268303, "grad_norm": 2.044076537612336, "learning_rate": 1.933060913399019e-05, "loss": 1.0183, "step": 5797 }, { "epoch": 0.43091787439613527, "grad_norm": 1.8696900956941693, "learning_rate": 1.93303204773266e-05, "loss": 0.9664, "step": 5798 }, { "epoch": 0.4309921962095875, "grad_norm": 2.258638110734868, "learning_rate": 1.9330031760594893e-05, "loss": 0.9574, "step": 5799 }, { "epoch": 0.43106651802303975, "grad_norm": 2.264852100743869, "learning_rate": 1.9329742983796924e-05, "loss": 0.842, "step": 5800 }, { "epoch": 0.431140839836492, "grad_norm": 2.106486088576589, "learning_rate": 1.932945414693455e-05, "loss": 0.9191, "step": 5801 }, { "epoch": 0.4312151616499443, "grad_norm": 3.1805489853276834, "learning_rate": 1.9329165250009637e-05, "loss": 0.816, "step": 5802 }, { "epoch": 0.4312894834633965, "grad_norm": 1.9384005974466574, "learning_rate": 1.9328876293024046e-05, "loss": 0.9094, "step": 5803 }, { "epoch": 0.43136380527684876, "grad_norm": 2.803273138624739, "learning_rate": 1.932858727597963e-05, "loss": 0.9938, "step": 5804 }, { "epoch": 0.431438127090301, "grad_norm": 2.9051064074708197, "learning_rate": 1.932829819887825e-05, "loss": 0.9308, "step": 5805 }, { "epoch": 0.43151244890375323, "grad_norm": 2.117389859003006, "learning_rate": 1.9328009061721776e-05, "loss": 1.0187, "step": 5806 }, { "epoch": 0.4315867707172055, "grad_norm": 1.9369756100649291, "learning_rate": 1.932771986451206e-05, "loss": 0.8981, "step": 5807 }, { "epoch": 0.43166109253065776, "grad_norm": 10.00488629156786, "learning_rate": 1.932743060725097e-05, "loss": 1.0687, "step": 5808 }, { "epoch": 0.43173541434411, "grad_norm": 1.9853372257788002, "learning_rate": 1.9327141289940366e-05, "loss": 0.966, "step": 5809 }, { "epoch": 0.43180973615756224, "grad_norm": 3.6526664490781413, "learning_rate": 1.932685191258211e-05, "loss": 0.811, "step": 5810 }, { "epoch": 0.4318840579710145, "grad_norm": 2.02669902496002, "learning_rate": 1.932656247517806e-05, "loss": 0.926, "step": 5811 }, { "epoch": 0.4319583797844667, "grad_norm": 2.324526059529615, "learning_rate": 1.9326272977730092e-05, "loss": 0.746, "step": 5812 }, { "epoch": 0.432032701597919, "grad_norm": 1.980009209298788, "learning_rate": 1.9325983420240056e-05, "loss": 0.7221, "step": 5813 }, { "epoch": 0.43210702341137125, "grad_norm": 2.659048176286501, "learning_rate": 1.932569380270983e-05, "loss": 1.1734, "step": 5814 }, { "epoch": 0.4321813452248235, "grad_norm": 2.107094657185424, "learning_rate": 1.9325404125141265e-05, "loss": 0.9192, "step": 5815 }, { "epoch": 0.4322556670382757, "grad_norm": 2.2911908918702246, "learning_rate": 1.9325114387536236e-05, "loss": 1.0643, "step": 5816 }, { "epoch": 0.43232998885172796, "grad_norm": 2.7129751208810395, "learning_rate": 1.93248245898966e-05, "loss": 1.1166, "step": 5817 }, { "epoch": 0.4324043106651802, "grad_norm": 1.6581423178652561, "learning_rate": 1.9324534732224232e-05, "loss": 0.7128, "step": 5818 }, { "epoch": 0.4324786324786325, "grad_norm": 1.9079378948429986, "learning_rate": 1.9324244814520988e-05, "loss": 0.8116, "step": 5819 }, { "epoch": 0.43255295429208473, "grad_norm": 2.4510982952261937, "learning_rate": 1.9323954836788745e-05, "loss": 1.0287, "step": 5820 }, { "epoch": 0.432627276105537, "grad_norm": 2.138944033846626, "learning_rate": 1.932366479902936e-05, "loss": 0.9502, "step": 5821 }, { "epoch": 0.4327015979189892, "grad_norm": 2.31634592260392, "learning_rate": 1.932337470124471e-05, "loss": 0.9495, "step": 5822 }, { "epoch": 0.43277591973244145, "grad_norm": 2.156640949789392, "learning_rate": 1.9323084543436655e-05, "loss": 0.7822, "step": 5823 }, { "epoch": 0.43285024154589374, "grad_norm": 2.1764424555780244, "learning_rate": 1.9322794325607065e-05, "loss": 0.9023, "step": 5824 }, { "epoch": 0.432924563359346, "grad_norm": 1.7538249406311315, "learning_rate": 1.932250404775781e-05, "loss": 0.7925, "step": 5825 }, { "epoch": 0.4329988851727982, "grad_norm": 2.0760182926617947, "learning_rate": 1.9322213709890752e-05, "loss": 1.0688, "step": 5826 }, { "epoch": 0.43307320698625046, "grad_norm": 1.8747514180196987, "learning_rate": 1.9321923312007773e-05, "loss": 0.946, "step": 5827 }, { "epoch": 0.4331475287997027, "grad_norm": 1.8205889036704892, "learning_rate": 1.9321632854110733e-05, "loss": 0.8302, "step": 5828 }, { "epoch": 0.43322185061315494, "grad_norm": 1.7271398973565333, "learning_rate": 1.9321342336201504e-05, "loss": 0.7309, "step": 5829 }, { "epoch": 0.43329617242660723, "grad_norm": 2.009714211439215, "learning_rate": 1.9321051758281956e-05, "loss": 0.8151, "step": 5830 }, { "epoch": 0.43337049424005947, "grad_norm": 2.127950780720746, "learning_rate": 1.9320761120353963e-05, "loss": 0.8717, "step": 5831 }, { "epoch": 0.4334448160535117, "grad_norm": 2.510961653253325, "learning_rate": 1.9320470422419387e-05, "loss": 1.1798, "step": 5832 }, { "epoch": 0.43351913786696394, "grad_norm": 2.419189761311213, "learning_rate": 1.9320179664480113e-05, "loss": 0.9694, "step": 5833 }, { "epoch": 0.4335934596804162, "grad_norm": 2.3291349125382728, "learning_rate": 1.9319888846538004e-05, "loss": 0.968, "step": 5834 }, { "epoch": 0.4336677814938685, "grad_norm": 2.6977251338040675, "learning_rate": 1.9319597968594934e-05, "loss": 0.9694, "step": 5835 }, { "epoch": 0.4337421033073207, "grad_norm": 1.8872473906116978, "learning_rate": 1.9319307030652776e-05, "loss": 0.9324, "step": 5836 }, { "epoch": 0.43381642512077295, "grad_norm": 2.0933773579783352, "learning_rate": 1.93190160327134e-05, "loss": 0.8876, "step": 5837 }, { "epoch": 0.4338907469342252, "grad_norm": 2.3255510592623065, "learning_rate": 1.9318724974778686e-05, "loss": 0.8391, "step": 5838 }, { "epoch": 0.43396506874767743, "grad_norm": 2.3128324201726054, "learning_rate": 1.9318433856850504e-05, "loss": 0.9365, "step": 5839 }, { "epoch": 0.43403939056112967, "grad_norm": 2.3717841850112715, "learning_rate": 1.9318142678930726e-05, "loss": 1.1198, "step": 5840 }, { "epoch": 0.43411371237458196, "grad_norm": 2.026444649691956, "learning_rate": 1.9317851441021232e-05, "loss": 1.0585, "step": 5841 }, { "epoch": 0.4341880341880342, "grad_norm": 2.100436402813169, "learning_rate": 1.9317560143123892e-05, "loss": 0.8942, "step": 5842 }, { "epoch": 0.43426235600148644, "grad_norm": 2.1758574407118347, "learning_rate": 1.9317268785240584e-05, "loss": 0.8489, "step": 5843 }, { "epoch": 0.4343366778149387, "grad_norm": 1.8736553170614165, "learning_rate": 1.931697736737318e-05, "loss": 0.9709, "step": 5844 }, { "epoch": 0.4344109996283909, "grad_norm": 2.1329877457866226, "learning_rate": 1.9316685889523562e-05, "loss": 0.8511, "step": 5845 }, { "epoch": 0.4344853214418432, "grad_norm": 2.3089029089078426, "learning_rate": 1.9316394351693603e-05, "loss": 0.9403, "step": 5846 }, { "epoch": 0.43455964325529545, "grad_norm": 1.9807558489026011, "learning_rate": 1.9316102753885183e-05, "loss": 0.8537, "step": 5847 }, { "epoch": 0.4346339650687477, "grad_norm": 1.9570315784503733, "learning_rate": 1.9315811096100173e-05, "loss": 0.784, "step": 5848 }, { "epoch": 0.4347082868821999, "grad_norm": 2.371857965350599, "learning_rate": 1.9315519378340456e-05, "loss": 0.998, "step": 5849 }, { "epoch": 0.43478260869565216, "grad_norm": 2.0516288128927034, "learning_rate": 1.931522760060791e-05, "loss": 0.911, "step": 5850 }, { "epoch": 0.4348569305091044, "grad_norm": 1.7935054337570804, "learning_rate": 1.931493576290441e-05, "loss": 0.7131, "step": 5851 }, { "epoch": 0.4349312523225567, "grad_norm": 2.0226387368242675, "learning_rate": 1.9314643865231837e-05, "loss": 0.7847, "step": 5852 }, { "epoch": 0.43500557413600893, "grad_norm": 2.3667097687307854, "learning_rate": 1.931435190759207e-05, "loss": 0.9224, "step": 5853 }, { "epoch": 0.43507989594946117, "grad_norm": 2.09280030695155, "learning_rate": 1.931405988998699e-05, "loss": 1.1262, "step": 5854 }, { "epoch": 0.4351542177629134, "grad_norm": 2.6108190508908993, "learning_rate": 1.9313767812418476e-05, "loss": 1.0211, "step": 5855 }, { "epoch": 0.43522853957636565, "grad_norm": 2.3288816624225133, "learning_rate": 1.9313475674888407e-05, "loss": 0.921, "step": 5856 }, { "epoch": 0.4353028613898179, "grad_norm": 1.9615574428645295, "learning_rate": 1.9313183477398665e-05, "loss": 1.0727, "step": 5857 }, { "epoch": 0.4353771832032702, "grad_norm": 2.33903480611879, "learning_rate": 1.931289121995113e-05, "loss": 0.8815, "step": 5858 }, { "epoch": 0.4354515050167224, "grad_norm": 2.163011380218098, "learning_rate": 1.9312598902547686e-05, "loss": 0.9821, "step": 5859 }, { "epoch": 0.43552582683017466, "grad_norm": 2.456057033878271, "learning_rate": 1.9312306525190214e-05, "loss": 1.1458, "step": 5860 }, { "epoch": 0.4356001486436269, "grad_norm": 9.110785479606074, "learning_rate": 1.9312014087880592e-05, "loss": 1.0175, "step": 5861 }, { "epoch": 0.43567447045707913, "grad_norm": 2.0552077343458213, "learning_rate": 1.931172159062071e-05, "loss": 0.7668, "step": 5862 }, { "epoch": 0.4357487922705314, "grad_norm": 1.654633249604274, "learning_rate": 1.9311429033412447e-05, "loss": 0.8227, "step": 5863 }, { "epoch": 0.43582311408398366, "grad_norm": 1.8086539197167275, "learning_rate": 1.9311136416257687e-05, "loss": 0.682, "step": 5864 }, { "epoch": 0.4358974358974359, "grad_norm": 1.911894907782636, "learning_rate": 1.9310843739158317e-05, "loss": 0.6916, "step": 5865 }, { "epoch": 0.43597175771088814, "grad_norm": 2.2044078292176303, "learning_rate": 1.931055100211621e-05, "loss": 0.838, "step": 5866 }, { "epoch": 0.4360460795243404, "grad_norm": 2.0901297021907563, "learning_rate": 1.9310258205133266e-05, "loss": 0.743, "step": 5867 }, { "epoch": 0.4361204013377926, "grad_norm": 2.126301025995748, "learning_rate": 1.930996534821136e-05, "loss": 0.9105, "step": 5868 }, { "epoch": 0.4361947231512449, "grad_norm": 2.1804824076649103, "learning_rate": 1.930967243135238e-05, "loss": 1.1409, "step": 5869 }, { "epoch": 0.43626904496469715, "grad_norm": 2.0419831301553506, "learning_rate": 1.9309379454558213e-05, "loss": 0.9328, "step": 5870 }, { "epoch": 0.4363433667781494, "grad_norm": 2.3310193731228463, "learning_rate": 1.9309086417830744e-05, "loss": 0.9709, "step": 5871 }, { "epoch": 0.4364176885916016, "grad_norm": 2.032176787372313, "learning_rate": 1.930879332117186e-05, "loss": 0.7617, "step": 5872 }, { "epoch": 0.43649201040505387, "grad_norm": 2.0516694690612547, "learning_rate": 1.9308500164583445e-05, "loss": 1.0549, "step": 5873 }, { "epoch": 0.43656633221850616, "grad_norm": 2.931959399390811, "learning_rate": 1.9308206948067392e-05, "loss": 1.0741, "step": 5874 }, { "epoch": 0.4366406540319584, "grad_norm": 2.6284259067690265, "learning_rate": 1.9307913671625584e-05, "loss": 0.9947, "step": 5875 }, { "epoch": 0.43671497584541064, "grad_norm": 2.272019372416205, "learning_rate": 1.9307620335259912e-05, "loss": 1.0765, "step": 5876 }, { "epoch": 0.4367892976588629, "grad_norm": 1.9515655813247106, "learning_rate": 1.930732693897226e-05, "loss": 0.7538, "step": 5877 }, { "epoch": 0.4368636194723151, "grad_norm": 1.9964634966310477, "learning_rate": 1.9307033482764524e-05, "loss": 0.576, "step": 5878 }, { "epoch": 0.43693794128576735, "grad_norm": 1.9734812460520754, "learning_rate": 1.9306739966638586e-05, "loss": 0.8177, "step": 5879 }, { "epoch": 0.43701226309921964, "grad_norm": 2.2826954848198953, "learning_rate": 1.9306446390596343e-05, "loss": 0.9276, "step": 5880 }, { "epoch": 0.4370865849126719, "grad_norm": 2.33172263209285, "learning_rate": 1.9306152754639678e-05, "loss": 1.0463, "step": 5881 }, { "epoch": 0.4371609067261241, "grad_norm": 2.3224487096331723, "learning_rate": 1.9305859058770485e-05, "loss": 0.8847, "step": 5882 }, { "epoch": 0.43723522853957636, "grad_norm": 2.876277186012974, "learning_rate": 1.9305565302990656e-05, "loss": 0.8021, "step": 5883 }, { "epoch": 0.4373095503530286, "grad_norm": 2.1503708749764803, "learning_rate": 1.9305271487302077e-05, "loss": 0.8813, "step": 5884 }, { "epoch": 0.43738387216648084, "grad_norm": 1.9551108738993361, "learning_rate": 1.9304977611706645e-05, "loss": 0.8979, "step": 5885 }, { "epoch": 0.43745819397993313, "grad_norm": 2.461284550992366, "learning_rate": 1.930468367620625e-05, "loss": 0.93, "step": 5886 }, { "epoch": 0.43753251579338537, "grad_norm": 1.8302015353556889, "learning_rate": 1.9304389680802783e-05, "loss": 0.7671, "step": 5887 }, { "epoch": 0.4376068376068376, "grad_norm": 2.1464681561567973, "learning_rate": 1.930409562549814e-05, "loss": 0.9835, "step": 5888 }, { "epoch": 0.43768115942028984, "grad_norm": 2.2566065696489637, "learning_rate": 1.9303801510294212e-05, "loss": 0.8721, "step": 5889 }, { "epoch": 0.4377554812337421, "grad_norm": 2.207411755855743, "learning_rate": 1.930350733519289e-05, "loss": 0.8716, "step": 5890 }, { "epoch": 0.4378298030471944, "grad_norm": 2.4094878944277185, "learning_rate": 1.9303213100196073e-05, "loss": 0.8325, "step": 5891 }, { "epoch": 0.4379041248606466, "grad_norm": 1.9949303646100653, "learning_rate": 1.930291880530565e-05, "loss": 1.0364, "step": 5892 }, { "epoch": 0.43797844667409885, "grad_norm": 1.9153689789088644, "learning_rate": 1.9302624450523522e-05, "loss": 0.8637, "step": 5893 }, { "epoch": 0.4380527684875511, "grad_norm": 1.980992518714052, "learning_rate": 1.930233003585158e-05, "loss": 0.8873, "step": 5894 }, { "epoch": 0.43812709030100333, "grad_norm": 2.58096245452459, "learning_rate": 1.930203556129172e-05, "loss": 0.7755, "step": 5895 }, { "epoch": 0.43820141211445557, "grad_norm": 2.2193975119677987, "learning_rate": 1.9301741026845832e-05, "loss": 1.096, "step": 5896 }, { "epoch": 0.43827573392790786, "grad_norm": 2.9596885839975267, "learning_rate": 1.9301446432515823e-05, "loss": 0.9374, "step": 5897 }, { "epoch": 0.4383500557413601, "grad_norm": 1.8549200631433682, "learning_rate": 1.9301151778303583e-05, "loss": 1.0116, "step": 5898 }, { "epoch": 0.43842437755481234, "grad_norm": 2.129095533288967, "learning_rate": 1.9300857064211012e-05, "loss": 0.9759, "step": 5899 }, { "epoch": 0.4384986993682646, "grad_norm": 2.075678827477168, "learning_rate": 1.930056229024001e-05, "loss": 0.9515, "step": 5900 }, { "epoch": 0.4385730211817168, "grad_norm": 2.45325775925573, "learning_rate": 1.9300267456392464e-05, "loss": 1.1232, "step": 5901 }, { "epoch": 0.4386473429951691, "grad_norm": 2.0448965737351017, "learning_rate": 1.929997256267028e-05, "loss": 0.9136, "step": 5902 }, { "epoch": 0.43872166480862135, "grad_norm": 3.0838292340722666, "learning_rate": 1.9299677609075353e-05, "loss": 0.8693, "step": 5903 }, { "epoch": 0.4387959866220736, "grad_norm": 2.076132273922563, "learning_rate": 1.929938259560959e-05, "loss": 0.6647, "step": 5904 }, { "epoch": 0.4388703084355258, "grad_norm": 2.3736098695477947, "learning_rate": 1.9299087522274878e-05, "loss": 0.9132, "step": 5905 }, { "epoch": 0.43894463024897806, "grad_norm": 2.552692120739062, "learning_rate": 1.929879238907313e-05, "loss": 0.9281, "step": 5906 }, { "epoch": 0.4390189520624303, "grad_norm": 1.874212828820964, "learning_rate": 1.9298497196006233e-05, "loss": 0.9803, "step": 5907 }, { "epoch": 0.4390932738758826, "grad_norm": 1.884036744588502, "learning_rate": 1.9298201943076096e-05, "loss": 0.9742, "step": 5908 }, { "epoch": 0.43916759568933483, "grad_norm": 1.8205432957711802, "learning_rate": 1.9297906630284616e-05, "loss": 0.5974, "step": 5909 }, { "epoch": 0.43924191750278707, "grad_norm": 1.990080467921921, "learning_rate": 1.9297611257633697e-05, "loss": 0.849, "step": 5910 }, { "epoch": 0.4393162393162393, "grad_norm": 2.1499385245848863, "learning_rate": 1.9297315825125236e-05, "loss": 0.891, "step": 5911 }, { "epoch": 0.43939056112969155, "grad_norm": 2.049473437869605, "learning_rate": 1.9297020332761142e-05, "loss": 0.7393, "step": 5912 }, { "epoch": 0.4394648829431438, "grad_norm": 1.9069735364415026, "learning_rate": 1.9296724780543314e-05, "loss": 0.7191, "step": 5913 }, { "epoch": 0.4395392047565961, "grad_norm": 2.1464857592876188, "learning_rate": 1.929642916847365e-05, "loss": 1.0229, "step": 5914 }, { "epoch": 0.4396135265700483, "grad_norm": 2.203970293200295, "learning_rate": 1.929613349655406e-05, "loss": 1.0708, "step": 5915 }, { "epoch": 0.43968784838350056, "grad_norm": 2.0408777022521565, "learning_rate": 1.9295837764786448e-05, "loss": 0.8974, "step": 5916 }, { "epoch": 0.4397621701969528, "grad_norm": 2.2329840986767215, "learning_rate": 1.929554197317271e-05, "loss": 0.7724, "step": 5917 }, { "epoch": 0.43983649201040503, "grad_norm": 1.8513342985967403, "learning_rate": 1.9295246121714755e-05, "loss": 0.8451, "step": 5918 }, { "epoch": 0.4399108138238573, "grad_norm": 2.2470366695544364, "learning_rate": 1.929495021041449e-05, "loss": 1.0906, "step": 5919 }, { "epoch": 0.43998513563730957, "grad_norm": 1.734398735350484, "learning_rate": 1.929465423927382e-05, "loss": 0.8917, "step": 5920 }, { "epoch": 0.4400594574507618, "grad_norm": 9.352541261012282, "learning_rate": 1.9294358208294642e-05, "loss": 0.6627, "step": 5921 }, { "epoch": 0.44013377926421404, "grad_norm": 1.966406589724374, "learning_rate": 1.9294062117478874e-05, "loss": 0.5859, "step": 5922 }, { "epoch": 0.4402081010776663, "grad_norm": 2.1278046811053106, "learning_rate": 1.9293765966828416e-05, "loss": 0.7886, "step": 5923 }, { "epoch": 0.4402824228911185, "grad_norm": 2.1288382013943528, "learning_rate": 1.929346975634517e-05, "loss": 0.8797, "step": 5924 }, { "epoch": 0.4403567447045708, "grad_norm": 2.73253776100323, "learning_rate": 1.9293173486031052e-05, "loss": 1.1695, "step": 5925 }, { "epoch": 0.44043106651802305, "grad_norm": 2.09880118221421, "learning_rate": 1.9292877155887964e-05, "loss": 0.9124, "step": 5926 }, { "epoch": 0.4405053883314753, "grad_norm": 2.025246854760844, "learning_rate": 1.9292580765917814e-05, "loss": 0.8657, "step": 5927 }, { "epoch": 0.4405797101449275, "grad_norm": 1.9590546628760028, "learning_rate": 1.9292284316122513e-05, "loss": 0.9448, "step": 5928 }, { "epoch": 0.44065403195837977, "grad_norm": 2.040825742770263, "learning_rate": 1.9291987806503968e-05, "loss": 0.8329, "step": 5929 }, { "epoch": 0.44072835377183206, "grad_norm": 2.0187358702356817, "learning_rate": 1.9291691237064084e-05, "loss": 0.7759, "step": 5930 }, { "epoch": 0.4408026755852843, "grad_norm": 1.9472507057516129, "learning_rate": 1.9291394607804778e-05, "loss": 0.7983, "step": 5931 }, { "epoch": 0.44087699739873654, "grad_norm": 2.0972669866663605, "learning_rate": 1.9291097918727955e-05, "loss": 1.0088, "step": 5932 }, { "epoch": 0.4409513192121888, "grad_norm": 2.87744556086303, "learning_rate": 1.9290801169835525e-05, "loss": 0.6143, "step": 5933 }, { "epoch": 0.441025641025641, "grad_norm": 2.1662937973873087, "learning_rate": 1.92905043611294e-05, "loss": 0.7468, "step": 5934 }, { "epoch": 0.44109996283909325, "grad_norm": 2.0741123209306833, "learning_rate": 1.929020749261149e-05, "loss": 0.8778, "step": 5935 }, { "epoch": 0.44117428465254555, "grad_norm": 2.1004573694548343, "learning_rate": 1.9289910564283704e-05, "loss": 1.081, "step": 5936 }, { "epoch": 0.4412486064659978, "grad_norm": 2.8976198066284957, "learning_rate": 1.9289613576147953e-05, "loss": 0.7956, "step": 5937 }, { "epoch": 0.44132292827945, "grad_norm": 1.9111514105246774, "learning_rate": 1.928931652820616e-05, "loss": 0.8125, "step": 5938 }, { "epoch": 0.44139725009290226, "grad_norm": 2.1618224249783613, "learning_rate": 1.9289019420460223e-05, "loss": 0.9103, "step": 5939 }, { "epoch": 0.4414715719063545, "grad_norm": 1.9200051540946494, "learning_rate": 1.9288722252912065e-05, "loss": 0.8812, "step": 5940 }, { "epoch": 0.44154589371980674, "grad_norm": 3.340237818226395, "learning_rate": 1.928842502556359e-05, "loss": 0.7841, "step": 5941 }, { "epoch": 0.44162021553325903, "grad_norm": 2.237913159506832, "learning_rate": 1.928812773841672e-05, "loss": 0.9998, "step": 5942 }, { "epoch": 0.44169453734671127, "grad_norm": 2.0721266019054, "learning_rate": 1.9287830391473367e-05, "loss": 0.9063, "step": 5943 }, { "epoch": 0.4417688591601635, "grad_norm": 2.7180313158910536, "learning_rate": 1.928753298473544e-05, "loss": 1.0635, "step": 5944 }, { "epoch": 0.44184318097361575, "grad_norm": 2.6091072558947244, "learning_rate": 1.928723551820486e-05, "loss": 0.9864, "step": 5945 }, { "epoch": 0.441917502787068, "grad_norm": 1.7153133808224588, "learning_rate": 1.9286937991883538e-05, "loss": 0.5597, "step": 5946 }, { "epoch": 0.4419918246005203, "grad_norm": 2.43765155929564, "learning_rate": 1.9286640405773392e-05, "loss": 0.7807, "step": 5947 }, { "epoch": 0.4420661464139725, "grad_norm": 2.6975467399014925, "learning_rate": 1.928634275987634e-05, "loss": 0.954, "step": 5948 }, { "epoch": 0.44214046822742475, "grad_norm": 2.3021807001136074, "learning_rate": 1.928604505419429e-05, "loss": 0.9159, "step": 5949 }, { "epoch": 0.442214790040877, "grad_norm": 2.7923163538197424, "learning_rate": 1.9285747288729167e-05, "loss": 0.9822, "step": 5950 }, { "epoch": 0.44228911185432923, "grad_norm": 2.401824147827131, "learning_rate": 1.9285449463482884e-05, "loss": 1.0203, "step": 5951 }, { "epoch": 0.44236343366778147, "grad_norm": 1.9758749727911709, "learning_rate": 1.928515157845736e-05, "loss": 0.7278, "step": 5952 }, { "epoch": 0.44243775548123376, "grad_norm": 2.1713466539361885, "learning_rate": 1.928485363365451e-05, "loss": 0.8389, "step": 5953 }, { "epoch": 0.442512077294686, "grad_norm": 2.0981694457932774, "learning_rate": 1.9284555629076257e-05, "loss": 0.9273, "step": 5954 }, { "epoch": 0.44258639910813824, "grad_norm": 2.049125427895432, "learning_rate": 1.9284257564724513e-05, "loss": 0.6878, "step": 5955 }, { "epoch": 0.4426607209215905, "grad_norm": 3.238054738143216, "learning_rate": 1.9283959440601204e-05, "loss": 0.7954, "step": 5956 }, { "epoch": 0.4427350427350427, "grad_norm": 1.8278676450635778, "learning_rate": 1.9283661256708242e-05, "loss": 0.9667, "step": 5957 }, { "epoch": 0.442809364548495, "grad_norm": 2.636433149909624, "learning_rate": 1.9283363013047554e-05, "loss": 0.9752, "step": 5958 }, { "epoch": 0.44288368636194725, "grad_norm": 2.201798637610238, "learning_rate": 1.9283064709621055e-05, "loss": 1.0817, "step": 5959 }, { "epoch": 0.4429580081753995, "grad_norm": 2.3194349515289163, "learning_rate": 1.9282766346430666e-05, "loss": 0.7963, "step": 5960 }, { "epoch": 0.4430323299888517, "grad_norm": 2.4950490857779823, "learning_rate": 1.928246792347831e-05, "loss": 0.8162, "step": 5961 }, { "epoch": 0.44310665180230396, "grad_norm": 2.1241135836547476, "learning_rate": 1.9282169440765908e-05, "loss": 0.6971, "step": 5962 }, { "epoch": 0.4431809736157562, "grad_norm": 1.9277979974175716, "learning_rate": 1.928187089829538e-05, "loss": 0.7298, "step": 5963 }, { "epoch": 0.4432552954292085, "grad_norm": 2.1205743090890987, "learning_rate": 1.928157229606865e-05, "loss": 0.7477, "step": 5964 }, { "epoch": 0.44332961724266073, "grad_norm": 3.2429271667278594, "learning_rate": 1.9281273634087635e-05, "loss": 0.9853, "step": 5965 }, { "epoch": 0.443403939056113, "grad_norm": 2.110382475435937, "learning_rate": 1.9280974912354267e-05, "loss": 0.9561, "step": 5966 }, { "epoch": 0.4434782608695652, "grad_norm": 2.2446697392582253, "learning_rate": 1.928067613087046e-05, "loss": 0.998, "step": 5967 }, { "epoch": 0.44355258268301745, "grad_norm": 5.183227209678275, "learning_rate": 1.9280377289638145e-05, "loss": 0.9726, "step": 5968 }, { "epoch": 0.4436269044964697, "grad_norm": 1.8118141203082025, "learning_rate": 1.928007838865924e-05, "loss": 0.7111, "step": 5969 }, { "epoch": 0.443701226309922, "grad_norm": 1.8373851519204072, "learning_rate": 1.9279779427935672e-05, "loss": 0.9054, "step": 5970 }, { "epoch": 0.4437755481233742, "grad_norm": 2.2677808646557516, "learning_rate": 1.927948040746937e-05, "loss": 0.8946, "step": 5971 }, { "epoch": 0.44384986993682646, "grad_norm": 1.7634789686897099, "learning_rate": 1.927918132726225e-05, "loss": 0.8545, "step": 5972 }, { "epoch": 0.4439241917502787, "grad_norm": 2.6060221217833197, "learning_rate": 1.9278882187316244e-05, "loss": 0.7352, "step": 5973 }, { "epoch": 0.44399851356373093, "grad_norm": 2.6597295619250354, "learning_rate": 1.927858298763327e-05, "loss": 1.0302, "step": 5974 }, { "epoch": 0.44407283537718323, "grad_norm": 2.1727722185580336, "learning_rate": 1.9278283728215267e-05, "loss": 1.0227, "step": 5975 }, { "epoch": 0.44414715719063547, "grad_norm": 2.1588851514156544, "learning_rate": 1.9277984409064154e-05, "loss": 1.005, "step": 5976 }, { "epoch": 0.4442214790040877, "grad_norm": 2.0056340138173145, "learning_rate": 1.927768503018186e-05, "loss": 0.8563, "step": 5977 }, { "epoch": 0.44429580081753994, "grad_norm": 1.9845616082845432, "learning_rate": 1.927738559157031e-05, "loss": 1.081, "step": 5978 }, { "epoch": 0.4443701226309922, "grad_norm": 2.0460615479624753, "learning_rate": 1.9277086093231426e-05, "loss": 0.981, "step": 5979 }, { "epoch": 0.4444444444444444, "grad_norm": 2.721105809309863, "learning_rate": 1.9276786535167152e-05, "loss": 0.8928, "step": 5980 }, { "epoch": 0.4445187662578967, "grad_norm": 1.9078003118520768, "learning_rate": 1.9276486917379405e-05, "loss": 0.8026, "step": 5981 }, { "epoch": 0.44459308807134895, "grad_norm": 2.68281337097562, "learning_rate": 1.9276187239870116e-05, "loss": 0.8274, "step": 5982 }, { "epoch": 0.4446674098848012, "grad_norm": 2.0386765057003173, "learning_rate": 1.9275887502641213e-05, "loss": 0.7788, "step": 5983 }, { "epoch": 0.44474173169825343, "grad_norm": 2.465413314508533, "learning_rate": 1.927558770569463e-05, "loss": 0.77, "step": 5984 }, { "epoch": 0.44481605351170567, "grad_norm": 1.9790442073214543, "learning_rate": 1.9275287849032293e-05, "loss": 0.9814, "step": 5985 }, { "epoch": 0.44489037532515796, "grad_norm": 2.2321917038327626, "learning_rate": 1.927498793265614e-05, "loss": 0.8188, "step": 5986 }, { "epoch": 0.4449646971386102, "grad_norm": 2.6024133104897267, "learning_rate": 1.9274687956568088e-05, "loss": 0.8067, "step": 5987 }, { "epoch": 0.44503901895206244, "grad_norm": 2.162868574692812, "learning_rate": 1.927438792077008e-05, "loss": 0.8675, "step": 5988 }, { "epoch": 0.4451133407655147, "grad_norm": 2.6412794896970224, "learning_rate": 1.927408782526404e-05, "loss": 0.8979, "step": 5989 }, { "epoch": 0.4451876625789669, "grad_norm": 1.8906425057021186, "learning_rate": 1.927378767005191e-05, "loss": 0.7803, "step": 5990 }, { "epoch": 0.44526198439241915, "grad_norm": 1.8914639044944228, "learning_rate": 1.9273487455135613e-05, "loss": 0.7558, "step": 5991 }, { "epoch": 0.44533630620587145, "grad_norm": 2.1935514050575464, "learning_rate": 1.9273187180517085e-05, "loss": 0.9669, "step": 5992 }, { "epoch": 0.4454106280193237, "grad_norm": 2.4733169850115364, "learning_rate": 1.927288684619826e-05, "loss": 0.9514, "step": 5993 }, { "epoch": 0.4454849498327759, "grad_norm": 2.222669477493577, "learning_rate": 1.927258645218107e-05, "loss": 0.8025, "step": 5994 }, { "epoch": 0.44555927164622816, "grad_norm": 2.979394238908606, "learning_rate": 1.9272285998467447e-05, "loss": 0.8744, "step": 5995 }, { "epoch": 0.4456335934596804, "grad_norm": 3.8069850737653126, "learning_rate": 1.927198548505933e-05, "loss": 0.7637, "step": 5996 }, { "epoch": 0.44570791527313264, "grad_norm": 2.265021328459046, "learning_rate": 1.9271684911958653e-05, "loss": 0.8351, "step": 5997 }, { "epoch": 0.44578223708658493, "grad_norm": 3.2994167330073325, "learning_rate": 1.9271384279167348e-05, "loss": 1.0273, "step": 5998 }, { "epoch": 0.44585655890003717, "grad_norm": 1.9394214653620834, "learning_rate": 1.9271083586687355e-05, "loss": 0.859, "step": 5999 }, { "epoch": 0.4459308807134894, "grad_norm": 2.1062977064762904, "learning_rate": 1.92707828345206e-05, "loss": 0.8118, "step": 6000 }, { "epoch": 0.44600520252694165, "grad_norm": 2.0489302445385693, "learning_rate": 1.927048202266903e-05, "loss": 0.8727, "step": 6001 }, { "epoch": 0.4460795243403939, "grad_norm": 2.0312247697682038, "learning_rate": 1.927018115113458e-05, "loss": 0.8944, "step": 6002 }, { "epoch": 0.4461538461538462, "grad_norm": 1.896273779169177, "learning_rate": 1.9269880219919184e-05, "loss": 0.6806, "step": 6003 }, { "epoch": 0.4462281679672984, "grad_norm": 1.9174287603960831, "learning_rate": 1.926957922902478e-05, "loss": 0.8401, "step": 6004 }, { "epoch": 0.44630248978075066, "grad_norm": 1.9297607606473821, "learning_rate": 1.9269278178453303e-05, "loss": 0.9895, "step": 6005 }, { "epoch": 0.4463768115942029, "grad_norm": 1.8473456301865148, "learning_rate": 1.9268977068206696e-05, "loss": 0.6844, "step": 6006 }, { "epoch": 0.44645113340765513, "grad_norm": 1.8922629345368247, "learning_rate": 1.9268675898286896e-05, "loss": 0.6689, "step": 6007 }, { "epoch": 0.44652545522110737, "grad_norm": 2.131363629335645, "learning_rate": 1.926837466869584e-05, "loss": 1.0459, "step": 6008 }, { "epoch": 0.44659977703455966, "grad_norm": 2.245788395993885, "learning_rate": 1.926807337943547e-05, "loss": 0.953, "step": 6009 }, { "epoch": 0.4466740988480119, "grad_norm": 2.05915105300722, "learning_rate": 1.9267772030507723e-05, "loss": 0.9543, "step": 6010 }, { "epoch": 0.44674842066146414, "grad_norm": 1.749718780289323, "learning_rate": 1.926747062191454e-05, "loss": 0.6936, "step": 6011 }, { "epoch": 0.4468227424749164, "grad_norm": 2.0583636379412193, "learning_rate": 1.9267169153657864e-05, "loss": 1.0613, "step": 6012 }, { "epoch": 0.4468970642883686, "grad_norm": 4.053272562197205, "learning_rate": 1.9266867625739637e-05, "loss": 1.129, "step": 6013 }, { "epoch": 0.4469713861018209, "grad_norm": 2.6273365694287465, "learning_rate": 1.9266566038161792e-05, "loss": 1.1766, "step": 6014 }, { "epoch": 0.44704570791527315, "grad_norm": 2.3779244344193367, "learning_rate": 1.9266264390926278e-05, "loss": 1.1171, "step": 6015 }, { "epoch": 0.4471200297287254, "grad_norm": 2.8000507364122096, "learning_rate": 1.9265962684035034e-05, "loss": 0.9233, "step": 6016 }, { "epoch": 0.4471943515421776, "grad_norm": 2.0873760118603117, "learning_rate": 1.9265660917490003e-05, "loss": 0.4868, "step": 6017 }, { "epoch": 0.44726867335562986, "grad_norm": 1.6330251865983518, "learning_rate": 1.9265359091293128e-05, "loss": 0.6879, "step": 6018 }, { "epoch": 0.4473429951690821, "grad_norm": 2.321177527580637, "learning_rate": 1.926505720544635e-05, "loss": 0.981, "step": 6019 }, { "epoch": 0.4474173169825344, "grad_norm": 2.4258300642251456, "learning_rate": 1.926475525995162e-05, "loss": 0.7443, "step": 6020 }, { "epoch": 0.44749163879598663, "grad_norm": 2.7683906885339296, "learning_rate": 1.9264453254810868e-05, "loss": 1.0166, "step": 6021 }, { "epoch": 0.4475659606094389, "grad_norm": 2.067567147598973, "learning_rate": 1.926415119002605e-05, "loss": 1.0052, "step": 6022 }, { "epoch": 0.4476402824228911, "grad_norm": 2.2789508094895305, "learning_rate": 1.926384906559911e-05, "loss": 0.9473, "step": 6023 }, { "epoch": 0.44771460423634335, "grad_norm": 2.3604478440305394, "learning_rate": 1.9263546881531988e-05, "loss": 0.9996, "step": 6024 }, { "epoch": 0.4477889260497956, "grad_norm": 2.3710105582240675, "learning_rate": 1.9263244637826634e-05, "loss": 0.8186, "step": 6025 }, { "epoch": 0.4478632478632479, "grad_norm": 2.4080283204988775, "learning_rate": 1.926294233448499e-05, "loss": 1.0843, "step": 6026 }, { "epoch": 0.4479375696767001, "grad_norm": 2.0912510807047826, "learning_rate": 1.9262639971509004e-05, "loss": 0.83, "step": 6027 }, { "epoch": 0.44801189149015236, "grad_norm": 2.124426085448981, "learning_rate": 1.926233754890062e-05, "loss": 0.7895, "step": 6028 }, { "epoch": 0.4480862133036046, "grad_norm": 2.15207610277202, "learning_rate": 1.9262035066661793e-05, "loss": 0.927, "step": 6029 }, { "epoch": 0.44816053511705684, "grad_norm": 1.7617488555167304, "learning_rate": 1.926173252479446e-05, "loss": 0.7768, "step": 6030 }, { "epoch": 0.44823485693050913, "grad_norm": 1.9921603426632537, "learning_rate": 1.9261429923300577e-05, "loss": 0.6754, "step": 6031 }, { "epoch": 0.44830917874396137, "grad_norm": 1.7690755242190446, "learning_rate": 1.9261127262182086e-05, "loss": 0.7287, "step": 6032 }, { "epoch": 0.4483835005574136, "grad_norm": 2.252568983387503, "learning_rate": 1.9260824541440942e-05, "loss": 0.8903, "step": 6033 }, { "epoch": 0.44845782237086584, "grad_norm": 1.9899747021634682, "learning_rate": 1.9260521761079087e-05, "loss": 0.8766, "step": 6034 }, { "epoch": 0.4485321441843181, "grad_norm": 2.239304291074488, "learning_rate": 1.9260218921098475e-05, "loss": 1.056, "step": 6035 }, { "epoch": 0.4486064659977703, "grad_norm": 2.8332500212203677, "learning_rate": 1.9259916021501053e-05, "loss": 1.0473, "step": 6036 }, { "epoch": 0.4486807878112226, "grad_norm": 2.9422573165699935, "learning_rate": 1.925961306228877e-05, "loss": 0.9687, "step": 6037 }, { "epoch": 0.44875510962467485, "grad_norm": 2.275327846507254, "learning_rate": 1.925931004346358e-05, "loss": 0.9604, "step": 6038 }, { "epoch": 0.4488294314381271, "grad_norm": 2.3601896095192023, "learning_rate": 1.9259006965027435e-05, "loss": 0.9541, "step": 6039 }, { "epoch": 0.44890375325157933, "grad_norm": 2.296458737429887, "learning_rate": 1.9258703826982282e-05, "loss": 0.8747, "step": 6040 }, { "epoch": 0.44897807506503157, "grad_norm": 2.1376964239444627, "learning_rate": 1.9258400629330072e-05, "loss": 0.927, "step": 6041 }, { "epoch": 0.44905239687848386, "grad_norm": 2.3716123320499443, "learning_rate": 1.925809737207276e-05, "loss": 0.9849, "step": 6042 }, { "epoch": 0.4491267186919361, "grad_norm": 1.8399151959872837, "learning_rate": 1.92577940552123e-05, "loss": 0.7081, "step": 6043 }, { "epoch": 0.44920104050538834, "grad_norm": 1.9859774560363186, "learning_rate": 1.925749067875064e-05, "loss": 0.7686, "step": 6044 }, { "epoch": 0.4492753623188406, "grad_norm": 1.6122844527166582, "learning_rate": 1.9257187242689736e-05, "loss": 0.7074, "step": 6045 }, { "epoch": 0.4493496841322928, "grad_norm": 2.0023561383896467, "learning_rate": 1.925688374703154e-05, "loss": 0.8405, "step": 6046 }, { "epoch": 0.44942400594574505, "grad_norm": 2.371318060255557, "learning_rate": 1.9256580191778007e-05, "loss": 0.7447, "step": 6047 }, { "epoch": 0.44949832775919735, "grad_norm": 2.3186023553559694, "learning_rate": 1.9256276576931093e-05, "loss": 0.8805, "step": 6048 }, { "epoch": 0.4495726495726496, "grad_norm": 2.6268058905373954, "learning_rate": 1.9255972902492748e-05, "loss": 0.8955, "step": 6049 }, { "epoch": 0.4496469713861018, "grad_norm": 2.2337024059657296, "learning_rate": 1.925566916846493e-05, "loss": 0.9757, "step": 6050 }, { "epoch": 0.44972129319955406, "grad_norm": 2.0246537227299766, "learning_rate": 1.92553653748496e-05, "loss": 0.7837, "step": 6051 }, { "epoch": 0.4497956150130063, "grad_norm": 1.8445001911955894, "learning_rate": 1.9255061521648698e-05, "loss": 0.8548, "step": 6052 }, { "epoch": 0.44986993682645854, "grad_norm": 2.2028827767712174, "learning_rate": 1.92547576088642e-05, "loss": 0.9556, "step": 6053 }, { "epoch": 0.44994425863991083, "grad_norm": 2.346786003011163, "learning_rate": 1.9254453636498044e-05, "loss": 0.9167, "step": 6054 }, { "epoch": 0.45001858045336307, "grad_norm": 2.7865882849218417, "learning_rate": 1.92541496045522e-05, "loss": 1.1515, "step": 6055 }, { "epoch": 0.4500929022668153, "grad_norm": 2.0100766741683747, "learning_rate": 1.925384551302862e-05, "loss": 0.9312, "step": 6056 }, { "epoch": 0.45016722408026755, "grad_norm": 1.837385509766372, "learning_rate": 1.9253541361929264e-05, "loss": 0.6102, "step": 6057 }, { "epoch": 0.4502415458937198, "grad_norm": 1.8217628903629433, "learning_rate": 1.9253237151256088e-05, "loss": 0.8018, "step": 6058 }, { "epoch": 0.4503158677071721, "grad_norm": 3.65118704273555, "learning_rate": 1.925293288101105e-05, "loss": 0.6464, "step": 6059 }, { "epoch": 0.4503901895206243, "grad_norm": 1.9485636955198142, "learning_rate": 1.9252628551196112e-05, "loss": 0.826, "step": 6060 }, { "epoch": 0.45046451133407656, "grad_norm": 3.058262420255344, "learning_rate": 1.925232416181323e-05, "loss": 0.945, "step": 6061 }, { "epoch": 0.4505388331475288, "grad_norm": 2.275708125786642, "learning_rate": 1.925201971286437e-05, "loss": 0.9021, "step": 6062 }, { "epoch": 0.45061315496098103, "grad_norm": 2.996996958794133, "learning_rate": 1.925171520435148e-05, "loss": 0.835, "step": 6063 }, { "epoch": 0.45068747677443327, "grad_norm": 1.695088920683738, "learning_rate": 1.9251410636276534e-05, "loss": 0.6983, "step": 6064 }, { "epoch": 0.45076179858788556, "grad_norm": 2.0166871379068803, "learning_rate": 1.9251106008641483e-05, "loss": 0.9753, "step": 6065 }, { "epoch": 0.4508361204013378, "grad_norm": 2.666428302489777, "learning_rate": 1.925080132144829e-05, "loss": 0.9713, "step": 6066 }, { "epoch": 0.45091044221479004, "grad_norm": 2.093218217631844, "learning_rate": 1.925049657469892e-05, "loss": 0.8594, "step": 6067 }, { "epoch": 0.4509847640282423, "grad_norm": 2.882696213180492, "learning_rate": 1.9250191768395333e-05, "loss": 0.6139, "step": 6068 }, { "epoch": 0.4510590858416945, "grad_norm": 2.973011154119158, "learning_rate": 1.9249886902539492e-05, "loss": 0.8266, "step": 6069 }, { "epoch": 0.4511334076551468, "grad_norm": 2.608475567108913, "learning_rate": 1.9249581977133356e-05, "loss": 0.876, "step": 6070 }, { "epoch": 0.45120772946859905, "grad_norm": 2.613433631631627, "learning_rate": 1.9249276992178897e-05, "loss": 0.5158, "step": 6071 }, { "epoch": 0.4512820512820513, "grad_norm": 2.216974035679971, "learning_rate": 1.924897194767807e-05, "loss": 0.6307, "step": 6072 }, { "epoch": 0.4513563730955035, "grad_norm": 2.2949072726235205, "learning_rate": 1.9248666843632836e-05, "loss": 0.8757, "step": 6073 }, { "epoch": 0.45143069490895577, "grad_norm": 2.883153212117488, "learning_rate": 1.9248361680045172e-05, "loss": 0.9941, "step": 6074 }, { "epoch": 0.451505016722408, "grad_norm": 2.0002964905001233, "learning_rate": 1.9248056456917032e-05, "loss": 1.0374, "step": 6075 }, { "epoch": 0.4515793385358603, "grad_norm": 2.7233529358998068, "learning_rate": 1.9247751174250386e-05, "loss": 0.9131, "step": 6076 }, { "epoch": 0.45165366034931254, "grad_norm": 2.090408738997147, "learning_rate": 1.9247445832047197e-05, "loss": 0.9263, "step": 6077 }, { "epoch": 0.4517279821627648, "grad_norm": 1.9609545851052759, "learning_rate": 1.924714043030943e-05, "loss": 0.7966, "step": 6078 }, { "epoch": 0.451802303976217, "grad_norm": 2.25767189035412, "learning_rate": 1.9246834969039058e-05, "loss": 0.8682, "step": 6079 }, { "epoch": 0.45187662578966925, "grad_norm": 2.4900522437101054, "learning_rate": 1.9246529448238037e-05, "loss": 0.8289, "step": 6080 }, { "epoch": 0.4519509476031215, "grad_norm": 2.088271603636692, "learning_rate": 1.924622386790834e-05, "loss": 0.6786, "step": 6081 }, { "epoch": 0.4520252694165738, "grad_norm": 1.8216973591308836, "learning_rate": 1.9245918228051933e-05, "loss": 0.8671, "step": 6082 }, { "epoch": 0.452099591230026, "grad_norm": 2.726155245582583, "learning_rate": 1.9245612528670786e-05, "loss": 0.8746, "step": 6083 }, { "epoch": 0.45217391304347826, "grad_norm": 2.151911200549273, "learning_rate": 1.9245306769766863e-05, "loss": 0.9247, "step": 6084 }, { "epoch": 0.4522482348569305, "grad_norm": 1.862612683252068, "learning_rate": 1.9245000951342137e-05, "loss": 1.063, "step": 6085 }, { "epoch": 0.45232255667038274, "grad_norm": 2.0355606904019212, "learning_rate": 1.9244695073398575e-05, "loss": 0.8869, "step": 6086 }, { "epoch": 0.45239687848383503, "grad_norm": 2.086974353285175, "learning_rate": 1.9244389135938144e-05, "loss": 0.9845, "step": 6087 }, { "epoch": 0.45247120029728727, "grad_norm": 2.3270721853281198, "learning_rate": 1.9244083138962817e-05, "loss": 0.9301, "step": 6088 }, { "epoch": 0.4525455221107395, "grad_norm": 2.061500339182194, "learning_rate": 1.9243777082474562e-05, "loss": 0.8965, "step": 6089 }, { "epoch": 0.45261984392419174, "grad_norm": 2.6297389246966714, "learning_rate": 1.9243470966475348e-05, "loss": 0.939, "step": 6090 }, { "epoch": 0.452694165737644, "grad_norm": 2.27604666153921, "learning_rate": 1.924316479096715e-05, "loss": 1.082, "step": 6091 }, { "epoch": 0.4527684875510962, "grad_norm": 2.045501778623936, "learning_rate": 1.9242858555951932e-05, "loss": 0.8992, "step": 6092 }, { "epoch": 0.4528428093645485, "grad_norm": 1.8629608829274247, "learning_rate": 1.9242552261431674e-05, "loss": 0.7895, "step": 6093 }, { "epoch": 0.45291713117800075, "grad_norm": 2.1620505922961404, "learning_rate": 1.9242245907408344e-05, "loss": 0.868, "step": 6094 }, { "epoch": 0.452991452991453, "grad_norm": 2.619140028889386, "learning_rate": 1.9241939493883915e-05, "loss": 1.1425, "step": 6095 }, { "epoch": 0.45306577480490523, "grad_norm": 1.6773741002265263, "learning_rate": 1.924163302086036e-05, "loss": 0.8801, "step": 6096 }, { "epoch": 0.45314009661835747, "grad_norm": 1.9615595208865375, "learning_rate": 1.9241326488339647e-05, "loss": 0.8313, "step": 6097 }, { "epoch": 0.45321441843180976, "grad_norm": 1.9677515895485453, "learning_rate": 1.9241019896323756e-05, "loss": 1.0019, "step": 6098 }, { "epoch": 0.453288740245262, "grad_norm": 1.9884685115521927, "learning_rate": 1.924071324481466e-05, "loss": 0.8183, "step": 6099 }, { "epoch": 0.45336306205871424, "grad_norm": 2.895247600882343, "learning_rate": 1.924040653381433e-05, "loss": 0.8843, "step": 6100 }, { "epoch": 0.4534373838721665, "grad_norm": 2.5609564241079674, "learning_rate": 1.924009976332474e-05, "loss": 0.723, "step": 6101 }, { "epoch": 0.4535117056856187, "grad_norm": 2.508515411431939, "learning_rate": 1.923979293334787e-05, "loss": 0.8023, "step": 6102 }, { "epoch": 0.45358602749907095, "grad_norm": 2.5358616746352483, "learning_rate": 1.923948604388569e-05, "loss": 0.9019, "step": 6103 }, { "epoch": 0.45366034931252325, "grad_norm": 2.4071213688709867, "learning_rate": 1.9239179094940182e-05, "loss": 1.153, "step": 6104 }, { "epoch": 0.4537346711259755, "grad_norm": 3.4072719731342316, "learning_rate": 1.9238872086513316e-05, "loss": 0.6991, "step": 6105 }, { "epoch": 0.4538089929394277, "grad_norm": 2.1542623934996934, "learning_rate": 1.923856501860707e-05, "loss": 0.8399, "step": 6106 }, { "epoch": 0.45388331475287996, "grad_norm": 1.8255158438704775, "learning_rate": 1.9238257891223423e-05, "loss": 1.0032, "step": 6107 }, { "epoch": 0.4539576365663322, "grad_norm": 2.0746226141198574, "learning_rate": 1.9237950704364353e-05, "loss": 0.8567, "step": 6108 }, { "epoch": 0.45403195837978444, "grad_norm": 2.5766191114121946, "learning_rate": 1.9237643458031834e-05, "loss": 0.7723, "step": 6109 }, { "epoch": 0.45410628019323673, "grad_norm": 1.7558398869117813, "learning_rate": 1.9237336152227845e-05, "loss": 0.4396, "step": 6110 }, { "epoch": 0.45418060200668897, "grad_norm": 1.7558495812902013, "learning_rate": 1.9237028786954366e-05, "loss": 0.6737, "step": 6111 }, { "epoch": 0.4542549238201412, "grad_norm": 2.615934376780606, "learning_rate": 1.9236721362213375e-05, "loss": 0.7814, "step": 6112 }, { "epoch": 0.45432924563359345, "grad_norm": 2.609196050288469, "learning_rate": 1.923641387800685e-05, "loss": 0.9439, "step": 6113 }, { "epoch": 0.4544035674470457, "grad_norm": 2.445236381760316, "learning_rate": 1.9236106334336773e-05, "loss": 0.8925, "step": 6114 }, { "epoch": 0.454477889260498, "grad_norm": 2.3110007364543366, "learning_rate": 1.923579873120512e-05, "loss": 0.7635, "step": 6115 }, { "epoch": 0.4545522110739502, "grad_norm": 1.990713776601368, "learning_rate": 1.9235491068613878e-05, "loss": 0.8031, "step": 6116 }, { "epoch": 0.45462653288740246, "grad_norm": 2.080910243419084, "learning_rate": 1.923518334656502e-05, "loss": 0.7186, "step": 6117 }, { "epoch": 0.4547008547008547, "grad_norm": 2.481771417897615, "learning_rate": 1.923487556506053e-05, "loss": 0.9599, "step": 6118 }, { "epoch": 0.45477517651430693, "grad_norm": 2.3578084359771774, "learning_rate": 1.9234567724102395e-05, "loss": 0.985, "step": 6119 }, { "epoch": 0.45484949832775917, "grad_norm": 1.9979619766131143, "learning_rate": 1.923425982369259e-05, "loss": 0.6546, "step": 6120 }, { "epoch": 0.45492382014121147, "grad_norm": 2.2334605808811867, "learning_rate": 1.92339518638331e-05, "loss": 0.8023, "step": 6121 }, { "epoch": 0.4549981419546637, "grad_norm": 1.8279503016402379, "learning_rate": 1.9233643844525905e-05, "loss": 0.8148, "step": 6122 }, { "epoch": 0.45507246376811594, "grad_norm": 2.6449989121107684, "learning_rate": 1.923333576577299e-05, "loss": 0.8893, "step": 6123 }, { "epoch": 0.4551467855815682, "grad_norm": 2.0480687074089903, "learning_rate": 1.923302762757634e-05, "loss": 0.7281, "step": 6124 }, { "epoch": 0.4552211073950204, "grad_norm": 2.257356201170704, "learning_rate": 1.9232719429937937e-05, "loss": 0.8425, "step": 6125 }, { "epoch": 0.4552954292084727, "grad_norm": 2.183237242476477, "learning_rate": 1.9232411172859766e-05, "loss": 0.7448, "step": 6126 }, { "epoch": 0.45536975102192495, "grad_norm": 8.561792309557031, "learning_rate": 1.9232102856343812e-05, "loss": 0.7749, "step": 6127 }, { "epoch": 0.4554440728353772, "grad_norm": 2.0200212048122754, "learning_rate": 1.9231794480392057e-05, "loss": 0.7367, "step": 6128 }, { "epoch": 0.45551839464882943, "grad_norm": 2.0126550270967734, "learning_rate": 1.9231486045006488e-05, "loss": 0.8163, "step": 6129 }, { "epoch": 0.45559271646228167, "grad_norm": 1.9446184286485702, "learning_rate": 1.9231177550189092e-05, "loss": 0.9582, "step": 6130 }, { "epoch": 0.4556670382757339, "grad_norm": 1.9331997419589235, "learning_rate": 1.9230868995941853e-05, "loss": 0.9008, "step": 6131 }, { "epoch": 0.4557413600891862, "grad_norm": 2.1672246963943627, "learning_rate": 1.9230560382266758e-05, "loss": 0.9177, "step": 6132 }, { "epoch": 0.45581568190263844, "grad_norm": 2.282852450182326, "learning_rate": 1.9230251709165793e-05, "loss": 0.9569, "step": 6133 }, { "epoch": 0.4558900037160907, "grad_norm": 2.9956269375512514, "learning_rate": 1.922994297664095e-05, "loss": 0.924, "step": 6134 }, { "epoch": 0.4559643255295429, "grad_norm": 1.8462795238468552, "learning_rate": 1.922963418469421e-05, "loss": 0.8326, "step": 6135 }, { "epoch": 0.45603864734299515, "grad_norm": 3.768373933289176, "learning_rate": 1.9229325333327564e-05, "loss": 0.9989, "step": 6136 }, { "epoch": 0.4561129691564474, "grad_norm": 2.8845884831850532, "learning_rate": 1.9229016422543e-05, "loss": 0.8855, "step": 6137 }, { "epoch": 0.4561872909698997, "grad_norm": 2.402685831141557, "learning_rate": 1.922870745234251e-05, "loss": 0.8608, "step": 6138 }, { "epoch": 0.4562616127833519, "grad_norm": 6.650282695090408, "learning_rate": 1.9228398422728077e-05, "loss": 0.9364, "step": 6139 }, { "epoch": 0.45633593459680416, "grad_norm": 2.652042588257419, "learning_rate": 1.9228089333701697e-05, "loss": 0.888, "step": 6140 }, { "epoch": 0.4564102564102564, "grad_norm": 2.534715367868423, "learning_rate": 1.9227780185265358e-05, "loss": 0.7437, "step": 6141 }, { "epoch": 0.45648457822370864, "grad_norm": 6.64754337507927, "learning_rate": 1.922747097742104e-05, "loss": 0.9533, "step": 6142 }, { "epoch": 0.45655890003716093, "grad_norm": 5.39630744038646, "learning_rate": 1.922716171017075e-05, "loss": 0.684, "step": 6143 }, { "epoch": 0.45663322185061317, "grad_norm": 3.8441495398884844, "learning_rate": 1.9226852383516474e-05, "loss": 0.7025, "step": 6144 }, { "epoch": 0.4567075436640654, "grad_norm": 3.3446930137072943, "learning_rate": 1.9226542997460196e-05, "loss": 0.7281, "step": 6145 }, { "epoch": 0.45678186547751765, "grad_norm": 3.771150816621577, "learning_rate": 1.9226233552003914e-05, "loss": 0.8272, "step": 6146 }, { "epoch": 0.4568561872909699, "grad_norm": 1.8823812552595032, "learning_rate": 1.922592404714962e-05, "loss": 0.8752, "step": 6147 }, { "epoch": 0.4569305091044221, "grad_norm": 2.674014761843747, "learning_rate": 1.9225614482899307e-05, "loss": 0.8063, "step": 6148 }, { "epoch": 0.4570048309178744, "grad_norm": 2.0051602451196726, "learning_rate": 1.9225304859254963e-05, "loss": 0.9506, "step": 6149 }, { "epoch": 0.45707915273132665, "grad_norm": 2.652603809977357, "learning_rate": 1.9224995176218587e-05, "loss": 1.0268, "step": 6150 }, { "epoch": 0.4571534745447789, "grad_norm": 2.3457785687747426, "learning_rate": 1.922468543379217e-05, "loss": 0.8351, "step": 6151 }, { "epoch": 0.45722779635823113, "grad_norm": 2.419917228802435, "learning_rate": 1.9224375631977706e-05, "loss": 0.9139, "step": 6152 }, { "epoch": 0.45730211817168337, "grad_norm": 2.465877505740848, "learning_rate": 1.9224065770777192e-05, "loss": 0.8909, "step": 6153 }, { "epoch": 0.45737643998513566, "grad_norm": 5.180434722896198, "learning_rate": 1.922375585019262e-05, "loss": 0.9296, "step": 6154 }, { "epoch": 0.4574507617985879, "grad_norm": 1.841861957178239, "learning_rate": 1.922344587022599e-05, "loss": 0.7065, "step": 6155 }, { "epoch": 0.45752508361204014, "grad_norm": 10.01823719415513, "learning_rate": 1.9223135830879292e-05, "loss": 1.1837, "step": 6156 }, { "epoch": 0.4575994054254924, "grad_norm": 2.301511076993371, "learning_rate": 1.922282573215452e-05, "loss": 0.8768, "step": 6157 }, { "epoch": 0.4576737272389446, "grad_norm": 2.153360220775173, "learning_rate": 1.9222515574053682e-05, "loss": 0.9355, "step": 6158 }, { "epoch": 0.45774804905239685, "grad_norm": 1.7138344386905138, "learning_rate": 1.9222205356578764e-05, "loss": 0.8532, "step": 6159 }, { "epoch": 0.45782237086584915, "grad_norm": 2.3796912471218623, "learning_rate": 1.9221895079731766e-05, "loss": 0.9304, "step": 6160 }, { "epoch": 0.4578966926793014, "grad_norm": 1.9333946738920267, "learning_rate": 1.9221584743514684e-05, "loss": 0.9853, "step": 6161 }, { "epoch": 0.4579710144927536, "grad_norm": 2.1220933220618083, "learning_rate": 1.922127434792952e-05, "loss": 0.9499, "step": 6162 }, { "epoch": 0.45804533630620586, "grad_norm": 2.0698908815868484, "learning_rate": 1.9220963892978267e-05, "loss": 0.7933, "step": 6163 }, { "epoch": 0.4581196581196581, "grad_norm": 2.2056276370153833, "learning_rate": 1.9220653378662933e-05, "loss": 0.8406, "step": 6164 }, { "epoch": 0.45819397993311034, "grad_norm": 2.0102400872219888, "learning_rate": 1.9220342804985504e-05, "loss": 0.8008, "step": 6165 }, { "epoch": 0.45826830174656263, "grad_norm": 2.2163885356119595, "learning_rate": 1.9220032171947993e-05, "loss": 0.7463, "step": 6166 }, { "epoch": 0.4583426235600149, "grad_norm": 2.3878829896201443, "learning_rate": 1.9219721479552388e-05, "loss": 0.949, "step": 6167 }, { "epoch": 0.4584169453734671, "grad_norm": 2.0827634052833996, "learning_rate": 1.92194107278007e-05, "loss": 0.8935, "step": 6168 }, { "epoch": 0.45849126718691935, "grad_norm": 2.4317262682663223, "learning_rate": 1.921909991669492e-05, "loss": 0.9994, "step": 6169 }, { "epoch": 0.4585655890003716, "grad_norm": 2.3484245542961046, "learning_rate": 1.9218789046237055e-05, "loss": 0.9539, "step": 6170 }, { "epoch": 0.4586399108138239, "grad_norm": 2.1315613242644686, "learning_rate": 1.92184781164291e-05, "loss": 0.9183, "step": 6171 }, { "epoch": 0.4587142326272761, "grad_norm": 2.0942211198991956, "learning_rate": 1.9218167127273065e-05, "loss": 1.0527, "step": 6172 }, { "epoch": 0.45878855444072836, "grad_norm": 2.193628408266317, "learning_rate": 1.9217856078770948e-05, "loss": 0.8147, "step": 6173 }, { "epoch": 0.4588628762541806, "grad_norm": 2.270821541098445, "learning_rate": 1.9217544970924754e-05, "loss": 0.8769, "step": 6174 }, { "epoch": 0.45893719806763283, "grad_norm": 2.222098474721122, "learning_rate": 1.921723380373648e-05, "loss": 1.1429, "step": 6175 }, { "epoch": 0.4590115198810851, "grad_norm": 2.0493860853352825, "learning_rate": 1.9216922577208134e-05, "loss": 0.8354, "step": 6176 }, { "epoch": 0.45908584169453737, "grad_norm": 2.0931622414865267, "learning_rate": 1.921661129134172e-05, "loss": 1.0271, "step": 6177 }, { "epoch": 0.4591601635079896, "grad_norm": 2.4398625092263373, "learning_rate": 1.921629994613924e-05, "loss": 1.0125, "step": 6178 }, { "epoch": 0.45923448532144184, "grad_norm": 3.4403194105774877, "learning_rate": 1.92159885416027e-05, "loss": 0.7613, "step": 6179 }, { "epoch": 0.4593088071348941, "grad_norm": 1.8833485317930234, "learning_rate": 1.9215677077734104e-05, "loss": 0.7933, "step": 6180 }, { "epoch": 0.4593831289483463, "grad_norm": 2.2957191371653396, "learning_rate": 1.9215365554535455e-05, "loss": 0.9184, "step": 6181 }, { "epoch": 0.4594574507617986, "grad_norm": 2.0392678163099056, "learning_rate": 1.9215053972008762e-05, "loss": 0.7951, "step": 6182 }, { "epoch": 0.45953177257525085, "grad_norm": 2.2186578098502223, "learning_rate": 1.921474233015603e-05, "loss": 1.0714, "step": 6183 }, { "epoch": 0.4596060943887031, "grad_norm": 2.045480381500454, "learning_rate": 1.9214430628979262e-05, "loss": 0.8793, "step": 6184 }, { "epoch": 0.45968041620215533, "grad_norm": 1.7808477838730246, "learning_rate": 1.921411886848047e-05, "loss": 0.7632, "step": 6185 }, { "epoch": 0.45975473801560757, "grad_norm": 1.951457460766005, "learning_rate": 1.921380704866166e-05, "loss": 0.9822, "step": 6186 }, { "epoch": 0.4598290598290598, "grad_norm": 2.485228981648891, "learning_rate": 1.921349516952484e-05, "loss": 0.8271, "step": 6187 }, { "epoch": 0.4599033816425121, "grad_norm": 1.7350216267090652, "learning_rate": 1.921318323107201e-05, "loss": 0.8733, "step": 6188 }, { "epoch": 0.45997770345596434, "grad_norm": 2.061475716091344, "learning_rate": 1.9212871233305187e-05, "loss": 0.6793, "step": 6189 }, { "epoch": 0.4600520252694166, "grad_norm": 2.249390017599531, "learning_rate": 1.921255917622638e-05, "loss": 0.9967, "step": 6190 }, { "epoch": 0.4601263470828688, "grad_norm": 2.314122059615112, "learning_rate": 1.9212247059837593e-05, "loss": 0.9318, "step": 6191 }, { "epoch": 0.46020066889632105, "grad_norm": 2.382419765607011, "learning_rate": 1.921193488414084e-05, "loss": 0.883, "step": 6192 }, { "epoch": 0.46027499070977335, "grad_norm": 1.9091300914021843, "learning_rate": 1.9211622649138123e-05, "loss": 0.9039, "step": 6193 }, { "epoch": 0.4603493125232256, "grad_norm": 2.1513203302222723, "learning_rate": 1.921131035483146e-05, "loss": 0.937, "step": 6194 }, { "epoch": 0.4604236343366778, "grad_norm": 1.9548002982893735, "learning_rate": 1.921099800122286e-05, "loss": 0.9566, "step": 6195 }, { "epoch": 0.46049795615013006, "grad_norm": 2.2227416060228973, "learning_rate": 1.921068558831433e-05, "loss": 0.9048, "step": 6196 }, { "epoch": 0.4605722779635823, "grad_norm": 1.942750382567767, "learning_rate": 1.9210373116107886e-05, "loss": 0.9493, "step": 6197 }, { "epoch": 0.46064659977703454, "grad_norm": 2.2214064286754183, "learning_rate": 1.9210060584605536e-05, "loss": 0.7215, "step": 6198 }, { "epoch": 0.46072092159048683, "grad_norm": 2.0832149925742875, "learning_rate": 1.92097479938093e-05, "loss": 1.0801, "step": 6199 }, { "epoch": 0.46079524340393907, "grad_norm": 1.8077459876318547, "learning_rate": 1.9209435343721178e-05, "loss": 0.9361, "step": 6200 }, { "epoch": 0.4608695652173913, "grad_norm": 2.7348249465570005, "learning_rate": 1.920912263434319e-05, "loss": 0.7011, "step": 6201 }, { "epoch": 0.46094388703084355, "grad_norm": 3.013022948978227, "learning_rate": 1.9208809865677348e-05, "loss": 1.0059, "step": 6202 }, { "epoch": 0.4610182088442958, "grad_norm": 2.2175407037066677, "learning_rate": 1.9208497037725668e-05, "loss": 1.059, "step": 6203 }, { "epoch": 0.461092530657748, "grad_norm": 2.038539802661762, "learning_rate": 1.9208184150490158e-05, "loss": 0.9728, "step": 6204 }, { "epoch": 0.4611668524712003, "grad_norm": 2.4665916443205766, "learning_rate": 1.9207871203972842e-05, "loss": 1.1257, "step": 6205 }, { "epoch": 0.46124117428465256, "grad_norm": 2.0538828357046297, "learning_rate": 1.9207558198175725e-05, "loss": 0.8819, "step": 6206 }, { "epoch": 0.4613154960981048, "grad_norm": 1.8543022777905973, "learning_rate": 1.9207245133100827e-05, "loss": 0.5885, "step": 6207 }, { "epoch": 0.46138981791155703, "grad_norm": 1.8582320688983047, "learning_rate": 1.9206932008750164e-05, "loss": 0.8163, "step": 6208 }, { "epoch": 0.46146413972500927, "grad_norm": 1.9864009283565203, "learning_rate": 1.9206618825125747e-05, "loss": 0.8289, "step": 6209 }, { "epoch": 0.46153846153846156, "grad_norm": 1.8981535759068038, "learning_rate": 1.9206305582229597e-05, "loss": 0.9358, "step": 6210 }, { "epoch": 0.4616127833519138, "grad_norm": 2.086967049466385, "learning_rate": 1.9205992280063733e-05, "loss": 1.0478, "step": 6211 }, { "epoch": 0.46168710516536604, "grad_norm": 1.8358538828540212, "learning_rate": 1.9205678918630163e-05, "loss": 0.7396, "step": 6212 }, { "epoch": 0.4617614269788183, "grad_norm": 2.0029771153002054, "learning_rate": 1.9205365497930915e-05, "loss": 0.8871, "step": 6213 }, { "epoch": 0.4618357487922705, "grad_norm": 1.9961278298308942, "learning_rate": 1.9205052017967997e-05, "loss": 0.8319, "step": 6214 }, { "epoch": 0.46191007060572276, "grad_norm": 1.9948276796789248, "learning_rate": 1.9204738478743435e-05, "loss": 0.7185, "step": 6215 }, { "epoch": 0.46198439241917505, "grad_norm": 2.0731550221732236, "learning_rate": 1.9204424880259242e-05, "loss": 0.8023, "step": 6216 }, { "epoch": 0.4620587142326273, "grad_norm": 2.345485380060289, "learning_rate": 1.920411122251744e-05, "loss": 1.1054, "step": 6217 }, { "epoch": 0.4621330360460795, "grad_norm": 1.994386072783293, "learning_rate": 1.9203797505520046e-05, "loss": 0.8892, "step": 6218 }, { "epoch": 0.46220735785953176, "grad_norm": 2.3032202751497466, "learning_rate": 1.9203483729269083e-05, "loss": 1.1142, "step": 6219 }, { "epoch": 0.462281679672984, "grad_norm": 2.4222148844294895, "learning_rate": 1.920316989376657e-05, "loss": 0.8387, "step": 6220 }, { "epoch": 0.4623560014864363, "grad_norm": 2.0194859468209043, "learning_rate": 1.9202855999014526e-05, "loss": 0.8852, "step": 6221 }, { "epoch": 0.46243032329988853, "grad_norm": 2.1762221777399193, "learning_rate": 1.9202542045014972e-05, "loss": 0.9687, "step": 6222 }, { "epoch": 0.4625046451133408, "grad_norm": 2.177578820395841, "learning_rate": 1.920222803176993e-05, "loss": 1.0583, "step": 6223 }, { "epoch": 0.462578966926793, "grad_norm": 1.9946684915399968, "learning_rate": 1.920191395928142e-05, "loss": 0.8828, "step": 6224 }, { "epoch": 0.46265328874024525, "grad_norm": 2.24903848314471, "learning_rate": 1.9201599827551467e-05, "loss": 0.9676, "step": 6225 }, { "epoch": 0.4627276105536975, "grad_norm": 1.8254906916188067, "learning_rate": 1.9201285636582093e-05, "loss": 0.9318, "step": 6226 }, { "epoch": 0.4628019323671498, "grad_norm": 1.8414122739135963, "learning_rate": 1.9200971386375318e-05, "loss": 0.829, "step": 6227 }, { "epoch": 0.462876254180602, "grad_norm": 1.9416770574935875, "learning_rate": 1.9200657076933166e-05, "loss": 0.7666, "step": 6228 }, { "epoch": 0.46295057599405426, "grad_norm": 1.7869377028161293, "learning_rate": 1.9200342708257662e-05, "loss": 0.9131, "step": 6229 }, { "epoch": 0.4630248978075065, "grad_norm": 2.0023953618600063, "learning_rate": 1.9200028280350832e-05, "loss": 0.94, "step": 6230 }, { "epoch": 0.46309921962095874, "grad_norm": 2.335661841306866, "learning_rate": 1.919971379321469e-05, "loss": 0.9196, "step": 6231 }, { "epoch": 0.463173541434411, "grad_norm": 2.3275798475368976, "learning_rate": 1.9199399246851274e-05, "loss": 0.9566, "step": 6232 }, { "epoch": 0.46324786324786327, "grad_norm": 2.17448207723703, "learning_rate": 1.9199084641262603e-05, "loss": 0.8332, "step": 6233 }, { "epoch": 0.4633221850613155, "grad_norm": 2.230058901950219, "learning_rate": 1.9198769976450696e-05, "loss": 1.0266, "step": 6234 }, { "epoch": 0.46339650687476774, "grad_norm": 1.9365483948596087, "learning_rate": 1.9198455252417593e-05, "loss": 0.8319, "step": 6235 }, { "epoch": 0.46347082868822, "grad_norm": 2.3019379153800252, "learning_rate": 1.919814046916531e-05, "loss": 1.0149, "step": 6236 }, { "epoch": 0.4635451505016722, "grad_norm": 1.664781870731222, "learning_rate": 1.9197825626695875e-05, "loss": 0.7542, "step": 6237 }, { "epoch": 0.4636194723151245, "grad_norm": 2.648504595259321, "learning_rate": 1.9197510725011313e-05, "loss": 0.9666, "step": 6238 }, { "epoch": 0.46369379412857675, "grad_norm": 1.8975383436976958, "learning_rate": 1.9197195764113656e-05, "loss": 0.5418, "step": 6239 }, { "epoch": 0.463768115942029, "grad_norm": 7.988413932054067, "learning_rate": 1.9196880744004933e-05, "loss": 1.1483, "step": 6240 }, { "epoch": 0.46384243775548123, "grad_norm": 2.004734500649948, "learning_rate": 1.9196565664687167e-05, "loss": 0.7658, "step": 6241 }, { "epoch": 0.46391675956893347, "grad_norm": 1.754053770979988, "learning_rate": 1.9196250526162387e-05, "loss": 0.9496, "step": 6242 }, { "epoch": 0.4639910813823857, "grad_norm": 2.0781695579343937, "learning_rate": 1.9195935328432623e-05, "loss": 0.8046, "step": 6243 }, { "epoch": 0.464065403195838, "grad_norm": 1.8373303269012975, "learning_rate": 1.919562007149991e-05, "loss": 0.6831, "step": 6244 }, { "epoch": 0.46413972500929024, "grad_norm": 2.068686884135119, "learning_rate": 1.9195304755366266e-05, "loss": 0.8998, "step": 6245 }, { "epoch": 0.4642140468227425, "grad_norm": 2.4849830361674656, "learning_rate": 1.919498938003373e-05, "loss": 0.9548, "step": 6246 }, { "epoch": 0.4642883686361947, "grad_norm": 2.200996181271254, "learning_rate": 1.919467394550433e-05, "loss": 0.7222, "step": 6247 }, { "epoch": 0.46436269044964695, "grad_norm": 2.7522498454174005, "learning_rate": 1.9194358451780095e-05, "loss": 0.7232, "step": 6248 }, { "epoch": 0.46443701226309925, "grad_norm": 2.678489137616425, "learning_rate": 1.9194042898863057e-05, "loss": 0.9636, "step": 6249 }, { "epoch": 0.4645113340765515, "grad_norm": 2.0937292377825716, "learning_rate": 1.9193727286755245e-05, "loss": 0.6659, "step": 6250 }, { "epoch": 0.4645856558900037, "grad_norm": 2.4513559906622926, "learning_rate": 1.9193411615458698e-05, "loss": 0.7177, "step": 6251 }, { "epoch": 0.46465997770345596, "grad_norm": 2.0728689516868073, "learning_rate": 1.9193095884975443e-05, "loss": 0.8011, "step": 6252 }, { "epoch": 0.4647342995169082, "grad_norm": 2.32044829696799, "learning_rate": 1.9192780095307516e-05, "loss": 1.1356, "step": 6253 }, { "epoch": 0.46480862133036044, "grad_norm": 2.264583589990819, "learning_rate": 1.9192464246456946e-05, "loss": 0.9195, "step": 6254 }, { "epoch": 0.46488294314381273, "grad_norm": 3.5003025907104672, "learning_rate": 1.9192148338425766e-05, "loss": 0.8818, "step": 6255 }, { "epoch": 0.46495726495726497, "grad_norm": 2.100935469549347, "learning_rate": 1.9191832371216013e-05, "loss": 0.9305, "step": 6256 }, { "epoch": 0.4650315867707172, "grad_norm": 2.0152928587017884, "learning_rate": 1.919151634482972e-05, "loss": 0.8474, "step": 6257 }, { "epoch": 0.46510590858416945, "grad_norm": 2.6649522382881137, "learning_rate": 1.919120025926892e-05, "loss": 1.0794, "step": 6258 }, { "epoch": 0.4651802303976217, "grad_norm": 2.652856436683747, "learning_rate": 1.9190884114535655e-05, "loss": 0.9795, "step": 6259 }, { "epoch": 0.4652545522110739, "grad_norm": 2.979000367072158, "learning_rate": 1.919056791063195e-05, "loss": 0.8684, "step": 6260 }, { "epoch": 0.4653288740245262, "grad_norm": 2.684804734865299, "learning_rate": 1.919025164755985e-05, "loss": 0.8499, "step": 6261 }, { "epoch": 0.46540319583797846, "grad_norm": 2.2592648100457655, "learning_rate": 1.9189935325321384e-05, "loss": 0.8656, "step": 6262 }, { "epoch": 0.4654775176514307, "grad_norm": 2.1686040733276077, "learning_rate": 1.918961894391859e-05, "loss": 0.9124, "step": 6263 }, { "epoch": 0.46555183946488293, "grad_norm": 1.9707284932207465, "learning_rate": 1.9189302503353508e-05, "loss": 0.8398, "step": 6264 }, { "epoch": 0.46562616127833517, "grad_norm": 2.7670555925460474, "learning_rate": 1.9188986003628175e-05, "loss": 0.84, "step": 6265 }, { "epoch": 0.46570048309178746, "grad_norm": 1.956488714273051, "learning_rate": 1.9188669444744624e-05, "loss": 0.8646, "step": 6266 }, { "epoch": 0.4657748049052397, "grad_norm": 1.9335406870067013, "learning_rate": 1.9188352826704895e-05, "loss": 0.8412, "step": 6267 }, { "epoch": 0.46584912671869194, "grad_norm": 1.8767511194619877, "learning_rate": 1.9188036149511027e-05, "loss": 0.7017, "step": 6268 }, { "epoch": 0.4659234485321442, "grad_norm": 2.7917380110819843, "learning_rate": 1.918771941316506e-05, "loss": 0.7555, "step": 6269 }, { "epoch": 0.4659977703455964, "grad_norm": 2.2005727024716597, "learning_rate": 1.9187402617669037e-05, "loss": 0.8985, "step": 6270 }, { "epoch": 0.46607209215904866, "grad_norm": 2.3213705117638463, "learning_rate": 1.918708576302499e-05, "loss": 0.9325, "step": 6271 }, { "epoch": 0.46614641397250095, "grad_norm": 2.888644660090329, "learning_rate": 1.9186768849234958e-05, "loss": 0.9192, "step": 6272 }, { "epoch": 0.4662207357859532, "grad_norm": 2.0488769747514626, "learning_rate": 1.9186451876300988e-05, "loss": 0.7551, "step": 6273 }, { "epoch": 0.4662950575994054, "grad_norm": 2.820257944730131, "learning_rate": 1.9186134844225117e-05, "loss": 1.0326, "step": 6274 }, { "epoch": 0.46636937941285767, "grad_norm": 2.1895861444880365, "learning_rate": 1.9185817753009385e-05, "loss": 0.8527, "step": 6275 }, { "epoch": 0.4664437012263099, "grad_norm": 4.627349236172513, "learning_rate": 1.9185500602655835e-05, "loss": 0.9739, "step": 6276 }, { "epoch": 0.4665180230397622, "grad_norm": 1.9660107193626364, "learning_rate": 1.918518339316651e-05, "loss": 0.6013, "step": 6277 }, { "epoch": 0.46659234485321444, "grad_norm": 2.0702934578792798, "learning_rate": 1.918486612454345e-05, "loss": 0.7611, "step": 6278 }, { "epoch": 0.4666666666666667, "grad_norm": 1.99904562107373, "learning_rate": 1.91845487967887e-05, "loss": 0.7595, "step": 6279 }, { "epoch": 0.4667409884801189, "grad_norm": 2.280125739278052, "learning_rate": 1.9184231409904298e-05, "loss": 0.9352, "step": 6280 }, { "epoch": 0.46681531029357115, "grad_norm": 2.21882407033423, "learning_rate": 1.9183913963892293e-05, "loss": 0.7868, "step": 6281 }, { "epoch": 0.4668896321070234, "grad_norm": 1.9783805846531624, "learning_rate": 1.918359645875473e-05, "loss": 1.012, "step": 6282 }, { "epoch": 0.4669639539204757, "grad_norm": 2.3117331749325922, "learning_rate": 1.9183278894493643e-05, "loss": 0.8033, "step": 6283 }, { "epoch": 0.4670382757339279, "grad_norm": 2.7655836067170503, "learning_rate": 1.9182961271111088e-05, "loss": 0.9013, "step": 6284 }, { "epoch": 0.46711259754738016, "grad_norm": 2.4565167866264233, "learning_rate": 1.9182643588609103e-05, "loss": 1.0934, "step": 6285 }, { "epoch": 0.4671869193608324, "grad_norm": 2.3554325525912256, "learning_rate": 1.9182325846989734e-05, "loss": 0.834, "step": 6286 }, { "epoch": 0.46726124117428464, "grad_norm": 2.6335497937021275, "learning_rate": 1.9182008046255028e-05, "loss": 1.005, "step": 6287 }, { "epoch": 0.4673355629877369, "grad_norm": 3.3836765358051366, "learning_rate": 1.918169018640703e-05, "loss": 0.7927, "step": 6288 }, { "epoch": 0.46740988480118917, "grad_norm": 2.032622633830955, "learning_rate": 1.918137226744779e-05, "loss": 0.8414, "step": 6289 }, { "epoch": 0.4674842066146414, "grad_norm": 2.2913511652013505, "learning_rate": 1.918105428937935e-05, "loss": 0.8503, "step": 6290 }, { "epoch": 0.46755852842809364, "grad_norm": 2.523099699347548, "learning_rate": 1.9180736252203758e-05, "loss": 1.1319, "step": 6291 }, { "epoch": 0.4676328502415459, "grad_norm": 2.771325031787363, "learning_rate": 1.9180418155923064e-05, "loss": 0.7775, "step": 6292 }, { "epoch": 0.4677071720549981, "grad_norm": 1.8389632991449782, "learning_rate": 1.9180100000539312e-05, "loss": 0.6879, "step": 6293 }, { "epoch": 0.4677814938684504, "grad_norm": 3.5391165395159336, "learning_rate": 1.9179781786054554e-05, "loss": 0.889, "step": 6294 }, { "epoch": 0.46785581568190265, "grad_norm": 2.4694388730153265, "learning_rate": 1.9179463512470837e-05, "loss": 0.9615, "step": 6295 }, { "epoch": 0.4679301374953549, "grad_norm": 2.256615223752367, "learning_rate": 1.917914517979021e-05, "loss": 0.8009, "step": 6296 }, { "epoch": 0.46800445930880713, "grad_norm": 1.842015946489209, "learning_rate": 1.9178826788014722e-05, "loss": 0.7861, "step": 6297 }, { "epoch": 0.46807878112225937, "grad_norm": 1.8770442956061257, "learning_rate": 1.9178508337146423e-05, "loss": 0.8991, "step": 6298 }, { "epoch": 0.4681531029357116, "grad_norm": 1.854468848867134, "learning_rate": 1.9178189827187367e-05, "loss": 0.8367, "step": 6299 }, { "epoch": 0.4682274247491639, "grad_norm": 2.146189111855802, "learning_rate": 1.9177871258139598e-05, "loss": 0.9369, "step": 6300 }, { "epoch": 0.46830174656261614, "grad_norm": 2.3662165858799122, "learning_rate": 1.9177552630005172e-05, "loss": 0.9153, "step": 6301 }, { "epoch": 0.4683760683760684, "grad_norm": 2.4008508896539005, "learning_rate": 1.9177233942786136e-05, "loss": 0.8937, "step": 6302 }, { "epoch": 0.4684503901895206, "grad_norm": 2.207164448228442, "learning_rate": 1.9176915196484546e-05, "loss": 0.917, "step": 6303 }, { "epoch": 0.46852471200297285, "grad_norm": 2.289888027517301, "learning_rate": 1.917659639110245e-05, "loss": 0.8106, "step": 6304 }, { "epoch": 0.46859903381642515, "grad_norm": 2.036238968551469, "learning_rate": 1.9176277526641907e-05, "loss": 0.7439, "step": 6305 }, { "epoch": 0.4686733556298774, "grad_norm": 2.185975127497263, "learning_rate": 1.917595860310496e-05, "loss": 0.9588, "step": 6306 }, { "epoch": 0.4687476774433296, "grad_norm": 1.8434973086590074, "learning_rate": 1.917563962049367e-05, "loss": 0.9409, "step": 6307 }, { "epoch": 0.46882199925678186, "grad_norm": 1.880713867351606, "learning_rate": 1.9175320578810092e-05, "loss": 0.6925, "step": 6308 }, { "epoch": 0.4688963210702341, "grad_norm": 2.391455162248525, "learning_rate": 1.917500147805627e-05, "loss": 1.0687, "step": 6309 }, { "epoch": 0.46897064288368634, "grad_norm": 1.8600189694915852, "learning_rate": 1.9174682318234268e-05, "loss": 0.6859, "step": 6310 }, { "epoch": 0.46904496469713863, "grad_norm": 3.160713531062432, "learning_rate": 1.9174363099346136e-05, "loss": 0.7514, "step": 6311 }, { "epoch": 0.46911928651059087, "grad_norm": 2.914243983488333, "learning_rate": 1.917404382139393e-05, "loss": 0.8465, "step": 6312 }, { "epoch": 0.4691936083240431, "grad_norm": 1.9979052486181155, "learning_rate": 1.9173724484379706e-05, "loss": 0.6755, "step": 6313 }, { "epoch": 0.46926793013749535, "grad_norm": 2.317197296215569, "learning_rate": 1.9173405088305518e-05, "loss": 0.8691, "step": 6314 }, { "epoch": 0.4693422519509476, "grad_norm": 2.668589590407237, "learning_rate": 1.917308563317343e-05, "loss": 1.1711, "step": 6315 }, { "epoch": 0.4694165737643998, "grad_norm": 5.787325311838035, "learning_rate": 1.9172766118985487e-05, "loss": 1.1406, "step": 6316 }, { "epoch": 0.4694908955778521, "grad_norm": 2.6741739565384783, "learning_rate": 1.9172446545743754e-05, "loss": 0.8133, "step": 6317 }, { "epoch": 0.46956521739130436, "grad_norm": 2.127942599192269, "learning_rate": 1.9172126913450285e-05, "loss": 0.9693, "step": 6318 }, { "epoch": 0.4696395392047566, "grad_norm": 1.7959317091611673, "learning_rate": 1.917180722210714e-05, "loss": 0.7826, "step": 6319 }, { "epoch": 0.46971386101820883, "grad_norm": 2.232283401005321, "learning_rate": 1.9171487471716377e-05, "loss": 0.6093, "step": 6320 }, { "epoch": 0.46978818283166107, "grad_norm": 2.2593364035010435, "learning_rate": 1.917116766228005e-05, "loss": 0.8755, "step": 6321 }, { "epoch": 0.46986250464511337, "grad_norm": 2.057851462364334, "learning_rate": 1.9170847793800225e-05, "loss": 0.8483, "step": 6322 }, { "epoch": 0.4699368264585656, "grad_norm": 2.6670140254716324, "learning_rate": 1.9170527866278955e-05, "loss": 0.992, "step": 6323 }, { "epoch": 0.47001114827201784, "grad_norm": 2.409942778913848, "learning_rate": 1.9170207879718306e-05, "loss": 0.7689, "step": 6324 }, { "epoch": 0.4700854700854701, "grad_norm": 2.0213009702932014, "learning_rate": 1.9169887834120332e-05, "loss": 0.9045, "step": 6325 }, { "epoch": 0.4701597918989223, "grad_norm": 2.1357701290101656, "learning_rate": 1.9169567729487095e-05, "loss": 0.8736, "step": 6326 }, { "epoch": 0.47023411371237456, "grad_norm": 2.2209185353175136, "learning_rate": 1.916924756582066e-05, "loss": 0.891, "step": 6327 }, { "epoch": 0.47030843552582685, "grad_norm": 2.8453288582404292, "learning_rate": 1.916892734312308e-05, "loss": 0.8378, "step": 6328 }, { "epoch": 0.4703827573392791, "grad_norm": 2.448793325684735, "learning_rate": 1.916860706139642e-05, "loss": 0.9844, "step": 6329 }, { "epoch": 0.47045707915273133, "grad_norm": 2.303975099472837, "learning_rate": 1.9168286720642752e-05, "loss": 0.9196, "step": 6330 }, { "epoch": 0.47053140096618357, "grad_norm": 2.2193132967430302, "learning_rate": 1.9167966320864124e-05, "loss": 0.7629, "step": 6331 }, { "epoch": 0.4706057227796358, "grad_norm": 2.214830964325619, "learning_rate": 1.9167645862062607e-05, "loss": 0.8193, "step": 6332 }, { "epoch": 0.4706800445930881, "grad_norm": 1.930124378517708, "learning_rate": 1.916732534424026e-05, "loss": 0.9269, "step": 6333 }, { "epoch": 0.47075436640654034, "grad_norm": 2.116762232681554, "learning_rate": 1.916700476739915e-05, "loss": 0.8743, "step": 6334 }, { "epoch": 0.4708286882199926, "grad_norm": 2.102736339287027, "learning_rate": 1.9166684131541337e-05, "loss": 0.9022, "step": 6335 }, { "epoch": 0.4709030100334448, "grad_norm": 2.1113570794651846, "learning_rate": 1.9166363436668888e-05, "loss": 0.5927, "step": 6336 }, { "epoch": 0.47097733184689705, "grad_norm": 3.0294471551720457, "learning_rate": 1.9166042682783868e-05, "loss": 0.8022, "step": 6337 }, { "epoch": 0.4710516536603493, "grad_norm": 2.188963029218422, "learning_rate": 1.9165721869888338e-05, "loss": 0.9861, "step": 6338 }, { "epoch": 0.4711259754738016, "grad_norm": 1.9505489474297713, "learning_rate": 1.9165400997984367e-05, "loss": 0.7874, "step": 6339 }, { "epoch": 0.4712002972872538, "grad_norm": 2.178765667016585, "learning_rate": 1.916508006707402e-05, "loss": 0.9954, "step": 6340 }, { "epoch": 0.47127461910070606, "grad_norm": 5.555004507382744, "learning_rate": 1.9164759077159366e-05, "loss": 0.9755, "step": 6341 }, { "epoch": 0.4713489409141583, "grad_norm": 1.9404745827237009, "learning_rate": 1.9164438028242463e-05, "loss": 0.905, "step": 6342 }, { "epoch": 0.47142326272761054, "grad_norm": 1.650909734073174, "learning_rate": 1.9164116920325392e-05, "loss": 0.6648, "step": 6343 }, { "epoch": 0.4714975845410628, "grad_norm": 2.0150259077260197, "learning_rate": 1.9163795753410203e-05, "loss": 0.8211, "step": 6344 }, { "epoch": 0.47157190635451507, "grad_norm": 2.408142983674193, "learning_rate": 1.9163474527498976e-05, "loss": 0.9218, "step": 6345 }, { "epoch": 0.4716462281679673, "grad_norm": 2.116043446558157, "learning_rate": 1.9163153242593778e-05, "loss": 0.6167, "step": 6346 }, { "epoch": 0.47172054998141955, "grad_norm": 2.7348426405664505, "learning_rate": 1.9162831898696672e-05, "loss": 1.0048, "step": 6347 }, { "epoch": 0.4717948717948718, "grad_norm": 3.641757320222086, "learning_rate": 1.916251049580973e-05, "loss": 0.8339, "step": 6348 }, { "epoch": 0.471869193608324, "grad_norm": 2.145352522929659, "learning_rate": 1.916218903393502e-05, "loss": 0.757, "step": 6349 }, { "epoch": 0.4719435154217763, "grad_norm": 2.2824738689101607, "learning_rate": 1.9161867513074612e-05, "loss": 0.9573, "step": 6350 }, { "epoch": 0.47201783723522855, "grad_norm": 2.2268232027381702, "learning_rate": 1.9161545933230577e-05, "loss": 1.0601, "step": 6351 }, { "epoch": 0.4720921590486808, "grad_norm": 1.8722731206020122, "learning_rate": 1.9161224294404984e-05, "loss": 0.9338, "step": 6352 }, { "epoch": 0.47216648086213303, "grad_norm": 2.415494195249989, "learning_rate": 1.9160902596599905e-05, "loss": 1.0161, "step": 6353 }, { "epoch": 0.47224080267558527, "grad_norm": 2.1288861033300135, "learning_rate": 1.9160580839817412e-05, "loss": 0.8284, "step": 6354 }, { "epoch": 0.4723151244890375, "grad_norm": 2.3768735122362505, "learning_rate": 1.9160259024059576e-05, "loss": 0.8089, "step": 6355 }, { "epoch": 0.4723894463024898, "grad_norm": 1.8714749376395863, "learning_rate": 1.9159937149328463e-05, "loss": 0.819, "step": 6356 }, { "epoch": 0.47246376811594204, "grad_norm": 2.2397247354433163, "learning_rate": 1.915961521562615e-05, "loss": 0.9915, "step": 6357 }, { "epoch": 0.4725380899293943, "grad_norm": 2.0352767203000472, "learning_rate": 1.9159293222954717e-05, "loss": 0.9619, "step": 6358 }, { "epoch": 0.4726124117428465, "grad_norm": 6.320884866617985, "learning_rate": 1.9158971171316223e-05, "loss": 1.0126, "step": 6359 }, { "epoch": 0.47268673355629875, "grad_norm": 2.729572842510405, "learning_rate": 1.9158649060712747e-05, "loss": 0.9492, "step": 6360 }, { "epoch": 0.47276105536975105, "grad_norm": 2.4637031081768743, "learning_rate": 1.915832689114637e-05, "loss": 0.6821, "step": 6361 }, { "epoch": 0.4728353771832033, "grad_norm": 2.2143434936548103, "learning_rate": 1.9158004662619153e-05, "loss": 0.8386, "step": 6362 }, { "epoch": 0.4729096989966555, "grad_norm": 2.938384167921149, "learning_rate": 1.915768237513318e-05, "loss": 0.9351, "step": 6363 }, { "epoch": 0.47298402081010776, "grad_norm": 2.3753000190262066, "learning_rate": 1.915736002869052e-05, "loss": 0.9654, "step": 6364 }, { "epoch": 0.47305834262356, "grad_norm": 2.951580463085267, "learning_rate": 1.9157037623293258e-05, "loss": 1.0188, "step": 6365 }, { "epoch": 0.47313266443701224, "grad_norm": 3.1917376751315767, "learning_rate": 1.9156715158943456e-05, "loss": 0.888, "step": 6366 }, { "epoch": 0.47320698625046453, "grad_norm": 3.086481422365427, "learning_rate": 1.91563926356432e-05, "loss": 0.8565, "step": 6367 }, { "epoch": 0.4732813080639168, "grad_norm": 2.5212616524799425, "learning_rate": 1.915607005339456e-05, "loss": 0.9238, "step": 6368 }, { "epoch": 0.473355629877369, "grad_norm": 2.517135999086289, "learning_rate": 1.9155747412199624e-05, "loss": 0.7182, "step": 6369 }, { "epoch": 0.47342995169082125, "grad_norm": 2.9414605932582782, "learning_rate": 1.9155424712060455e-05, "loss": 0.9995, "step": 6370 }, { "epoch": 0.4735042735042735, "grad_norm": 3.214859892058955, "learning_rate": 1.915510195297914e-05, "loss": 0.9552, "step": 6371 }, { "epoch": 0.4735785953177257, "grad_norm": 2.538080775892732, "learning_rate": 1.9154779134957747e-05, "loss": 0.7282, "step": 6372 }, { "epoch": 0.473652917131178, "grad_norm": 2.5864989480275207, "learning_rate": 1.915445625799837e-05, "loss": 0.9177, "step": 6373 }, { "epoch": 0.47372723894463026, "grad_norm": 2.4734568372109376, "learning_rate": 1.9154133322103073e-05, "loss": 1.0591, "step": 6374 }, { "epoch": 0.4738015607580825, "grad_norm": 2.1043585121435715, "learning_rate": 1.915381032727394e-05, "loss": 0.8668, "step": 6375 }, { "epoch": 0.47387588257153473, "grad_norm": 2.402289643128878, "learning_rate": 1.9153487273513054e-05, "loss": 0.7533, "step": 6376 }, { "epoch": 0.473950204384987, "grad_norm": 2.1781675139620074, "learning_rate": 1.915316416082249e-05, "loss": 1.0946, "step": 6377 }, { "epoch": 0.47402452619843927, "grad_norm": 2.8611889017868313, "learning_rate": 1.915284098920433e-05, "loss": 1.0604, "step": 6378 }, { "epoch": 0.4740988480118915, "grad_norm": 2.5759418827347, "learning_rate": 1.9152517758660658e-05, "loss": 0.9972, "step": 6379 }, { "epoch": 0.47417316982534374, "grad_norm": 2.7113753774138063, "learning_rate": 1.9152194469193548e-05, "loss": 0.9552, "step": 6380 }, { "epoch": 0.474247491638796, "grad_norm": 3.765254861504439, "learning_rate": 1.9151871120805085e-05, "loss": 1.0225, "step": 6381 }, { "epoch": 0.4743218134522482, "grad_norm": 3.129009457848463, "learning_rate": 1.9151547713497352e-05, "loss": 0.8393, "step": 6382 }, { "epoch": 0.47439613526570046, "grad_norm": 2.283205756376658, "learning_rate": 1.9151224247272427e-05, "loss": 0.8536, "step": 6383 }, { "epoch": 0.47447045707915275, "grad_norm": 1.7557700429698286, "learning_rate": 1.91509007221324e-05, "loss": 0.8128, "step": 6384 }, { "epoch": 0.474544778892605, "grad_norm": 2.0675853018327865, "learning_rate": 1.9150577138079347e-05, "loss": 0.8488, "step": 6385 }, { "epoch": 0.47461910070605723, "grad_norm": 2.2296395583121087, "learning_rate": 1.915025349511535e-05, "loss": 0.7766, "step": 6386 }, { "epoch": 0.47469342251950947, "grad_norm": 2.2361627231240746, "learning_rate": 1.91499297932425e-05, "loss": 0.8856, "step": 6387 }, { "epoch": 0.4747677443329617, "grad_norm": 3.6562874906857536, "learning_rate": 1.9149606032462877e-05, "loss": 0.8787, "step": 6388 }, { "epoch": 0.474842066146414, "grad_norm": 9.249982459836897, "learning_rate": 1.914928221277856e-05, "loss": 0.6978, "step": 6389 }, { "epoch": 0.47491638795986624, "grad_norm": 2.0821932005633714, "learning_rate": 1.914895833419164e-05, "loss": 0.8061, "step": 6390 }, { "epoch": 0.4749907097733185, "grad_norm": 2.615661481067984, "learning_rate": 1.9148634396704206e-05, "loss": 0.8998, "step": 6391 }, { "epoch": 0.4750650315867707, "grad_norm": 2.214804684928033, "learning_rate": 1.9148310400318337e-05, "loss": 0.9036, "step": 6392 }, { "epoch": 0.47513935340022295, "grad_norm": 2.525160910153366, "learning_rate": 1.9147986345036115e-05, "loss": 0.95, "step": 6393 }, { "epoch": 0.4752136752136752, "grad_norm": 1.7482965648007904, "learning_rate": 1.9147662230859635e-05, "loss": 0.6505, "step": 6394 }, { "epoch": 0.4752879970271275, "grad_norm": 2.5617872026421793, "learning_rate": 1.9147338057790984e-05, "loss": 0.9304, "step": 6395 }, { "epoch": 0.4753623188405797, "grad_norm": 2.386114803752948, "learning_rate": 1.9147013825832238e-05, "loss": 1.0771, "step": 6396 }, { "epoch": 0.47543664065403196, "grad_norm": 1.6777816546658475, "learning_rate": 1.9146689534985495e-05, "loss": 0.8323, "step": 6397 }, { "epoch": 0.4755109624674842, "grad_norm": 2.325409993391743, "learning_rate": 1.914636518525284e-05, "loss": 0.9178, "step": 6398 }, { "epoch": 0.47558528428093644, "grad_norm": 2.138015324479644, "learning_rate": 1.914604077663636e-05, "loss": 1.011, "step": 6399 }, { "epoch": 0.4756596060943887, "grad_norm": 2.1314508046415477, "learning_rate": 1.9145716309138144e-05, "loss": 0.7305, "step": 6400 }, { "epoch": 0.47573392790784097, "grad_norm": 2.3181878127233215, "learning_rate": 1.9145391782760284e-05, "loss": 0.7206, "step": 6401 }, { "epoch": 0.4758082497212932, "grad_norm": 3.272879764936846, "learning_rate": 1.914506719750486e-05, "loss": 0.9828, "step": 6402 }, { "epoch": 0.47588257153474545, "grad_norm": 2.780464720505354, "learning_rate": 1.914474255337397e-05, "loss": 0.8892, "step": 6403 }, { "epoch": 0.4759568933481977, "grad_norm": 5.4838653640785076, "learning_rate": 1.9144417850369704e-05, "loss": 1.0729, "step": 6404 }, { "epoch": 0.4760312151616499, "grad_norm": 2.1828606942409094, "learning_rate": 1.9144093088494153e-05, "loss": 0.8428, "step": 6405 }, { "epoch": 0.4761055369751022, "grad_norm": 1.8464357991787155, "learning_rate": 1.91437682677494e-05, "loss": 0.6717, "step": 6406 }, { "epoch": 0.47617985878855446, "grad_norm": 2.8624174270359544, "learning_rate": 1.9143443388137542e-05, "loss": 0.904, "step": 6407 }, { "epoch": 0.4762541806020067, "grad_norm": 2.04873999879101, "learning_rate": 1.9143118449660673e-05, "loss": 0.7885, "step": 6408 }, { "epoch": 0.47632850241545893, "grad_norm": 2.5533999273352457, "learning_rate": 1.9142793452320876e-05, "loss": 1.0565, "step": 6409 }, { "epoch": 0.47640282422891117, "grad_norm": 2.181603967204545, "learning_rate": 1.9142468396120254e-05, "loss": 0.9969, "step": 6410 }, { "epoch": 0.4764771460423634, "grad_norm": 2.039630506965106, "learning_rate": 1.9142143281060896e-05, "loss": 0.8796, "step": 6411 }, { "epoch": 0.4765514678558157, "grad_norm": 2.3914876748835967, "learning_rate": 1.9141818107144887e-05, "loss": 0.9308, "step": 6412 }, { "epoch": 0.47662578966926794, "grad_norm": 3.1999390812167374, "learning_rate": 1.914149287437433e-05, "loss": 0.9967, "step": 6413 }, { "epoch": 0.4767001114827202, "grad_norm": 2.5544443982893528, "learning_rate": 1.914116758275132e-05, "loss": 1.0101, "step": 6414 }, { "epoch": 0.4767744332961724, "grad_norm": 2.0068634402896164, "learning_rate": 1.914084223227795e-05, "loss": 0.7609, "step": 6415 }, { "epoch": 0.47684875510962466, "grad_norm": 2.5307260432041487, "learning_rate": 1.9140516822956305e-05, "loss": 0.845, "step": 6416 }, { "epoch": 0.47692307692307695, "grad_norm": 2.056511713215976, "learning_rate": 1.914019135478849e-05, "loss": 0.985, "step": 6417 }, { "epoch": 0.4769973987365292, "grad_norm": 3.379484455837552, "learning_rate": 1.9139865827776596e-05, "loss": 0.8654, "step": 6418 }, { "epoch": 0.4770717205499814, "grad_norm": 2.111983014418219, "learning_rate": 1.913954024192272e-05, "loss": 0.8534, "step": 6419 }, { "epoch": 0.47714604236343366, "grad_norm": 2.2315276576666725, "learning_rate": 1.9139214597228958e-05, "loss": 0.8661, "step": 6420 }, { "epoch": 0.4772203641768859, "grad_norm": 2.1477832296861794, "learning_rate": 1.9138888893697406e-05, "loss": 0.7814, "step": 6421 }, { "epoch": 0.47729468599033814, "grad_norm": 1.7109951254316564, "learning_rate": 1.9138563131330167e-05, "loss": 0.8622, "step": 6422 }, { "epoch": 0.47736900780379043, "grad_norm": 2.0918423123795638, "learning_rate": 1.9138237310129327e-05, "loss": 0.8302, "step": 6423 }, { "epoch": 0.4774433296172427, "grad_norm": 2.052938958996599, "learning_rate": 1.913791143009699e-05, "loss": 0.9543, "step": 6424 }, { "epoch": 0.4775176514306949, "grad_norm": 2.248994123134797, "learning_rate": 1.9137585491235257e-05, "loss": 0.7629, "step": 6425 }, { "epoch": 0.47759197324414715, "grad_norm": 2.3008139963865055, "learning_rate": 1.913725949354622e-05, "loss": 0.8449, "step": 6426 }, { "epoch": 0.4776662950575994, "grad_norm": 2.1669879066892204, "learning_rate": 1.913693343703198e-05, "loss": 0.7654, "step": 6427 }, { "epoch": 0.4777406168710516, "grad_norm": 2.022644018088614, "learning_rate": 1.9136607321694634e-05, "loss": 0.8829, "step": 6428 }, { "epoch": 0.4778149386845039, "grad_norm": 2.1669123999691142, "learning_rate": 1.913628114753629e-05, "loss": 0.8713, "step": 6429 }, { "epoch": 0.47788926049795616, "grad_norm": 2.291403989096542, "learning_rate": 1.913595491455904e-05, "loss": 0.8167, "step": 6430 }, { "epoch": 0.4779635823114084, "grad_norm": 2.450760243809941, "learning_rate": 1.9135628622764986e-05, "loss": 0.8638, "step": 6431 }, { "epoch": 0.47803790412486064, "grad_norm": 3.734726451577574, "learning_rate": 1.9135302272156228e-05, "loss": 0.8611, "step": 6432 }, { "epoch": 0.4781122259383129, "grad_norm": 2.5945288377858233, "learning_rate": 1.913497586273487e-05, "loss": 1.0281, "step": 6433 }, { "epoch": 0.47818654775176517, "grad_norm": 2.1889730185102425, "learning_rate": 1.9134649394503005e-05, "loss": 0.9331, "step": 6434 }, { "epoch": 0.4782608695652174, "grad_norm": 2.3870711362526484, "learning_rate": 1.913432286746275e-05, "loss": 0.993, "step": 6435 }, { "epoch": 0.47833519137866964, "grad_norm": 2.5062575726766085, "learning_rate": 1.9133996281616194e-05, "loss": 1.0489, "step": 6436 }, { "epoch": 0.4784095131921219, "grad_norm": 3.0942934092063243, "learning_rate": 1.9133669636965444e-05, "loss": 1.1716, "step": 6437 }, { "epoch": 0.4784838350055741, "grad_norm": 3.6393540597275775, "learning_rate": 1.9133342933512602e-05, "loss": 0.8657, "step": 6438 }, { "epoch": 0.47855815681902636, "grad_norm": 2.020942695074438, "learning_rate": 1.9133016171259774e-05, "loss": 1.0885, "step": 6439 }, { "epoch": 0.47863247863247865, "grad_norm": 4.014985392920762, "learning_rate": 1.9132689350209062e-05, "loss": 0.8733, "step": 6440 }, { "epoch": 0.4787068004459309, "grad_norm": 2.195523775245394, "learning_rate": 1.913236247036257e-05, "loss": 0.7539, "step": 6441 }, { "epoch": 0.47878112225938313, "grad_norm": 3.0810933811924186, "learning_rate": 1.91320355317224e-05, "loss": 1.0846, "step": 6442 }, { "epoch": 0.47885544407283537, "grad_norm": 2.373528089807893, "learning_rate": 1.9131708534290657e-05, "loss": 0.9362, "step": 6443 }, { "epoch": 0.4789297658862876, "grad_norm": 3.0590505854927117, "learning_rate": 1.9131381478069454e-05, "loss": 1.0082, "step": 6444 }, { "epoch": 0.4790040876997399, "grad_norm": 2.0716357689585743, "learning_rate": 1.9131054363060888e-05, "loss": 0.8791, "step": 6445 }, { "epoch": 0.47907840951319214, "grad_norm": 2.0469438175472114, "learning_rate": 1.913072718926707e-05, "loss": 1.0763, "step": 6446 }, { "epoch": 0.4791527313266444, "grad_norm": 3.47600778997272, "learning_rate": 1.91303999566901e-05, "loss": 0.8901, "step": 6447 }, { "epoch": 0.4792270531400966, "grad_norm": 2.226569763772308, "learning_rate": 1.913007266533209e-05, "loss": 0.9349, "step": 6448 }, { "epoch": 0.47930137495354885, "grad_norm": 2.573070527532695, "learning_rate": 1.912974531519515e-05, "loss": 0.9289, "step": 6449 }, { "epoch": 0.4793756967670011, "grad_norm": 1.7534401487492215, "learning_rate": 1.9129417906281377e-05, "loss": 0.6828, "step": 6450 }, { "epoch": 0.4794500185804534, "grad_norm": 1.9485360146786417, "learning_rate": 1.912909043859289e-05, "loss": 0.6716, "step": 6451 }, { "epoch": 0.4795243403939056, "grad_norm": 2.3820473703242957, "learning_rate": 1.912876291213179e-05, "loss": 0.8077, "step": 6452 }, { "epoch": 0.47959866220735786, "grad_norm": 2.9868405417715116, "learning_rate": 1.912843532690019e-05, "loss": 0.9475, "step": 6453 }, { "epoch": 0.4796729840208101, "grad_norm": 2.013223192241549, "learning_rate": 1.9128107682900198e-05, "loss": 0.8162, "step": 6454 }, { "epoch": 0.47974730583426234, "grad_norm": 2.2789552289517863, "learning_rate": 1.912777998013392e-05, "loss": 0.9958, "step": 6455 }, { "epoch": 0.4798216276477146, "grad_norm": 2.3520925400501524, "learning_rate": 1.9127452218603467e-05, "loss": 0.8811, "step": 6456 }, { "epoch": 0.47989594946116687, "grad_norm": 2.5498155861509924, "learning_rate": 1.9127124398310953e-05, "loss": 0.8237, "step": 6457 }, { "epoch": 0.4799702712746191, "grad_norm": 2.161625507290806, "learning_rate": 1.9126796519258485e-05, "loss": 1.0142, "step": 6458 }, { "epoch": 0.48004459308807135, "grad_norm": 2.0743950634492214, "learning_rate": 1.9126468581448172e-05, "loss": 0.836, "step": 6459 }, { "epoch": 0.4801189149015236, "grad_norm": 2.1727621860749835, "learning_rate": 1.9126140584882128e-05, "loss": 0.8384, "step": 6460 }, { "epoch": 0.4801932367149758, "grad_norm": 2.0977484242618316, "learning_rate": 1.9125812529562466e-05, "loss": 0.6683, "step": 6461 }, { "epoch": 0.4802675585284281, "grad_norm": 2.465633620291911, "learning_rate": 1.91254844154913e-05, "loss": 0.9704, "step": 6462 }, { "epoch": 0.48034188034188036, "grad_norm": 2.9390434091774034, "learning_rate": 1.9125156242670732e-05, "loss": 0.9184, "step": 6463 }, { "epoch": 0.4804162021553326, "grad_norm": 2.564575947897222, "learning_rate": 1.9124828011102888e-05, "loss": 0.9312, "step": 6464 }, { "epoch": 0.48049052396878483, "grad_norm": 2.228479347851104, "learning_rate": 1.912449972078987e-05, "loss": 1.0716, "step": 6465 }, { "epoch": 0.48056484578223707, "grad_norm": 2.348499345749794, "learning_rate": 1.91241713717338e-05, "loss": 0.9731, "step": 6466 }, { "epoch": 0.4806391675956893, "grad_norm": 2.8804468911970926, "learning_rate": 1.9123842963936785e-05, "loss": 0.9128, "step": 6467 }, { "epoch": 0.4807134894091416, "grad_norm": 2.232749921818821, "learning_rate": 1.9123514497400944e-05, "loss": 0.8931, "step": 6468 }, { "epoch": 0.48078781122259384, "grad_norm": 2.4504549336952564, "learning_rate": 1.912318597212839e-05, "loss": 0.9834, "step": 6469 }, { "epoch": 0.4808621330360461, "grad_norm": 1.903662653498667, "learning_rate": 1.9122857388121235e-05, "loss": 0.7961, "step": 6470 }, { "epoch": 0.4809364548494983, "grad_norm": 1.8999014854883804, "learning_rate": 1.91225287453816e-05, "loss": 1.0347, "step": 6471 }, { "epoch": 0.48101077666295056, "grad_norm": 5.020820050315602, "learning_rate": 1.91222000439116e-05, "loss": 0.9503, "step": 6472 }, { "epoch": 0.48108509847640285, "grad_norm": 1.9014151799440673, "learning_rate": 1.9121871283713345e-05, "loss": 0.7662, "step": 6473 }, { "epoch": 0.4811594202898551, "grad_norm": 2.2408381682343035, "learning_rate": 1.9121542464788962e-05, "loss": 0.9401, "step": 6474 }, { "epoch": 0.4812337421033073, "grad_norm": 2.4585088778704183, "learning_rate": 1.9121213587140557e-05, "loss": 0.9197, "step": 6475 }, { "epoch": 0.48130806391675957, "grad_norm": 2.058047705737307, "learning_rate": 1.912088465077025e-05, "loss": 0.7697, "step": 6476 }, { "epoch": 0.4813823857302118, "grad_norm": 2.011013083109044, "learning_rate": 1.912055565568017e-05, "loss": 0.8245, "step": 6477 }, { "epoch": 0.48145670754366404, "grad_norm": 2.1705612375796552, "learning_rate": 1.9120226601872418e-05, "loss": 0.975, "step": 6478 }, { "epoch": 0.48153102935711634, "grad_norm": 1.9542758576713226, "learning_rate": 1.9119897489349123e-05, "loss": 0.8244, "step": 6479 }, { "epoch": 0.4816053511705686, "grad_norm": 2.2307985987278323, "learning_rate": 1.9119568318112403e-05, "loss": 0.8841, "step": 6480 }, { "epoch": 0.4816796729840208, "grad_norm": 2.2584381623584586, "learning_rate": 1.911923908816437e-05, "loss": 0.756, "step": 6481 }, { "epoch": 0.48175399479747305, "grad_norm": 1.9824819556812956, "learning_rate": 1.9118909799507154e-05, "loss": 0.9097, "step": 6482 }, { "epoch": 0.4818283166109253, "grad_norm": 2.15147436681129, "learning_rate": 1.911858045214287e-05, "loss": 0.9264, "step": 6483 }, { "epoch": 0.4819026384243775, "grad_norm": 2.033208963325338, "learning_rate": 1.9118251046073638e-05, "loss": 0.9928, "step": 6484 }, { "epoch": 0.4819769602378298, "grad_norm": 2.1176236717657146, "learning_rate": 1.9117921581301574e-05, "loss": 0.8893, "step": 6485 }, { "epoch": 0.48205128205128206, "grad_norm": 2.424579763356087, "learning_rate": 1.911759205782881e-05, "loss": 0.6913, "step": 6486 }, { "epoch": 0.4821256038647343, "grad_norm": 2.41215167836013, "learning_rate": 1.9117262475657457e-05, "loss": 0.8838, "step": 6487 }, { "epoch": 0.48219992567818654, "grad_norm": 4.4765534909687865, "learning_rate": 1.9116932834789642e-05, "loss": 0.9283, "step": 6488 }, { "epoch": 0.4822742474916388, "grad_norm": 2.399132616647908, "learning_rate": 1.9116603135227486e-05, "loss": 1.0823, "step": 6489 }, { "epoch": 0.48234856930509107, "grad_norm": 5.171097219591104, "learning_rate": 1.9116273376973116e-05, "loss": 1.0873, "step": 6490 }, { "epoch": 0.4824228911185433, "grad_norm": 1.9640100572318642, "learning_rate": 1.9115943560028646e-05, "loss": 0.7372, "step": 6491 }, { "epoch": 0.48249721293199554, "grad_norm": 2.1152315944899636, "learning_rate": 1.911561368439621e-05, "loss": 0.7713, "step": 6492 }, { "epoch": 0.4825715347454478, "grad_norm": 2.0817239587672765, "learning_rate": 1.911528375007792e-05, "loss": 0.5481, "step": 6493 }, { "epoch": 0.4826458565589, "grad_norm": 2.592052969346051, "learning_rate": 1.911495375707591e-05, "loss": 0.7922, "step": 6494 }, { "epoch": 0.48272017837235226, "grad_norm": 2.3888267354619934, "learning_rate": 1.9114623705392302e-05, "loss": 0.8106, "step": 6495 }, { "epoch": 0.48279450018580455, "grad_norm": 2.144620889967195, "learning_rate": 1.9114293595029216e-05, "loss": 0.8112, "step": 6496 }, { "epoch": 0.4828688219992568, "grad_norm": 2.7003502982667014, "learning_rate": 1.9113963425988782e-05, "loss": 1.016, "step": 6497 }, { "epoch": 0.48294314381270903, "grad_norm": 1.730725797569103, "learning_rate": 1.9113633198273126e-05, "loss": 0.8407, "step": 6498 }, { "epoch": 0.48301746562616127, "grad_norm": 2.2252236591035897, "learning_rate": 1.911330291188437e-05, "loss": 0.7562, "step": 6499 }, { "epoch": 0.4830917874396135, "grad_norm": 2.856448998761822, "learning_rate": 1.911297256682464e-05, "loss": 0.9696, "step": 6500 }, { "epoch": 0.4831661092530658, "grad_norm": 2.621972099064385, "learning_rate": 1.911264216309607e-05, "loss": 0.9832, "step": 6501 }, { "epoch": 0.48324043106651804, "grad_norm": 1.8038931047079811, "learning_rate": 1.9112311700700782e-05, "loss": 0.744, "step": 6502 }, { "epoch": 0.4833147528799703, "grad_norm": 2.402937332397769, "learning_rate": 1.9111981179640903e-05, "loss": 0.9027, "step": 6503 }, { "epoch": 0.4833890746934225, "grad_norm": 2.314723333670967, "learning_rate": 1.9111650599918564e-05, "loss": 0.8574, "step": 6504 }, { "epoch": 0.48346339650687475, "grad_norm": 2.218406008120316, "learning_rate": 1.9111319961535887e-05, "loss": 0.7874, "step": 6505 }, { "epoch": 0.483537718320327, "grad_norm": 2.8119118957246103, "learning_rate": 1.911098926449501e-05, "loss": 0.8343, "step": 6506 }, { "epoch": 0.4836120401337793, "grad_norm": 2.328824961933176, "learning_rate": 1.9110658508798053e-05, "loss": 1.0302, "step": 6507 }, { "epoch": 0.4836863619472315, "grad_norm": 2.98591225938234, "learning_rate": 1.9110327694447154e-05, "loss": 0.8541, "step": 6508 }, { "epoch": 0.48376068376068376, "grad_norm": 2.4681835727255024, "learning_rate": 1.910999682144443e-05, "loss": 0.8198, "step": 6509 }, { "epoch": 0.483835005574136, "grad_norm": 2.2436256733396647, "learning_rate": 1.9109665889792026e-05, "loss": 0.7413, "step": 6510 }, { "epoch": 0.48390932738758824, "grad_norm": 3.367398663523765, "learning_rate": 1.9109334899492062e-05, "loss": 1.0155, "step": 6511 }, { "epoch": 0.4839836492010405, "grad_norm": 2.2637867749790543, "learning_rate": 1.9109003850546678e-05, "loss": 0.9962, "step": 6512 }, { "epoch": 0.48405797101449277, "grad_norm": 7.2204820567524175, "learning_rate": 1.910867274295799e-05, "loss": 1.0142, "step": 6513 }, { "epoch": 0.484132292827945, "grad_norm": 2.3752246426199592, "learning_rate": 1.910834157672815e-05, "loss": 1.04, "step": 6514 }, { "epoch": 0.48420661464139725, "grad_norm": 2.0517297408946296, "learning_rate": 1.9108010351859273e-05, "loss": 0.8341, "step": 6515 }, { "epoch": 0.4842809364548495, "grad_norm": 1.9465401558755582, "learning_rate": 1.9107679068353498e-05, "loss": 0.9628, "step": 6516 }, { "epoch": 0.4843552582683017, "grad_norm": 2.256932856837439, "learning_rate": 1.910734772621296e-05, "loss": 0.7831, "step": 6517 }, { "epoch": 0.484429580081754, "grad_norm": 2.4564245319790725, "learning_rate": 1.910701632543979e-05, "loss": 0.9318, "step": 6518 }, { "epoch": 0.48450390189520626, "grad_norm": 2.7888076227389313, "learning_rate": 1.9106684866036122e-05, "loss": 0.9602, "step": 6519 }, { "epoch": 0.4845782237086585, "grad_norm": 3.9750544655850333, "learning_rate": 1.9106353348004088e-05, "loss": 0.9516, "step": 6520 }, { "epoch": 0.48465254552211073, "grad_norm": 2.044910023860326, "learning_rate": 1.9106021771345824e-05, "loss": 0.8937, "step": 6521 }, { "epoch": 0.48472686733556297, "grad_norm": 2.261053872357942, "learning_rate": 1.9105690136063464e-05, "loss": 0.7536, "step": 6522 }, { "epoch": 0.4848011891490152, "grad_norm": 3.4934667599743894, "learning_rate": 1.9105358442159144e-05, "loss": 0.9534, "step": 6523 }, { "epoch": 0.4848755109624675, "grad_norm": 2.6049403186798714, "learning_rate": 1.9105026689634997e-05, "loss": 0.9427, "step": 6524 }, { "epoch": 0.48494983277591974, "grad_norm": 1.9594079338135808, "learning_rate": 1.910469487849316e-05, "loss": 0.852, "step": 6525 }, { "epoch": 0.485024154589372, "grad_norm": 2.1274798468206475, "learning_rate": 1.9104363008735774e-05, "loss": 0.8079, "step": 6526 }, { "epoch": 0.4850984764028242, "grad_norm": 2.2624028948480404, "learning_rate": 1.910403108036497e-05, "loss": 0.9509, "step": 6527 }, { "epoch": 0.48517279821627646, "grad_norm": 3.2985212604972505, "learning_rate": 1.9103699093382887e-05, "loss": 1.2467, "step": 6528 }, { "epoch": 0.48524712002972875, "grad_norm": 2.045891928174954, "learning_rate": 1.910336704779166e-05, "loss": 0.7001, "step": 6529 }, { "epoch": 0.485321441843181, "grad_norm": 2.335789149663611, "learning_rate": 1.9103034943593425e-05, "loss": 1.0972, "step": 6530 }, { "epoch": 0.48539576365663323, "grad_norm": 39.72564982527715, "learning_rate": 1.9102702780790327e-05, "loss": 0.8384, "step": 6531 }, { "epoch": 0.48547008547008547, "grad_norm": 2.5810005968636855, "learning_rate": 1.91023705593845e-05, "loss": 0.8886, "step": 6532 }, { "epoch": 0.4855444072835377, "grad_norm": 5.911240926560942, "learning_rate": 1.9102038279378085e-05, "loss": 0.8192, "step": 6533 }, { "epoch": 0.48561872909698994, "grad_norm": 2.7955899142620244, "learning_rate": 1.9101705940773217e-05, "loss": 0.8052, "step": 6534 }, { "epoch": 0.48569305091044224, "grad_norm": 3.8215583554470385, "learning_rate": 1.9101373543572043e-05, "loss": 1.0704, "step": 6535 }, { "epoch": 0.4857673727238945, "grad_norm": 5.670218779329848, "learning_rate": 1.9101041087776694e-05, "loss": 0.9695, "step": 6536 }, { "epoch": 0.4858416945373467, "grad_norm": 3.6413609714713835, "learning_rate": 1.9100708573389318e-05, "loss": 0.9138, "step": 6537 }, { "epoch": 0.48591601635079895, "grad_norm": 2.8630432352399797, "learning_rate": 1.910037600041205e-05, "loss": 1.0464, "step": 6538 }, { "epoch": 0.4859903381642512, "grad_norm": 4.225554297413854, "learning_rate": 1.9100043368847034e-05, "loss": 0.9583, "step": 6539 }, { "epoch": 0.48606465997770343, "grad_norm": 4.208116859269204, "learning_rate": 1.909971067869641e-05, "loss": 1.0867, "step": 6540 }, { "epoch": 0.4861389817911557, "grad_norm": 4.323352383087215, "learning_rate": 1.909937792996232e-05, "loss": 0.9343, "step": 6541 }, { "epoch": 0.48621330360460796, "grad_norm": 2.580528595044767, "learning_rate": 1.9099045122646913e-05, "loss": 0.7004, "step": 6542 }, { "epoch": 0.4862876254180602, "grad_norm": 3.372044150174318, "learning_rate": 1.9098712256752322e-05, "loss": 0.9541, "step": 6543 }, { "epoch": 0.48636194723151244, "grad_norm": 2.822922808942761, "learning_rate": 1.909837933228069e-05, "loss": 0.783, "step": 6544 }, { "epoch": 0.4864362690449647, "grad_norm": 4.629971886572446, "learning_rate": 1.909804634923417e-05, "loss": 0.9598, "step": 6545 }, { "epoch": 0.48651059085841697, "grad_norm": 9.861603452072407, "learning_rate": 1.9097713307614896e-05, "loss": 0.8853, "step": 6546 }, { "epoch": 0.4865849126718692, "grad_norm": 4.954040559010372, "learning_rate": 1.9097380207425016e-05, "loss": 0.9524, "step": 6547 }, { "epoch": 0.48665923448532145, "grad_norm": 10.281603393902452, "learning_rate": 1.9097047048666673e-05, "loss": 0.8467, "step": 6548 }, { "epoch": 0.4867335562987737, "grad_norm": 5.397856924947245, "learning_rate": 1.9096713831342016e-05, "loss": 0.9432, "step": 6549 }, { "epoch": 0.4868078781122259, "grad_norm": 26.555067935167855, "learning_rate": 1.9096380555453186e-05, "loss": 1.0637, "step": 6550 }, { "epoch": 0.48688219992567816, "grad_norm": 5.649768854794216, "learning_rate": 1.909604722100233e-05, "loss": 1.043, "step": 6551 }, { "epoch": 0.48695652173913045, "grad_norm": 7.906561835769034, "learning_rate": 1.9095713827991595e-05, "loss": 1.0713, "step": 6552 }, { "epoch": 0.4870308435525827, "grad_norm": 7.07839064181456, "learning_rate": 1.9095380376423124e-05, "loss": 0.9501, "step": 6553 }, { "epoch": 0.48710516536603493, "grad_norm": 4.501484393532146, "learning_rate": 1.909504686629907e-05, "loss": 0.9008, "step": 6554 }, { "epoch": 0.48717948717948717, "grad_norm": 8.098049939257706, "learning_rate": 1.909471329762157e-05, "loss": 0.8872, "step": 6555 }, { "epoch": 0.4872538089929394, "grad_norm": 8.274195386542184, "learning_rate": 1.9094379670392785e-05, "loss": 0.94, "step": 6556 }, { "epoch": 0.4873281308063917, "grad_norm": 4.657986265419121, "learning_rate": 1.9094045984614852e-05, "loss": 0.8149, "step": 6557 }, { "epoch": 0.48740245261984394, "grad_norm": 6.11047031784448, "learning_rate": 1.909371224028992e-05, "loss": 0.9255, "step": 6558 }, { "epoch": 0.4874767744332962, "grad_norm": 15.593643261226594, "learning_rate": 1.9093378437420146e-05, "loss": 1.114, "step": 6559 }, { "epoch": 0.4875510962467484, "grad_norm": 3.5027932570015614, "learning_rate": 1.9093044576007668e-05, "loss": 0.8542, "step": 6560 }, { "epoch": 0.48762541806020065, "grad_norm": 7.964685706863054, "learning_rate": 1.9092710656054646e-05, "loss": 1.0572, "step": 6561 }, { "epoch": 0.4876997398736529, "grad_norm": 4.051301558254121, "learning_rate": 1.909237667756322e-05, "loss": 1.0512, "step": 6562 }, { "epoch": 0.4877740616871052, "grad_norm": 7.562629379138599, "learning_rate": 1.909204264053555e-05, "loss": 1.1035, "step": 6563 }, { "epoch": 0.4878483835005574, "grad_norm": 3.159833435531115, "learning_rate": 1.9091708544973776e-05, "loss": 0.7729, "step": 6564 }, { "epoch": 0.48792270531400966, "grad_norm": 9.131124997351126, "learning_rate": 1.9091374390880057e-05, "loss": 0.9464, "step": 6565 }, { "epoch": 0.4879970271274619, "grad_norm": 3.1294639096820984, "learning_rate": 1.9091040178256542e-05, "loss": 1.0858, "step": 6566 }, { "epoch": 0.48807134894091414, "grad_norm": 3.183274629455144, "learning_rate": 1.9090705907105378e-05, "loss": 1.0475, "step": 6567 }, { "epoch": 0.48814567075436643, "grad_norm": 2.345279312216687, "learning_rate": 1.9090371577428723e-05, "loss": 0.7443, "step": 6568 }, { "epoch": 0.4882199925678187, "grad_norm": 4.921971064653905, "learning_rate": 1.9090037189228732e-05, "loss": 0.9252, "step": 6569 }, { "epoch": 0.4882943143812709, "grad_norm": 2.802121426833485, "learning_rate": 1.9089702742507547e-05, "loss": 0.8058, "step": 6570 }, { "epoch": 0.48836863619472315, "grad_norm": 2.2087537453274626, "learning_rate": 1.9089368237267328e-05, "loss": 0.7827, "step": 6571 }, { "epoch": 0.4884429580081754, "grad_norm": 2.7601785527309164, "learning_rate": 1.908903367351023e-05, "loss": 0.8665, "step": 6572 }, { "epoch": 0.4885172798216276, "grad_norm": 2.2636851775487457, "learning_rate": 1.9088699051238405e-05, "loss": 0.8628, "step": 6573 }, { "epoch": 0.4885916016350799, "grad_norm": 6.053461547725452, "learning_rate": 1.908836437045401e-05, "loss": 0.801, "step": 6574 }, { "epoch": 0.48866592344853216, "grad_norm": 2.1214149871612054, "learning_rate": 1.908802963115919e-05, "loss": 1.0036, "step": 6575 }, { "epoch": 0.4887402452619844, "grad_norm": 2.5659487674708896, "learning_rate": 1.9087694833356113e-05, "loss": 0.9257, "step": 6576 }, { "epoch": 0.48881456707543663, "grad_norm": 2.290820593844773, "learning_rate": 1.9087359977046925e-05, "loss": 0.9759, "step": 6577 }, { "epoch": 0.4888888888888889, "grad_norm": 2.3016956683053693, "learning_rate": 1.9087025062233785e-05, "loss": 0.9794, "step": 6578 }, { "epoch": 0.4889632107023411, "grad_norm": 1.7610494937120436, "learning_rate": 1.908669008891885e-05, "loss": 0.7502, "step": 6579 }, { "epoch": 0.4890375325157934, "grad_norm": 2.207908190336241, "learning_rate": 1.9086355057104272e-05, "loss": 1.0033, "step": 6580 }, { "epoch": 0.48911185432924564, "grad_norm": 1.7156348161801271, "learning_rate": 1.9086019966792217e-05, "loss": 0.9254, "step": 6581 }, { "epoch": 0.4891861761426979, "grad_norm": 1.7923957631488119, "learning_rate": 1.9085684817984832e-05, "loss": 0.7555, "step": 6582 }, { "epoch": 0.4892604979561501, "grad_norm": 2.1665224599492316, "learning_rate": 1.9085349610684283e-05, "loss": 1.0296, "step": 6583 }, { "epoch": 0.48933481976960236, "grad_norm": 2.33814086387336, "learning_rate": 1.9085014344892723e-05, "loss": 0.9237, "step": 6584 }, { "epoch": 0.48940914158305465, "grad_norm": 2.9694597537467766, "learning_rate": 1.9084679020612313e-05, "loss": 1.1429, "step": 6585 }, { "epoch": 0.4894834633965069, "grad_norm": 2.263181119686594, "learning_rate": 1.9084343637845212e-05, "loss": 0.9508, "step": 6586 }, { "epoch": 0.48955778520995913, "grad_norm": 2.101999724493393, "learning_rate": 1.9084008196593576e-05, "loss": 0.9303, "step": 6587 }, { "epoch": 0.48963210702341137, "grad_norm": 3.2336381864837884, "learning_rate": 1.9083672696859567e-05, "loss": 0.7796, "step": 6588 }, { "epoch": 0.4897064288368636, "grad_norm": 2.434474319456708, "learning_rate": 1.9083337138645346e-05, "loss": 0.9267, "step": 6589 }, { "epoch": 0.48978075065031584, "grad_norm": 2.4378938419283074, "learning_rate": 1.908300152195307e-05, "loss": 0.7524, "step": 6590 }, { "epoch": 0.48985507246376814, "grad_norm": 2.090596199465999, "learning_rate": 1.90826658467849e-05, "loss": 0.7002, "step": 6591 }, { "epoch": 0.4899293942772204, "grad_norm": 2.111217190974715, "learning_rate": 1.9082330113143003e-05, "loss": 0.7917, "step": 6592 }, { "epoch": 0.4900037160906726, "grad_norm": 2.232721374475937, "learning_rate": 1.9081994321029534e-05, "loss": 1.0702, "step": 6593 }, { "epoch": 0.49007803790412485, "grad_norm": 2.066407632944212, "learning_rate": 1.9081658470446655e-05, "loss": 0.9434, "step": 6594 }, { "epoch": 0.4901523597175771, "grad_norm": 2.100663536349948, "learning_rate": 1.908132256139653e-05, "loss": 0.7228, "step": 6595 }, { "epoch": 0.4902266815310294, "grad_norm": 2.2248973422159573, "learning_rate": 1.9080986593881322e-05, "loss": 0.6592, "step": 6596 }, { "epoch": 0.4903010033444816, "grad_norm": 2.8099422686612803, "learning_rate": 1.9080650567903196e-05, "loss": 0.8863, "step": 6597 }, { "epoch": 0.49037532515793386, "grad_norm": 2.1898099489448883, "learning_rate": 1.908031448346431e-05, "loss": 1.0324, "step": 6598 }, { "epoch": 0.4904496469713861, "grad_norm": 1.8729534270693575, "learning_rate": 1.9079978340566835e-05, "loss": 0.8989, "step": 6599 }, { "epoch": 0.49052396878483834, "grad_norm": 2.6833193604482335, "learning_rate": 1.9079642139212925e-05, "loss": 0.942, "step": 6600 }, { "epoch": 0.4905982905982906, "grad_norm": 2.308685062708812, "learning_rate": 1.9079305879404753e-05, "loss": 0.7777, "step": 6601 }, { "epoch": 0.49067261241174287, "grad_norm": 2.366749537350678, "learning_rate": 1.9078969561144477e-05, "loss": 0.8234, "step": 6602 }, { "epoch": 0.4907469342251951, "grad_norm": 2.076781062445786, "learning_rate": 1.9078633184434273e-05, "loss": 1.0978, "step": 6603 }, { "epoch": 0.49082125603864735, "grad_norm": 1.9880793355180308, "learning_rate": 1.9078296749276294e-05, "loss": 0.9766, "step": 6604 }, { "epoch": 0.4908955778520996, "grad_norm": 1.9480579129517093, "learning_rate": 1.9077960255672714e-05, "loss": 0.9318, "step": 6605 }, { "epoch": 0.4909698996655518, "grad_norm": 2.810086060585722, "learning_rate": 1.9077623703625697e-05, "loss": 0.8848, "step": 6606 }, { "epoch": 0.49104422147900406, "grad_norm": 2.5729637646725863, "learning_rate": 1.9077287093137407e-05, "loss": 0.9548, "step": 6607 }, { "epoch": 0.49111854329245636, "grad_norm": 2.1920680882408816, "learning_rate": 1.9076950424210014e-05, "loss": 0.9208, "step": 6608 }, { "epoch": 0.4911928651059086, "grad_norm": 1.7094238143021354, "learning_rate": 1.907661369684569e-05, "loss": 0.9307, "step": 6609 }, { "epoch": 0.49126718691936083, "grad_norm": 1.6977497023537471, "learning_rate": 1.907627691104659e-05, "loss": 0.6714, "step": 6610 }, { "epoch": 0.49134150873281307, "grad_norm": 2.402372273379121, "learning_rate": 1.9075940066814898e-05, "loss": 0.83, "step": 6611 }, { "epoch": 0.4914158305462653, "grad_norm": 1.9099505891825495, "learning_rate": 1.907560316415277e-05, "loss": 0.8858, "step": 6612 }, { "epoch": 0.4914901523597176, "grad_norm": 2.4884726704955593, "learning_rate": 1.9075266203062384e-05, "loss": 0.997, "step": 6613 }, { "epoch": 0.49156447417316984, "grad_norm": 1.9411801863380262, "learning_rate": 1.90749291835459e-05, "loss": 0.9223, "step": 6614 }, { "epoch": 0.4916387959866221, "grad_norm": 2.1372437413351006, "learning_rate": 1.9074592105605493e-05, "loss": 0.8443, "step": 6615 }, { "epoch": 0.4917131178000743, "grad_norm": 2.0612706142177615, "learning_rate": 1.9074254969243337e-05, "loss": 0.8781, "step": 6616 }, { "epoch": 0.49178743961352656, "grad_norm": 1.9058006967783747, "learning_rate": 1.9073917774461597e-05, "loss": 1.0585, "step": 6617 }, { "epoch": 0.4918617614269788, "grad_norm": 3.258197070398666, "learning_rate": 1.9073580521262444e-05, "loss": 1.0041, "step": 6618 }, { "epoch": 0.4919360832404311, "grad_norm": 1.9492636267276369, "learning_rate": 1.907324320964805e-05, "loss": 1.1506, "step": 6619 }, { "epoch": 0.4920104050538833, "grad_norm": 2.0486744247647954, "learning_rate": 1.907290583962059e-05, "loss": 0.9226, "step": 6620 }, { "epoch": 0.49208472686733556, "grad_norm": 10.091309525271523, "learning_rate": 1.907256841118223e-05, "loss": 0.8897, "step": 6621 }, { "epoch": 0.4921590486807878, "grad_norm": 7.371487019551267, "learning_rate": 1.9072230924335148e-05, "loss": 1.0187, "step": 6622 }, { "epoch": 0.49223337049424004, "grad_norm": 1.9208203229061003, "learning_rate": 1.907189337908151e-05, "loss": 0.7595, "step": 6623 }, { "epoch": 0.49230769230769234, "grad_norm": 1.9013659241166843, "learning_rate": 1.9071555775423497e-05, "loss": 0.864, "step": 6624 }, { "epoch": 0.4923820141211446, "grad_norm": 1.9370862483807159, "learning_rate": 1.907121811336328e-05, "loss": 0.8585, "step": 6625 }, { "epoch": 0.4924563359345968, "grad_norm": 4.248568549301616, "learning_rate": 1.9070880392903025e-05, "loss": 0.9452, "step": 6626 }, { "epoch": 0.49253065774804905, "grad_norm": 7.03030685631473, "learning_rate": 1.9070542614044915e-05, "loss": 0.9935, "step": 6627 }, { "epoch": 0.4926049795615013, "grad_norm": 1.9728581410461488, "learning_rate": 1.9070204776791124e-05, "loss": 0.7289, "step": 6628 }, { "epoch": 0.4926793013749535, "grad_norm": 2.2972556833412714, "learning_rate": 1.9069866881143823e-05, "loss": 0.9223, "step": 6629 }, { "epoch": 0.4927536231884058, "grad_norm": 2.2175862327368385, "learning_rate": 1.9069528927105192e-05, "loss": 0.8664, "step": 6630 }, { "epoch": 0.49282794500185806, "grad_norm": 3.034842529165023, "learning_rate": 1.9069190914677405e-05, "loss": 1.007, "step": 6631 }, { "epoch": 0.4929022668153103, "grad_norm": 2.2508719777048003, "learning_rate": 1.9068852843862637e-05, "loss": 0.9681, "step": 6632 }, { "epoch": 0.49297658862876254, "grad_norm": 2.6828015509192085, "learning_rate": 1.9068514714663064e-05, "loss": 0.8637, "step": 6633 }, { "epoch": 0.4930509104422148, "grad_norm": 2.026083943066815, "learning_rate": 1.906817652708086e-05, "loss": 1.0419, "step": 6634 }, { "epoch": 0.493125232255667, "grad_norm": 1.6884625650384721, "learning_rate": 1.9067838281118212e-05, "loss": 0.6781, "step": 6635 }, { "epoch": 0.4931995540691193, "grad_norm": 2.0471920417381386, "learning_rate": 1.906749997677729e-05, "loss": 1.0077, "step": 6636 }, { "epoch": 0.49327387588257154, "grad_norm": 2.1676066076547262, "learning_rate": 1.906716161406027e-05, "loss": 0.707, "step": 6637 }, { "epoch": 0.4933481976960238, "grad_norm": 2.6805721769851063, "learning_rate": 1.906682319296934e-05, "loss": 0.8424, "step": 6638 }, { "epoch": 0.493422519509476, "grad_norm": 2.0755749756781943, "learning_rate": 1.9066484713506667e-05, "loss": 0.88, "step": 6639 }, { "epoch": 0.49349684132292826, "grad_norm": 2.304902167255017, "learning_rate": 1.906614617567444e-05, "loss": 1.1032, "step": 6640 }, { "epoch": 0.49357116313638055, "grad_norm": 6.250050792264445, "learning_rate": 1.9065807579474835e-05, "loss": 1.0107, "step": 6641 }, { "epoch": 0.4936454849498328, "grad_norm": 2.3816444524719946, "learning_rate": 1.906546892491003e-05, "loss": 1.0, "step": 6642 }, { "epoch": 0.49371980676328503, "grad_norm": 2.344589205197716, "learning_rate": 1.9065130211982203e-05, "loss": 0.9619, "step": 6643 }, { "epoch": 0.49379412857673727, "grad_norm": 9.706315052542436, "learning_rate": 1.9064791440693542e-05, "loss": 1.0775, "step": 6644 }, { "epoch": 0.4938684503901895, "grad_norm": 2.3147473351275765, "learning_rate": 1.9064452611046223e-05, "loss": 1.1238, "step": 6645 }, { "epoch": 0.49394277220364174, "grad_norm": 2.272691665248082, "learning_rate": 1.9064113723042426e-05, "loss": 0.8075, "step": 6646 }, { "epoch": 0.49401709401709404, "grad_norm": 2.4149685240473815, "learning_rate": 1.9063774776684338e-05, "loss": 0.9527, "step": 6647 }, { "epoch": 0.4940914158305463, "grad_norm": 2.820773513395996, "learning_rate": 1.9063435771974136e-05, "loss": 0.9862, "step": 6648 }, { "epoch": 0.4941657376439985, "grad_norm": 2.3103457770888993, "learning_rate": 1.906309670891401e-05, "loss": 1.004, "step": 6649 }, { "epoch": 0.49424005945745075, "grad_norm": 2.339705163616034, "learning_rate": 1.9062757587506134e-05, "loss": 0.8903, "step": 6650 }, { "epoch": 0.494314381270903, "grad_norm": 2.5563773961835974, "learning_rate": 1.9062418407752694e-05, "loss": 1.0636, "step": 6651 }, { "epoch": 0.4943887030843553, "grad_norm": 1.7936238387779964, "learning_rate": 1.9062079169655872e-05, "loss": 0.6904, "step": 6652 }, { "epoch": 0.4944630248978075, "grad_norm": 2.927697352699945, "learning_rate": 1.906173987321786e-05, "loss": 0.9715, "step": 6653 }, { "epoch": 0.49453734671125976, "grad_norm": 2.4476186345714197, "learning_rate": 1.9061400518440837e-05, "loss": 1.0086, "step": 6654 }, { "epoch": 0.494611668524712, "grad_norm": 2.6358116759677372, "learning_rate": 1.9061061105326985e-05, "loss": 0.9096, "step": 6655 }, { "epoch": 0.49468599033816424, "grad_norm": 2.101432386991227, "learning_rate": 1.906072163387849e-05, "loss": 0.8335, "step": 6656 }, { "epoch": 0.4947603121516165, "grad_norm": 11.502869368163468, "learning_rate": 1.9060382104097543e-05, "loss": 0.9913, "step": 6657 }, { "epoch": 0.49483463396506877, "grad_norm": 3.3513170427804475, "learning_rate": 1.9060042515986324e-05, "loss": 0.9599, "step": 6658 }, { "epoch": 0.494908955778521, "grad_norm": 1.721680859455149, "learning_rate": 1.9059702869547024e-05, "loss": 0.7752, "step": 6659 }, { "epoch": 0.49498327759197325, "grad_norm": 1.973263192890649, "learning_rate": 1.9059363164781825e-05, "loss": 0.7879, "step": 6660 }, { "epoch": 0.4950575994054255, "grad_norm": 3.079764057287819, "learning_rate": 1.905902340169291e-05, "loss": 0.9201, "step": 6661 }, { "epoch": 0.4951319212188777, "grad_norm": 2.8031085561772215, "learning_rate": 1.9058683580282483e-05, "loss": 1.0643, "step": 6662 }, { "epoch": 0.49520624303232996, "grad_norm": 1.8123473054118164, "learning_rate": 1.9058343700552715e-05, "loss": 0.7605, "step": 6663 }, { "epoch": 0.49528056484578226, "grad_norm": 2.450305019043694, "learning_rate": 1.9058003762505803e-05, "loss": 0.9108, "step": 6664 }, { "epoch": 0.4953548866592345, "grad_norm": 2.5049980159720207, "learning_rate": 1.905766376614393e-05, "loss": 0.9067, "step": 6665 }, { "epoch": 0.49542920847268673, "grad_norm": 2.0497568053723465, "learning_rate": 1.905732371146929e-05, "loss": 1.0474, "step": 6666 }, { "epoch": 0.49550353028613897, "grad_norm": 2.0859661908471976, "learning_rate": 1.905698359848407e-05, "loss": 0.8457, "step": 6667 }, { "epoch": 0.4955778520995912, "grad_norm": 2.7295201011426173, "learning_rate": 1.9056643427190456e-05, "loss": 0.8245, "step": 6668 }, { "epoch": 0.4956521739130435, "grad_norm": 2.177423353424784, "learning_rate": 1.9056303197590646e-05, "loss": 1.023, "step": 6669 }, { "epoch": 0.49572649572649574, "grad_norm": 1.8215203140573553, "learning_rate": 1.9055962909686824e-05, "loss": 0.7813, "step": 6670 }, { "epoch": 0.495800817539948, "grad_norm": 2.1537356970427033, "learning_rate": 1.905562256348118e-05, "loss": 0.9058, "step": 6671 }, { "epoch": 0.4958751393534002, "grad_norm": 6.198231964200663, "learning_rate": 1.9055282158975915e-05, "loss": 0.8734, "step": 6672 }, { "epoch": 0.49594946116685246, "grad_norm": 1.9461535213384542, "learning_rate": 1.9054941696173206e-05, "loss": 0.8765, "step": 6673 }, { "epoch": 0.4960237829803047, "grad_norm": 2.1222075278131607, "learning_rate": 1.9054601175075257e-05, "loss": 0.9578, "step": 6674 }, { "epoch": 0.496098104793757, "grad_norm": 2.024406687987379, "learning_rate": 1.9054260595684254e-05, "loss": 0.8278, "step": 6675 }, { "epoch": 0.4961724266072092, "grad_norm": 2.0240283407346644, "learning_rate": 1.9053919958002387e-05, "loss": 0.8988, "step": 6676 }, { "epoch": 0.49624674842066147, "grad_norm": 2.2943532465560206, "learning_rate": 1.9053579262031857e-05, "loss": 0.9329, "step": 6677 }, { "epoch": 0.4963210702341137, "grad_norm": 8.1515568419125, "learning_rate": 1.9053238507774852e-05, "loss": 0.9948, "step": 6678 }, { "epoch": 0.49639539204756594, "grad_norm": 11.061684425057257, "learning_rate": 1.905289769523357e-05, "loss": 0.8306, "step": 6679 }, { "epoch": 0.49646971386101824, "grad_norm": 2.0317542767416623, "learning_rate": 1.9052556824410202e-05, "loss": 0.8442, "step": 6680 }, { "epoch": 0.4965440356744705, "grad_norm": 5.837033074874932, "learning_rate": 1.9052215895306942e-05, "loss": 1.1992, "step": 6681 }, { "epoch": 0.4966183574879227, "grad_norm": 3.0828379003266915, "learning_rate": 1.9051874907925985e-05, "loss": 0.9186, "step": 6682 }, { "epoch": 0.49669267930137495, "grad_norm": 2.4891244904446945, "learning_rate": 1.9051533862269527e-05, "loss": 1.0133, "step": 6683 }, { "epoch": 0.4967670011148272, "grad_norm": 2.8898511626686356, "learning_rate": 1.905119275833976e-05, "loss": 0.7073, "step": 6684 }, { "epoch": 0.4968413229282794, "grad_norm": 3.390620119526111, "learning_rate": 1.905085159613889e-05, "loss": 0.8503, "step": 6685 }, { "epoch": 0.4969156447417317, "grad_norm": 2.598903204074951, "learning_rate": 1.9050510375669104e-05, "loss": 0.8938, "step": 6686 }, { "epoch": 0.49698996655518396, "grad_norm": 2.155314988133854, "learning_rate": 1.9050169096932602e-05, "loss": 0.7337, "step": 6687 }, { "epoch": 0.4970642883686362, "grad_norm": 2.0467261724447345, "learning_rate": 1.9049827759931583e-05, "loss": 0.9155, "step": 6688 }, { "epoch": 0.49713861018208844, "grad_norm": 2.5518860534998526, "learning_rate": 1.904948636466824e-05, "loss": 0.9636, "step": 6689 }, { "epoch": 0.4972129319955407, "grad_norm": 2.132144613838876, "learning_rate": 1.9049144911144773e-05, "loss": 0.9478, "step": 6690 }, { "epoch": 0.4972872538089929, "grad_norm": 2.0426017126898244, "learning_rate": 1.904880339936338e-05, "loss": 0.8031, "step": 6691 }, { "epoch": 0.4973615756224452, "grad_norm": 2.1189971028157926, "learning_rate": 1.9048461829326265e-05, "loss": 0.758, "step": 6692 }, { "epoch": 0.49743589743589745, "grad_norm": 2.7947515885087046, "learning_rate": 1.904812020103562e-05, "loss": 0.7009, "step": 6693 }, { "epoch": 0.4975102192493497, "grad_norm": 1.9931029507792402, "learning_rate": 1.9047778514493644e-05, "loss": 0.9115, "step": 6694 }, { "epoch": 0.4975845410628019, "grad_norm": 1.7742313389303905, "learning_rate": 1.904743676970254e-05, "loss": 0.8096, "step": 6695 }, { "epoch": 0.49765886287625416, "grad_norm": 2.4450238367053543, "learning_rate": 1.904709496666451e-05, "loss": 1.0015, "step": 6696 }, { "epoch": 0.49773318468970645, "grad_norm": 2.3483967948767717, "learning_rate": 1.904675310538175e-05, "loss": 0.8477, "step": 6697 }, { "epoch": 0.4978075065031587, "grad_norm": 2.2290090056653065, "learning_rate": 1.9046411185856465e-05, "loss": 0.7992, "step": 6698 }, { "epoch": 0.49788182831661093, "grad_norm": 2.0461748220855327, "learning_rate": 1.9046069208090854e-05, "loss": 0.9119, "step": 6699 }, { "epoch": 0.49795615013006317, "grad_norm": 2.3753099621015927, "learning_rate": 1.9045727172087117e-05, "loss": 0.9456, "step": 6700 }, { "epoch": 0.4980304719435154, "grad_norm": 2.1517400559725113, "learning_rate": 1.904538507784746e-05, "loss": 0.8179, "step": 6701 }, { "epoch": 0.49810479375696765, "grad_norm": 2.4552979734181744, "learning_rate": 1.904504292537408e-05, "loss": 0.9989, "step": 6702 }, { "epoch": 0.49817911557041994, "grad_norm": 2.4506711227268667, "learning_rate": 1.9044700714669183e-05, "loss": 0.8322, "step": 6703 }, { "epoch": 0.4982534373838722, "grad_norm": 2.545680607203186, "learning_rate": 1.9044358445734973e-05, "loss": 0.9078, "step": 6704 }, { "epoch": 0.4983277591973244, "grad_norm": 1.9891516278687098, "learning_rate": 1.9044016118573655e-05, "loss": 0.5392, "step": 6705 }, { "epoch": 0.49840208101077665, "grad_norm": 2.382880046449319, "learning_rate": 1.9043673733187428e-05, "loss": 0.8382, "step": 6706 }, { "epoch": 0.4984764028242289, "grad_norm": 4.7255771712211, "learning_rate": 1.90433312895785e-05, "loss": 1.2266, "step": 6707 }, { "epoch": 0.4985507246376812, "grad_norm": 3.3663879298554127, "learning_rate": 1.9042988787749075e-05, "loss": 0.8915, "step": 6708 }, { "epoch": 0.4986250464511334, "grad_norm": 2.28261166231498, "learning_rate": 1.9042646227701357e-05, "loss": 0.8331, "step": 6709 }, { "epoch": 0.49869936826458566, "grad_norm": 1.9485866831247955, "learning_rate": 1.904230360943755e-05, "loss": 0.9973, "step": 6710 }, { "epoch": 0.4987736900780379, "grad_norm": 2.0943375625249727, "learning_rate": 1.904196093295986e-05, "loss": 0.9485, "step": 6711 }, { "epoch": 0.49884801189149014, "grad_norm": 2.5591090071074216, "learning_rate": 1.90416181982705e-05, "loss": 0.8919, "step": 6712 }, { "epoch": 0.4989223337049424, "grad_norm": 2.154945096873275, "learning_rate": 1.9041275405371668e-05, "loss": 1.0564, "step": 6713 }, { "epoch": 0.49899665551839467, "grad_norm": 2.1230754976847326, "learning_rate": 1.9040932554265574e-05, "loss": 0.7412, "step": 6714 }, { "epoch": 0.4990709773318469, "grad_norm": 1.818259762359969, "learning_rate": 1.9040589644954425e-05, "loss": 0.7712, "step": 6715 }, { "epoch": 0.49914529914529915, "grad_norm": 2.5596695276595174, "learning_rate": 1.904024667744043e-05, "loss": 0.778, "step": 6716 }, { "epoch": 0.4992196209587514, "grad_norm": 2.697386101393637, "learning_rate": 1.9039903651725797e-05, "loss": 1.0076, "step": 6717 }, { "epoch": 0.4992939427722036, "grad_norm": 2.4277964958528337, "learning_rate": 1.903956056781273e-05, "loss": 0.814, "step": 6718 }, { "epoch": 0.49936826458565586, "grad_norm": 2.8148360380018085, "learning_rate": 1.9039217425703444e-05, "loss": 0.9318, "step": 6719 }, { "epoch": 0.49944258639910816, "grad_norm": 2.1821676471740172, "learning_rate": 1.9038874225400147e-05, "loss": 0.815, "step": 6720 }, { "epoch": 0.4995169082125604, "grad_norm": 2.29794529065112, "learning_rate": 1.9038530966905043e-05, "loss": 0.9304, "step": 6721 }, { "epoch": 0.49959123002601263, "grad_norm": 2.292576600341188, "learning_rate": 1.9038187650220347e-05, "loss": 0.6937, "step": 6722 }, { "epoch": 0.49966555183946487, "grad_norm": 2.031707388908194, "learning_rate": 1.9037844275348266e-05, "loss": 0.7771, "step": 6723 }, { "epoch": 0.4997398736529171, "grad_norm": 1.6434330484669062, "learning_rate": 1.9037500842291015e-05, "loss": 0.6908, "step": 6724 }, { "epoch": 0.4998141954663694, "grad_norm": 2.6544141315560017, "learning_rate": 1.90371573510508e-05, "loss": 1.062, "step": 6725 }, { "epoch": 0.49988851727982164, "grad_norm": 2.146089882642097, "learning_rate": 1.9036813801629836e-05, "loss": 0.7409, "step": 6726 }, { "epoch": 0.4999628390932739, "grad_norm": 2.2950755132979808, "learning_rate": 1.9036470194030333e-05, "loss": 0.8941, "step": 6727 }, { "epoch": 0.5000371609067261, "grad_norm": 2.3148400067396464, "learning_rate": 1.903612652825451e-05, "loss": 1.2145, "step": 6728 }, { "epoch": 0.5001114827201784, "grad_norm": 1.94226942850346, "learning_rate": 1.9035782804304566e-05, "loss": 0.8964, "step": 6729 }, { "epoch": 0.5001858045336306, "grad_norm": 2.358498455060547, "learning_rate": 1.9035439022182723e-05, "loss": 1.0068, "step": 6730 }, { "epoch": 0.5002601263470828, "grad_norm": 2.1692012776177907, "learning_rate": 1.9035095181891194e-05, "loss": 0.9497, "step": 6731 }, { "epoch": 0.5003344481605351, "grad_norm": 2.0241322807096616, "learning_rate": 1.9034751283432188e-05, "loss": 0.8901, "step": 6732 }, { "epoch": 0.5004087699739873, "grad_norm": 1.6924581667550662, "learning_rate": 1.9034407326807925e-05, "loss": 0.9092, "step": 6733 }, { "epoch": 0.5004830917874397, "grad_norm": 2.8304490312796906, "learning_rate": 1.9034063312020615e-05, "loss": 0.8341, "step": 6734 }, { "epoch": 0.5005574136008919, "grad_norm": 2.1735381346409683, "learning_rate": 1.9033719239072473e-05, "loss": 0.7093, "step": 6735 }, { "epoch": 0.5006317354143441, "grad_norm": 2.039965342731465, "learning_rate": 1.9033375107965718e-05, "loss": 0.8492, "step": 6736 }, { "epoch": 0.5007060572277964, "grad_norm": 1.95067426275514, "learning_rate": 1.903303091870256e-05, "loss": 1.0071, "step": 6737 }, { "epoch": 0.5007803790412486, "grad_norm": 2.1386836091033388, "learning_rate": 1.903268667128522e-05, "loss": 0.8097, "step": 6738 }, { "epoch": 0.5008547008547009, "grad_norm": 2.6741657483568027, "learning_rate": 1.903234236571591e-05, "loss": 0.9326, "step": 6739 }, { "epoch": 0.5009290226681531, "grad_norm": 2.3283438174848903, "learning_rate": 1.903199800199685e-05, "loss": 0.7283, "step": 6740 }, { "epoch": 0.5010033444816053, "grad_norm": 2.408162062477778, "learning_rate": 1.9031653580130254e-05, "loss": 1.0313, "step": 6741 }, { "epoch": 0.5010776662950576, "grad_norm": 2.7024904277247725, "learning_rate": 1.903130910011834e-05, "loss": 1.0496, "step": 6742 }, { "epoch": 0.5011519881085098, "grad_norm": 2.533463890308014, "learning_rate": 1.903096456196333e-05, "loss": 1.0783, "step": 6743 }, { "epoch": 0.501226309921962, "grad_norm": 2.0536622198627774, "learning_rate": 1.9030619965667436e-05, "loss": 0.9576, "step": 6744 }, { "epoch": 0.5013006317354144, "grad_norm": 3.072224150657277, "learning_rate": 1.9030275311232883e-05, "loss": 0.6884, "step": 6745 }, { "epoch": 0.5013749535488666, "grad_norm": 2.1764394707262817, "learning_rate": 1.902993059866188e-05, "loss": 0.7745, "step": 6746 }, { "epoch": 0.5014492753623189, "grad_norm": 1.7527368836031134, "learning_rate": 1.9029585827956655e-05, "loss": 0.8701, "step": 6747 }, { "epoch": 0.5015235971757711, "grad_norm": 1.867167336335091, "learning_rate": 1.902924099911943e-05, "loss": 0.8186, "step": 6748 }, { "epoch": 0.5015979189892233, "grad_norm": 1.6513724866606885, "learning_rate": 1.9028896112152413e-05, "loss": 0.7207, "step": 6749 }, { "epoch": 0.5016722408026756, "grad_norm": 1.932992499345247, "learning_rate": 1.9028551167057837e-05, "loss": 0.8672, "step": 6750 }, { "epoch": 0.5017465626161278, "grad_norm": 2.083343396758109, "learning_rate": 1.902820616383791e-05, "loss": 0.7787, "step": 6751 }, { "epoch": 0.5018208844295801, "grad_norm": 1.8514611700235355, "learning_rate": 1.9027861102494864e-05, "loss": 0.9385, "step": 6752 }, { "epoch": 0.5018952062430323, "grad_norm": 2.493840877908044, "learning_rate": 1.902751598303092e-05, "loss": 0.8211, "step": 6753 }, { "epoch": 0.5019695280564845, "grad_norm": 5.183432794434326, "learning_rate": 1.9027170805448293e-05, "loss": 0.7977, "step": 6754 }, { "epoch": 0.5020438498699368, "grad_norm": 2.403051683355525, "learning_rate": 1.9026825569749208e-05, "loss": 1.0649, "step": 6755 }, { "epoch": 0.5021181716833891, "grad_norm": 2.0487383108488184, "learning_rate": 1.902648027593589e-05, "loss": 0.8836, "step": 6756 }, { "epoch": 0.5021924934968414, "grad_norm": 2.4818455425740438, "learning_rate": 1.9026134924010556e-05, "loss": 1.0635, "step": 6757 }, { "epoch": 0.5022668153102936, "grad_norm": 1.9803368153548058, "learning_rate": 1.9025789513975442e-05, "loss": 1.0462, "step": 6758 }, { "epoch": 0.5023411371237458, "grad_norm": 2.6273229745981572, "learning_rate": 1.9025444045832756e-05, "loss": 0.9593, "step": 6759 }, { "epoch": 0.5024154589371981, "grad_norm": 2.3820428419684228, "learning_rate": 1.9025098519584734e-05, "loss": 0.8937, "step": 6760 }, { "epoch": 0.5024897807506503, "grad_norm": 2.2388901374084234, "learning_rate": 1.9024752935233593e-05, "loss": 1.0298, "step": 6761 }, { "epoch": 0.5025641025641026, "grad_norm": 2.7950113569620028, "learning_rate": 1.9024407292781562e-05, "loss": 0.927, "step": 6762 }, { "epoch": 0.5026384243775548, "grad_norm": 2.0955802696706716, "learning_rate": 1.9024061592230864e-05, "loss": 0.9456, "step": 6763 }, { "epoch": 0.502712746191007, "grad_norm": 2.0887703542667513, "learning_rate": 1.9023715833583727e-05, "loss": 0.9566, "step": 6764 }, { "epoch": 0.5027870680044593, "grad_norm": 1.9503531467947173, "learning_rate": 1.902337001684238e-05, "loss": 0.8196, "step": 6765 }, { "epoch": 0.5028613898179115, "grad_norm": 1.779137700327927, "learning_rate": 1.9023024142009037e-05, "loss": 0.7726, "step": 6766 }, { "epoch": 0.5029357116313639, "grad_norm": 2.623963098483317, "learning_rate": 1.902267820908594e-05, "loss": 1.2378, "step": 6767 }, { "epoch": 0.5030100334448161, "grad_norm": 2.321985977665309, "learning_rate": 1.9022332218075304e-05, "loss": 0.884, "step": 6768 }, { "epoch": 0.5030843552582683, "grad_norm": 2.4001527608614666, "learning_rate": 1.9021986168979363e-05, "loss": 0.9667, "step": 6769 }, { "epoch": 0.5031586770717206, "grad_norm": 3.3698186492970006, "learning_rate": 1.902164006180034e-05, "loss": 1.0485, "step": 6770 }, { "epoch": 0.5032329988851728, "grad_norm": 2.2177486859639237, "learning_rate": 1.902129389654047e-05, "loss": 0.9288, "step": 6771 }, { "epoch": 0.503307320698625, "grad_norm": 1.9703074805278153, "learning_rate": 1.9020947673201977e-05, "loss": 0.8414, "step": 6772 }, { "epoch": 0.5033816425120773, "grad_norm": 1.7444828286932088, "learning_rate": 1.9020601391787092e-05, "loss": 0.7444, "step": 6773 }, { "epoch": 0.5034559643255295, "grad_norm": 1.910524541622376, "learning_rate": 1.9020255052298042e-05, "loss": 0.841, "step": 6774 }, { "epoch": 0.5035302861389818, "grad_norm": 2.820903560759429, "learning_rate": 1.9019908654737057e-05, "loss": 1.0444, "step": 6775 }, { "epoch": 0.503604607952434, "grad_norm": 2.2813397783840457, "learning_rate": 1.9019562199106373e-05, "loss": 0.9637, "step": 6776 }, { "epoch": 0.5036789297658862, "grad_norm": 2.2232882196789947, "learning_rate": 1.901921568540821e-05, "loss": 0.9, "step": 6777 }, { "epoch": 0.5037532515793386, "grad_norm": 1.8007851836397033, "learning_rate": 1.9018869113644808e-05, "loss": 0.8966, "step": 6778 }, { "epoch": 0.5038275733927908, "grad_norm": 2.3085413700986885, "learning_rate": 1.901852248381839e-05, "loss": 1.0306, "step": 6779 }, { "epoch": 0.5039018952062431, "grad_norm": 2.322901406091313, "learning_rate": 1.9018175795931197e-05, "loss": 0.8577, "step": 6780 }, { "epoch": 0.5039762170196953, "grad_norm": 2.4329298701228526, "learning_rate": 1.9017829049985456e-05, "loss": 0.8849, "step": 6781 }, { "epoch": 0.5040505388331475, "grad_norm": 1.7653669197066144, "learning_rate": 1.9017482245983395e-05, "loss": 0.7345, "step": 6782 }, { "epoch": 0.5041248606465998, "grad_norm": 2.9622762364085267, "learning_rate": 1.9017135383927255e-05, "loss": 0.7425, "step": 6783 }, { "epoch": 0.504199182460052, "grad_norm": 2.3518439599775536, "learning_rate": 1.9016788463819265e-05, "loss": 0.7727, "step": 6784 }, { "epoch": 0.5042735042735043, "grad_norm": 2.0044079750102886, "learning_rate": 1.901644148566166e-05, "loss": 0.9341, "step": 6785 }, { "epoch": 0.5043478260869565, "grad_norm": 2.6144934251056298, "learning_rate": 1.9016094449456667e-05, "loss": 0.8158, "step": 6786 }, { "epoch": 0.5044221479004087, "grad_norm": 2.4516875445930157, "learning_rate": 1.901574735520653e-05, "loss": 1.097, "step": 6787 }, { "epoch": 0.504496469713861, "grad_norm": 1.7173206454815846, "learning_rate": 1.901540020291348e-05, "loss": 0.8704, "step": 6788 }, { "epoch": 0.5045707915273132, "grad_norm": 2.4461984718502903, "learning_rate": 1.901505299257975e-05, "loss": 1.101, "step": 6789 }, { "epoch": 0.5046451133407656, "grad_norm": 2.1526322917698, "learning_rate": 1.9014705724207572e-05, "loss": 0.9014, "step": 6790 }, { "epoch": 0.5047194351542178, "grad_norm": 2.4804455389209807, "learning_rate": 1.9014358397799192e-05, "loss": 0.9376, "step": 6791 }, { "epoch": 0.50479375696767, "grad_norm": 1.9155386667052035, "learning_rate": 1.901401101335684e-05, "loss": 0.9247, "step": 6792 }, { "epoch": 0.5048680787811223, "grad_norm": 1.7518347653020871, "learning_rate": 1.9013663570882754e-05, "loss": 0.7516, "step": 6793 }, { "epoch": 0.5049424005945745, "grad_norm": 2.1757799033488916, "learning_rate": 1.9013316070379165e-05, "loss": 0.8354, "step": 6794 }, { "epoch": 0.5050167224080268, "grad_norm": 2.5225335876446304, "learning_rate": 1.901296851184832e-05, "loss": 1.0187, "step": 6795 }, { "epoch": 0.505091044221479, "grad_norm": 2.2179707334214505, "learning_rate": 1.9012620895292446e-05, "loss": 0.9329, "step": 6796 }, { "epoch": 0.5051653660349312, "grad_norm": 2.2403344965330723, "learning_rate": 1.901227322071379e-05, "loss": 0.9978, "step": 6797 }, { "epoch": 0.5052396878483835, "grad_norm": 2.2817600085467937, "learning_rate": 1.9011925488114586e-05, "loss": 0.8816, "step": 6798 }, { "epoch": 0.5053140096618357, "grad_norm": 2.158427266399904, "learning_rate": 1.9011577697497073e-05, "loss": 0.7596, "step": 6799 }, { "epoch": 0.5053883314752879, "grad_norm": 2.3394942291506693, "learning_rate": 1.901122984886349e-05, "loss": 0.9774, "step": 6800 }, { "epoch": 0.5054626532887403, "grad_norm": 1.9520102594390396, "learning_rate": 1.901088194221608e-05, "loss": 0.8971, "step": 6801 }, { "epoch": 0.5055369751021925, "grad_norm": 2.270500016100969, "learning_rate": 1.9010533977557073e-05, "loss": 0.9714, "step": 6802 }, { "epoch": 0.5056112969156448, "grad_norm": 2.1498280789660353, "learning_rate": 1.901018595488872e-05, "loss": 0.9715, "step": 6803 }, { "epoch": 0.505685618729097, "grad_norm": 2.2903975491249087, "learning_rate": 1.900983787421326e-05, "loss": 0.8161, "step": 6804 }, { "epoch": 0.5057599405425492, "grad_norm": 2.6101440991539753, "learning_rate": 1.9009489735532923e-05, "loss": 0.8905, "step": 6805 }, { "epoch": 0.5058342623560015, "grad_norm": 1.8244120485740636, "learning_rate": 1.9009141538849967e-05, "loss": 0.794, "step": 6806 }, { "epoch": 0.5059085841694537, "grad_norm": 2.2103664566430408, "learning_rate": 1.9008793284166622e-05, "loss": 0.8708, "step": 6807 }, { "epoch": 0.505982905982906, "grad_norm": 2.0680707239971476, "learning_rate": 1.9008444971485133e-05, "loss": 0.9526, "step": 6808 }, { "epoch": 0.5060572277963582, "grad_norm": 2.030663995891736, "learning_rate": 1.900809660080774e-05, "loss": 0.7926, "step": 6809 }, { "epoch": 0.5061315496098104, "grad_norm": 2.8465063223494744, "learning_rate": 1.9007748172136694e-05, "loss": 0.8475, "step": 6810 }, { "epoch": 0.5062058714232627, "grad_norm": 2.4507427569711324, "learning_rate": 1.900739968547423e-05, "loss": 0.976, "step": 6811 }, { "epoch": 0.506280193236715, "grad_norm": 2.2026026429921672, "learning_rate": 1.900705114082259e-05, "loss": 0.9792, "step": 6812 }, { "epoch": 0.5063545150501673, "grad_norm": 2.0792631269390314, "learning_rate": 1.900670253818403e-05, "loss": 0.715, "step": 6813 }, { "epoch": 0.5064288368636195, "grad_norm": 1.7602765411296404, "learning_rate": 1.9006353877560783e-05, "loss": 0.7315, "step": 6814 }, { "epoch": 0.5065031586770717, "grad_norm": 1.880994402339418, "learning_rate": 1.9006005158955095e-05, "loss": 0.8264, "step": 6815 }, { "epoch": 0.506577480490524, "grad_norm": 2.5474237304463654, "learning_rate": 1.9005656382369215e-05, "loss": 1.1356, "step": 6816 }, { "epoch": 0.5066518023039762, "grad_norm": 1.9583629706082113, "learning_rate": 1.9005307547805384e-05, "loss": 1.025, "step": 6817 }, { "epoch": 0.5067261241174285, "grad_norm": 2.8754629327383676, "learning_rate": 1.9004958655265854e-05, "loss": 0.9299, "step": 6818 }, { "epoch": 0.5068004459308807, "grad_norm": 2.2540871682187857, "learning_rate": 1.9004609704752868e-05, "loss": 0.8655, "step": 6819 }, { "epoch": 0.5068747677443329, "grad_norm": 1.9083431252664216, "learning_rate": 1.900426069626867e-05, "loss": 1.0522, "step": 6820 }, { "epoch": 0.5069490895577852, "grad_norm": 2.0725870694973025, "learning_rate": 1.9003911629815505e-05, "loss": 1.1545, "step": 6821 }, { "epoch": 0.5070234113712374, "grad_norm": 1.9429016950982418, "learning_rate": 1.900356250539563e-05, "loss": 0.7414, "step": 6822 }, { "epoch": 0.5070977331846898, "grad_norm": 3.1638662070305945, "learning_rate": 1.9003213323011285e-05, "loss": 1.1316, "step": 6823 }, { "epoch": 0.507172054998142, "grad_norm": 2.828125311055322, "learning_rate": 1.900286408266472e-05, "loss": 0.8764, "step": 6824 }, { "epoch": 0.5072463768115942, "grad_norm": 2.793066552523638, "learning_rate": 1.9002514784358182e-05, "loss": 0.7714, "step": 6825 }, { "epoch": 0.5073206986250465, "grad_norm": 2.0117521374022656, "learning_rate": 1.900216542809392e-05, "loss": 1.1329, "step": 6826 }, { "epoch": 0.5073950204384987, "grad_norm": 2.220236888098965, "learning_rate": 1.9001816013874185e-05, "loss": 0.9059, "step": 6827 }, { "epoch": 0.507469342251951, "grad_norm": 2.104423136731124, "learning_rate": 1.9001466541701227e-05, "loss": 0.9611, "step": 6828 }, { "epoch": 0.5075436640654032, "grad_norm": 2.2804636851292286, "learning_rate": 1.9001117011577294e-05, "loss": 0.9646, "step": 6829 }, { "epoch": 0.5076179858788554, "grad_norm": 3.908814043002267, "learning_rate": 1.9000767423504635e-05, "loss": 0.9394, "step": 6830 }, { "epoch": 0.5076923076923077, "grad_norm": 2.0000265281680525, "learning_rate": 1.9000417777485506e-05, "loss": 0.9677, "step": 6831 }, { "epoch": 0.5077666295057599, "grad_norm": 2.199416478989099, "learning_rate": 1.900006807352215e-05, "loss": 1.0372, "step": 6832 }, { "epoch": 0.5078409513192121, "grad_norm": 2.526208324142902, "learning_rate": 1.8999718311616828e-05, "loss": 0.6642, "step": 6833 }, { "epoch": 0.5079152731326645, "grad_norm": 2.1432848424711968, "learning_rate": 1.899936849177178e-05, "loss": 0.8799, "step": 6834 }, { "epoch": 0.5079895949461167, "grad_norm": 2.097290986797099, "learning_rate": 1.899901861398927e-05, "loss": 0.9119, "step": 6835 }, { "epoch": 0.508063916759569, "grad_norm": 2.2685260292480938, "learning_rate": 1.899866867827154e-05, "loss": 0.9684, "step": 6836 }, { "epoch": 0.5081382385730212, "grad_norm": 1.7063108670861242, "learning_rate": 1.899831868462085e-05, "loss": 0.6362, "step": 6837 }, { "epoch": 0.5082125603864734, "grad_norm": 3.4160018437558106, "learning_rate": 1.899796863303945e-05, "loss": 0.7871, "step": 6838 }, { "epoch": 0.5082868821999257, "grad_norm": 2.2316663273439565, "learning_rate": 1.8997618523529603e-05, "loss": 0.9066, "step": 6839 }, { "epoch": 0.5083612040133779, "grad_norm": 2.2060462968351655, "learning_rate": 1.8997268356093547e-05, "loss": 0.8475, "step": 6840 }, { "epoch": 0.5084355258268302, "grad_norm": 4.569629679912385, "learning_rate": 1.8996918130733542e-05, "loss": 0.7328, "step": 6841 }, { "epoch": 0.5085098476402824, "grad_norm": 2.2925754473941575, "learning_rate": 1.8996567847451853e-05, "loss": 0.8389, "step": 6842 }, { "epoch": 0.5085841694537346, "grad_norm": 2.857045171346438, "learning_rate": 1.899621750625072e-05, "loss": 0.8776, "step": 6843 }, { "epoch": 0.5086584912671869, "grad_norm": 2.096515875868768, "learning_rate": 1.8995867107132407e-05, "loss": 0.9734, "step": 6844 }, { "epoch": 0.5087328130806391, "grad_norm": 2.0847589196276806, "learning_rate": 1.899551665009917e-05, "loss": 0.9537, "step": 6845 }, { "epoch": 0.5088071348940915, "grad_norm": 2.498318747127204, "learning_rate": 1.8995166135153266e-05, "loss": 1.0654, "step": 6846 }, { "epoch": 0.5088814567075437, "grad_norm": 1.7554715254612125, "learning_rate": 1.8994815562296944e-05, "loss": 0.8644, "step": 6847 }, { "epoch": 0.5089557785209959, "grad_norm": 1.9441660871113202, "learning_rate": 1.8994464931532467e-05, "loss": 0.8665, "step": 6848 }, { "epoch": 0.5090301003344482, "grad_norm": 2.1746219192438927, "learning_rate": 1.8994114242862093e-05, "loss": 1.0211, "step": 6849 }, { "epoch": 0.5091044221479004, "grad_norm": 2.1259758570085694, "learning_rate": 1.899376349628808e-05, "loss": 0.8554, "step": 6850 }, { "epoch": 0.5091787439613527, "grad_norm": 2.5099093694368935, "learning_rate": 1.8993412691812682e-05, "loss": 0.9766, "step": 6851 }, { "epoch": 0.5092530657748049, "grad_norm": 9.476637436726842, "learning_rate": 1.8993061829438158e-05, "loss": 1.0681, "step": 6852 }, { "epoch": 0.5093273875882571, "grad_norm": 1.6978488423690605, "learning_rate": 1.899271090916677e-05, "loss": 0.8523, "step": 6853 }, { "epoch": 0.5094017094017094, "grad_norm": 2.0869590715814295, "learning_rate": 1.899235993100078e-05, "loss": 0.6206, "step": 6854 }, { "epoch": 0.5094760312151616, "grad_norm": 2.093906359619832, "learning_rate": 1.8992008894942438e-05, "loss": 0.8033, "step": 6855 }, { "epoch": 0.5095503530286138, "grad_norm": 2.1049938926151825, "learning_rate": 1.899165780099401e-05, "loss": 1.0722, "step": 6856 }, { "epoch": 0.5096246748420662, "grad_norm": 3.3035822360817257, "learning_rate": 1.8991306649157755e-05, "loss": 1.0624, "step": 6857 }, { "epoch": 0.5096989966555184, "grad_norm": 1.7555419499136344, "learning_rate": 1.8990955439435938e-05, "loss": 0.9478, "step": 6858 }, { "epoch": 0.5097733184689707, "grad_norm": 2.0838571417516447, "learning_rate": 1.8990604171830813e-05, "loss": 0.8449, "step": 6859 }, { "epoch": 0.5098476402824229, "grad_norm": 3.21714035892863, "learning_rate": 1.8990252846344647e-05, "loss": 0.9169, "step": 6860 }, { "epoch": 0.5099219620958751, "grad_norm": 2.6400881308966375, "learning_rate": 1.89899014629797e-05, "loss": 0.801, "step": 6861 }, { "epoch": 0.5099962839093274, "grad_norm": 2.1185190772145157, "learning_rate": 1.898955002173823e-05, "loss": 0.9349, "step": 6862 }, { "epoch": 0.5100706057227796, "grad_norm": 1.8936754097606348, "learning_rate": 1.8989198522622505e-05, "loss": 0.9786, "step": 6863 }, { "epoch": 0.5101449275362319, "grad_norm": 2.542384249395219, "learning_rate": 1.8988846965634788e-05, "loss": 0.9275, "step": 6864 }, { "epoch": 0.5102192493496841, "grad_norm": 2.6776002370807457, "learning_rate": 1.8988495350777342e-05, "loss": 0.8814, "step": 6865 }, { "epoch": 0.5102935711631363, "grad_norm": 7.531269882770457, "learning_rate": 1.8988143678052425e-05, "loss": 0.8281, "step": 6866 }, { "epoch": 0.5103678929765886, "grad_norm": 2.986281886012816, "learning_rate": 1.898779194746231e-05, "loss": 1.0727, "step": 6867 }, { "epoch": 0.5104422147900409, "grad_norm": 2.947289903363235, "learning_rate": 1.8987440159009253e-05, "loss": 1.0464, "step": 6868 }, { "epoch": 0.5105165366034932, "grad_norm": 2.0762084501102933, "learning_rate": 1.898708831269552e-05, "loss": 0.8006, "step": 6869 }, { "epoch": 0.5105908584169454, "grad_norm": 1.961104281918746, "learning_rate": 1.8986736408523385e-05, "loss": 0.844, "step": 6870 }, { "epoch": 0.5106651802303976, "grad_norm": 2.4147597506434155, "learning_rate": 1.8986384446495107e-05, "loss": 1.1894, "step": 6871 }, { "epoch": 0.5107395020438499, "grad_norm": 2.23946281427569, "learning_rate": 1.8986032426612947e-05, "loss": 0.8428, "step": 6872 }, { "epoch": 0.5108138238573021, "grad_norm": 3.1538290630921564, "learning_rate": 1.8985680348879182e-05, "loss": 0.9963, "step": 6873 }, { "epoch": 0.5108881456707544, "grad_norm": 2.5844470615121753, "learning_rate": 1.8985328213296072e-05, "loss": 1.1324, "step": 6874 }, { "epoch": 0.5109624674842066, "grad_norm": 2.665091639597291, "learning_rate": 1.898497601986588e-05, "loss": 0.8143, "step": 6875 }, { "epoch": 0.5110367892976588, "grad_norm": 16.882379832267784, "learning_rate": 1.8984623768590884e-05, "loss": 1.1208, "step": 6876 }, { "epoch": 0.5111111111111111, "grad_norm": 2.239929726194949, "learning_rate": 1.8984271459473345e-05, "loss": 0.8319, "step": 6877 }, { "epoch": 0.5111854329245633, "grad_norm": 1.8757539770071838, "learning_rate": 1.8983919092515535e-05, "loss": 0.959, "step": 6878 }, { "epoch": 0.5112597547380157, "grad_norm": 2.0827649306639144, "learning_rate": 1.8983566667719714e-05, "loss": 0.8098, "step": 6879 }, { "epoch": 0.5113340765514679, "grad_norm": 1.6977399767430275, "learning_rate": 1.898321418508816e-05, "loss": 0.81, "step": 6880 }, { "epoch": 0.5114083983649201, "grad_norm": 2.2946560408843992, "learning_rate": 1.8982861644623143e-05, "loss": 0.9985, "step": 6881 }, { "epoch": 0.5114827201783724, "grad_norm": 2.0672971522902324, "learning_rate": 1.8982509046326923e-05, "loss": 1.0261, "step": 6882 }, { "epoch": 0.5115570419918246, "grad_norm": 2.3022333279710088, "learning_rate": 1.898215639020178e-05, "loss": 0.9684, "step": 6883 }, { "epoch": 0.5116313638052769, "grad_norm": 2.192692585068193, "learning_rate": 1.8981803676249977e-05, "loss": 0.7824, "step": 6884 }, { "epoch": 0.5117056856187291, "grad_norm": 2.123233279121315, "learning_rate": 1.898145090447379e-05, "loss": 1.0366, "step": 6885 }, { "epoch": 0.5117800074321813, "grad_norm": 2.1651951066598536, "learning_rate": 1.8981098074875488e-05, "loss": 0.8099, "step": 6886 }, { "epoch": 0.5118543292456336, "grad_norm": 2.0663810632371105, "learning_rate": 1.8980745187457342e-05, "loss": 0.8622, "step": 6887 }, { "epoch": 0.5119286510590858, "grad_norm": 2.468076990038276, "learning_rate": 1.8980392242221624e-05, "loss": 0.7866, "step": 6888 }, { "epoch": 0.512002972872538, "grad_norm": 2.14263998330848, "learning_rate": 1.8980039239170606e-05, "loss": 0.8291, "step": 6889 }, { "epoch": 0.5120772946859904, "grad_norm": 1.9357768963620225, "learning_rate": 1.8979686178306563e-05, "loss": 0.7384, "step": 6890 }, { "epoch": 0.5121516164994426, "grad_norm": 2.643709427923004, "learning_rate": 1.8979333059631767e-05, "loss": 1.0215, "step": 6891 }, { "epoch": 0.5122259383128949, "grad_norm": 2.0602931130223405, "learning_rate": 1.8978979883148486e-05, "loss": 0.9188, "step": 6892 }, { "epoch": 0.5123002601263471, "grad_norm": 2.3779201697543386, "learning_rate": 1.8978626648859004e-05, "loss": 1.0625, "step": 6893 }, { "epoch": 0.5123745819397993, "grad_norm": 2.0867907280217923, "learning_rate": 1.8978273356765585e-05, "loss": 1.0695, "step": 6894 }, { "epoch": 0.5124489037532516, "grad_norm": 1.8207799472895367, "learning_rate": 1.8977920006870513e-05, "loss": 0.7962, "step": 6895 }, { "epoch": 0.5125232255667038, "grad_norm": 2.121480513881684, "learning_rate": 1.8977566599176057e-05, "loss": 0.9323, "step": 6896 }, { "epoch": 0.5125975473801561, "grad_norm": 1.9619346443763117, "learning_rate": 1.897721313368449e-05, "loss": 0.8822, "step": 6897 }, { "epoch": 0.5126718691936083, "grad_norm": 2.773413972253824, "learning_rate": 1.8976859610398094e-05, "loss": 1.0709, "step": 6898 }, { "epoch": 0.5127461910070605, "grad_norm": 2.0763691294580555, "learning_rate": 1.8976506029319138e-05, "loss": 1.0515, "step": 6899 }, { "epoch": 0.5128205128205128, "grad_norm": 1.7669532138533572, "learning_rate": 1.89761523904499e-05, "loss": 0.8183, "step": 6900 }, { "epoch": 0.512894834633965, "grad_norm": 2.420014568055216, "learning_rate": 1.8975798693792663e-05, "loss": 0.9793, "step": 6901 }, { "epoch": 0.5129691564474174, "grad_norm": 2.5771481797293774, "learning_rate": 1.89754449393497e-05, "loss": 0.855, "step": 6902 }, { "epoch": 0.5130434782608696, "grad_norm": 1.8674508076229295, "learning_rate": 1.8975091127123287e-05, "loss": 0.8907, "step": 6903 }, { "epoch": 0.5131178000743218, "grad_norm": 2.141335660019834, "learning_rate": 1.8974737257115704e-05, "loss": 0.9539, "step": 6904 }, { "epoch": 0.5131921218877741, "grad_norm": 2.2500157349897694, "learning_rate": 1.8974383329329228e-05, "loss": 1.032, "step": 6905 }, { "epoch": 0.5132664437012263, "grad_norm": 1.8088460636900696, "learning_rate": 1.8974029343766137e-05, "loss": 0.6064, "step": 6906 }, { "epoch": 0.5133407655146786, "grad_norm": 2.1141599238669238, "learning_rate": 1.897367530042871e-05, "loss": 0.8368, "step": 6907 }, { "epoch": 0.5134150873281308, "grad_norm": 2.4208104030282844, "learning_rate": 1.8973321199319227e-05, "loss": 0.895, "step": 6908 }, { "epoch": 0.513489409141583, "grad_norm": 2.633046083962432, "learning_rate": 1.8972967040439968e-05, "loss": 0.9986, "step": 6909 }, { "epoch": 0.5135637309550353, "grad_norm": 2.924214633534352, "learning_rate": 1.8972612823793214e-05, "loss": 0.933, "step": 6910 }, { "epoch": 0.5136380527684875, "grad_norm": 1.9061231779267165, "learning_rate": 1.8972258549381243e-05, "loss": 0.7754, "step": 6911 }, { "epoch": 0.5137123745819397, "grad_norm": 1.6298732050104328, "learning_rate": 1.8971904217206337e-05, "loss": 0.6598, "step": 6912 }, { "epoch": 0.5137866963953921, "grad_norm": 1.8471515937144722, "learning_rate": 1.8971549827270778e-05, "loss": 0.7559, "step": 6913 }, { "epoch": 0.5138610182088443, "grad_norm": 2.361382288545101, "learning_rate": 1.8971195379576848e-05, "loss": 0.9418, "step": 6914 }, { "epoch": 0.5139353400222966, "grad_norm": 2.121572654074306, "learning_rate": 1.8970840874126825e-05, "loss": 0.9578, "step": 6915 }, { "epoch": 0.5140096618357488, "grad_norm": 2.3427756344351636, "learning_rate": 1.8970486310922992e-05, "loss": 0.8398, "step": 6916 }, { "epoch": 0.514083983649201, "grad_norm": 2.5838550980483386, "learning_rate": 1.897013168996764e-05, "loss": 0.8928, "step": 6917 }, { "epoch": 0.5141583054626533, "grad_norm": 2.2212321998081874, "learning_rate": 1.896977701126304e-05, "loss": 1.0728, "step": 6918 }, { "epoch": 0.5142326272761055, "grad_norm": 2.2519436509409254, "learning_rate": 1.8969422274811483e-05, "loss": 0.9925, "step": 6919 }, { "epoch": 0.5143069490895578, "grad_norm": 2.173129035330964, "learning_rate": 1.8969067480615253e-05, "loss": 0.8422, "step": 6920 }, { "epoch": 0.51438127090301, "grad_norm": 2.0394038054936487, "learning_rate": 1.896871262867663e-05, "loss": 1.0449, "step": 6921 }, { "epoch": 0.5144555927164622, "grad_norm": 2.1980576376456993, "learning_rate": 1.89683577189979e-05, "loss": 0.9568, "step": 6922 }, { "epoch": 0.5145299145299145, "grad_norm": 2.611615937242262, "learning_rate": 1.896800275158135e-05, "loss": 1.1093, "step": 6923 }, { "epoch": 0.5146042363433668, "grad_norm": 1.8945068204136406, "learning_rate": 1.896764772642926e-05, "loss": 0.8806, "step": 6924 }, { "epoch": 0.5146785581568191, "grad_norm": 2.2482908567685054, "learning_rate": 1.896729264354392e-05, "loss": 0.8771, "step": 6925 }, { "epoch": 0.5147528799702713, "grad_norm": 2.319469108168234, "learning_rate": 1.896693750292762e-05, "loss": 0.8907, "step": 6926 }, { "epoch": 0.5148272017837235, "grad_norm": 2.7681320883785947, "learning_rate": 1.8966582304582636e-05, "loss": 0.9375, "step": 6927 }, { "epoch": 0.5149015235971758, "grad_norm": 2.5584137342442466, "learning_rate": 1.8966227048511263e-05, "loss": 0.8113, "step": 6928 }, { "epoch": 0.514975845410628, "grad_norm": 2.6109816891331454, "learning_rate": 1.8965871734715784e-05, "loss": 0.6892, "step": 6929 }, { "epoch": 0.5150501672240803, "grad_norm": 1.8511044515065436, "learning_rate": 1.8965516363198492e-05, "loss": 0.759, "step": 6930 }, { "epoch": 0.5151244890375325, "grad_norm": 2.2996748696784794, "learning_rate": 1.8965160933961665e-05, "loss": 0.8803, "step": 6931 }, { "epoch": 0.5151988108509847, "grad_norm": 2.2939096092395537, "learning_rate": 1.8964805447007603e-05, "loss": 0.7387, "step": 6932 }, { "epoch": 0.515273132664437, "grad_norm": 2.295075409283028, "learning_rate": 1.8964449902338583e-05, "loss": 0.9635, "step": 6933 }, { "epoch": 0.5153474544778892, "grad_norm": 1.984391150719141, "learning_rate": 1.8964094299956905e-05, "loss": 0.9185, "step": 6934 }, { "epoch": 0.5154217762913416, "grad_norm": 1.8393866752525716, "learning_rate": 1.896373863986485e-05, "loss": 0.7704, "step": 6935 }, { "epoch": 0.5154960981047938, "grad_norm": 2.3834698500333875, "learning_rate": 1.896338292206471e-05, "loss": 0.9145, "step": 6936 }, { "epoch": 0.515570419918246, "grad_norm": 1.6446411324866521, "learning_rate": 1.8963027146558775e-05, "loss": 0.6304, "step": 6937 }, { "epoch": 0.5156447417316983, "grad_norm": 1.8962599049135231, "learning_rate": 1.896267131334934e-05, "loss": 0.9441, "step": 6938 }, { "epoch": 0.5157190635451505, "grad_norm": 1.6993504322484243, "learning_rate": 1.896231542243869e-05, "loss": 0.856, "step": 6939 }, { "epoch": 0.5157933853586028, "grad_norm": 2.036447491673201, "learning_rate": 1.896195947382912e-05, "loss": 0.8875, "step": 6940 }, { "epoch": 0.515867707172055, "grad_norm": 1.8921981743717595, "learning_rate": 1.8961603467522918e-05, "loss": 0.7849, "step": 6941 }, { "epoch": 0.5159420289855072, "grad_norm": 1.934719976298852, "learning_rate": 1.896124740352238e-05, "loss": 0.7518, "step": 6942 }, { "epoch": 0.5160163507989595, "grad_norm": 1.7485094627200104, "learning_rate": 1.8960891281829795e-05, "loss": 0.822, "step": 6943 }, { "epoch": 0.5160906726124117, "grad_norm": 2.6357430150367436, "learning_rate": 1.8960535102447454e-05, "loss": 0.8656, "step": 6944 }, { "epoch": 0.5161649944258639, "grad_norm": 3.2743032979059743, "learning_rate": 1.8960178865377656e-05, "loss": 0.8891, "step": 6945 }, { "epoch": 0.5162393162393163, "grad_norm": 1.9022619470262088, "learning_rate": 1.895982257062269e-05, "loss": 0.8018, "step": 6946 }, { "epoch": 0.5163136380527685, "grad_norm": 2.3252708671402464, "learning_rate": 1.8959466218184853e-05, "loss": 0.8141, "step": 6947 }, { "epoch": 0.5163879598662208, "grad_norm": 1.8067468069403214, "learning_rate": 1.8959109808066437e-05, "loss": 0.6483, "step": 6948 }, { "epoch": 0.516462281679673, "grad_norm": 1.941828063502324, "learning_rate": 1.8958753340269737e-05, "loss": 0.948, "step": 6949 }, { "epoch": 0.5165366034931252, "grad_norm": 2.6828805084136365, "learning_rate": 1.8958396814797045e-05, "loss": 1.1279, "step": 6950 }, { "epoch": 0.5166109253065775, "grad_norm": 2.6869898248874327, "learning_rate": 1.895804023165066e-05, "loss": 0.689, "step": 6951 }, { "epoch": 0.5166852471200297, "grad_norm": 3.5658659330115428, "learning_rate": 1.895768359083288e-05, "loss": 1.2555, "step": 6952 }, { "epoch": 0.516759568933482, "grad_norm": 2.491339236228958, "learning_rate": 1.8957326892345995e-05, "loss": 0.9624, "step": 6953 }, { "epoch": 0.5168338907469342, "grad_norm": 3.554158894603732, "learning_rate": 1.8956970136192306e-05, "loss": 0.7208, "step": 6954 }, { "epoch": 0.5169082125603864, "grad_norm": 2.189246635263767, "learning_rate": 1.8956613322374105e-05, "loss": 0.9275, "step": 6955 }, { "epoch": 0.5169825343738387, "grad_norm": 2.3155968553693915, "learning_rate": 1.8956256450893696e-05, "loss": 1.1743, "step": 6956 }, { "epoch": 0.517056856187291, "grad_norm": 2.0208535110339945, "learning_rate": 1.8955899521753373e-05, "loss": 0.7938, "step": 6957 }, { "epoch": 0.5171311780007433, "grad_norm": 2.183310213823414, "learning_rate": 1.895554253495543e-05, "loss": 0.8958, "step": 6958 }, { "epoch": 0.5172054998141955, "grad_norm": 1.7270933509903412, "learning_rate": 1.895518549050217e-05, "loss": 0.7495, "step": 6959 }, { "epoch": 0.5172798216276477, "grad_norm": 1.8531542441891597, "learning_rate": 1.895482838839589e-05, "loss": 0.8547, "step": 6960 }, { "epoch": 0.5173541434411, "grad_norm": 2.432559502318503, "learning_rate": 1.8954471228638892e-05, "loss": 0.9442, "step": 6961 }, { "epoch": 0.5174284652545522, "grad_norm": 1.8672232801734736, "learning_rate": 1.895411401123347e-05, "loss": 0.843, "step": 6962 }, { "epoch": 0.5175027870680045, "grad_norm": 2.6484483183136516, "learning_rate": 1.895375673618193e-05, "loss": 0.9774, "step": 6963 }, { "epoch": 0.5175771088814567, "grad_norm": 2.2891722704966053, "learning_rate": 1.8953399403486565e-05, "loss": 0.9503, "step": 6964 }, { "epoch": 0.5176514306949089, "grad_norm": 1.6469687677700084, "learning_rate": 1.895304201314968e-05, "loss": 0.8314, "step": 6965 }, { "epoch": 0.5177257525083612, "grad_norm": 2.1012650422785004, "learning_rate": 1.8952684565173574e-05, "loss": 0.8756, "step": 6966 }, { "epoch": 0.5178000743218134, "grad_norm": 1.7186439497796182, "learning_rate": 1.895232705956055e-05, "loss": 0.7429, "step": 6967 }, { "epoch": 0.5178743961352656, "grad_norm": 2.380359970118401, "learning_rate": 1.8951969496312914e-05, "loss": 0.8722, "step": 6968 }, { "epoch": 0.517948717948718, "grad_norm": 1.8368031024606328, "learning_rate": 1.895161187543296e-05, "loss": 0.7102, "step": 6969 }, { "epoch": 0.5180230397621702, "grad_norm": 1.97112739207256, "learning_rate": 1.895125419692299e-05, "loss": 0.7902, "step": 6970 }, { "epoch": 0.5180973615756225, "grad_norm": 2.352729638015567, "learning_rate": 1.8950896460785314e-05, "loss": 0.687, "step": 6971 }, { "epoch": 0.5181716833890747, "grad_norm": 2.7286187242055053, "learning_rate": 1.895053866702223e-05, "loss": 1.0919, "step": 6972 }, { "epoch": 0.518246005202527, "grad_norm": 2.081503575740949, "learning_rate": 1.895018081563604e-05, "loss": 0.903, "step": 6973 }, { "epoch": 0.5183203270159792, "grad_norm": 1.9633269841646366, "learning_rate": 1.894982290662905e-05, "loss": 0.7865, "step": 6974 }, { "epoch": 0.5183946488294314, "grad_norm": 2.048162646053898, "learning_rate": 1.8949464940003565e-05, "loss": 0.7039, "step": 6975 }, { "epoch": 0.5184689706428837, "grad_norm": 2.334305903829167, "learning_rate": 1.8949106915761893e-05, "loss": 1.0418, "step": 6976 }, { "epoch": 0.5185432924563359, "grad_norm": 1.8754277044358185, "learning_rate": 1.894874883390633e-05, "loss": 0.7443, "step": 6977 }, { "epoch": 0.5186176142697881, "grad_norm": 2.1999447767611535, "learning_rate": 1.894839069443919e-05, "loss": 0.8919, "step": 6978 }, { "epoch": 0.5186919360832404, "grad_norm": 1.7150619128501123, "learning_rate": 1.894803249736277e-05, "loss": 0.752, "step": 6979 }, { "epoch": 0.5187662578966927, "grad_norm": 1.8374938929357976, "learning_rate": 1.8947674242679385e-05, "loss": 0.8052, "step": 6980 }, { "epoch": 0.518840579710145, "grad_norm": 2.7602210670863534, "learning_rate": 1.894731593039134e-05, "loss": 0.8025, "step": 6981 }, { "epoch": 0.5189149015235972, "grad_norm": 2.200414390754891, "learning_rate": 1.8946957560500935e-05, "loss": 0.8889, "step": 6982 }, { "epoch": 0.5189892233370494, "grad_norm": 1.8523595049881298, "learning_rate": 1.8946599133010485e-05, "loss": 0.8642, "step": 6983 }, { "epoch": 0.5190635451505017, "grad_norm": 2.1521864942251545, "learning_rate": 1.894624064792229e-05, "loss": 1.0604, "step": 6984 }, { "epoch": 0.5191378669639539, "grad_norm": 2.2839938786648055, "learning_rate": 1.8945882105238665e-05, "loss": 1.1401, "step": 6985 }, { "epoch": 0.5192121887774062, "grad_norm": 2.339108121463014, "learning_rate": 1.8945523504961913e-05, "loss": 1.0687, "step": 6986 }, { "epoch": 0.5192865105908584, "grad_norm": 1.6651091208919262, "learning_rate": 1.8945164847094345e-05, "loss": 0.6628, "step": 6987 }, { "epoch": 0.5193608324043106, "grad_norm": 1.9180023135695683, "learning_rate": 1.8944806131638273e-05, "loss": 0.9008, "step": 6988 }, { "epoch": 0.5194351542177629, "grad_norm": 1.8901456995658132, "learning_rate": 1.8944447358596002e-05, "loss": 0.8012, "step": 6989 }, { "epoch": 0.5195094760312151, "grad_norm": 3.912776392562892, "learning_rate": 1.894408852796984e-05, "loss": 0.6224, "step": 6990 }, { "epoch": 0.5195837978446675, "grad_norm": 1.8386467654454473, "learning_rate": 1.8943729639762104e-05, "loss": 0.8045, "step": 6991 }, { "epoch": 0.5196581196581197, "grad_norm": 2.2158719957653616, "learning_rate": 1.8943370693975097e-05, "loss": 0.9337, "step": 6992 }, { "epoch": 0.5197324414715719, "grad_norm": 15.26897810477045, "learning_rate": 1.8943011690611134e-05, "loss": 0.9865, "step": 6993 }, { "epoch": 0.5198067632850242, "grad_norm": 2.1415092180612154, "learning_rate": 1.894265262967253e-05, "loss": 0.9579, "step": 6994 }, { "epoch": 0.5198810850984764, "grad_norm": 3.8919691461872694, "learning_rate": 1.8942293511161586e-05, "loss": 0.8229, "step": 6995 }, { "epoch": 0.5199554069119287, "grad_norm": 1.7435376016882245, "learning_rate": 1.8941934335080625e-05, "loss": 0.9129, "step": 6996 }, { "epoch": 0.5200297287253809, "grad_norm": 2.188018634095416, "learning_rate": 1.894157510143195e-05, "loss": 0.8661, "step": 6997 }, { "epoch": 0.5201040505388331, "grad_norm": 3.003392918638905, "learning_rate": 1.8941215810217882e-05, "loss": 0.8595, "step": 6998 }, { "epoch": 0.5201783723522854, "grad_norm": 1.671595815334845, "learning_rate": 1.8940856461440734e-05, "loss": 0.8644, "step": 6999 }, { "epoch": 0.5202526941657376, "grad_norm": 2.4502007734105895, "learning_rate": 1.894049705510281e-05, "loss": 1.0909, "step": 7000 }, { "epoch": 0.5203270159791898, "grad_norm": 2.392997361564937, "learning_rate": 1.8940137591206432e-05, "loss": 0.9657, "step": 7001 }, { "epoch": 0.5204013377926422, "grad_norm": 2.5138674681133577, "learning_rate": 1.8939778069753913e-05, "loss": 1.0781, "step": 7002 }, { "epoch": 0.5204756596060944, "grad_norm": 2.278777647995426, "learning_rate": 1.893941849074757e-05, "loss": 1.0509, "step": 7003 }, { "epoch": 0.5205499814195467, "grad_norm": 2.0217529504997445, "learning_rate": 1.8939058854189706e-05, "loss": 0.8786, "step": 7004 }, { "epoch": 0.5206243032329989, "grad_norm": 2.1355750856679494, "learning_rate": 1.893869916008265e-05, "loss": 0.8282, "step": 7005 }, { "epoch": 0.5206986250464511, "grad_norm": 2.333584402724089, "learning_rate": 1.893833940842871e-05, "loss": 0.8966, "step": 7006 }, { "epoch": 0.5207729468599034, "grad_norm": 2.270540367779763, "learning_rate": 1.893797959923021e-05, "loss": 0.9156, "step": 7007 }, { "epoch": 0.5208472686733556, "grad_norm": 2.177813247286207, "learning_rate": 1.8937619732489456e-05, "loss": 0.9823, "step": 7008 }, { "epoch": 0.5209215904868079, "grad_norm": 1.7273322049330737, "learning_rate": 1.8937259808208776e-05, "loss": 0.7172, "step": 7009 }, { "epoch": 0.5209959123002601, "grad_norm": 2.218153805795222, "learning_rate": 1.8936899826390472e-05, "loss": 0.8882, "step": 7010 }, { "epoch": 0.5210702341137123, "grad_norm": 2.0992903487811505, "learning_rate": 1.8936539787036876e-05, "loss": 0.9792, "step": 7011 }, { "epoch": 0.5211445559271646, "grad_norm": 1.870233492444543, "learning_rate": 1.89361796901503e-05, "loss": 0.7541, "step": 7012 }, { "epoch": 0.5212188777406169, "grad_norm": 1.703998849966506, "learning_rate": 1.8935819535733063e-05, "loss": 0.8884, "step": 7013 }, { "epoch": 0.5212931995540692, "grad_norm": 1.58856277269294, "learning_rate": 1.8935459323787485e-05, "loss": 0.7769, "step": 7014 }, { "epoch": 0.5213675213675214, "grad_norm": 1.8991157085210009, "learning_rate": 1.893509905431588e-05, "loss": 0.9922, "step": 7015 }, { "epoch": 0.5214418431809736, "grad_norm": 1.8985656441357497, "learning_rate": 1.8934738727320572e-05, "loss": 0.9138, "step": 7016 }, { "epoch": 0.5215161649944259, "grad_norm": 2.456503344808193, "learning_rate": 1.893437834280388e-05, "loss": 1.2099, "step": 7017 }, { "epoch": 0.5215904868078781, "grad_norm": 1.766557861003497, "learning_rate": 1.8934017900768123e-05, "loss": 0.9028, "step": 7018 }, { "epoch": 0.5216648086213304, "grad_norm": 1.7179803961740003, "learning_rate": 1.8933657401215625e-05, "loss": 0.8836, "step": 7019 }, { "epoch": 0.5217391304347826, "grad_norm": 2.498624855902406, "learning_rate": 1.8933296844148705e-05, "loss": 0.9955, "step": 7020 }, { "epoch": 0.5218134522482348, "grad_norm": 2.4567184462192224, "learning_rate": 1.8932936229569678e-05, "loss": 1.005, "step": 7021 }, { "epoch": 0.5218877740616871, "grad_norm": 2.0774697753801052, "learning_rate": 1.8932575557480878e-05, "loss": 0.866, "step": 7022 }, { "epoch": 0.5219620958751393, "grad_norm": 1.8845177787696084, "learning_rate": 1.8932214827884615e-05, "loss": 0.7139, "step": 7023 }, { "epoch": 0.5220364176885915, "grad_norm": 2.1610912574324437, "learning_rate": 1.8931854040783215e-05, "loss": 0.9539, "step": 7024 }, { "epoch": 0.5221107395020439, "grad_norm": 6.7366971417397075, "learning_rate": 1.8931493196179005e-05, "loss": 1.2712, "step": 7025 }, { "epoch": 0.5221850613154961, "grad_norm": 1.912122078614657, "learning_rate": 1.8931132294074305e-05, "loss": 0.7503, "step": 7026 }, { "epoch": 0.5222593831289484, "grad_norm": 2.13232914171157, "learning_rate": 1.893077133447144e-05, "loss": 0.793, "step": 7027 }, { "epoch": 0.5223337049424006, "grad_norm": 2.8326475911347444, "learning_rate": 1.893041031737273e-05, "loss": 0.9584, "step": 7028 }, { "epoch": 0.5224080267558529, "grad_norm": 3.1845715102235483, "learning_rate": 1.8930049242780504e-05, "loss": 1.0713, "step": 7029 }, { "epoch": 0.5224823485693051, "grad_norm": 2.282520799318799, "learning_rate": 1.8929688110697082e-05, "loss": 0.8553, "step": 7030 }, { "epoch": 0.5225566703827573, "grad_norm": 2.4886530027928027, "learning_rate": 1.8929326921124792e-05, "loss": 0.7786, "step": 7031 }, { "epoch": 0.5226309921962096, "grad_norm": 1.5925999627413319, "learning_rate": 1.892896567406596e-05, "loss": 0.6849, "step": 7032 }, { "epoch": 0.5227053140096618, "grad_norm": 2.3644365055303673, "learning_rate": 1.8928604369522912e-05, "loss": 0.922, "step": 7033 }, { "epoch": 0.522779635823114, "grad_norm": 2.145437658919592, "learning_rate": 1.892824300749797e-05, "loss": 0.7226, "step": 7034 }, { "epoch": 0.5228539576365663, "grad_norm": 1.713499903997565, "learning_rate": 1.892788158799346e-05, "loss": 0.7233, "step": 7035 }, { "epoch": 0.5229282794500186, "grad_norm": 2.2305310764536244, "learning_rate": 1.8927520111011714e-05, "loss": 0.7251, "step": 7036 }, { "epoch": 0.5230026012634709, "grad_norm": 2.3715912846618066, "learning_rate": 1.892715857655506e-05, "loss": 0.8779, "step": 7037 }, { "epoch": 0.5230769230769231, "grad_norm": 2.299660027934636, "learning_rate": 1.8926796984625815e-05, "loss": 0.6795, "step": 7038 }, { "epoch": 0.5231512448903753, "grad_norm": 2.287284596831531, "learning_rate": 1.892643533522632e-05, "loss": 1.0049, "step": 7039 }, { "epoch": 0.5232255667038276, "grad_norm": 1.6751537496096793, "learning_rate": 1.8926073628358894e-05, "loss": 0.6288, "step": 7040 }, { "epoch": 0.5232998885172798, "grad_norm": 2.1776094391697662, "learning_rate": 1.8925711864025872e-05, "loss": 0.8774, "step": 7041 }, { "epoch": 0.5233742103307321, "grad_norm": 2.020783264068371, "learning_rate": 1.8925350042229576e-05, "loss": 0.9444, "step": 7042 }, { "epoch": 0.5234485321441843, "grad_norm": 3.010927087117177, "learning_rate": 1.8924988162972347e-05, "loss": 1.1801, "step": 7043 }, { "epoch": 0.5235228539576365, "grad_norm": 1.8063086516359605, "learning_rate": 1.89246262262565e-05, "loss": 0.7742, "step": 7044 }, { "epoch": 0.5235971757710888, "grad_norm": 1.997630814981587, "learning_rate": 1.8924264232084377e-05, "loss": 0.8298, "step": 7045 }, { "epoch": 0.523671497584541, "grad_norm": 1.8998463819572051, "learning_rate": 1.89239021804583e-05, "loss": 1.0209, "step": 7046 }, { "epoch": 0.5237458193979934, "grad_norm": 1.8384163589692657, "learning_rate": 1.8923540071380607e-05, "loss": 0.882, "step": 7047 }, { "epoch": 0.5238201412114456, "grad_norm": 2.0121361723231384, "learning_rate": 1.8923177904853624e-05, "loss": 0.9657, "step": 7048 }, { "epoch": 0.5238944630248978, "grad_norm": 2.24238538299013, "learning_rate": 1.8922815680879688e-05, "loss": 1.1366, "step": 7049 }, { "epoch": 0.5239687848383501, "grad_norm": 1.7964322494304195, "learning_rate": 1.8922453399461126e-05, "loss": 0.6011, "step": 7050 }, { "epoch": 0.5240431066518023, "grad_norm": 1.8983803804372044, "learning_rate": 1.892209106060027e-05, "loss": 0.6226, "step": 7051 }, { "epoch": 0.5241174284652546, "grad_norm": 2.0616911760036265, "learning_rate": 1.8921728664299455e-05, "loss": 0.85, "step": 7052 }, { "epoch": 0.5241917502787068, "grad_norm": 2.0754779224820203, "learning_rate": 1.8921366210561015e-05, "loss": 1.0137, "step": 7053 }, { "epoch": 0.524266072092159, "grad_norm": 2.281569751131181, "learning_rate": 1.892100369938728e-05, "loss": 1.1157, "step": 7054 }, { "epoch": 0.5243403939056113, "grad_norm": 2.258605086686637, "learning_rate": 1.892064113078059e-05, "loss": 0.9522, "step": 7055 }, { "epoch": 0.5244147157190635, "grad_norm": 2.713123690495285, "learning_rate": 1.8920278504743273e-05, "loss": 0.8083, "step": 7056 }, { "epoch": 0.5244890375325157, "grad_norm": 2.0347064048359407, "learning_rate": 1.8919915821277666e-05, "loss": 0.9765, "step": 7057 }, { "epoch": 0.5245633593459681, "grad_norm": 2.661574879413549, "learning_rate": 1.8919553080386102e-05, "loss": 0.9713, "step": 7058 }, { "epoch": 0.5246376811594203, "grad_norm": 2.054978952400172, "learning_rate": 1.8919190282070922e-05, "loss": 1.1084, "step": 7059 }, { "epoch": 0.5247120029728726, "grad_norm": 1.7796792838938098, "learning_rate": 1.8918827426334454e-05, "loss": 0.8252, "step": 7060 }, { "epoch": 0.5247863247863248, "grad_norm": 2.285539935820866, "learning_rate": 1.891846451317904e-05, "loss": 1.1406, "step": 7061 }, { "epoch": 0.524860646599777, "grad_norm": 2.1839769510662994, "learning_rate": 1.8918101542607016e-05, "loss": 0.8969, "step": 7062 }, { "epoch": 0.5249349684132293, "grad_norm": 2.004706145747634, "learning_rate": 1.8917738514620712e-05, "loss": 0.7498, "step": 7063 }, { "epoch": 0.5250092902266815, "grad_norm": 1.8465823623584874, "learning_rate": 1.891737542922247e-05, "loss": 0.776, "step": 7064 }, { "epoch": 0.5250836120401338, "grad_norm": 1.862470388654972, "learning_rate": 1.8917012286414635e-05, "loss": 0.9668, "step": 7065 }, { "epoch": 0.525157933853586, "grad_norm": 1.7916245585054622, "learning_rate": 1.8916649086199533e-05, "loss": 0.763, "step": 7066 }, { "epoch": 0.5252322556670382, "grad_norm": 2.531474997463227, "learning_rate": 1.8916285828579507e-05, "loss": 0.8418, "step": 7067 }, { "epoch": 0.5253065774804905, "grad_norm": 1.90406432569841, "learning_rate": 1.8915922513556896e-05, "loss": 0.779, "step": 7068 }, { "epoch": 0.5253808992939428, "grad_norm": 1.7240929354502015, "learning_rate": 1.8915559141134036e-05, "loss": 0.873, "step": 7069 }, { "epoch": 0.5254552211073951, "grad_norm": 2.3793029573228663, "learning_rate": 1.891519571131327e-05, "loss": 0.9301, "step": 7070 }, { "epoch": 0.5255295429208473, "grad_norm": 1.9556854624278717, "learning_rate": 1.891483222409694e-05, "loss": 0.8658, "step": 7071 }, { "epoch": 0.5256038647342995, "grad_norm": 1.9296275671942997, "learning_rate": 1.891446867948738e-05, "loss": 0.8107, "step": 7072 }, { "epoch": 0.5256781865477518, "grad_norm": 4.575733881107005, "learning_rate": 1.891410507748693e-05, "loss": 0.9375, "step": 7073 }, { "epoch": 0.525752508361204, "grad_norm": 2.5710293678275353, "learning_rate": 1.8913741418097936e-05, "loss": 0.805, "step": 7074 }, { "epoch": 0.5258268301746563, "grad_norm": 1.8978426495775231, "learning_rate": 1.8913377701322743e-05, "loss": 0.8283, "step": 7075 }, { "epoch": 0.5259011519881085, "grad_norm": 1.4386397000525166, "learning_rate": 1.891301392716368e-05, "loss": 0.7243, "step": 7076 }, { "epoch": 0.5259754738015607, "grad_norm": 1.719987435476844, "learning_rate": 1.89126500956231e-05, "loss": 0.8721, "step": 7077 }, { "epoch": 0.526049795615013, "grad_norm": 1.8373691117715516, "learning_rate": 1.8912286206703338e-05, "loss": 0.7384, "step": 7078 }, { "epoch": 0.5261241174284652, "grad_norm": 2.6111517616520667, "learning_rate": 1.8911922260406742e-05, "loss": 0.8836, "step": 7079 }, { "epoch": 0.5261984392419174, "grad_norm": 1.9829770044050896, "learning_rate": 1.891155825673565e-05, "loss": 0.8324, "step": 7080 }, { "epoch": 0.5262727610553698, "grad_norm": 2.1047441784334087, "learning_rate": 1.891119419569241e-05, "loss": 0.765, "step": 7081 }, { "epoch": 0.526347082868822, "grad_norm": 2.0535841146443135, "learning_rate": 1.8910830077279367e-05, "loss": 0.9813, "step": 7082 }, { "epoch": 0.5264214046822743, "grad_norm": 2.191890403536218, "learning_rate": 1.891046590149886e-05, "loss": 0.9942, "step": 7083 }, { "epoch": 0.5264957264957265, "grad_norm": 2.3783969437032444, "learning_rate": 1.8910101668353234e-05, "loss": 0.8846, "step": 7084 }, { "epoch": 0.5265700483091788, "grad_norm": 2.3902773184567603, "learning_rate": 1.8909737377844837e-05, "loss": 1.0272, "step": 7085 }, { "epoch": 0.526644370122631, "grad_norm": 1.7749802907771504, "learning_rate": 1.8909373029976013e-05, "loss": 0.7854, "step": 7086 }, { "epoch": 0.5267186919360832, "grad_norm": 1.9902850306779247, "learning_rate": 1.890900862474911e-05, "loss": 0.7049, "step": 7087 }, { "epoch": 0.5267930137495355, "grad_norm": 2.0681436436198553, "learning_rate": 1.8908644162166468e-05, "loss": 0.8404, "step": 7088 }, { "epoch": 0.5268673355629877, "grad_norm": 2.1495172036244834, "learning_rate": 1.890827964223044e-05, "loss": 1.0663, "step": 7089 }, { "epoch": 0.5269416573764399, "grad_norm": 2.0307170759023676, "learning_rate": 1.8907915064943368e-05, "loss": 0.8066, "step": 7090 }, { "epoch": 0.5270159791898922, "grad_norm": 2.0267702391793527, "learning_rate": 1.8907550430307603e-05, "loss": 0.8591, "step": 7091 }, { "epoch": 0.5270903010033445, "grad_norm": 2.3609955349347085, "learning_rate": 1.8907185738325487e-05, "loss": 0.8281, "step": 7092 }, { "epoch": 0.5271646228167968, "grad_norm": 2.006940045906346, "learning_rate": 1.8906820988999377e-05, "loss": 0.9069, "step": 7093 }, { "epoch": 0.527238944630249, "grad_norm": 1.8557898389389405, "learning_rate": 1.890645618233161e-05, "loss": 1.01, "step": 7094 }, { "epoch": 0.5273132664437012, "grad_norm": 1.8938098723516075, "learning_rate": 1.8906091318324543e-05, "loss": 0.9741, "step": 7095 }, { "epoch": 0.5273875882571535, "grad_norm": 1.7851148766678084, "learning_rate": 1.8905726396980523e-05, "loss": 0.7316, "step": 7096 }, { "epoch": 0.5274619100706057, "grad_norm": 2.201829961652592, "learning_rate": 1.8905361418301895e-05, "loss": 0.9499, "step": 7097 }, { "epoch": 0.527536231884058, "grad_norm": 2.7242335927393078, "learning_rate": 1.8904996382291016e-05, "loss": 1.081, "step": 7098 }, { "epoch": 0.5276105536975102, "grad_norm": 1.9922393885063439, "learning_rate": 1.8904631288950233e-05, "loss": 1.0, "step": 7099 }, { "epoch": 0.5276848755109624, "grad_norm": 2.387047732456945, "learning_rate": 1.890426613828189e-05, "loss": 0.7015, "step": 7100 }, { "epoch": 0.5277591973244147, "grad_norm": 2.0775119092924466, "learning_rate": 1.890390093028835e-05, "loss": 0.9394, "step": 7101 }, { "epoch": 0.5278335191378669, "grad_norm": 2.073129089987186, "learning_rate": 1.8903535664971955e-05, "loss": 0.9365, "step": 7102 }, { "epoch": 0.5279078409513193, "grad_norm": 1.8162695154052981, "learning_rate": 1.890317034233506e-05, "loss": 0.6487, "step": 7103 }, { "epoch": 0.5279821627647715, "grad_norm": 1.9620574279966574, "learning_rate": 1.8902804962380016e-05, "loss": 0.9833, "step": 7104 }, { "epoch": 0.5280564845782237, "grad_norm": 2.701453629555589, "learning_rate": 1.8902439525109172e-05, "loss": 0.9372, "step": 7105 }, { "epoch": 0.528130806391676, "grad_norm": 2.0046794908379333, "learning_rate": 1.890207403052489e-05, "loss": 0.8004, "step": 7106 }, { "epoch": 0.5282051282051282, "grad_norm": 2.013200079708962, "learning_rate": 1.8901708478629513e-05, "loss": 0.7826, "step": 7107 }, { "epoch": 0.5282794500185805, "grad_norm": 2.267293382652051, "learning_rate": 1.8901342869425405e-05, "loss": 1.0595, "step": 7108 }, { "epoch": 0.5283537718320327, "grad_norm": 2.139410115628224, "learning_rate": 1.8900977202914906e-05, "loss": 0.9431, "step": 7109 }, { "epoch": 0.5284280936454849, "grad_norm": 2.5022942510242148, "learning_rate": 1.8900611479100378e-05, "loss": 0.8578, "step": 7110 }, { "epoch": 0.5285024154589372, "grad_norm": 2.2957441789906348, "learning_rate": 1.890024569798418e-05, "loss": 0.8197, "step": 7111 }, { "epoch": 0.5285767372723894, "grad_norm": 1.728143323043199, "learning_rate": 1.8899879859568663e-05, "loss": 0.6901, "step": 7112 }, { "epoch": 0.5286510590858416, "grad_norm": 2.3482456917192454, "learning_rate": 1.8899513963856175e-05, "loss": 0.8663, "step": 7113 }, { "epoch": 0.528725380899294, "grad_norm": 2.170932325917567, "learning_rate": 1.889914801084908e-05, "loss": 1.0791, "step": 7114 }, { "epoch": 0.5287997027127462, "grad_norm": 1.9232084043275823, "learning_rate": 1.8898782000549735e-05, "loss": 0.8917, "step": 7115 }, { "epoch": 0.5288740245261985, "grad_norm": 2.2681110622939165, "learning_rate": 1.889841593296049e-05, "loss": 0.943, "step": 7116 }, { "epoch": 0.5289483463396507, "grad_norm": 1.9901085264742095, "learning_rate": 1.8898049808083704e-05, "loss": 0.896, "step": 7117 }, { "epoch": 0.529022668153103, "grad_norm": 2.1631208380230076, "learning_rate": 1.889768362592174e-05, "loss": 0.9352, "step": 7118 }, { "epoch": 0.5290969899665552, "grad_norm": 2.406803419255841, "learning_rate": 1.8897317386476945e-05, "loss": 0.7808, "step": 7119 }, { "epoch": 0.5291713117800074, "grad_norm": 2.2095212070247627, "learning_rate": 1.8896951089751687e-05, "loss": 0.9115, "step": 7120 }, { "epoch": 0.5292456335934597, "grad_norm": 2.921312315129472, "learning_rate": 1.8896584735748316e-05, "loss": 0.9506, "step": 7121 }, { "epoch": 0.5293199554069119, "grad_norm": 2.0659847484695795, "learning_rate": 1.8896218324469194e-05, "loss": 0.8785, "step": 7122 }, { "epoch": 0.5293942772203641, "grad_norm": 2.3663549118085774, "learning_rate": 1.8895851855916682e-05, "loss": 0.9942, "step": 7123 }, { "epoch": 0.5294685990338164, "grad_norm": 2.1625466825774855, "learning_rate": 1.889548533009314e-05, "loss": 0.7701, "step": 7124 }, { "epoch": 0.5295429208472687, "grad_norm": 2.1797080472285075, "learning_rate": 1.8895118747000918e-05, "loss": 0.8318, "step": 7125 }, { "epoch": 0.529617242660721, "grad_norm": 2.0633117996586336, "learning_rate": 1.8894752106642386e-05, "loss": 0.7345, "step": 7126 }, { "epoch": 0.5296915644741732, "grad_norm": 1.9301665073394945, "learning_rate": 1.8894385409019903e-05, "loss": 0.6912, "step": 7127 }, { "epoch": 0.5297658862876254, "grad_norm": 2.156295869442044, "learning_rate": 1.889401865413583e-05, "loss": 0.9225, "step": 7128 }, { "epoch": 0.5298402081010777, "grad_norm": 2.1930931547008603, "learning_rate": 1.8893651841992523e-05, "loss": 0.7315, "step": 7129 }, { "epoch": 0.5299145299145299, "grad_norm": 3.0456896505995457, "learning_rate": 1.8893284972592348e-05, "loss": 0.8322, "step": 7130 }, { "epoch": 0.5299888517279822, "grad_norm": 2.3019876688436423, "learning_rate": 1.8892918045937666e-05, "loss": 0.9385, "step": 7131 }, { "epoch": 0.5300631735414344, "grad_norm": 2.034331726084151, "learning_rate": 1.8892551062030837e-05, "loss": 0.8242, "step": 7132 }, { "epoch": 0.5301374953548866, "grad_norm": 1.76318504873387, "learning_rate": 1.8892184020874227e-05, "loss": 0.8093, "step": 7133 }, { "epoch": 0.5302118171683389, "grad_norm": 2.082082447095855, "learning_rate": 1.8891816922470198e-05, "loss": 0.9043, "step": 7134 }, { "epoch": 0.5302861389817911, "grad_norm": 2.1298994374576865, "learning_rate": 1.8891449766821113e-05, "loss": 0.9378, "step": 7135 }, { "epoch": 0.5303604607952433, "grad_norm": 1.9852400689296927, "learning_rate": 1.8891082553929336e-05, "loss": 0.9354, "step": 7136 }, { "epoch": 0.5304347826086957, "grad_norm": 2.258351552765177, "learning_rate": 1.889071528379723e-05, "loss": 0.8893, "step": 7137 }, { "epoch": 0.5305091044221479, "grad_norm": 2.1020031913876296, "learning_rate": 1.889034795642716e-05, "loss": 0.8466, "step": 7138 }, { "epoch": 0.5305834262356002, "grad_norm": 2.8084666762523374, "learning_rate": 1.8889980571821493e-05, "loss": 1.076, "step": 7139 }, { "epoch": 0.5306577480490524, "grad_norm": 2.1673633847255567, "learning_rate": 1.888961312998259e-05, "loss": 0.9125, "step": 7140 }, { "epoch": 0.5307320698625047, "grad_norm": 2.236973647961902, "learning_rate": 1.888924563091282e-05, "loss": 1.038, "step": 7141 }, { "epoch": 0.5308063916759569, "grad_norm": 1.7782766013780482, "learning_rate": 1.888887807461455e-05, "loss": 0.658, "step": 7142 }, { "epoch": 0.5308807134894091, "grad_norm": 1.9568243733653565, "learning_rate": 1.888851046109014e-05, "loss": 1.015, "step": 7143 }, { "epoch": 0.5309550353028614, "grad_norm": 2.124825844307431, "learning_rate": 1.8888142790341965e-05, "loss": 0.9663, "step": 7144 }, { "epoch": 0.5310293571163136, "grad_norm": 1.7301344371282492, "learning_rate": 1.8887775062372386e-05, "loss": 0.8501, "step": 7145 }, { "epoch": 0.5311036789297658, "grad_norm": 2.2297169707283597, "learning_rate": 1.888740727718377e-05, "loss": 0.8656, "step": 7146 }, { "epoch": 0.5311780007432181, "grad_norm": 1.9967808117879324, "learning_rate": 1.8887039434778492e-05, "loss": 1.0021, "step": 7147 }, { "epoch": 0.5312523225566704, "grad_norm": 2.1698096937116804, "learning_rate": 1.888667153515891e-05, "loss": 0.9558, "step": 7148 }, { "epoch": 0.5313266443701227, "grad_norm": 2.015364062881546, "learning_rate": 1.8886303578327402e-05, "loss": 0.8858, "step": 7149 }, { "epoch": 0.5314009661835749, "grad_norm": 2.7245443896264163, "learning_rate": 1.888593556428633e-05, "loss": 0.9604, "step": 7150 }, { "epoch": 0.5314752879970271, "grad_norm": 2.1318618022180944, "learning_rate": 1.888556749303807e-05, "loss": 1.0412, "step": 7151 }, { "epoch": 0.5315496098104794, "grad_norm": 2.003857618901838, "learning_rate": 1.8885199364584984e-05, "loss": 0.7448, "step": 7152 }, { "epoch": 0.5316239316239316, "grad_norm": 1.9556327879945643, "learning_rate": 1.8884831178929443e-05, "loss": 0.9399, "step": 7153 }, { "epoch": 0.5316982534373839, "grad_norm": 2.1440848130015016, "learning_rate": 1.8884462936073824e-05, "loss": 0.8573, "step": 7154 }, { "epoch": 0.5317725752508361, "grad_norm": 1.6280832664154958, "learning_rate": 1.888409463602049e-05, "loss": 0.7072, "step": 7155 }, { "epoch": 0.5318468970642883, "grad_norm": 2.2403876061764687, "learning_rate": 1.888372627877182e-05, "loss": 0.9206, "step": 7156 }, { "epoch": 0.5319212188777406, "grad_norm": 1.8800784496949756, "learning_rate": 1.8883357864330182e-05, "loss": 0.7501, "step": 7157 }, { "epoch": 0.5319955406911928, "grad_norm": 1.9416855419828853, "learning_rate": 1.8882989392697944e-05, "loss": 0.9069, "step": 7158 }, { "epoch": 0.5320698625046452, "grad_norm": 1.9724761956898724, "learning_rate": 1.8882620863877477e-05, "loss": 0.8891, "step": 7159 }, { "epoch": 0.5321441843180974, "grad_norm": 4.085730299931032, "learning_rate": 1.8882252277871163e-05, "loss": 0.9005, "step": 7160 }, { "epoch": 0.5322185061315496, "grad_norm": 2.3714532045764662, "learning_rate": 1.888188363468137e-05, "loss": 0.6581, "step": 7161 }, { "epoch": 0.5322928279450019, "grad_norm": 1.8145798621058407, "learning_rate": 1.888151493431047e-05, "loss": 0.8009, "step": 7162 }, { "epoch": 0.5323671497584541, "grad_norm": 2.438805885826402, "learning_rate": 1.8881146176760838e-05, "loss": 0.8096, "step": 7163 }, { "epoch": 0.5324414715719064, "grad_norm": 2.3263808185595782, "learning_rate": 1.888077736203485e-05, "loss": 1.09, "step": 7164 }, { "epoch": 0.5325157933853586, "grad_norm": 1.722747870568417, "learning_rate": 1.8880408490134875e-05, "loss": 0.6984, "step": 7165 }, { "epoch": 0.5325901151988108, "grad_norm": 1.8546726458156495, "learning_rate": 1.8880039561063286e-05, "loss": 0.6885, "step": 7166 }, { "epoch": 0.5326644370122631, "grad_norm": 2.0689781391807873, "learning_rate": 1.8879670574822473e-05, "loss": 0.7043, "step": 7167 }, { "epoch": 0.5327387588257153, "grad_norm": 2.098073982446243, "learning_rate": 1.8879301531414797e-05, "loss": 0.8976, "step": 7168 }, { "epoch": 0.5328130806391675, "grad_norm": 2.461391090295643, "learning_rate": 1.8878932430842635e-05, "loss": 0.8267, "step": 7169 }, { "epoch": 0.5328874024526199, "grad_norm": 2.579240341468676, "learning_rate": 1.8878563273108374e-05, "loss": 1.0177, "step": 7170 }, { "epoch": 0.5329617242660721, "grad_norm": 1.8397225296386424, "learning_rate": 1.8878194058214377e-05, "loss": 0.7595, "step": 7171 }, { "epoch": 0.5330360460795244, "grad_norm": 2.2228791303876387, "learning_rate": 1.887782478616303e-05, "loss": 0.8771, "step": 7172 }, { "epoch": 0.5331103678929766, "grad_norm": 4.234928512002064, "learning_rate": 1.8877455456956708e-05, "loss": 0.714, "step": 7173 }, { "epoch": 0.5331846897064288, "grad_norm": 2.133629641908384, "learning_rate": 1.8877086070597788e-05, "loss": 0.8961, "step": 7174 }, { "epoch": 0.5332590115198811, "grad_norm": 2.5251655324649414, "learning_rate": 1.887671662708865e-05, "loss": 1.0025, "step": 7175 }, { "epoch": 0.5333333333333333, "grad_norm": 2.4912421076987465, "learning_rate": 1.8876347126431668e-05, "loss": 0.9348, "step": 7176 }, { "epoch": 0.5334076551467856, "grad_norm": 7.250547398823575, "learning_rate": 1.8875977568629224e-05, "loss": 0.8932, "step": 7177 }, { "epoch": 0.5334819769602378, "grad_norm": 1.8335408387722938, "learning_rate": 1.8875607953683698e-05, "loss": 0.6053, "step": 7178 }, { "epoch": 0.53355629877369, "grad_norm": 11.3988120951866, "learning_rate": 1.887523828159747e-05, "loss": 0.7947, "step": 7179 }, { "epoch": 0.5336306205871423, "grad_norm": 2.1201305082306527, "learning_rate": 1.8874868552372916e-05, "loss": 0.9363, "step": 7180 }, { "epoch": 0.5337049424005946, "grad_norm": 2.1162330780831744, "learning_rate": 1.8874498766012418e-05, "loss": 0.8004, "step": 7181 }, { "epoch": 0.5337792642140469, "grad_norm": 3.432212701359242, "learning_rate": 1.887412892251836e-05, "loss": 0.887, "step": 7182 }, { "epoch": 0.5338535860274991, "grad_norm": 2.81443935549183, "learning_rate": 1.8873759021893118e-05, "loss": 0.8471, "step": 7183 }, { "epoch": 0.5339279078409513, "grad_norm": 2.118163657947101, "learning_rate": 1.8873389064139077e-05, "loss": 0.7729, "step": 7184 }, { "epoch": 0.5340022296544036, "grad_norm": 1.6403342097912321, "learning_rate": 1.887301904925862e-05, "loss": 0.7082, "step": 7185 }, { "epoch": 0.5340765514678558, "grad_norm": 3.1263430381364334, "learning_rate": 1.8872648977254123e-05, "loss": 0.8363, "step": 7186 }, { "epoch": 0.5341508732813081, "grad_norm": 1.6858547240926491, "learning_rate": 1.8872278848127977e-05, "loss": 0.7207, "step": 7187 }, { "epoch": 0.5342251950947603, "grad_norm": 2.5615648321850575, "learning_rate": 1.8871908661882556e-05, "loss": 0.9838, "step": 7188 }, { "epoch": 0.5342995169082125, "grad_norm": 2.375367800229481, "learning_rate": 1.8871538418520248e-05, "loss": 0.7321, "step": 7189 }, { "epoch": 0.5343738387216648, "grad_norm": 2.0254772844780646, "learning_rate": 1.8871168118043435e-05, "loss": 0.6918, "step": 7190 }, { "epoch": 0.534448160535117, "grad_norm": 2.1755359071495284, "learning_rate": 1.8870797760454505e-05, "loss": 1.0123, "step": 7191 }, { "epoch": 0.5345224823485693, "grad_norm": 2.0923324559176026, "learning_rate": 1.887042734575584e-05, "loss": 0.96, "step": 7192 }, { "epoch": 0.5345968041620216, "grad_norm": 1.737959476115984, "learning_rate": 1.8870056873949822e-05, "loss": 0.909, "step": 7193 }, { "epoch": 0.5346711259754738, "grad_norm": 2.585868388667055, "learning_rate": 1.886968634503884e-05, "loss": 1.0786, "step": 7194 }, { "epoch": 0.5347454477889261, "grad_norm": 2.562730902207004, "learning_rate": 1.8869315759025272e-05, "loss": 0.7366, "step": 7195 }, { "epoch": 0.5348197696023783, "grad_norm": 2.570085171288319, "learning_rate": 1.8868945115911513e-05, "loss": 0.9251, "step": 7196 }, { "epoch": 0.5348940914158306, "grad_norm": 2.1705773821862104, "learning_rate": 1.8868574415699945e-05, "loss": 0.9777, "step": 7197 }, { "epoch": 0.5349684132292828, "grad_norm": 2.322723414326187, "learning_rate": 1.8868203658392955e-05, "loss": 0.8216, "step": 7198 }, { "epoch": 0.535042735042735, "grad_norm": 2.57501553638353, "learning_rate": 1.886783284399293e-05, "loss": 0.8495, "step": 7199 }, { "epoch": 0.5351170568561873, "grad_norm": 2.0932523656688966, "learning_rate": 1.8867461972502256e-05, "loss": 0.9056, "step": 7200 }, { "epoch": 0.5351913786696395, "grad_norm": 1.9215332659009967, "learning_rate": 1.8867091043923323e-05, "loss": 1.0045, "step": 7201 }, { "epoch": 0.5352657004830917, "grad_norm": 2.433875824893274, "learning_rate": 1.8866720058258518e-05, "loss": 1.1468, "step": 7202 }, { "epoch": 0.535340022296544, "grad_norm": 1.7807350275673297, "learning_rate": 1.8866349015510234e-05, "loss": 0.92, "step": 7203 }, { "epoch": 0.5354143441099963, "grad_norm": 2.4066300220527186, "learning_rate": 1.886597791568085e-05, "loss": 0.904, "step": 7204 }, { "epoch": 0.5354886659234486, "grad_norm": 1.9483987606207367, "learning_rate": 1.886560675877276e-05, "loss": 0.924, "step": 7205 }, { "epoch": 0.5355629877369008, "grad_norm": 1.7450182512391526, "learning_rate": 1.8865235544788355e-05, "loss": 0.8105, "step": 7206 }, { "epoch": 0.535637309550353, "grad_norm": 2.3865834248796594, "learning_rate": 1.886486427373002e-05, "loss": 1.008, "step": 7207 }, { "epoch": 0.5357116313638053, "grad_norm": 4.456422093394058, "learning_rate": 1.8864492945600156e-05, "loss": 1.1257, "step": 7208 }, { "epoch": 0.5357859531772575, "grad_norm": 2.582629857801383, "learning_rate": 1.886412156040114e-05, "loss": 0.7684, "step": 7209 }, { "epoch": 0.5358602749907098, "grad_norm": 1.8871090977691067, "learning_rate": 1.886375011813537e-05, "loss": 1.0442, "step": 7210 }, { "epoch": 0.535934596804162, "grad_norm": 2.0550564646985894, "learning_rate": 1.8863378618805242e-05, "loss": 1.0377, "step": 7211 }, { "epoch": 0.5360089186176142, "grad_norm": 2.0071213093891505, "learning_rate": 1.886300706241314e-05, "loss": 0.7925, "step": 7212 }, { "epoch": 0.5360832404310665, "grad_norm": 8.15715770968809, "learning_rate": 1.8862635448961456e-05, "loss": 0.9574, "step": 7213 }, { "epoch": 0.5361575622445187, "grad_norm": 1.9801228797310078, "learning_rate": 1.8862263778452585e-05, "loss": 0.8272, "step": 7214 }, { "epoch": 0.5362318840579711, "grad_norm": 1.9578760927441472, "learning_rate": 1.886189205088892e-05, "loss": 1.0239, "step": 7215 }, { "epoch": 0.5363062058714233, "grad_norm": 1.9314433516821135, "learning_rate": 1.8861520266272854e-05, "loss": 0.8299, "step": 7216 }, { "epoch": 0.5363805276848755, "grad_norm": 2.242969993588612, "learning_rate": 1.886114842460678e-05, "loss": 0.7384, "step": 7217 }, { "epoch": 0.5364548494983278, "grad_norm": 2.120050042793388, "learning_rate": 1.8860776525893096e-05, "loss": 1.0146, "step": 7218 }, { "epoch": 0.53652917131178, "grad_norm": 2.5045068378638535, "learning_rate": 1.8860404570134188e-05, "loss": 1.098, "step": 7219 }, { "epoch": 0.5366034931252323, "grad_norm": 2.142723792603309, "learning_rate": 1.8860032557332456e-05, "loss": 0.968, "step": 7220 }, { "epoch": 0.5366778149386845, "grad_norm": 3.71992806796839, "learning_rate": 1.8859660487490296e-05, "loss": 1.1204, "step": 7221 }, { "epoch": 0.5367521367521367, "grad_norm": 1.9708721579568629, "learning_rate": 1.8859288360610096e-05, "loss": 0.8675, "step": 7222 }, { "epoch": 0.536826458565589, "grad_norm": 1.9587402473730928, "learning_rate": 1.8858916176694265e-05, "loss": 0.8402, "step": 7223 }, { "epoch": 0.5369007803790412, "grad_norm": 1.8152877450089242, "learning_rate": 1.8858543935745185e-05, "loss": 0.8347, "step": 7224 }, { "epoch": 0.5369751021924934, "grad_norm": 2.327766315466355, "learning_rate": 1.885817163776526e-05, "loss": 0.8469, "step": 7225 }, { "epoch": 0.5370494240059458, "grad_norm": 1.8309720190835206, "learning_rate": 1.885779928275689e-05, "loss": 0.8516, "step": 7226 }, { "epoch": 0.537123745819398, "grad_norm": 2.3496967709863394, "learning_rate": 1.8857426870722464e-05, "loss": 0.9644, "step": 7227 }, { "epoch": 0.5371980676328503, "grad_norm": 2.022528994753628, "learning_rate": 1.8857054401664386e-05, "loss": 0.7764, "step": 7228 }, { "epoch": 0.5372723894463025, "grad_norm": 1.923227324687608, "learning_rate": 1.885668187558505e-05, "loss": 0.9585, "step": 7229 }, { "epoch": 0.5373467112597548, "grad_norm": 2.2076773410430515, "learning_rate": 1.8856309292486852e-05, "loss": 0.8411, "step": 7230 }, { "epoch": 0.537421033073207, "grad_norm": 2.2120908286431797, "learning_rate": 1.8855936652372198e-05, "loss": 1.0354, "step": 7231 }, { "epoch": 0.5374953548866592, "grad_norm": 1.845407005426152, "learning_rate": 1.8855563955243485e-05, "loss": 0.8434, "step": 7232 }, { "epoch": 0.5375696767001115, "grad_norm": 1.7872762124265726, "learning_rate": 1.885519120110311e-05, "loss": 0.6802, "step": 7233 }, { "epoch": 0.5376439985135637, "grad_norm": 2.063390786165446, "learning_rate": 1.8854818389953468e-05, "loss": 0.7563, "step": 7234 }, { "epoch": 0.5377183203270159, "grad_norm": 2.0154638701828955, "learning_rate": 1.8854445521796968e-05, "loss": 0.8703, "step": 7235 }, { "epoch": 0.5377926421404682, "grad_norm": 2.051665644742011, "learning_rate": 1.8854072596636008e-05, "loss": 0.8931, "step": 7236 }, { "epoch": 0.5378669639539205, "grad_norm": 2.1891348348696487, "learning_rate": 1.885369961447299e-05, "loss": 0.8752, "step": 7237 }, { "epoch": 0.5379412857673728, "grad_norm": 2.151031509812944, "learning_rate": 1.885332657531031e-05, "loss": 0.7738, "step": 7238 }, { "epoch": 0.538015607580825, "grad_norm": 2.2826608859613353, "learning_rate": 1.8852953479150376e-05, "loss": 0.949, "step": 7239 }, { "epoch": 0.5380899293942772, "grad_norm": 1.849442552584588, "learning_rate": 1.8852580325995583e-05, "loss": 0.7243, "step": 7240 }, { "epoch": 0.5381642512077295, "grad_norm": 1.8758136164258234, "learning_rate": 1.885220711584834e-05, "loss": 0.8542, "step": 7241 }, { "epoch": 0.5382385730211817, "grad_norm": 3.1501749618470662, "learning_rate": 1.8851833848711047e-05, "loss": 0.9055, "step": 7242 }, { "epoch": 0.538312894834634, "grad_norm": 2.4708359350176443, "learning_rate": 1.8851460524586106e-05, "loss": 1.1223, "step": 7243 }, { "epoch": 0.5383872166480862, "grad_norm": 2.0541228626637382, "learning_rate": 1.885108714347592e-05, "loss": 0.7835, "step": 7244 }, { "epoch": 0.5384615384615384, "grad_norm": 2.1678791130814323, "learning_rate": 1.8850713705382893e-05, "loss": 0.9375, "step": 7245 }, { "epoch": 0.5385358602749907, "grad_norm": 2.102254829568552, "learning_rate": 1.8850340210309435e-05, "loss": 1.0144, "step": 7246 }, { "epoch": 0.5386101820884429, "grad_norm": 2.379427226117749, "learning_rate": 1.8849966658257944e-05, "loss": 0.9349, "step": 7247 }, { "epoch": 0.5386845039018952, "grad_norm": 2.6995942907458574, "learning_rate": 1.8849593049230826e-05, "loss": 0.8835, "step": 7248 }, { "epoch": 0.5387588257153475, "grad_norm": 1.7894013567421219, "learning_rate": 1.884921938323049e-05, "loss": 0.7282, "step": 7249 }, { "epoch": 0.5388331475287997, "grad_norm": 2.87343452511886, "learning_rate": 1.8848845660259334e-05, "loss": 0.8824, "step": 7250 }, { "epoch": 0.538907469342252, "grad_norm": 2.16898466314454, "learning_rate": 1.8848471880319775e-05, "loss": 0.7234, "step": 7251 }, { "epoch": 0.5389817911557042, "grad_norm": 2.3406450603585403, "learning_rate": 1.8848098043414207e-05, "loss": 0.8997, "step": 7252 }, { "epoch": 0.5390561129691565, "grad_norm": 2.050413270567349, "learning_rate": 1.8847724149545047e-05, "loss": 0.9125, "step": 7253 }, { "epoch": 0.5391304347826087, "grad_norm": 7.525297077803702, "learning_rate": 1.8847350198714697e-05, "loss": 1.0012, "step": 7254 }, { "epoch": 0.5392047565960609, "grad_norm": 2.2298102815523926, "learning_rate": 1.8846976190925562e-05, "loss": 0.7389, "step": 7255 }, { "epoch": 0.5392790784095132, "grad_norm": 2.122213834373994, "learning_rate": 1.8846602126180058e-05, "loss": 0.8594, "step": 7256 }, { "epoch": 0.5393534002229654, "grad_norm": 2.551782417759579, "learning_rate": 1.8846228004480583e-05, "loss": 1.0418, "step": 7257 }, { "epoch": 0.5394277220364176, "grad_norm": 2.5386433006439377, "learning_rate": 1.8845853825829558e-05, "loss": 0.9607, "step": 7258 }, { "epoch": 0.5395020438498699, "grad_norm": 1.8690916207510877, "learning_rate": 1.884547959022938e-05, "loss": 0.8834, "step": 7259 }, { "epoch": 0.5395763656633222, "grad_norm": 2.0487900937675456, "learning_rate": 1.884510529768247e-05, "loss": 0.9544, "step": 7260 }, { "epoch": 0.5396506874767745, "grad_norm": 1.9309118100404086, "learning_rate": 1.8844730948191224e-05, "loss": 0.644, "step": 7261 }, { "epoch": 0.5397250092902267, "grad_norm": 2.8893163541605653, "learning_rate": 1.884435654175806e-05, "loss": 0.6976, "step": 7262 }, { "epoch": 0.539799331103679, "grad_norm": 2.1821512092656072, "learning_rate": 1.884398207838539e-05, "loss": 0.8572, "step": 7263 }, { "epoch": 0.5398736529171312, "grad_norm": 2.888196008430356, "learning_rate": 1.8843607558075622e-05, "loss": 0.7063, "step": 7264 }, { "epoch": 0.5399479747305834, "grad_norm": 2.1771996077998463, "learning_rate": 1.8843232980831166e-05, "loss": 0.8768, "step": 7265 }, { "epoch": 0.5400222965440357, "grad_norm": 2.0147411522278635, "learning_rate": 1.8842858346654435e-05, "loss": 0.9435, "step": 7266 }, { "epoch": 0.5400966183574879, "grad_norm": 2.20208468101258, "learning_rate": 1.8842483655547842e-05, "loss": 0.7664, "step": 7267 }, { "epoch": 0.5401709401709401, "grad_norm": 1.6355792806228229, "learning_rate": 1.8842108907513796e-05, "loss": 0.8204, "step": 7268 }, { "epoch": 0.5402452619843924, "grad_norm": 2.41410646519753, "learning_rate": 1.8841734102554713e-05, "loss": 0.8841, "step": 7269 }, { "epoch": 0.5403195837978446, "grad_norm": 2.707209201857159, "learning_rate": 1.8841359240673004e-05, "loss": 1.0865, "step": 7270 }, { "epoch": 0.540393905611297, "grad_norm": 2.6944057594406634, "learning_rate": 1.8840984321871084e-05, "loss": 0.8544, "step": 7271 }, { "epoch": 0.5404682274247492, "grad_norm": 2.261686115513396, "learning_rate": 1.8840609346151363e-05, "loss": 0.9224, "step": 7272 }, { "epoch": 0.5405425492382014, "grad_norm": 1.6724577219170647, "learning_rate": 1.884023431351626e-05, "loss": 0.9672, "step": 7273 }, { "epoch": 0.5406168710516537, "grad_norm": 1.8169637739802054, "learning_rate": 1.883985922396819e-05, "loss": 0.9252, "step": 7274 }, { "epoch": 0.5406911928651059, "grad_norm": 1.8959228626385163, "learning_rate": 1.883948407750956e-05, "loss": 0.9961, "step": 7275 }, { "epoch": 0.5407655146785582, "grad_norm": 3.8885625903528243, "learning_rate": 1.883910887414279e-05, "loss": 0.6708, "step": 7276 }, { "epoch": 0.5408398364920104, "grad_norm": 2.474693810531541, "learning_rate": 1.8838733613870303e-05, "loss": 0.8005, "step": 7277 }, { "epoch": 0.5409141583054626, "grad_norm": 2.0817006290000166, "learning_rate": 1.88383582966945e-05, "loss": 0.8457, "step": 7278 }, { "epoch": 0.5409884801189149, "grad_norm": 2.16160820593859, "learning_rate": 1.883798292261781e-05, "loss": 0.8442, "step": 7279 }, { "epoch": 0.5410628019323671, "grad_norm": 2.3753064229728316, "learning_rate": 1.883760749164264e-05, "loss": 0.9653, "step": 7280 }, { "epoch": 0.5411371237458193, "grad_norm": 2.3531466685917097, "learning_rate": 1.8837232003771415e-05, "loss": 0.8371, "step": 7281 }, { "epoch": 0.5412114455592717, "grad_norm": 2.128928909916049, "learning_rate": 1.8836856459006546e-05, "loss": 0.7803, "step": 7282 }, { "epoch": 0.5412857673727239, "grad_norm": 2.0186712604334778, "learning_rate": 1.8836480857350456e-05, "loss": 0.9452, "step": 7283 }, { "epoch": 0.5413600891861762, "grad_norm": 2.7369624992215904, "learning_rate": 1.883610519880556e-05, "loss": 0.8557, "step": 7284 }, { "epoch": 0.5414344109996284, "grad_norm": 2.5153932825890184, "learning_rate": 1.8835729483374275e-05, "loss": 1.0836, "step": 7285 }, { "epoch": 0.5415087328130807, "grad_norm": 2.180938902289722, "learning_rate": 1.8835353711059023e-05, "loss": 0.845, "step": 7286 }, { "epoch": 0.5415830546265329, "grad_norm": 2.0560120060497185, "learning_rate": 1.8834977881862223e-05, "loss": 0.8793, "step": 7287 }, { "epoch": 0.5416573764399851, "grad_norm": 2.181694343631923, "learning_rate": 1.8834601995786295e-05, "loss": 0.8516, "step": 7288 }, { "epoch": 0.5417316982534374, "grad_norm": 3.9422380605435596, "learning_rate": 1.8834226052833658e-05, "loss": 0.7245, "step": 7289 }, { "epoch": 0.5418060200668896, "grad_norm": 2.34722797534247, "learning_rate": 1.883385005300673e-05, "loss": 0.99, "step": 7290 }, { "epoch": 0.5418803418803418, "grad_norm": 4.079881393237557, "learning_rate": 1.8833473996307937e-05, "loss": 0.8082, "step": 7291 }, { "epoch": 0.5419546636937941, "grad_norm": 2.089892166311305, "learning_rate": 1.8833097882739692e-05, "loss": 1.0635, "step": 7292 }, { "epoch": 0.5420289855072464, "grad_norm": 1.8684755814739424, "learning_rate": 1.8832721712304423e-05, "loss": 0.8115, "step": 7293 }, { "epoch": 0.5421033073206987, "grad_norm": 2.778993142391531, "learning_rate": 1.883234548500455e-05, "loss": 0.7877, "step": 7294 }, { "epoch": 0.5421776291341509, "grad_norm": 2.0908794253561527, "learning_rate": 1.8831969200842497e-05, "loss": 0.8632, "step": 7295 }, { "epoch": 0.5422519509476031, "grad_norm": 2.186775851056708, "learning_rate": 1.883159285982068e-05, "loss": 0.8971, "step": 7296 }, { "epoch": 0.5423262727610554, "grad_norm": 4.01484661135029, "learning_rate": 1.883121646194153e-05, "loss": 0.7079, "step": 7297 }, { "epoch": 0.5424005945745076, "grad_norm": 2.0166013140729007, "learning_rate": 1.8830840007207466e-05, "loss": 0.8849, "step": 7298 }, { "epoch": 0.5424749163879599, "grad_norm": 2.1325461425838004, "learning_rate": 1.8830463495620913e-05, "loss": 0.771, "step": 7299 }, { "epoch": 0.5425492382014121, "grad_norm": 2.155970882571743, "learning_rate": 1.8830086927184293e-05, "loss": 0.6851, "step": 7300 }, { "epoch": 0.5426235600148643, "grad_norm": 1.855468156277008, "learning_rate": 1.8829710301900033e-05, "loss": 0.7962, "step": 7301 }, { "epoch": 0.5426978818283166, "grad_norm": 3.430456610492657, "learning_rate": 1.8829333619770555e-05, "loss": 0.9275, "step": 7302 }, { "epoch": 0.5427722036417688, "grad_norm": 3.064674881601132, "learning_rate": 1.8828956880798284e-05, "loss": 0.878, "step": 7303 }, { "epoch": 0.5428465254552212, "grad_norm": 1.9841272022406438, "learning_rate": 1.8828580084985648e-05, "loss": 0.7556, "step": 7304 }, { "epoch": 0.5429208472686734, "grad_norm": 2.17867946839991, "learning_rate": 1.8828203232335075e-05, "loss": 0.9987, "step": 7305 }, { "epoch": 0.5429951690821256, "grad_norm": 1.960453193265653, "learning_rate": 1.882782632284898e-05, "loss": 0.8772, "step": 7306 }, { "epoch": 0.5430694908955779, "grad_norm": 2.0825042095271664, "learning_rate": 1.8827449356529804e-05, "loss": 0.971, "step": 7307 }, { "epoch": 0.5431438127090301, "grad_norm": 2.155261144431709, "learning_rate": 1.8827072333379965e-05, "loss": 0.973, "step": 7308 }, { "epoch": 0.5432181345224824, "grad_norm": 4.029740387507929, "learning_rate": 1.8826695253401894e-05, "loss": 0.813, "step": 7309 }, { "epoch": 0.5432924563359346, "grad_norm": 2.071776876623726, "learning_rate": 1.882631811659801e-05, "loss": 0.8892, "step": 7310 }, { "epoch": 0.5433667781493868, "grad_norm": 1.8846480705213047, "learning_rate": 1.8825940922970757e-05, "loss": 0.8967, "step": 7311 }, { "epoch": 0.5434410999628391, "grad_norm": 2.0323727097371775, "learning_rate": 1.8825563672522547e-05, "loss": 1.0331, "step": 7312 }, { "epoch": 0.5435154217762913, "grad_norm": 5.978665603335166, "learning_rate": 1.882518636525582e-05, "loss": 0.8845, "step": 7313 }, { "epoch": 0.5435897435897435, "grad_norm": 1.8443561834647537, "learning_rate": 1.8824809001173003e-05, "loss": 0.7855, "step": 7314 }, { "epoch": 0.5436640654031958, "grad_norm": 2.117357398310885, "learning_rate": 1.882443158027652e-05, "loss": 0.8531, "step": 7315 }, { "epoch": 0.5437383872166481, "grad_norm": 3.0243096179417326, "learning_rate": 1.8824054102568804e-05, "loss": 0.7459, "step": 7316 }, { "epoch": 0.5438127090301004, "grad_norm": 2.845294218149243, "learning_rate": 1.8823676568052284e-05, "loss": 0.952, "step": 7317 }, { "epoch": 0.5438870308435526, "grad_norm": 2.2265629666373834, "learning_rate": 1.8823298976729397e-05, "loss": 0.8618, "step": 7318 }, { "epoch": 0.5439613526570048, "grad_norm": 2.0735618094373587, "learning_rate": 1.8822921328602562e-05, "loss": 0.8792, "step": 7319 }, { "epoch": 0.5440356744704571, "grad_norm": 1.6736845969651177, "learning_rate": 1.8822543623674224e-05, "loss": 0.8595, "step": 7320 }, { "epoch": 0.5441099962839093, "grad_norm": 2.157535164815683, "learning_rate": 1.8822165861946807e-05, "loss": 0.86, "step": 7321 }, { "epoch": 0.5441843180973616, "grad_norm": 2.6490488959550986, "learning_rate": 1.8821788043422745e-05, "loss": 0.9176, "step": 7322 }, { "epoch": 0.5442586399108138, "grad_norm": 2.0468430846890824, "learning_rate": 1.8821410168104462e-05, "loss": 0.9236, "step": 7323 }, { "epoch": 0.544332961724266, "grad_norm": 2.0798949560406554, "learning_rate": 1.8821032235994407e-05, "loss": 0.9787, "step": 7324 }, { "epoch": 0.5444072835377183, "grad_norm": 2.448409418391004, "learning_rate": 1.8820654247095e-05, "loss": 0.7822, "step": 7325 }, { "epoch": 0.5444816053511705, "grad_norm": 2.0526887846623083, "learning_rate": 1.8820276201408674e-05, "loss": 0.7937, "step": 7326 }, { "epoch": 0.5445559271646229, "grad_norm": 2.0336384063821655, "learning_rate": 1.8819898098937877e-05, "loss": 0.9435, "step": 7327 }, { "epoch": 0.5446302489780751, "grad_norm": 2.56637164668971, "learning_rate": 1.8819519939685026e-05, "loss": 1.0072, "step": 7328 }, { "epoch": 0.5447045707915273, "grad_norm": 2.62834100569846, "learning_rate": 1.881914172365257e-05, "loss": 0.7972, "step": 7329 }, { "epoch": 0.5447788926049796, "grad_norm": 5.128588516220733, "learning_rate": 1.881876345084293e-05, "loss": 0.7844, "step": 7330 }, { "epoch": 0.5448532144184318, "grad_norm": 2.0054904058117633, "learning_rate": 1.8818385121258555e-05, "loss": 0.8844, "step": 7331 }, { "epoch": 0.5449275362318841, "grad_norm": 8.962083804264637, "learning_rate": 1.881800673490187e-05, "loss": 0.7082, "step": 7332 }, { "epoch": 0.5450018580453363, "grad_norm": 2.465077273944676, "learning_rate": 1.8817628291775316e-05, "loss": 0.8165, "step": 7333 }, { "epoch": 0.5450761798587885, "grad_norm": 4.491910571096366, "learning_rate": 1.881724979188133e-05, "loss": 0.9217, "step": 7334 }, { "epoch": 0.5451505016722408, "grad_norm": 2.7847165917697825, "learning_rate": 1.8816871235222344e-05, "loss": 0.8036, "step": 7335 }, { "epoch": 0.545224823485693, "grad_norm": 2.108329944640377, "learning_rate": 1.88164926218008e-05, "loss": 0.8863, "step": 7336 }, { "epoch": 0.5452991452991452, "grad_norm": 2.808700294054659, "learning_rate": 1.8816113951619134e-05, "loss": 1.0427, "step": 7337 }, { "epoch": 0.5453734671125976, "grad_norm": 3.9334381177631106, "learning_rate": 1.8815735224679788e-05, "loss": 0.9562, "step": 7338 }, { "epoch": 0.5454477889260498, "grad_norm": 11.043014147681989, "learning_rate": 1.8815356440985192e-05, "loss": 0.8429, "step": 7339 }, { "epoch": 0.5455221107395021, "grad_norm": 2.9837701351376573, "learning_rate": 1.8814977600537787e-05, "loss": 0.9736, "step": 7340 }, { "epoch": 0.5455964325529543, "grad_norm": 6.100399517392862, "learning_rate": 1.8814598703340017e-05, "loss": 0.855, "step": 7341 }, { "epoch": 0.5456707543664066, "grad_norm": 4.32185311939642, "learning_rate": 1.8814219749394316e-05, "loss": 0.9722, "step": 7342 }, { "epoch": 0.5457450761798588, "grad_norm": 5.319597006055528, "learning_rate": 1.8813840738703127e-05, "loss": 1.0158, "step": 7343 }, { "epoch": 0.545819397993311, "grad_norm": 5.5522783121423815, "learning_rate": 1.8813461671268886e-05, "loss": 0.8683, "step": 7344 }, { "epoch": 0.5458937198067633, "grad_norm": 5.246779778265469, "learning_rate": 1.881308254709404e-05, "loss": 1.0199, "step": 7345 }, { "epoch": 0.5459680416202155, "grad_norm": 3.599128570186665, "learning_rate": 1.8812703366181022e-05, "loss": 0.823, "step": 7346 }, { "epoch": 0.5460423634336677, "grad_norm": 5.620156749256087, "learning_rate": 1.881232412853228e-05, "loss": 0.9202, "step": 7347 }, { "epoch": 0.54611668524712, "grad_norm": 2.600295059317016, "learning_rate": 1.881194483415025e-05, "loss": 0.7817, "step": 7348 }, { "epoch": 0.5461910070605723, "grad_norm": 8.208479576237483, "learning_rate": 1.8811565483037376e-05, "loss": 0.966, "step": 7349 }, { "epoch": 0.5462653288740246, "grad_norm": 3.866044870977027, "learning_rate": 1.8811186075196104e-05, "loss": 0.9364, "step": 7350 }, { "epoch": 0.5463396506874768, "grad_norm": 3.433917797089394, "learning_rate": 1.881080661062887e-05, "loss": 0.8262, "step": 7351 }, { "epoch": 0.546413972500929, "grad_norm": 19.14729692927291, "learning_rate": 1.8810427089338123e-05, "loss": 0.7973, "step": 7352 }, { "epoch": 0.5464882943143813, "grad_norm": 9.183928216700298, "learning_rate": 1.8810047511326302e-05, "loss": 1.0437, "step": 7353 }, { "epoch": 0.5465626161278335, "grad_norm": 20.06659037308556, "learning_rate": 1.880966787659585e-05, "loss": 0.8795, "step": 7354 }, { "epoch": 0.5466369379412858, "grad_norm": 4.308194673287166, "learning_rate": 1.880928818514922e-05, "loss": 0.8015, "step": 7355 }, { "epoch": 0.546711259754738, "grad_norm": 3.4402111859460205, "learning_rate": 1.8808908436988845e-05, "loss": 0.9396, "step": 7356 }, { "epoch": 0.5467855815681902, "grad_norm": 4.726928561323862, "learning_rate": 1.8808528632117173e-05, "loss": 1.0262, "step": 7357 }, { "epoch": 0.5468599033816425, "grad_norm": 2.8797126894189513, "learning_rate": 1.8808148770536655e-05, "loss": 1.0449, "step": 7358 }, { "epoch": 0.5469342251950947, "grad_norm": 2.7910323388779443, "learning_rate": 1.8807768852249727e-05, "loss": 0.9689, "step": 7359 }, { "epoch": 0.5470085470085471, "grad_norm": 8.310939262808738, "learning_rate": 1.8807388877258846e-05, "loss": 0.894, "step": 7360 }, { "epoch": 0.5470828688219993, "grad_norm": 2.757374267599142, "learning_rate": 1.880700884556645e-05, "loss": 1.1392, "step": 7361 }, { "epoch": 0.5471571906354515, "grad_norm": 2.8495930345293607, "learning_rate": 1.8806628757174987e-05, "loss": 0.8321, "step": 7362 }, { "epoch": 0.5472315124489038, "grad_norm": 2.5114311992549467, "learning_rate": 1.8806248612086903e-05, "loss": 1.0007, "step": 7363 }, { "epoch": 0.547305834262356, "grad_norm": 2.50824001483747, "learning_rate": 1.880586841030465e-05, "loss": 0.8035, "step": 7364 }, { "epoch": 0.5473801560758083, "grad_norm": 2.941745159266238, "learning_rate": 1.8805488151830675e-05, "loss": 0.9084, "step": 7365 }, { "epoch": 0.5474544778892605, "grad_norm": 2.33303233313073, "learning_rate": 1.880510783666742e-05, "loss": 0.7255, "step": 7366 }, { "epoch": 0.5475287997027127, "grad_norm": 2.78371792607104, "learning_rate": 1.880472746481734e-05, "loss": 0.9089, "step": 7367 }, { "epoch": 0.547603121516165, "grad_norm": 6.856605212962846, "learning_rate": 1.880434703628288e-05, "loss": 0.7917, "step": 7368 }, { "epoch": 0.5476774433296172, "grad_norm": 2.421021503728671, "learning_rate": 1.880396655106649e-05, "loss": 0.9728, "step": 7369 }, { "epoch": 0.5477517651430694, "grad_norm": 2.8454990257034334, "learning_rate": 1.8803586009170625e-05, "loss": 0.7924, "step": 7370 }, { "epoch": 0.5478260869565217, "grad_norm": 1.718762773119982, "learning_rate": 1.8803205410597727e-05, "loss": 0.8114, "step": 7371 }, { "epoch": 0.547900408769974, "grad_norm": 2.849160925876267, "learning_rate": 1.880282475535025e-05, "loss": 0.9685, "step": 7372 }, { "epoch": 0.5479747305834263, "grad_norm": 3.026433877209289, "learning_rate": 1.8802444043430644e-05, "loss": 1.0265, "step": 7373 }, { "epoch": 0.5480490523968785, "grad_norm": 2.296847936103941, "learning_rate": 1.8802063274841357e-05, "loss": 0.8905, "step": 7374 }, { "epoch": 0.5481233742103307, "grad_norm": 2.3803579967319424, "learning_rate": 1.8801682449584845e-05, "loss": 0.8558, "step": 7375 }, { "epoch": 0.548197696023783, "grad_norm": 3.2850678219050553, "learning_rate": 1.880130156766356e-05, "loss": 0.7342, "step": 7376 }, { "epoch": 0.5482720178372352, "grad_norm": 2.4738593696894626, "learning_rate": 1.880092062907995e-05, "loss": 0.7788, "step": 7377 }, { "epoch": 0.5483463396506875, "grad_norm": 3.780470124603266, "learning_rate": 1.8800539633836473e-05, "loss": 0.862, "step": 7378 }, { "epoch": 0.5484206614641397, "grad_norm": 3.4622048394129044, "learning_rate": 1.8800158581935574e-05, "loss": 0.9071, "step": 7379 }, { "epoch": 0.5484949832775919, "grad_norm": 4.6443073364643395, "learning_rate": 1.8799777473379714e-05, "loss": 1.1499, "step": 7380 }, { "epoch": 0.5485693050910442, "grad_norm": 2.5856075997612784, "learning_rate": 1.879939630817134e-05, "loss": 1.0368, "step": 7381 }, { "epoch": 0.5486436269044964, "grad_norm": 3.693777948554352, "learning_rate": 1.8799015086312913e-05, "loss": 0.8501, "step": 7382 }, { "epoch": 0.5487179487179488, "grad_norm": 8.083654758547269, "learning_rate": 1.879863380780688e-05, "loss": 0.8163, "step": 7383 }, { "epoch": 0.548792270531401, "grad_norm": 4.340834609819724, "learning_rate": 1.87982524726557e-05, "loss": 0.9003, "step": 7384 }, { "epoch": 0.5488665923448532, "grad_norm": 2.173155124450752, "learning_rate": 1.8797871080861827e-05, "loss": 0.9438, "step": 7385 }, { "epoch": 0.5489409141583055, "grad_norm": 2.4381778861793606, "learning_rate": 1.8797489632427718e-05, "loss": 0.8066, "step": 7386 }, { "epoch": 0.5490152359717577, "grad_norm": 2.907669921491276, "learning_rate": 1.8797108127355827e-05, "loss": 0.9096, "step": 7387 }, { "epoch": 0.54908955778521, "grad_norm": 2.0456502947632393, "learning_rate": 1.879672656564861e-05, "loss": 0.918, "step": 7388 }, { "epoch": 0.5491638795986622, "grad_norm": 1.7793161491620304, "learning_rate": 1.879634494730852e-05, "loss": 0.8868, "step": 7389 }, { "epoch": 0.5492382014121144, "grad_norm": 2.067041689884122, "learning_rate": 1.8795963272338023e-05, "loss": 0.8459, "step": 7390 }, { "epoch": 0.5493125232255667, "grad_norm": 1.9023323981855549, "learning_rate": 1.879558154073957e-05, "loss": 0.9201, "step": 7391 }, { "epoch": 0.5493868450390189, "grad_norm": 2.5538756083895056, "learning_rate": 1.8795199752515617e-05, "loss": 0.8601, "step": 7392 }, { "epoch": 0.5494611668524712, "grad_norm": 3.421214970669309, "learning_rate": 1.8794817907668626e-05, "loss": 0.9679, "step": 7393 }, { "epoch": 0.5495354886659235, "grad_norm": 1.8802263308339158, "learning_rate": 1.8794436006201053e-05, "loss": 0.9063, "step": 7394 }, { "epoch": 0.5496098104793757, "grad_norm": 2.7637470767503536, "learning_rate": 1.879405404811536e-05, "loss": 1.2049, "step": 7395 }, { "epoch": 0.549684132292828, "grad_norm": 2.1555424601239896, "learning_rate": 1.8793672033414e-05, "loss": 0.903, "step": 7396 }, { "epoch": 0.5497584541062802, "grad_norm": 2.166699027727339, "learning_rate": 1.8793289962099434e-05, "loss": 0.8744, "step": 7397 }, { "epoch": 0.5498327759197325, "grad_norm": 2.7390024630653755, "learning_rate": 1.8792907834174127e-05, "loss": 0.833, "step": 7398 }, { "epoch": 0.5499070977331847, "grad_norm": 2.940445294310892, "learning_rate": 1.8792525649640537e-05, "loss": 0.8832, "step": 7399 }, { "epoch": 0.5499814195466369, "grad_norm": 2.4970645552925737, "learning_rate": 1.879214340850112e-05, "loss": 1.0725, "step": 7400 }, { "epoch": 0.5500557413600892, "grad_norm": 3.8606643880247935, "learning_rate": 1.879176111075834e-05, "loss": 0.6624, "step": 7401 }, { "epoch": 0.5501300631735414, "grad_norm": 1.801743785609917, "learning_rate": 1.8791378756414656e-05, "loss": 0.8873, "step": 7402 }, { "epoch": 0.5502043849869936, "grad_norm": 2.159803426140166, "learning_rate": 1.8790996345472538e-05, "loss": 1.0404, "step": 7403 }, { "epoch": 0.5502787068004459, "grad_norm": 2.3918043584680277, "learning_rate": 1.879061387793444e-05, "loss": 1.0252, "step": 7404 }, { "epoch": 0.5503530286138982, "grad_norm": 2.2225271605279997, "learning_rate": 1.8790231353802823e-05, "loss": 0.9581, "step": 7405 }, { "epoch": 0.5504273504273505, "grad_norm": 2.370503857562098, "learning_rate": 1.8789848773080153e-05, "loss": 0.8516, "step": 7406 }, { "epoch": 0.5505016722408027, "grad_norm": 2.018333607861357, "learning_rate": 1.8789466135768892e-05, "loss": 0.8981, "step": 7407 }, { "epoch": 0.550575994054255, "grad_norm": 2.4078570685229104, "learning_rate": 1.8789083441871504e-05, "loss": 0.8593, "step": 7408 }, { "epoch": 0.5506503158677072, "grad_norm": 3.3607650090858003, "learning_rate": 1.878870069139046e-05, "loss": 0.8591, "step": 7409 }, { "epoch": 0.5507246376811594, "grad_norm": 2.5625828386228293, "learning_rate": 1.878831788432821e-05, "loss": 0.6818, "step": 7410 }, { "epoch": 0.5507989594946117, "grad_norm": 2.208688992790944, "learning_rate": 1.8787935020687224e-05, "loss": 0.949, "step": 7411 }, { "epoch": 0.5508732813080639, "grad_norm": 1.8658208932090101, "learning_rate": 1.878755210046997e-05, "loss": 0.8818, "step": 7412 }, { "epoch": 0.5509476031215161, "grad_norm": 2.4950314347368385, "learning_rate": 1.8787169123678913e-05, "loss": 0.8437, "step": 7413 }, { "epoch": 0.5510219249349684, "grad_norm": 2.473438208969553, "learning_rate": 1.8786786090316517e-05, "loss": 0.9435, "step": 7414 }, { "epoch": 0.5510962467484206, "grad_norm": 3.731255767983079, "learning_rate": 1.878640300038525e-05, "loss": 0.8717, "step": 7415 }, { "epoch": 0.551170568561873, "grad_norm": 5.060050738577203, "learning_rate": 1.8786019853887573e-05, "loss": 0.9269, "step": 7416 }, { "epoch": 0.5512448903753252, "grad_norm": 2.470649307773822, "learning_rate": 1.8785636650825954e-05, "loss": 0.7867, "step": 7417 }, { "epoch": 0.5513192121887774, "grad_norm": 2.640842827386612, "learning_rate": 1.8785253391202867e-05, "loss": 0.8398, "step": 7418 }, { "epoch": 0.5513935340022297, "grad_norm": 3.755284579696781, "learning_rate": 1.8784870075020775e-05, "loss": 1.0166, "step": 7419 }, { "epoch": 0.5514678558156819, "grad_norm": 4.070103004240843, "learning_rate": 1.8784486702282143e-05, "loss": 1.0163, "step": 7420 }, { "epoch": 0.5515421776291342, "grad_norm": 3.031466461792034, "learning_rate": 1.8784103272989442e-05, "loss": 0.8715, "step": 7421 }, { "epoch": 0.5516164994425864, "grad_norm": 3.1248112786640743, "learning_rate": 1.878371978714514e-05, "loss": 1.0674, "step": 7422 }, { "epoch": 0.5516908212560386, "grad_norm": 2.178303837486059, "learning_rate": 1.8783336244751704e-05, "loss": 0.7516, "step": 7423 }, { "epoch": 0.5517651430694909, "grad_norm": 2.4365200796234014, "learning_rate": 1.878295264581161e-05, "loss": 1.0298, "step": 7424 }, { "epoch": 0.5518394648829431, "grad_norm": 2.779264744634782, "learning_rate": 1.8782568990327315e-05, "loss": 1.1039, "step": 7425 }, { "epoch": 0.5519137866963953, "grad_norm": 1.8838746554869188, "learning_rate": 1.8782185278301298e-05, "loss": 0.6874, "step": 7426 }, { "epoch": 0.5519881085098476, "grad_norm": 13.275724146654603, "learning_rate": 1.878180150973603e-05, "loss": 0.8851, "step": 7427 }, { "epoch": 0.5520624303232999, "grad_norm": 2.917411799104095, "learning_rate": 1.8781417684633983e-05, "loss": 1.0055, "step": 7428 }, { "epoch": 0.5521367521367522, "grad_norm": 3.7810633935560394, "learning_rate": 1.878103380299762e-05, "loss": 0.9893, "step": 7429 }, { "epoch": 0.5522110739502044, "grad_norm": 4.340362417985451, "learning_rate": 1.878064986482942e-05, "loss": 0.8462, "step": 7430 }, { "epoch": 0.5522853957636567, "grad_norm": 6.589198575009479, "learning_rate": 1.8780265870131852e-05, "loss": 0.8678, "step": 7431 }, { "epoch": 0.5523597175771089, "grad_norm": 6.128383452790435, "learning_rate": 1.8779881818907385e-05, "loss": 0.8739, "step": 7432 }, { "epoch": 0.5524340393905611, "grad_norm": 2.46447077220274, "learning_rate": 1.8779497711158492e-05, "loss": 0.9751, "step": 7433 }, { "epoch": 0.5525083612040134, "grad_norm": 2.21075619798583, "learning_rate": 1.8779113546887653e-05, "loss": 0.9641, "step": 7434 }, { "epoch": 0.5525826830174656, "grad_norm": 2.5066883384276877, "learning_rate": 1.8778729326097337e-05, "loss": 0.8761, "step": 7435 }, { "epoch": 0.5526570048309178, "grad_norm": 4.296091478124249, "learning_rate": 1.8778345048790015e-05, "loss": 1.0573, "step": 7436 }, { "epoch": 0.5527313266443701, "grad_norm": 2.0269781511373286, "learning_rate": 1.877796071496816e-05, "loss": 0.8935, "step": 7437 }, { "epoch": 0.5528056484578223, "grad_norm": 8.048230404744494, "learning_rate": 1.8777576324634256e-05, "loss": 0.9221, "step": 7438 }, { "epoch": 0.5528799702712747, "grad_norm": 2.2062185462281114, "learning_rate": 1.8777191877790766e-05, "loss": 1.0156, "step": 7439 }, { "epoch": 0.5529542920847269, "grad_norm": 2.0514004909053347, "learning_rate": 1.8776807374440173e-05, "loss": 0.8542, "step": 7440 }, { "epoch": 0.5530286138981791, "grad_norm": 2.182482064526701, "learning_rate": 1.8776422814584947e-05, "loss": 0.8666, "step": 7441 }, { "epoch": 0.5531029357116314, "grad_norm": 2.151978364190192, "learning_rate": 1.8776038198227568e-05, "loss": 1.1284, "step": 7442 }, { "epoch": 0.5531772575250836, "grad_norm": 4.383662080179946, "learning_rate": 1.877565352537051e-05, "loss": 0.9851, "step": 7443 }, { "epoch": 0.5532515793385359, "grad_norm": 5.917529700760702, "learning_rate": 1.877526879601625e-05, "loss": 1.0383, "step": 7444 }, { "epoch": 0.5533259011519881, "grad_norm": 2.045533036924658, "learning_rate": 1.8774884010167263e-05, "loss": 0.8209, "step": 7445 }, { "epoch": 0.5534002229654403, "grad_norm": 3.853740914545488, "learning_rate": 1.877449916782603e-05, "loss": 0.8619, "step": 7446 }, { "epoch": 0.5534745447788926, "grad_norm": 2.39348117743197, "learning_rate": 1.877411426899503e-05, "loss": 0.8011, "step": 7447 }, { "epoch": 0.5535488665923448, "grad_norm": 1.7472349137820118, "learning_rate": 1.8773729313676733e-05, "loss": 0.7442, "step": 7448 }, { "epoch": 0.553623188405797, "grad_norm": 2.957972644123395, "learning_rate": 1.877334430187362e-05, "loss": 0.8005, "step": 7449 }, { "epoch": 0.5536975102192494, "grad_norm": 1.7093407661991467, "learning_rate": 1.8772959233588176e-05, "loss": 0.7964, "step": 7450 }, { "epoch": 0.5537718320327016, "grad_norm": 2.2584254973372686, "learning_rate": 1.8772574108822876e-05, "loss": 0.8825, "step": 7451 }, { "epoch": 0.5538461538461539, "grad_norm": 1.7423106802482866, "learning_rate": 1.8772188927580197e-05, "loss": 0.8108, "step": 7452 }, { "epoch": 0.5539204756596061, "grad_norm": 2.8032051021524365, "learning_rate": 1.8771803689862623e-05, "loss": 0.8472, "step": 7453 }, { "epoch": 0.5539947974730584, "grad_norm": 2.5272036388504056, "learning_rate": 1.877141839567263e-05, "loss": 1.0396, "step": 7454 }, { "epoch": 0.5540691192865106, "grad_norm": 2.3014983831458893, "learning_rate": 1.87710330450127e-05, "loss": 0.9147, "step": 7455 }, { "epoch": 0.5541434410999628, "grad_norm": 1.799798135836757, "learning_rate": 1.8770647637885315e-05, "loss": 0.8344, "step": 7456 }, { "epoch": 0.5542177629134151, "grad_norm": 2.0905026726668594, "learning_rate": 1.8770262174292955e-05, "loss": 0.9783, "step": 7457 }, { "epoch": 0.5542920847268673, "grad_norm": 2.18921072063198, "learning_rate": 1.8769876654238103e-05, "loss": 0.7846, "step": 7458 }, { "epoch": 0.5543664065403195, "grad_norm": 2.3948544783001764, "learning_rate": 1.8769491077723238e-05, "loss": 1.0251, "step": 7459 }, { "epoch": 0.5544407283537718, "grad_norm": 2.1493788724786453, "learning_rate": 1.8769105444750848e-05, "loss": 0.8928, "step": 7460 }, { "epoch": 0.5545150501672241, "grad_norm": 2.359207826556475, "learning_rate": 1.876871975532341e-05, "loss": 0.9427, "step": 7461 }, { "epoch": 0.5545893719806764, "grad_norm": 2.3173912944493242, "learning_rate": 1.876833400944341e-05, "loss": 0.8549, "step": 7462 }, { "epoch": 0.5546636937941286, "grad_norm": 1.4323414774387067, "learning_rate": 1.876794820711333e-05, "loss": 0.6187, "step": 7463 }, { "epoch": 0.5547380156075808, "grad_norm": 2.2154831254241136, "learning_rate": 1.8767562348335654e-05, "loss": 0.8235, "step": 7464 }, { "epoch": 0.5548123374210331, "grad_norm": 2.052426509084044, "learning_rate": 1.876717643311287e-05, "loss": 0.8784, "step": 7465 }, { "epoch": 0.5548866592344853, "grad_norm": 2.2180757022200837, "learning_rate": 1.8766790461447457e-05, "loss": 0.7974, "step": 7466 }, { "epoch": 0.5549609810479376, "grad_norm": 3.4406635685758293, "learning_rate": 1.87664044333419e-05, "loss": 0.8677, "step": 7467 }, { "epoch": 0.5550353028613898, "grad_norm": 2.376161043485439, "learning_rate": 1.8766018348798686e-05, "loss": 1.0209, "step": 7468 }, { "epoch": 0.555109624674842, "grad_norm": 2.174573675213599, "learning_rate": 1.8765632207820302e-05, "loss": 0.8481, "step": 7469 }, { "epoch": 0.5551839464882943, "grad_norm": 1.9741447890403887, "learning_rate": 1.8765246010409232e-05, "loss": 0.8474, "step": 7470 }, { "epoch": 0.5552582683017465, "grad_norm": 2.86518523208311, "learning_rate": 1.8764859756567967e-05, "loss": 0.8833, "step": 7471 }, { "epoch": 0.5553325901151989, "grad_norm": 2.339180696191847, "learning_rate": 1.8764473446298982e-05, "loss": 0.8663, "step": 7472 }, { "epoch": 0.5554069119286511, "grad_norm": 1.695901050732535, "learning_rate": 1.876408707960478e-05, "loss": 0.6853, "step": 7473 }, { "epoch": 0.5554812337421033, "grad_norm": 2.08325221263501, "learning_rate": 1.8763700656487832e-05, "loss": 0.9089, "step": 7474 }, { "epoch": 0.5555555555555556, "grad_norm": 1.7925908033908984, "learning_rate": 1.876331417695064e-05, "loss": 0.6623, "step": 7475 }, { "epoch": 0.5556298773690078, "grad_norm": 1.9292960770071288, "learning_rate": 1.8762927640995686e-05, "loss": 0.8564, "step": 7476 }, { "epoch": 0.5557041991824601, "grad_norm": 2.276805872811306, "learning_rate": 1.8762541048625452e-05, "loss": 0.826, "step": 7477 }, { "epoch": 0.5557785209959123, "grad_norm": 2.9147084976787236, "learning_rate": 1.876215439984244e-05, "loss": 0.8584, "step": 7478 }, { "epoch": 0.5558528428093645, "grad_norm": 1.9176126840654213, "learning_rate": 1.876176769464913e-05, "loss": 1.0044, "step": 7479 }, { "epoch": 0.5559271646228168, "grad_norm": 1.8257504800760416, "learning_rate": 1.8761380933048013e-05, "loss": 0.7939, "step": 7480 }, { "epoch": 0.556001486436269, "grad_norm": 1.8015341973452492, "learning_rate": 1.8760994115041585e-05, "loss": 0.9872, "step": 7481 }, { "epoch": 0.5560758082497212, "grad_norm": 2.694400931065231, "learning_rate": 1.876060724063233e-05, "loss": 0.8885, "step": 7482 }, { "epoch": 0.5561501300631735, "grad_norm": 2.0592825110693016, "learning_rate": 1.8760220309822736e-05, "loss": 0.6815, "step": 7483 }, { "epoch": 0.5562244518766258, "grad_norm": 3.00125353778443, "learning_rate": 1.8759833322615303e-05, "loss": 0.6016, "step": 7484 }, { "epoch": 0.5562987736900781, "grad_norm": 2.1592262059548597, "learning_rate": 1.8759446279012512e-05, "loss": 1.0361, "step": 7485 }, { "epoch": 0.5563730955035303, "grad_norm": 2.894903779858591, "learning_rate": 1.8759059179016865e-05, "loss": 0.8656, "step": 7486 }, { "epoch": 0.5564474173169826, "grad_norm": 2.0674147268889063, "learning_rate": 1.8758672022630847e-05, "loss": 1.0086, "step": 7487 }, { "epoch": 0.5565217391304348, "grad_norm": 1.8843165242199753, "learning_rate": 1.875828480985696e-05, "loss": 0.7176, "step": 7488 }, { "epoch": 0.556596060943887, "grad_norm": 2.228445440356736, "learning_rate": 1.875789754069768e-05, "loss": 0.9591, "step": 7489 }, { "epoch": 0.5566703827573393, "grad_norm": 2.1105136703297434, "learning_rate": 1.8757510215155512e-05, "loss": 0.6615, "step": 7490 }, { "epoch": 0.5567447045707915, "grad_norm": 2.262358726684356, "learning_rate": 1.875712283323295e-05, "loss": 0.7609, "step": 7491 }, { "epoch": 0.5568190263842437, "grad_norm": 2.376856551651302, "learning_rate": 1.8756735394932485e-05, "loss": 0.7622, "step": 7492 }, { "epoch": 0.556893348197696, "grad_norm": 1.912761511568948, "learning_rate": 1.8756347900256612e-05, "loss": 0.8628, "step": 7493 }, { "epoch": 0.5569676700111482, "grad_norm": 2.211011635654337, "learning_rate": 1.8755960349207827e-05, "loss": 0.8371, "step": 7494 }, { "epoch": 0.5570419918246006, "grad_norm": 1.9068572732794375, "learning_rate": 1.8755572741788623e-05, "loss": 0.9481, "step": 7495 }, { "epoch": 0.5571163136380528, "grad_norm": 2.1961028430872247, "learning_rate": 1.8755185078001494e-05, "loss": 0.7133, "step": 7496 }, { "epoch": 0.557190635451505, "grad_norm": 2.002186808506803, "learning_rate": 1.875479735784894e-05, "loss": 0.8758, "step": 7497 }, { "epoch": 0.5572649572649573, "grad_norm": 1.8613079152011718, "learning_rate": 1.8754409581333453e-05, "loss": 1.0049, "step": 7498 }, { "epoch": 0.5573392790784095, "grad_norm": 2.006050706670903, "learning_rate": 1.8754021748457534e-05, "loss": 0.8027, "step": 7499 }, { "epoch": 0.5574136008918618, "grad_norm": 1.8575258590436283, "learning_rate": 1.8753633859223672e-05, "loss": 0.8666, "step": 7500 }, { "epoch": 0.557487922705314, "grad_norm": 1.4052411627224684, "learning_rate": 1.8753245913634374e-05, "loss": 0.6235, "step": 7501 }, { "epoch": 0.5575622445187662, "grad_norm": 2.793851855127334, "learning_rate": 1.8752857911692133e-05, "loss": 0.8372, "step": 7502 }, { "epoch": 0.5576365663322185, "grad_norm": 1.7196232136743885, "learning_rate": 1.8752469853399446e-05, "loss": 1.0113, "step": 7503 }, { "epoch": 0.5577108881456707, "grad_norm": 1.8489325670375227, "learning_rate": 1.875208173875881e-05, "loss": 0.8248, "step": 7504 }, { "epoch": 0.557785209959123, "grad_norm": 2.462676039223598, "learning_rate": 1.875169356777273e-05, "loss": 0.8725, "step": 7505 }, { "epoch": 0.5578595317725753, "grad_norm": 1.8743322445864354, "learning_rate": 1.87513053404437e-05, "loss": 0.9314, "step": 7506 }, { "epoch": 0.5579338535860275, "grad_norm": 1.4730862657206536, "learning_rate": 1.8750917056774213e-05, "loss": 0.8354, "step": 7507 }, { "epoch": 0.5580081753994798, "grad_norm": 2.162469933350679, "learning_rate": 1.8750528716766784e-05, "loss": 0.8963, "step": 7508 }, { "epoch": 0.558082497212932, "grad_norm": 2.6336686833124427, "learning_rate": 1.8750140320423905e-05, "loss": 0.856, "step": 7509 }, { "epoch": 0.5581568190263843, "grad_norm": 1.8256091612165446, "learning_rate": 1.8749751867748074e-05, "loss": 0.788, "step": 7510 }, { "epoch": 0.5582311408398365, "grad_norm": 2.0589554093432687, "learning_rate": 1.8749363358741793e-05, "loss": 0.7749, "step": 7511 }, { "epoch": 0.5583054626532887, "grad_norm": 2.341921656323879, "learning_rate": 1.8748974793407568e-05, "loss": 0.8328, "step": 7512 }, { "epoch": 0.558379784466741, "grad_norm": 2.243286768035327, "learning_rate": 1.8748586171747897e-05, "loss": 0.8588, "step": 7513 }, { "epoch": 0.5584541062801932, "grad_norm": 2.1343580166909595, "learning_rate": 1.874819749376528e-05, "loss": 0.9371, "step": 7514 }, { "epoch": 0.5585284280936454, "grad_norm": 1.9404663676858789, "learning_rate": 1.874780875946222e-05, "loss": 0.862, "step": 7515 }, { "epoch": 0.5586027499070977, "grad_norm": 2.0266192355723356, "learning_rate": 1.8747419968841226e-05, "loss": 0.9411, "step": 7516 }, { "epoch": 0.55867707172055, "grad_norm": 2.0599126243541845, "learning_rate": 1.8747031121904795e-05, "loss": 0.9318, "step": 7517 }, { "epoch": 0.5587513935340023, "grad_norm": 2.2248048411910717, "learning_rate": 1.8746642218655432e-05, "loss": 1.0732, "step": 7518 }, { "epoch": 0.5588257153474545, "grad_norm": 3.313079873897903, "learning_rate": 1.874625325909564e-05, "loss": 1.0274, "step": 7519 }, { "epoch": 0.5589000371609067, "grad_norm": 4.058256457387384, "learning_rate": 1.874586424322792e-05, "loss": 0.9521, "step": 7520 }, { "epoch": 0.558974358974359, "grad_norm": 1.8967355014524971, "learning_rate": 1.874547517105478e-05, "loss": 0.6949, "step": 7521 }, { "epoch": 0.5590486807878112, "grad_norm": 2.424042363835771, "learning_rate": 1.874508604257873e-05, "loss": 0.9753, "step": 7522 }, { "epoch": 0.5591230026012635, "grad_norm": 2.2385173440093054, "learning_rate": 1.8744696857802266e-05, "loss": 1.1247, "step": 7523 }, { "epoch": 0.5591973244147157, "grad_norm": 2.1531532333191694, "learning_rate": 1.8744307616727896e-05, "loss": 1.1055, "step": 7524 }, { "epoch": 0.5592716462281679, "grad_norm": 2.155738342204632, "learning_rate": 1.874391831935813e-05, "loss": 1.0299, "step": 7525 }, { "epoch": 0.5593459680416202, "grad_norm": 1.9711003380167917, "learning_rate": 1.8743528965695468e-05, "loss": 0.9087, "step": 7526 }, { "epoch": 0.5594202898550724, "grad_norm": 1.7115490750385698, "learning_rate": 1.8743139555742423e-05, "loss": 0.9453, "step": 7527 }, { "epoch": 0.5594946116685248, "grad_norm": 2.083209020273084, "learning_rate": 1.87427500895015e-05, "loss": 1.0172, "step": 7528 }, { "epoch": 0.559568933481977, "grad_norm": 2.510979103731685, "learning_rate": 1.8742360566975204e-05, "loss": 1.044, "step": 7529 }, { "epoch": 0.5596432552954292, "grad_norm": 2.339555153403699, "learning_rate": 1.8741970988166044e-05, "loss": 0.679, "step": 7530 }, { "epoch": 0.5597175771088815, "grad_norm": 1.8513360372686551, "learning_rate": 1.8741581353076528e-05, "loss": 0.834, "step": 7531 }, { "epoch": 0.5597918989223337, "grad_norm": 3.004624343959901, "learning_rate": 1.8741191661709167e-05, "loss": 0.7759, "step": 7532 }, { "epoch": 0.559866220735786, "grad_norm": 2.322187780407411, "learning_rate": 1.8740801914066465e-05, "loss": 0.8323, "step": 7533 }, { "epoch": 0.5599405425492382, "grad_norm": 2.199590820184311, "learning_rate": 1.8740412110150936e-05, "loss": 0.7739, "step": 7534 }, { "epoch": 0.5600148643626904, "grad_norm": 2.44840011320003, "learning_rate": 1.8740022249965088e-05, "loss": 1.049, "step": 7535 }, { "epoch": 0.5600891861761427, "grad_norm": 1.9415803055209047, "learning_rate": 1.8739632333511427e-05, "loss": 1.0224, "step": 7536 }, { "epoch": 0.5601635079895949, "grad_norm": 2.1560291349774854, "learning_rate": 1.873924236079247e-05, "loss": 0.8294, "step": 7537 }, { "epoch": 0.5602378298030471, "grad_norm": 1.90803001676093, "learning_rate": 1.8738852331810723e-05, "loss": 0.8206, "step": 7538 }, { "epoch": 0.5603121516164994, "grad_norm": 2.4188976432475116, "learning_rate": 1.8738462246568695e-05, "loss": 0.6102, "step": 7539 }, { "epoch": 0.5603864734299517, "grad_norm": 1.8158261349832896, "learning_rate": 1.8738072105068904e-05, "loss": 0.8015, "step": 7540 }, { "epoch": 0.560460795243404, "grad_norm": 1.8353060985168712, "learning_rate": 1.873768190731386e-05, "loss": 0.7601, "step": 7541 }, { "epoch": 0.5605351170568562, "grad_norm": 1.9740882383653737, "learning_rate": 1.873729165330607e-05, "loss": 0.7308, "step": 7542 }, { "epoch": 0.5606094388703085, "grad_norm": 1.7985294017180513, "learning_rate": 1.8736901343048052e-05, "loss": 0.7914, "step": 7543 }, { "epoch": 0.5606837606837607, "grad_norm": 1.8585617708312896, "learning_rate": 1.8736510976542313e-05, "loss": 0.6875, "step": 7544 }, { "epoch": 0.5607580824972129, "grad_norm": 1.8804586156835295, "learning_rate": 1.873612055379137e-05, "loss": 0.8959, "step": 7545 }, { "epoch": 0.5608324043106652, "grad_norm": 2.183779037453098, "learning_rate": 1.873573007479774e-05, "loss": 0.9129, "step": 7546 }, { "epoch": 0.5609067261241174, "grad_norm": 2.3938891989145525, "learning_rate": 1.8735339539563928e-05, "loss": 0.8614, "step": 7547 }, { "epoch": 0.5609810479375696, "grad_norm": 1.765531505471091, "learning_rate": 1.8734948948092455e-05, "loss": 0.7642, "step": 7548 }, { "epoch": 0.5610553697510219, "grad_norm": 2.1037001351282627, "learning_rate": 1.8734558300385835e-05, "loss": 0.8088, "step": 7549 }, { "epoch": 0.5611296915644741, "grad_norm": 1.9400851567278963, "learning_rate": 1.8734167596446582e-05, "loss": 0.945, "step": 7550 }, { "epoch": 0.5612040133779265, "grad_norm": 1.870860184571783, "learning_rate": 1.8733776836277212e-05, "loss": 0.8238, "step": 7551 }, { "epoch": 0.5612783351913787, "grad_norm": 2.2981383620390092, "learning_rate": 1.8733386019880237e-05, "loss": 0.9044, "step": 7552 }, { "epoch": 0.561352657004831, "grad_norm": 2.009079407669215, "learning_rate": 1.873299514725818e-05, "loss": 0.9388, "step": 7553 }, { "epoch": 0.5614269788182832, "grad_norm": 2.029743739557029, "learning_rate": 1.873260421841355e-05, "loss": 0.8988, "step": 7554 }, { "epoch": 0.5615013006317354, "grad_norm": 2.0255460308068085, "learning_rate": 1.8732213233348865e-05, "loss": 0.8693, "step": 7555 }, { "epoch": 0.5615756224451877, "grad_norm": 1.848743117377499, "learning_rate": 1.8731822192066648e-05, "loss": 0.6996, "step": 7556 }, { "epoch": 0.5616499442586399, "grad_norm": 2.3070534889084735, "learning_rate": 1.8731431094569412e-05, "loss": 0.8873, "step": 7557 }, { "epoch": 0.5617242660720921, "grad_norm": 1.9185640518067353, "learning_rate": 1.8731039940859675e-05, "loss": 0.9175, "step": 7558 }, { "epoch": 0.5617985878855444, "grad_norm": 1.6830052260357944, "learning_rate": 1.873064873093996e-05, "loss": 0.7547, "step": 7559 }, { "epoch": 0.5618729096989966, "grad_norm": 2.1983024140680087, "learning_rate": 1.8730257464812777e-05, "loss": 0.6986, "step": 7560 }, { "epoch": 0.5619472315124489, "grad_norm": 2.1464475374662118, "learning_rate": 1.872986614248065e-05, "loss": 0.9499, "step": 7561 }, { "epoch": 0.5620215533259012, "grad_norm": 1.6909551691037252, "learning_rate": 1.87294747639461e-05, "loss": 0.8891, "step": 7562 }, { "epoch": 0.5620958751393534, "grad_norm": 1.9402427191649048, "learning_rate": 1.8729083329211644e-05, "loss": 1.0006, "step": 7563 }, { "epoch": 0.5621701969528057, "grad_norm": 2.4926759853937845, "learning_rate": 1.8728691838279798e-05, "loss": 0.9261, "step": 7564 }, { "epoch": 0.5622445187662579, "grad_norm": 1.908102282240194, "learning_rate": 1.8728300291153093e-05, "loss": 0.9377, "step": 7565 }, { "epoch": 0.5623188405797102, "grad_norm": 1.7913447741369748, "learning_rate": 1.872790868783404e-05, "loss": 0.9004, "step": 7566 }, { "epoch": 0.5623931623931624, "grad_norm": 1.8096080276983688, "learning_rate": 1.8727517028325164e-05, "loss": 0.9037, "step": 7567 }, { "epoch": 0.5624674842066146, "grad_norm": 2.5388391024673456, "learning_rate": 1.8727125312628986e-05, "loss": 0.9655, "step": 7568 }, { "epoch": 0.5625418060200669, "grad_norm": 1.986002560983923, "learning_rate": 1.8726733540748027e-05, "loss": 0.8882, "step": 7569 }, { "epoch": 0.5626161278335191, "grad_norm": 1.8474270399695185, "learning_rate": 1.8726341712684814e-05, "loss": 0.8546, "step": 7570 }, { "epoch": 0.5626904496469713, "grad_norm": 2.248202919227927, "learning_rate": 1.8725949828441865e-05, "loss": 0.7639, "step": 7571 }, { "epoch": 0.5627647714604236, "grad_norm": 1.864352911045909, "learning_rate": 1.8725557888021703e-05, "loss": 0.7528, "step": 7572 }, { "epoch": 0.5628390932738759, "grad_norm": 1.8245005408136017, "learning_rate": 1.872516589142685e-05, "loss": 0.715, "step": 7573 }, { "epoch": 0.5629134150873282, "grad_norm": 2.1235289496039296, "learning_rate": 1.8724773838659834e-05, "loss": 1.0281, "step": 7574 }, { "epoch": 0.5629877369007804, "grad_norm": 2.9720523441664715, "learning_rate": 1.8724381729723175e-05, "loss": 0.8759, "step": 7575 }, { "epoch": 0.5630620587142326, "grad_norm": 1.8287890576047836, "learning_rate": 1.8723989564619402e-05, "loss": 0.8019, "step": 7576 }, { "epoch": 0.5631363805276849, "grad_norm": 1.985114369493288, "learning_rate": 1.8723597343351036e-05, "loss": 0.8977, "step": 7577 }, { "epoch": 0.5632107023411371, "grad_norm": 1.6191548180023865, "learning_rate": 1.8723205065920598e-05, "loss": 0.8615, "step": 7578 }, { "epoch": 0.5632850241545894, "grad_norm": 1.8301716408964892, "learning_rate": 1.872281273233062e-05, "loss": 0.8704, "step": 7579 }, { "epoch": 0.5633593459680416, "grad_norm": 1.9734710033234748, "learning_rate": 1.872242034258363e-05, "loss": 1.0384, "step": 7580 }, { "epoch": 0.5634336677814938, "grad_norm": 1.8665372600139765, "learning_rate": 1.872202789668215e-05, "loss": 0.8863, "step": 7581 }, { "epoch": 0.5635079895949461, "grad_norm": 1.859995590651216, "learning_rate": 1.8721635394628706e-05, "loss": 0.7875, "step": 7582 }, { "epoch": 0.5635823114083983, "grad_norm": 2.3340665110595804, "learning_rate": 1.872124283642583e-05, "loss": 0.843, "step": 7583 }, { "epoch": 0.5636566332218507, "grad_norm": 2.263770655137954, "learning_rate": 1.8720850222076035e-05, "loss": 0.7384, "step": 7584 }, { "epoch": 0.5637309550353029, "grad_norm": 1.8571429994168356, "learning_rate": 1.872045755158187e-05, "loss": 0.6553, "step": 7585 }, { "epoch": 0.5638052768487551, "grad_norm": 2.027671079513543, "learning_rate": 1.8720064824945844e-05, "loss": 0.7987, "step": 7586 }, { "epoch": 0.5638795986622074, "grad_norm": 4.611494784336361, "learning_rate": 1.8719672042170494e-05, "loss": 1.1611, "step": 7587 }, { "epoch": 0.5639539204756596, "grad_norm": 1.7959408356081312, "learning_rate": 1.871927920325835e-05, "loss": 0.8285, "step": 7588 }, { "epoch": 0.5640282422891119, "grad_norm": 1.9896723146857112, "learning_rate": 1.8718886308211938e-05, "loss": 0.8212, "step": 7589 }, { "epoch": 0.5641025641025641, "grad_norm": 3.0337224513125447, "learning_rate": 1.871849335703379e-05, "loss": 1.1978, "step": 7590 }, { "epoch": 0.5641768859160163, "grad_norm": 2.0097809075271695, "learning_rate": 1.8718100349726432e-05, "loss": 1.0558, "step": 7591 }, { "epoch": 0.5642512077294686, "grad_norm": 2.28017374267836, "learning_rate": 1.8717707286292397e-05, "loss": 0.829, "step": 7592 }, { "epoch": 0.5643255295429208, "grad_norm": 2.1846648100291097, "learning_rate": 1.8717314166734214e-05, "loss": 0.8452, "step": 7593 }, { "epoch": 0.564399851356373, "grad_norm": 1.8423353813980736, "learning_rate": 1.871692099105442e-05, "loss": 0.6913, "step": 7594 }, { "epoch": 0.5644741731698253, "grad_norm": 1.7071004407730552, "learning_rate": 1.8716527759255533e-05, "loss": 0.7839, "step": 7595 }, { "epoch": 0.5645484949832776, "grad_norm": 1.9527165348458921, "learning_rate": 1.8716134471340094e-05, "loss": 0.7596, "step": 7596 }, { "epoch": 0.5646228167967299, "grad_norm": 2.4625607028148466, "learning_rate": 1.8715741127310637e-05, "loss": 1.0855, "step": 7597 }, { "epoch": 0.5646971386101821, "grad_norm": 2.1186643964980627, "learning_rate": 1.871534772716969e-05, "loss": 0.968, "step": 7598 }, { "epoch": 0.5647714604236344, "grad_norm": 2.4355632961868388, "learning_rate": 1.871495427091979e-05, "loss": 0.7895, "step": 7599 }, { "epoch": 0.5648457822370866, "grad_norm": 2.057342777670234, "learning_rate": 1.871456075856346e-05, "loss": 0.8416, "step": 7600 }, { "epoch": 0.5649201040505388, "grad_norm": 1.7969428783571157, "learning_rate": 1.871416719010324e-05, "loss": 0.8203, "step": 7601 }, { "epoch": 0.5649944258639911, "grad_norm": 2.3779248607964245, "learning_rate": 1.8713773565541666e-05, "loss": 0.9629, "step": 7602 }, { "epoch": 0.5650687476774433, "grad_norm": 1.9245330121496307, "learning_rate": 1.871337988488127e-05, "loss": 0.7763, "step": 7603 }, { "epoch": 0.5651430694908955, "grad_norm": 2.5996909142907922, "learning_rate": 1.871298614812459e-05, "loss": 1.005, "step": 7604 }, { "epoch": 0.5652173913043478, "grad_norm": 2.4537262643216047, "learning_rate": 1.8712592355274152e-05, "loss": 0.8522, "step": 7605 }, { "epoch": 0.5652917131178, "grad_norm": 1.9305920287967058, "learning_rate": 1.87121985063325e-05, "loss": 0.9593, "step": 7606 }, { "epoch": 0.5653660349312524, "grad_norm": 4.214141613760018, "learning_rate": 1.871180460130216e-05, "loss": 1.0146, "step": 7607 }, { "epoch": 0.5654403567447046, "grad_norm": 1.7675231031106438, "learning_rate": 1.871141064018568e-05, "loss": 0.7722, "step": 7608 }, { "epoch": 0.5655146785581568, "grad_norm": 1.849168293253513, "learning_rate": 1.8711016622985592e-05, "loss": 0.9104, "step": 7609 }, { "epoch": 0.5655890003716091, "grad_norm": 2.1615747834760812, "learning_rate": 1.8710622549704424e-05, "loss": 0.8205, "step": 7610 }, { "epoch": 0.5656633221850613, "grad_norm": 1.7777224245731502, "learning_rate": 1.8710228420344724e-05, "loss": 0.7718, "step": 7611 }, { "epoch": 0.5657376439985136, "grad_norm": 2.1896892755922166, "learning_rate": 1.8709834234909028e-05, "loss": 1.0337, "step": 7612 }, { "epoch": 0.5658119658119658, "grad_norm": 2.1082899323785473, "learning_rate": 1.870943999339987e-05, "loss": 0.9652, "step": 7613 }, { "epoch": 0.565886287625418, "grad_norm": 1.6193913355770577, "learning_rate": 1.8709045695819785e-05, "loss": 0.714, "step": 7614 }, { "epoch": 0.5659606094388703, "grad_norm": 2.1717042021486863, "learning_rate": 1.8708651342171318e-05, "loss": 0.8788, "step": 7615 }, { "epoch": 0.5660349312523225, "grad_norm": 6.223916077841438, "learning_rate": 1.8708256932457004e-05, "loss": 1.1051, "step": 7616 }, { "epoch": 0.5661092530657748, "grad_norm": 2.356772669004329, "learning_rate": 1.8707862466679386e-05, "loss": 1.0409, "step": 7617 }, { "epoch": 0.5661835748792271, "grad_norm": 1.9026190278962156, "learning_rate": 1.8707467944841e-05, "loss": 0.9944, "step": 7618 }, { "epoch": 0.5662578966926793, "grad_norm": 1.7009644272028464, "learning_rate": 1.8707073366944392e-05, "loss": 0.7273, "step": 7619 }, { "epoch": 0.5663322185061316, "grad_norm": 1.992489508424294, "learning_rate": 1.8706678732992092e-05, "loss": 0.8077, "step": 7620 }, { "epoch": 0.5664065403195838, "grad_norm": 1.8912137114075807, "learning_rate": 1.8706284042986645e-05, "loss": 0.705, "step": 7621 }, { "epoch": 0.5664808621330361, "grad_norm": 1.8782229674919835, "learning_rate": 1.8705889296930598e-05, "loss": 0.7044, "step": 7622 }, { "epoch": 0.5665551839464883, "grad_norm": 2.2318852894437917, "learning_rate": 1.8705494494826483e-05, "loss": 0.9344, "step": 7623 }, { "epoch": 0.5666295057599405, "grad_norm": 2.1239294463917515, "learning_rate": 1.870509963667685e-05, "loss": 0.917, "step": 7624 }, { "epoch": 0.5667038275733928, "grad_norm": 2.9092219667724195, "learning_rate": 1.870470472248423e-05, "loss": 0.7952, "step": 7625 }, { "epoch": 0.566778149386845, "grad_norm": 2.273606984762585, "learning_rate": 1.870430975225118e-05, "loss": 0.926, "step": 7626 }, { "epoch": 0.5668524712002972, "grad_norm": 2.1918685647910836, "learning_rate": 1.8703914725980234e-05, "loss": 0.9822, "step": 7627 }, { "epoch": 0.5669267930137495, "grad_norm": 2.1747287603838976, "learning_rate": 1.8703519643673937e-05, "loss": 0.8994, "step": 7628 }, { "epoch": 0.5670011148272018, "grad_norm": 2.9876224285995505, "learning_rate": 1.8703124505334828e-05, "loss": 1.0475, "step": 7629 }, { "epoch": 0.5670754366406541, "grad_norm": 1.5757879063029758, "learning_rate": 1.8702729310965462e-05, "loss": 0.7491, "step": 7630 }, { "epoch": 0.5671497584541063, "grad_norm": 2.1773041195369935, "learning_rate": 1.870233406056837e-05, "loss": 0.8619, "step": 7631 }, { "epoch": 0.5672240802675586, "grad_norm": 2.388497568647616, "learning_rate": 1.8701938754146107e-05, "loss": 0.9108, "step": 7632 }, { "epoch": 0.5672984020810108, "grad_norm": 1.8297427520807725, "learning_rate": 1.8701543391701213e-05, "loss": 0.8365, "step": 7633 }, { "epoch": 0.567372723894463, "grad_norm": 2.4647304712494877, "learning_rate": 1.870114797323623e-05, "loss": 0.8582, "step": 7634 }, { "epoch": 0.5674470457079153, "grad_norm": 2.1107133386026584, "learning_rate": 1.8700752498753712e-05, "loss": 0.9258, "step": 7635 }, { "epoch": 0.5675213675213675, "grad_norm": 1.4596226528975669, "learning_rate": 1.87003569682562e-05, "loss": 0.5907, "step": 7636 }, { "epoch": 0.5675956893348197, "grad_norm": 2.302103186565771, "learning_rate": 1.869996138174624e-05, "loss": 0.7862, "step": 7637 }, { "epoch": 0.567670011148272, "grad_norm": 2.0202648692627436, "learning_rate": 1.8699565739226382e-05, "loss": 0.8885, "step": 7638 }, { "epoch": 0.5677443329617242, "grad_norm": 2.212392532273336, "learning_rate": 1.869917004069917e-05, "loss": 1.0575, "step": 7639 }, { "epoch": 0.5678186547751766, "grad_norm": 1.9873927583047377, "learning_rate": 1.8698774286167154e-05, "loss": 0.9605, "step": 7640 }, { "epoch": 0.5678929765886288, "grad_norm": 2.331930161553364, "learning_rate": 1.8698378475632876e-05, "loss": 0.9175, "step": 7641 }, { "epoch": 0.567967298402081, "grad_norm": 2.017470479436134, "learning_rate": 1.8697982609098892e-05, "loss": 0.853, "step": 7642 }, { "epoch": 0.5680416202155333, "grad_norm": 2.0841869700678317, "learning_rate": 1.869758668656775e-05, "loss": 0.8164, "step": 7643 }, { "epoch": 0.5681159420289855, "grad_norm": 1.8337975583841981, "learning_rate": 1.8697190708041993e-05, "loss": 0.8303, "step": 7644 }, { "epoch": 0.5681902638424378, "grad_norm": 2.082618904888319, "learning_rate": 1.8696794673524174e-05, "loss": 1.0084, "step": 7645 }, { "epoch": 0.56826458565589, "grad_norm": 1.9122761877871735, "learning_rate": 1.8696398583016844e-05, "loss": 0.8396, "step": 7646 }, { "epoch": 0.5683389074693422, "grad_norm": 2.1525308466893294, "learning_rate": 1.8696002436522547e-05, "loss": 0.8706, "step": 7647 }, { "epoch": 0.5684132292827945, "grad_norm": 2.051694907530577, "learning_rate": 1.869560623404384e-05, "loss": 0.8431, "step": 7648 }, { "epoch": 0.5684875510962467, "grad_norm": 2.064826012459849, "learning_rate": 1.8695209975583273e-05, "loss": 1.012, "step": 7649 }, { "epoch": 0.568561872909699, "grad_norm": 1.7212414633529525, "learning_rate": 1.8694813661143393e-05, "loss": 0.6237, "step": 7650 }, { "epoch": 0.5686361947231513, "grad_norm": 2.158418460525989, "learning_rate": 1.8694417290726754e-05, "loss": 0.8917, "step": 7651 }, { "epoch": 0.5687105165366035, "grad_norm": 1.617543421106243, "learning_rate": 1.8694020864335908e-05, "loss": 0.8934, "step": 7652 }, { "epoch": 0.5687848383500558, "grad_norm": 2.3707169474307, "learning_rate": 1.8693624381973406e-05, "loss": 0.8552, "step": 7653 }, { "epoch": 0.568859160163508, "grad_norm": 2.388394453354632, "learning_rate": 1.86932278436418e-05, "loss": 1.0475, "step": 7654 }, { "epoch": 0.5689334819769603, "grad_norm": 2.074246117035011, "learning_rate": 1.8692831249343647e-05, "loss": 0.6979, "step": 7655 }, { "epoch": 0.5690078037904125, "grad_norm": 1.475460901141998, "learning_rate": 1.8692434599081498e-05, "loss": 0.6443, "step": 7656 }, { "epoch": 0.5690821256038647, "grad_norm": 1.9015086928934521, "learning_rate": 1.8692037892857905e-05, "loss": 0.7059, "step": 7657 }, { "epoch": 0.569156447417317, "grad_norm": 1.8150496698025969, "learning_rate": 1.8691641130675423e-05, "loss": 0.7041, "step": 7658 }, { "epoch": 0.5692307692307692, "grad_norm": 1.7271631050299157, "learning_rate": 1.8691244312536603e-05, "loss": 0.7609, "step": 7659 }, { "epoch": 0.5693050910442214, "grad_norm": 2.2807709431897982, "learning_rate": 1.869084743844401e-05, "loss": 0.9887, "step": 7660 }, { "epoch": 0.5693794128576737, "grad_norm": 4.682380469242391, "learning_rate": 1.8690450508400186e-05, "loss": 0.7186, "step": 7661 }, { "epoch": 0.5694537346711259, "grad_norm": 4.516144364524842, "learning_rate": 1.8690053522407696e-05, "loss": 0.8627, "step": 7662 }, { "epoch": 0.5695280564845783, "grad_norm": 2.3324749385245642, "learning_rate": 1.868965648046909e-05, "loss": 0.8933, "step": 7663 }, { "epoch": 0.5696023782980305, "grad_norm": 1.957708168161397, "learning_rate": 1.8689259382586925e-05, "loss": 0.897, "step": 7664 }, { "epoch": 0.5696767001114827, "grad_norm": 1.8260896503899362, "learning_rate": 1.8688862228763764e-05, "loss": 0.9207, "step": 7665 }, { "epoch": 0.569751021924935, "grad_norm": 2.3212223935493537, "learning_rate": 1.8688465019002158e-05, "loss": 0.825, "step": 7666 }, { "epoch": 0.5698253437383872, "grad_norm": 2.382842959271066, "learning_rate": 1.868806775330466e-05, "loss": 0.9022, "step": 7667 }, { "epoch": 0.5698996655518395, "grad_norm": 1.8642383411328076, "learning_rate": 1.8687670431673837e-05, "loss": 0.7757, "step": 7668 }, { "epoch": 0.5699739873652917, "grad_norm": 2.3067602313932087, "learning_rate": 1.868727305411224e-05, "loss": 0.6553, "step": 7669 }, { "epoch": 0.5700483091787439, "grad_norm": 1.7432045806579, "learning_rate": 1.8686875620622434e-05, "loss": 0.8874, "step": 7670 }, { "epoch": 0.5701226309921962, "grad_norm": 2.2986315121011414, "learning_rate": 1.868647813120697e-05, "loss": 1.0717, "step": 7671 }, { "epoch": 0.5701969528056484, "grad_norm": 1.575931427622672, "learning_rate": 1.8686080585868413e-05, "loss": 0.7683, "step": 7672 }, { "epoch": 0.5702712746191007, "grad_norm": 1.6615238079453203, "learning_rate": 1.8685682984609316e-05, "loss": 0.7149, "step": 7673 }, { "epoch": 0.570345596432553, "grad_norm": 1.5634720532049036, "learning_rate": 1.8685285327432245e-05, "loss": 0.8281, "step": 7674 }, { "epoch": 0.5704199182460052, "grad_norm": 1.9972855704420238, "learning_rate": 1.8684887614339758e-05, "loss": 0.7737, "step": 7675 }, { "epoch": 0.5704942400594575, "grad_norm": 2.0098916053237272, "learning_rate": 1.8684489845334416e-05, "loss": 0.9996, "step": 7676 }, { "epoch": 0.5705685618729097, "grad_norm": 2.809542554691965, "learning_rate": 1.8684092020418777e-05, "loss": 0.8909, "step": 7677 }, { "epoch": 0.570642883686362, "grad_norm": 1.8729996587761846, "learning_rate": 1.8683694139595406e-05, "loss": 0.8513, "step": 7678 }, { "epoch": 0.5707172054998142, "grad_norm": 1.9951131076515656, "learning_rate": 1.8683296202866858e-05, "loss": 0.7063, "step": 7679 }, { "epoch": 0.5707915273132664, "grad_norm": 2.2422601321874605, "learning_rate": 1.8682898210235703e-05, "loss": 0.8453, "step": 7680 }, { "epoch": 0.5708658491267187, "grad_norm": 1.7293374908062067, "learning_rate": 1.86825001617045e-05, "loss": 0.7668, "step": 7681 }, { "epoch": 0.5709401709401709, "grad_norm": 8.585898257505724, "learning_rate": 1.8682102057275813e-05, "loss": 1.0805, "step": 7682 }, { "epoch": 0.5710144927536231, "grad_norm": 1.925835164113339, "learning_rate": 1.8681703896952203e-05, "loss": 0.9094, "step": 7683 }, { "epoch": 0.5710888145670754, "grad_norm": 1.9167106372240488, "learning_rate": 1.8681305680736233e-05, "loss": 0.9583, "step": 7684 }, { "epoch": 0.5711631363805277, "grad_norm": 1.8925375320570346, "learning_rate": 1.8680907408630468e-05, "loss": 0.8838, "step": 7685 }, { "epoch": 0.57123745819398, "grad_norm": 1.8215557087680563, "learning_rate": 1.868050908063747e-05, "loss": 0.9159, "step": 7686 }, { "epoch": 0.5713117800074322, "grad_norm": 2.190108640095158, "learning_rate": 1.8680110696759804e-05, "loss": 0.9576, "step": 7687 }, { "epoch": 0.5713861018208845, "grad_norm": 1.8877069087997345, "learning_rate": 1.8679712257000035e-05, "loss": 0.8176, "step": 7688 }, { "epoch": 0.5714604236343367, "grad_norm": 1.8614100404039093, "learning_rate": 1.8679313761360733e-05, "loss": 0.9951, "step": 7689 }, { "epoch": 0.5715347454477889, "grad_norm": 1.9511350912574525, "learning_rate": 1.8678915209844457e-05, "loss": 0.9747, "step": 7690 }, { "epoch": 0.5716090672612412, "grad_norm": 7.598749968573099, "learning_rate": 1.8678516602453774e-05, "loss": 1.1685, "step": 7691 }, { "epoch": 0.5716833890746934, "grad_norm": 2.053717205242491, "learning_rate": 1.8678117939191257e-05, "loss": 0.869, "step": 7692 }, { "epoch": 0.5717577108881456, "grad_norm": 1.8643695988652447, "learning_rate": 1.8677719220059462e-05, "loss": 1.0023, "step": 7693 }, { "epoch": 0.5718320327015979, "grad_norm": 2.2835000187001584, "learning_rate": 1.867732044506096e-05, "loss": 1.1306, "step": 7694 }, { "epoch": 0.5719063545150501, "grad_norm": 1.6177018678145354, "learning_rate": 1.8676921614198323e-05, "loss": 0.7761, "step": 7695 }, { "epoch": 0.5719806763285025, "grad_norm": 2.725203032068611, "learning_rate": 1.867652272747411e-05, "loss": 1.1064, "step": 7696 }, { "epoch": 0.5720549981419547, "grad_norm": 1.9429520019726843, "learning_rate": 1.8676123784890902e-05, "loss": 0.7963, "step": 7697 }, { "epoch": 0.572129319955407, "grad_norm": 3.5503493181355936, "learning_rate": 1.8675724786451254e-05, "loss": 0.9725, "step": 7698 }, { "epoch": 0.5722036417688592, "grad_norm": 2.19065340899121, "learning_rate": 1.8675325732157738e-05, "loss": 0.677, "step": 7699 }, { "epoch": 0.5722779635823114, "grad_norm": 1.7555896398077504, "learning_rate": 1.8674926622012926e-05, "loss": 0.6665, "step": 7700 }, { "epoch": 0.5723522853957637, "grad_norm": 2.004334016676659, "learning_rate": 1.867452745601939e-05, "loss": 0.7534, "step": 7701 }, { "epoch": 0.5724266072092159, "grad_norm": 2.2688616968762885, "learning_rate": 1.8674128234179695e-05, "loss": 0.7406, "step": 7702 }, { "epoch": 0.5725009290226681, "grad_norm": 2.4103416209573125, "learning_rate": 1.867372895649641e-05, "loss": 1.129, "step": 7703 }, { "epoch": 0.5725752508361204, "grad_norm": 1.6072922816217698, "learning_rate": 1.8673329622972108e-05, "loss": 0.8643, "step": 7704 }, { "epoch": 0.5726495726495726, "grad_norm": 2.0974467127440306, "learning_rate": 1.867293023360936e-05, "loss": 1.0918, "step": 7705 }, { "epoch": 0.5727238944630249, "grad_norm": 2.5838114441212188, "learning_rate": 1.8672530788410742e-05, "loss": 0.9183, "step": 7706 }, { "epoch": 0.5727982162764772, "grad_norm": 1.9086134771375038, "learning_rate": 1.867213128737882e-05, "loss": 0.8168, "step": 7707 }, { "epoch": 0.5728725380899294, "grad_norm": 1.834086320041802, "learning_rate": 1.867173173051616e-05, "loss": 0.887, "step": 7708 }, { "epoch": 0.5729468599033817, "grad_norm": 1.8025803513281706, "learning_rate": 1.867133211782535e-05, "loss": 0.8223, "step": 7709 }, { "epoch": 0.5730211817168339, "grad_norm": 2.5322741143857304, "learning_rate": 1.8670932449308944e-05, "loss": 1.0049, "step": 7710 }, { "epoch": 0.5730955035302862, "grad_norm": 1.6180084621060795, "learning_rate": 1.867053272496953e-05, "loss": 0.732, "step": 7711 }, { "epoch": 0.5731698253437384, "grad_norm": 2.4041279921087004, "learning_rate": 1.8670132944809677e-05, "loss": 1.0022, "step": 7712 }, { "epoch": 0.5732441471571906, "grad_norm": 2.1491165533484766, "learning_rate": 1.8669733108831955e-05, "loss": 0.9024, "step": 7713 }, { "epoch": 0.5733184689706429, "grad_norm": 5.423390029386804, "learning_rate": 1.8669333217038946e-05, "loss": 0.9925, "step": 7714 }, { "epoch": 0.5733927907840951, "grad_norm": 1.8641113609855955, "learning_rate": 1.8668933269433215e-05, "loss": 0.8184, "step": 7715 }, { "epoch": 0.5734671125975473, "grad_norm": 2.2162013777758496, "learning_rate": 1.8668533266017344e-05, "loss": 0.9673, "step": 7716 }, { "epoch": 0.5735414344109996, "grad_norm": 2.7831110476849905, "learning_rate": 1.8668133206793904e-05, "loss": 1.0378, "step": 7717 }, { "epoch": 0.5736157562244518, "grad_norm": 2.113207986911034, "learning_rate": 1.8667733091765474e-05, "loss": 1.041, "step": 7718 }, { "epoch": 0.5736900780379042, "grad_norm": 1.774057675608535, "learning_rate": 1.8667332920934628e-05, "loss": 0.8118, "step": 7719 }, { "epoch": 0.5737643998513564, "grad_norm": 2.3067926734140642, "learning_rate": 1.866693269430394e-05, "loss": 0.7486, "step": 7720 }, { "epoch": 0.5738387216648086, "grad_norm": 1.9471643214038517, "learning_rate": 1.8666532411875987e-05, "loss": 0.7984, "step": 7721 }, { "epoch": 0.5739130434782609, "grad_norm": 2.1078318721199545, "learning_rate": 1.8666132073653355e-05, "loss": 0.6885, "step": 7722 }, { "epoch": 0.5739873652917131, "grad_norm": 1.806770555798295, "learning_rate": 1.866573167963861e-05, "loss": 0.6966, "step": 7723 }, { "epoch": 0.5740616871051654, "grad_norm": 1.9783462507560083, "learning_rate": 1.8665331229834334e-05, "loss": 0.9093, "step": 7724 }, { "epoch": 0.5741360089186176, "grad_norm": 2.059432436339958, "learning_rate": 1.8664930724243108e-05, "loss": 0.8335, "step": 7725 }, { "epoch": 0.5742103307320698, "grad_norm": 1.896438497300626, "learning_rate": 1.8664530162867505e-05, "loss": 0.8642, "step": 7726 }, { "epoch": 0.5742846525455221, "grad_norm": 2.1285041375495894, "learning_rate": 1.8664129545710106e-05, "loss": 1.015, "step": 7727 }, { "epoch": 0.5743589743589743, "grad_norm": 2.167444868321741, "learning_rate": 1.866372887277349e-05, "loss": 0.7797, "step": 7728 }, { "epoch": 0.5744332961724266, "grad_norm": 1.8315651137057982, "learning_rate": 1.866332814406024e-05, "loss": 0.9592, "step": 7729 }, { "epoch": 0.5745076179858789, "grad_norm": 2.1178778936246934, "learning_rate": 1.866292735957293e-05, "loss": 1.0103, "step": 7730 }, { "epoch": 0.5745819397993311, "grad_norm": 2.229247293857492, "learning_rate": 1.8662526519314146e-05, "loss": 0.8244, "step": 7731 }, { "epoch": 0.5746562616127834, "grad_norm": 2.8921043042896994, "learning_rate": 1.8662125623286464e-05, "loss": 0.9808, "step": 7732 }, { "epoch": 0.5747305834262356, "grad_norm": 1.9825020989329847, "learning_rate": 1.8661724671492467e-05, "loss": 0.9629, "step": 7733 }, { "epoch": 0.5748049052396879, "grad_norm": 2.1045755897818914, "learning_rate": 1.8661323663934738e-05, "loss": 0.7242, "step": 7734 }, { "epoch": 0.5748792270531401, "grad_norm": 2.1787782857142406, "learning_rate": 1.8660922600615853e-05, "loss": 0.7937, "step": 7735 }, { "epoch": 0.5749535488665923, "grad_norm": 2.202000668675846, "learning_rate": 1.86605214815384e-05, "loss": 1.047, "step": 7736 }, { "epoch": 0.5750278706800446, "grad_norm": 1.8551427309162043, "learning_rate": 1.8660120306704958e-05, "loss": 0.8248, "step": 7737 }, { "epoch": 0.5751021924934968, "grad_norm": 2.1839886144784924, "learning_rate": 1.865971907611811e-05, "loss": 0.9978, "step": 7738 }, { "epoch": 0.575176514306949, "grad_norm": 2.63649821380613, "learning_rate": 1.865931778978044e-05, "loss": 1.0267, "step": 7739 }, { "epoch": 0.5752508361204013, "grad_norm": 1.7173815922455011, "learning_rate": 1.8658916447694533e-05, "loss": 0.9089, "step": 7740 }, { "epoch": 0.5753251579338536, "grad_norm": 1.8877615074627994, "learning_rate": 1.865851504986297e-05, "loss": 0.8354, "step": 7741 }, { "epoch": 0.5753994797473059, "grad_norm": 6.550082675379409, "learning_rate": 1.8658113596288338e-05, "loss": 1.0551, "step": 7742 }, { "epoch": 0.5754738015607581, "grad_norm": 2.1248610513898467, "learning_rate": 1.865771208697322e-05, "loss": 0.8661, "step": 7743 }, { "epoch": 0.5755481233742104, "grad_norm": 2.047368097233105, "learning_rate": 1.86573105219202e-05, "loss": 0.8215, "step": 7744 }, { "epoch": 0.5756224451876626, "grad_norm": 2.494022271831478, "learning_rate": 1.8656908901131864e-05, "loss": 0.9855, "step": 7745 }, { "epoch": 0.5756967670011148, "grad_norm": 2.63585848215808, "learning_rate": 1.8656507224610795e-05, "loss": 1.0651, "step": 7746 }, { "epoch": 0.5757710888145671, "grad_norm": 1.651486404173591, "learning_rate": 1.8656105492359584e-05, "loss": 0.5565, "step": 7747 }, { "epoch": 0.5758454106280193, "grad_norm": 1.9686920814955182, "learning_rate": 1.8655703704380815e-05, "loss": 0.7102, "step": 7748 }, { "epoch": 0.5759197324414715, "grad_norm": 2.5219635929355864, "learning_rate": 1.8655301860677076e-05, "loss": 0.7877, "step": 7749 }, { "epoch": 0.5759940542549238, "grad_norm": 1.9182545900130274, "learning_rate": 1.8654899961250953e-05, "loss": 0.87, "step": 7750 }, { "epoch": 0.576068376068376, "grad_norm": 2.069460482616961, "learning_rate": 1.8654498006105027e-05, "loss": 0.8652, "step": 7751 }, { "epoch": 0.5761426978818284, "grad_norm": 2.0836748323787013, "learning_rate": 1.86540959952419e-05, "loss": 0.9613, "step": 7752 }, { "epoch": 0.5762170196952806, "grad_norm": 2.792748566770869, "learning_rate": 1.8653693928664146e-05, "loss": 0.8888, "step": 7753 }, { "epoch": 0.5762913415087328, "grad_norm": 1.8316038167366542, "learning_rate": 1.865329180637436e-05, "loss": 0.864, "step": 7754 }, { "epoch": 0.5763656633221851, "grad_norm": 1.9701270777796924, "learning_rate": 1.8652889628375135e-05, "loss": 0.8186, "step": 7755 }, { "epoch": 0.5764399851356373, "grad_norm": 2.198678204167234, "learning_rate": 1.8652487394669055e-05, "loss": 0.9424, "step": 7756 }, { "epoch": 0.5765143069490896, "grad_norm": 1.8973933391077034, "learning_rate": 1.8652085105258707e-05, "loss": 0.8009, "step": 7757 }, { "epoch": 0.5765886287625418, "grad_norm": 2.0038232453254836, "learning_rate": 1.8651682760146683e-05, "loss": 0.7746, "step": 7758 }, { "epoch": 0.576662950575994, "grad_norm": 1.8858668779513796, "learning_rate": 1.865128035933558e-05, "loss": 0.7322, "step": 7759 }, { "epoch": 0.5767372723894463, "grad_norm": 2.2931537231475763, "learning_rate": 1.8650877902827978e-05, "loss": 0.8, "step": 7760 }, { "epoch": 0.5768115942028985, "grad_norm": 1.8706973015292456, "learning_rate": 1.865047539062647e-05, "loss": 1.0704, "step": 7761 }, { "epoch": 0.5768859160163508, "grad_norm": 2.3628785970430286, "learning_rate": 1.865007282273366e-05, "loss": 1.1025, "step": 7762 }, { "epoch": 0.5769602378298031, "grad_norm": 2.2290584497480506, "learning_rate": 1.8649670199152123e-05, "loss": 0.9144, "step": 7763 }, { "epoch": 0.5770345596432553, "grad_norm": 2.4699035159251146, "learning_rate": 1.864926751988446e-05, "loss": 0.7732, "step": 7764 }, { "epoch": 0.5771088814567076, "grad_norm": 1.620659247670381, "learning_rate": 1.8648864784933257e-05, "loss": 0.6661, "step": 7765 }, { "epoch": 0.5771832032701598, "grad_norm": 2.2888850632280935, "learning_rate": 1.8648461994301117e-05, "loss": 0.8613, "step": 7766 }, { "epoch": 0.577257525083612, "grad_norm": 1.9406934461213854, "learning_rate": 1.8648059147990625e-05, "loss": 0.8756, "step": 7767 }, { "epoch": 0.5773318468970643, "grad_norm": 2.166711179201923, "learning_rate": 1.864765624600438e-05, "loss": 0.9757, "step": 7768 }, { "epoch": 0.5774061687105165, "grad_norm": 1.6806329421508341, "learning_rate": 1.864725328834497e-05, "loss": 0.9395, "step": 7769 }, { "epoch": 0.5774804905239688, "grad_norm": 2.1041253720358775, "learning_rate": 1.8646850275014992e-05, "loss": 1.1417, "step": 7770 }, { "epoch": 0.577554812337421, "grad_norm": 2.628677645150381, "learning_rate": 1.864644720601704e-05, "loss": 0.743, "step": 7771 }, { "epoch": 0.5776291341508732, "grad_norm": 2.017951139389138, "learning_rate": 1.864604408135371e-05, "loss": 0.906, "step": 7772 }, { "epoch": 0.5777034559643255, "grad_norm": 2.0467772141414575, "learning_rate": 1.8645640901027597e-05, "loss": 0.8587, "step": 7773 }, { "epoch": 0.5777777777777777, "grad_norm": 2.0251917898108043, "learning_rate": 1.86452376650413e-05, "loss": 0.785, "step": 7774 }, { "epoch": 0.5778520995912301, "grad_norm": 2.2205872605252743, "learning_rate": 1.8644834373397403e-05, "loss": 0.9766, "step": 7775 }, { "epoch": 0.5779264214046823, "grad_norm": 1.9444875984544632, "learning_rate": 1.864443102609852e-05, "loss": 0.9672, "step": 7776 }, { "epoch": 0.5780007432181345, "grad_norm": 2.1433854306025046, "learning_rate": 1.8644027623147235e-05, "loss": 0.9958, "step": 7777 }, { "epoch": 0.5780750650315868, "grad_norm": 1.835394535166944, "learning_rate": 1.864362416454615e-05, "loss": 0.8132, "step": 7778 }, { "epoch": 0.578149386845039, "grad_norm": 3.1377414539194914, "learning_rate": 1.864322065029786e-05, "loss": 0.9649, "step": 7779 }, { "epoch": 0.5782237086584913, "grad_norm": 2.0040247395683686, "learning_rate": 1.864281708040496e-05, "loss": 0.8197, "step": 7780 }, { "epoch": 0.5782980304719435, "grad_norm": 1.70122630145207, "learning_rate": 1.8642413454870054e-05, "loss": 0.5, "step": 7781 }, { "epoch": 0.5783723522853957, "grad_norm": 1.920478925440888, "learning_rate": 1.864200977369574e-05, "loss": 0.9243, "step": 7782 }, { "epoch": 0.578446674098848, "grad_norm": 2.125557453937681, "learning_rate": 1.8641606036884616e-05, "loss": 0.8478, "step": 7783 }, { "epoch": 0.5785209959123002, "grad_norm": 2.322937108395699, "learning_rate": 1.864120224443928e-05, "loss": 1.0057, "step": 7784 }, { "epoch": 0.5785953177257525, "grad_norm": 1.876855063688452, "learning_rate": 1.8640798396362333e-05, "loss": 0.7777, "step": 7785 }, { "epoch": 0.5786696395392048, "grad_norm": 1.9448133111539236, "learning_rate": 1.8640394492656374e-05, "loss": 0.9199, "step": 7786 }, { "epoch": 0.578743961352657, "grad_norm": 3.177013386433427, "learning_rate": 1.8639990533324002e-05, "loss": 0.8768, "step": 7787 }, { "epoch": 0.5788182831661093, "grad_norm": 2.484962315289833, "learning_rate": 1.863958651836782e-05, "loss": 1.0306, "step": 7788 }, { "epoch": 0.5788926049795615, "grad_norm": 2.1003202141165, "learning_rate": 1.863918244779043e-05, "loss": 0.9277, "step": 7789 }, { "epoch": 0.5789669267930138, "grad_norm": 2.2407198099201073, "learning_rate": 1.8638778321594434e-05, "loss": 0.785, "step": 7790 }, { "epoch": 0.579041248606466, "grad_norm": 3.139432158354742, "learning_rate": 1.863837413978243e-05, "loss": 0.963, "step": 7791 }, { "epoch": 0.5791155704199182, "grad_norm": 1.790169750254057, "learning_rate": 1.8637969902357015e-05, "loss": 0.8634, "step": 7792 }, { "epoch": 0.5791898922333705, "grad_norm": 2.7513549980566125, "learning_rate": 1.86375656093208e-05, "loss": 0.885, "step": 7793 }, { "epoch": 0.5792642140468227, "grad_norm": 5.048200468353978, "learning_rate": 1.863716126067639e-05, "loss": 0.9366, "step": 7794 }, { "epoch": 0.579338535860275, "grad_norm": 2.8490550013819735, "learning_rate": 1.863675685642638e-05, "loss": 1.0868, "step": 7795 }, { "epoch": 0.5794128576737272, "grad_norm": 1.944689394062809, "learning_rate": 1.8636352396573384e-05, "loss": 0.9188, "step": 7796 }, { "epoch": 0.5794871794871795, "grad_norm": 2.2211524307539254, "learning_rate": 1.8635947881119993e-05, "loss": 0.9438, "step": 7797 }, { "epoch": 0.5795615013006318, "grad_norm": 1.9759747635763478, "learning_rate": 1.863554331006882e-05, "loss": 0.8213, "step": 7798 }, { "epoch": 0.579635823114084, "grad_norm": 1.880424307658006, "learning_rate": 1.8635138683422466e-05, "loss": 0.7143, "step": 7799 }, { "epoch": 0.5797101449275363, "grad_norm": 2.756165668718882, "learning_rate": 1.8634734001183538e-05, "loss": 0.7781, "step": 7800 }, { "epoch": 0.5797844667409885, "grad_norm": 1.8025996972390113, "learning_rate": 1.863432926335464e-05, "loss": 0.7849, "step": 7801 }, { "epoch": 0.5798587885544407, "grad_norm": 1.9150998332184586, "learning_rate": 1.863392446993838e-05, "loss": 1.0185, "step": 7802 }, { "epoch": 0.579933110367893, "grad_norm": 2.263050611409121, "learning_rate": 1.8633519620937362e-05, "loss": 1.003, "step": 7803 }, { "epoch": 0.5800074321813452, "grad_norm": 1.6837267724085787, "learning_rate": 1.8633114716354193e-05, "loss": 0.6877, "step": 7804 }, { "epoch": 0.5800817539947974, "grad_norm": 2.0324973365244565, "learning_rate": 1.8632709756191476e-05, "loss": 0.8672, "step": 7805 }, { "epoch": 0.5801560758082497, "grad_norm": 1.8289000220660905, "learning_rate": 1.8632304740451827e-05, "loss": 0.7768, "step": 7806 }, { "epoch": 0.5802303976217019, "grad_norm": 1.671512112322443, "learning_rate": 1.8631899669137842e-05, "loss": 0.6908, "step": 7807 }, { "epoch": 0.5803047194351543, "grad_norm": 2.1888287287739914, "learning_rate": 1.8631494542252136e-05, "loss": 1.0908, "step": 7808 }, { "epoch": 0.5803790412486065, "grad_norm": 3.3769838911126153, "learning_rate": 1.8631089359797317e-05, "loss": 0.8494, "step": 7809 }, { "epoch": 0.5804533630620587, "grad_norm": 2.2400278564230898, "learning_rate": 1.8630684121775996e-05, "loss": 0.794, "step": 7810 }, { "epoch": 0.580527684875511, "grad_norm": 2.0955709902678628, "learning_rate": 1.8630278828190774e-05, "loss": 0.966, "step": 7811 }, { "epoch": 0.5806020066889632, "grad_norm": 2.1744535558114224, "learning_rate": 1.8629873479044266e-05, "loss": 0.7275, "step": 7812 }, { "epoch": 0.5806763285024155, "grad_norm": 2.8373270634075634, "learning_rate": 1.862946807433908e-05, "loss": 1.0069, "step": 7813 }, { "epoch": 0.5807506503158677, "grad_norm": 1.885950570598611, "learning_rate": 1.8629062614077826e-05, "loss": 0.7957, "step": 7814 }, { "epoch": 0.5808249721293199, "grad_norm": 2.091939946036972, "learning_rate": 1.8628657098263116e-05, "loss": 0.9141, "step": 7815 }, { "epoch": 0.5808992939427722, "grad_norm": 2.244682940434041, "learning_rate": 1.8628251526897558e-05, "loss": 0.693, "step": 7816 }, { "epoch": 0.5809736157562244, "grad_norm": 2.026363077516272, "learning_rate": 1.8627845899983766e-05, "loss": 0.8447, "step": 7817 }, { "epoch": 0.5810479375696767, "grad_norm": 1.66921365484392, "learning_rate": 1.862744021752435e-05, "loss": 0.8408, "step": 7818 }, { "epoch": 0.581122259383129, "grad_norm": 1.5931505984626868, "learning_rate": 1.862703447952192e-05, "loss": 0.7333, "step": 7819 }, { "epoch": 0.5811965811965812, "grad_norm": 1.8728192547436124, "learning_rate": 1.8626628685979087e-05, "loss": 0.8347, "step": 7820 }, { "epoch": 0.5812709030100335, "grad_norm": 2.4135780339907225, "learning_rate": 1.862622283689847e-05, "loss": 1.0923, "step": 7821 }, { "epoch": 0.5813452248234857, "grad_norm": 2.134103167800807, "learning_rate": 1.8625816932282677e-05, "loss": 1.0605, "step": 7822 }, { "epoch": 0.581419546636938, "grad_norm": 3.1059875522427554, "learning_rate": 1.8625410972134323e-05, "loss": 0.75, "step": 7823 }, { "epoch": 0.5814938684503902, "grad_norm": 1.8442932978816655, "learning_rate": 1.8625004956456017e-05, "loss": 0.9212, "step": 7824 }, { "epoch": 0.5815681902638424, "grad_norm": 1.9196465260793716, "learning_rate": 1.8624598885250382e-05, "loss": 0.9028, "step": 7825 }, { "epoch": 0.5816425120772947, "grad_norm": 2.14166800877978, "learning_rate": 1.8624192758520022e-05, "loss": 0.8285, "step": 7826 }, { "epoch": 0.5817168338907469, "grad_norm": 2.7435958548047727, "learning_rate": 1.862378657626756e-05, "loss": 0.9728, "step": 7827 }, { "epoch": 0.5817911557041991, "grad_norm": 1.967404319829604, "learning_rate": 1.862338033849561e-05, "loss": 0.9088, "step": 7828 }, { "epoch": 0.5818654775176514, "grad_norm": 2.5637203185701534, "learning_rate": 1.862297404520678e-05, "loss": 1.0314, "step": 7829 }, { "epoch": 0.5819397993311036, "grad_norm": 2.1226697485823074, "learning_rate": 1.862256769640369e-05, "loss": 0.855, "step": 7830 }, { "epoch": 0.582014121144556, "grad_norm": 2.2783423186333813, "learning_rate": 1.8622161292088957e-05, "loss": 0.8855, "step": 7831 }, { "epoch": 0.5820884429580082, "grad_norm": 2.0659699679931913, "learning_rate": 1.8621754832265198e-05, "loss": 0.7967, "step": 7832 }, { "epoch": 0.5821627647714605, "grad_norm": 2.0405584580250897, "learning_rate": 1.862134831693503e-05, "loss": 0.853, "step": 7833 }, { "epoch": 0.5822370865849127, "grad_norm": 2.420451568834085, "learning_rate": 1.862094174610107e-05, "loss": 1.0132, "step": 7834 }, { "epoch": 0.5823114083983649, "grad_norm": 2.3537245721048, "learning_rate": 1.862053511976593e-05, "loss": 0.819, "step": 7835 }, { "epoch": 0.5823857302118172, "grad_norm": 1.708406421800925, "learning_rate": 1.8620128437932234e-05, "loss": 0.9511, "step": 7836 }, { "epoch": 0.5824600520252694, "grad_norm": 2.5844814812084262, "learning_rate": 1.86197217006026e-05, "loss": 0.7202, "step": 7837 }, { "epoch": 0.5825343738387216, "grad_norm": 2.7507587315089297, "learning_rate": 1.8619314907779644e-05, "loss": 0.8185, "step": 7838 }, { "epoch": 0.5826086956521739, "grad_norm": 1.9264156209589944, "learning_rate": 1.8618908059465986e-05, "loss": 0.8399, "step": 7839 }, { "epoch": 0.5826830174656261, "grad_norm": 1.7344056283532703, "learning_rate": 1.8618501155664243e-05, "loss": 0.7661, "step": 7840 }, { "epoch": 0.5827573392790784, "grad_norm": 1.8616498255847893, "learning_rate": 1.8618094196377037e-05, "loss": 0.8551, "step": 7841 }, { "epoch": 0.5828316610925307, "grad_norm": 2.461577202123277, "learning_rate": 1.8617687181606992e-05, "loss": 0.8724, "step": 7842 }, { "epoch": 0.582905982905983, "grad_norm": 1.982131530109085, "learning_rate": 1.861728011135672e-05, "loss": 0.8387, "step": 7843 }, { "epoch": 0.5829803047194352, "grad_norm": 2.476979725398071, "learning_rate": 1.8616872985628847e-05, "loss": 0.9879, "step": 7844 }, { "epoch": 0.5830546265328874, "grad_norm": 2.7542522534861114, "learning_rate": 1.8616465804425995e-05, "loss": 1.1371, "step": 7845 }, { "epoch": 0.5831289483463397, "grad_norm": 2.0007010455862924, "learning_rate": 1.861605856775078e-05, "loss": 1.0231, "step": 7846 }, { "epoch": 0.5832032701597919, "grad_norm": 1.8850165117517281, "learning_rate": 1.8615651275605827e-05, "loss": 0.8455, "step": 7847 }, { "epoch": 0.5832775919732441, "grad_norm": 1.741883571200573, "learning_rate": 1.861524392799376e-05, "loss": 0.6218, "step": 7848 }, { "epoch": 0.5833519137866964, "grad_norm": 1.9605224991805321, "learning_rate": 1.86148365249172e-05, "loss": 1.0276, "step": 7849 }, { "epoch": 0.5834262356001486, "grad_norm": 1.977589766560329, "learning_rate": 1.861442906637877e-05, "loss": 1.0426, "step": 7850 }, { "epoch": 0.5835005574136009, "grad_norm": 1.8834284388163487, "learning_rate": 1.8614021552381087e-05, "loss": 0.6912, "step": 7851 }, { "epoch": 0.5835748792270531, "grad_norm": 2.057810562930838, "learning_rate": 1.8613613982926783e-05, "loss": 0.9859, "step": 7852 }, { "epoch": 0.5836492010405054, "grad_norm": 1.9452870801993378, "learning_rate": 1.8613206358018482e-05, "loss": 0.8578, "step": 7853 }, { "epoch": 0.5837235228539577, "grad_norm": 1.8066430852464184, "learning_rate": 1.8612798677658803e-05, "loss": 0.8004, "step": 7854 }, { "epoch": 0.5837978446674099, "grad_norm": 1.8973559978677434, "learning_rate": 1.8612390941850372e-05, "loss": 0.9341, "step": 7855 }, { "epoch": 0.5838721664808622, "grad_norm": 2.129404620726251, "learning_rate": 1.8611983150595815e-05, "loss": 0.9974, "step": 7856 }, { "epoch": 0.5839464882943144, "grad_norm": 1.9712522437325626, "learning_rate": 1.861157530389776e-05, "loss": 0.8563, "step": 7857 }, { "epoch": 0.5840208101077666, "grad_norm": 2.2744936947500776, "learning_rate": 1.861116740175883e-05, "loss": 0.8591, "step": 7858 }, { "epoch": 0.5840951319212189, "grad_norm": 2.5863363822471195, "learning_rate": 1.861075944418165e-05, "loss": 0.9004, "step": 7859 }, { "epoch": 0.5841694537346711, "grad_norm": 2.0549200126224667, "learning_rate": 1.8610351431168844e-05, "loss": 0.9761, "step": 7860 }, { "epoch": 0.5842437755481233, "grad_norm": 3.6070894323735656, "learning_rate": 1.860994336272305e-05, "loss": 0.8667, "step": 7861 }, { "epoch": 0.5843180973615756, "grad_norm": 2.020759773522934, "learning_rate": 1.860953523884688e-05, "loss": 0.7156, "step": 7862 }, { "epoch": 0.5843924191750278, "grad_norm": 1.847571668854979, "learning_rate": 1.8609127059542972e-05, "loss": 0.914, "step": 7863 }, { "epoch": 0.5844667409884802, "grad_norm": 2.18861294412994, "learning_rate": 1.860871882481395e-05, "loss": 0.8735, "step": 7864 }, { "epoch": 0.5845410628019324, "grad_norm": 2.0238229207885814, "learning_rate": 1.8608310534662446e-05, "loss": 0.8099, "step": 7865 }, { "epoch": 0.5846153846153846, "grad_norm": 2.213119924216907, "learning_rate": 1.860790218909108e-05, "loss": 0.976, "step": 7866 }, { "epoch": 0.5846897064288369, "grad_norm": 2.1468001816121296, "learning_rate": 1.8607493788102492e-05, "loss": 0.9418, "step": 7867 }, { "epoch": 0.5847640282422891, "grad_norm": 1.8773621373622333, "learning_rate": 1.86070853316993e-05, "loss": 0.7898, "step": 7868 }, { "epoch": 0.5848383500557414, "grad_norm": 2.4726223378826115, "learning_rate": 1.8606676819884146e-05, "loss": 1.2833, "step": 7869 }, { "epoch": 0.5849126718691936, "grad_norm": 2.2927676245498616, "learning_rate": 1.8606268252659646e-05, "loss": 0.6762, "step": 7870 }, { "epoch": 0.5849869936826458, "grad_norm": 2.038474751093578, "learning_rate": 1.860585963002844e-05, "loss": 0.9947, "step": 7871 }, { "epoch": 0.5850613154960981, "grad_norm": 2.5568540228857994, "learning_rate": 1.8605450951993157e-05, "loss": 0.8902, "step": 7872 }, { "epoch": 0.5851356373095503, "grad_norm": 1.8790541113360584, "learning_rate": 1.860504221855643e-05, "loss": 0.8388, "step": 7873 }, { "epoch": 0.5852099591230026, "grad_norm": 2.348511416429267, "learning_rate": 1.8604633429720883e-05, "loss": 0.9705, "step": 7874 }, { "epoch": 0.5852842809364549, "grad_norm": 1.774210901634638, "learning_rate": 1.8604224585489156e-05, "loss": 0.8036, "step": 7875 }, { "epoch": 0.5853586027499071, "grad_norm": 2.391568539368978, "learning_rate": 1.8603815685863876e-05, "loss": 0.7839, "step": 7876 }, { "epoch": 0.5854329245633594, "grad_norm": 1.9862694468521969, "learning_rate": 1.8603406730847676e-05, "loss": 0.9523, "step": 7877 }, { "epoch": 0.5855072463768116, "grad_norm": 1.9325827637740736, "learning_rate": 1.8602997720443192e-05, "loss": 0.7768, "step": 7878 }, { "epoch": 0.5855815681902639, "grad_norm": 2.3357391749556453, "learning_rate": 1.8602588654653053e-05, "loss": 0.9959, "step": 7879 }, { "epoch": 0.5856558900037161, "grad_norm": 1.894955902161283, "learning_rate": 1.8602179533479896e-05, "loss": 0.9505, "step": 7880 }, { "epoch": 0.5857302118171683, "grad_norm": 6.535776764784442, "learning_rate": 1.860177035692635e-05, "loss": 1.0903, "step": 7881 }, { "epoch": 0.5858045336306206, "grad_norm": 1.7509073394378873, "learning_rate": 1.8601361124995057e-05, "loss": 0.8074, "step": 7882 }, { "epoch": 0.5858788554440728, "grad_norm": 2.2097255822947677, "learning_rate": 1.8600951837688645e-05, "loss": 0.9009, "step": 7883 }, { "epoch": 0.585953177257525, "grad_norm": 1.822878113551938, "learning_rate": 1.8600542495009755e-05, "loss": 0.8283, "step": 7884 }, { "epoch": 0.5860274990709773, "grad_norm": 1.6293769518431769, "learning_rate": 1.860013309696102e-05, "loss": 0.8261, "step": 7885 }, { "epoch": 0.5861018208844295, "grad_norm": 2.1539529249549894, "learning_rate": 1.8599723643545066e-05, "loss": 0.983, "step": 7886 }, { "epoch": 0.5861761426978819, "grad_norm": 1.7480979201923892, "learning_rate": 1.8599314134764543e-05, "loss": 0.8292, "step": 7887 }, { "epoch": 0.5862504645113341, "grad_norm": 2.1200453200964677, "learning_rate": 1.8598904570622078e-05, "loss": 0.959, "step": 7888 }, { "epoch": 0.5863247863247864, "grad_norm": 2.0067106333062537, "learning_rate": 1.8598494951120315e-05, "loss": 0.7394, "step": 7889 }, { "epoch": 0.5863991081382386, "grad_norm": 2.2955968925437817, "learning_rate": 1.859808527626189e-05, "loss": 0.8737, "step": 7890 }, { "epoch": 0.5864734299516908, "grad_norm": 2.00361163821493, "learning_rate": 1.859767554604943e-05, "loss": 0.8959, "step": 7891 }, { "epoch": 0.5865477517651431, "grad_norm": 2.136822858023701, "learning_rate": 1.859726576048559e-05, "loss": 0.9467, "step": 7892 }, { "epoch": 0.5866220735785953, "grad_norm": 2.0837041514683907, "learning_rate": 1.8596855919572993e-05, "loss": 0.9498, "step": 7893 }, { "epoch": 0.5866963953920475, "grad_norm": 2.3221030283661745, "learning_rate": 1.8596446023314287e-05, "loss": 0.6932, "step": 7894 }, { "epoch": 0.5867707172054998, "grad_norm": 2.011952899144879, "learning_rate": 1.8596036071712108e-05, "loss": 0.7804, "step": 7895 }, { "epoch": 0.586845039018952, "grad_norm": 1.984596603784611, "learning_rate": 1.8595626064769092e-05, "loss": 1.0237, "step": 7896 }, { "epoch": 0.5869193608324043, "grad_norm": 1.8773088833536353, "learning_rate": 1.8595216002487883e-05, "loss": 0.9011, "step": 7897 }, { "epoch": 0.5869936826458566, "grad_norm": 2.235115748320171, "learning_rate": 1.8594805884871116e-05, "loss": 1.0378, "step": 7898 }, { "epoch": 0.5870680044593088, "grad_norm": 1.5573029344654061, "learning_rate": 1.8594395711921437e-05, "loss": 0.7932, "step": 7899 }, { "epoch": 0.5871423262727611, "grad_norm": 2.612593651491983, "learning_rate": 1.8593985483641485e-05, "loss": 0.9908, "step": 7900 }, { "epoch": 0.5872166480862133, "grad_norm": 5.331668294740916, "learning_rate": 1.85935752000339e-05, "loss": 0.979, "step": 7901 }, { "epoch": 0.5872909698996656, "grad_norm": 2.189723355575397, "learning_rate": 1.8593164861101326e-05, "loss": 1.0384, "step": 7902 }, { "epoch": 0.5873652917131178, "grad_norm": 1.9810444887904408, "learning_rate": 1.85927544668464e-05, "loss": 0.9809, "step": 7903 }, { "epoch": 0.58743961352657, "grad_norm": 1.977045081049851, "learning_rate": 1.8592344017271766e-05, "loss": 0.9607, "step": 7904 }, { "epoch": 0.5875139353400223, "grad_norm": 1.8192452540825281, "learning_rate": 1.8591933512380066e-05, "loss": 0.7674, "step": 7905 }, { "epoch": 0.5875882571534745, "grad_norm": 2.2183813879977734, "learning_rate": 1.8591522952173948e-05, "loss": 0.6757, "step": 7906 }, { "epoch": 0.5876625789669268, "grad_norm": 1.4667123586194928, "learning_rate": 1.859111233665605e-05, "loss": 0.7667, "step": 7907 }, { "epoch": 0.587736900780379, "grad_norm": 2.1111792906068656, "learning_rate": 1.8590701665829012e-05, "loss": 0.7984, "step": 7908 }, { "epoch": 0.5878112225938313, "grad_norm": 1.8822628489610571, "learning_rate": 1.859029093969549e-05, "loss": 0.8161, "step": 7909 }, { "epoch": 0.5878855444072836, "grad_norm": 2.3646276609107204, "learning_rate": 1.8589880158258116e-05, "loss": 0.7533, "step": 7910 }, { "epoch": 0.5879598662207358, "grad_norm": 1.9304771687314002, "learning_rate": 1.8589469321519538e-05, "loss": 0.8871, "step": 7911 }, { "epoch": 0.588034188034188, "grad_norm": 2.1148438464522274, "learning_rate": 1.8589058429482404e-05, "loss": 0.9248, "step": 7912 }, { "epoch": 0.5881085098476403, "grad_norm": 1.773919903557096, "learning_rate": 1.8588647482149357e-05, "loss": 0.9368, "step": 7913 }, { "epoch": 0.5881828316610925, "grad_norm": 2.4422766843673, "learning_rate": 1.8588236479523045e-05, "loss": 0.9536, "step": 7914 }, { "epoch": 0.5882571534745448, "grad_norm": 2.028245464527816, "learning_rate": 1.858782542160611e-05, "loss": 0.9779, "step": 7915 }, { "epoch": 0.588331475287997, "grad_norm": 1.9572635538171461, "learning_rate": 1.85874143084012e-05, "loss": 1.0199, "step": 7916 }, { "epoch": 0.5884057971014492, "grad_norm": 1.8637889350131092, "learning_rate": 1.8587003139910962e-05, "loss": 0.8032, "step": 7917 }, { "epoch": 0.5884801189149015, "grad_norm": 1.7250103301644377, "learning_rate": 1.8586591916138047e-05, "loss": 0.6274, "step": 7918 }, { "epoch": 0.5885544407283537, "grad_norm": 2.0488679402939405, "learning_rate": 1.8586180637085095e-05, "loss": 0.9424, "step": 7919 }, { "epoch": 0.5886287625418061, "grad_norm": 1.9526650792726479, "learning_rate": 1.8585769302754762e-05, "loss": 0.7459, "step": 7920 }, { "epoch": 0.5887030843552583, "grad_norm": 2.9175548892422727, "learning_rate": 1.8585357913149685e-05, "loss": 0.7754, "step": 7921 }, { "epoch": 0.5887774061687105, "grad_norm": 2.3766763159382243, "learning_rate": 1.8584946468272527e-05, "loss": 0.9499, "step": 7922 }, { "epoch": 0.5888517279821628, "grad_norm": 2.077015062696005, "learning_rate": 1.8584534968125924e-05, "loss": 0.7905, "step": 7923 }, { "epoch": 0.588926049795615, "grad_norm": 1.9914149762076006, "learning_rate": 1.858412341271253e-05, "loss": 0.868, "step": 7924 }, { "epoch": 0.5890003716090673, "grad_norm": 2.113391905765577, "learning_rate": 1.8583711802035e-05, "loss": 0.7298, "step": 7925 }, { "epoch": 0.5890746934225195, "grad_norm": 1.9097929016424966, "learning_rate": 1.858330013609597e-05, "loss": 0.8326, "step": 7926 }, { "epoch": 0.5891490152359717, "grad_norm": 1.8586625264602967, "learning_rate": 1.8582888414898108e-05, "loss": 0.8025, "step": 7927 }, { "epoch": 0.589223337049424, "grad_norm": 1.9143815754065305, "learning_rate": 1.8582476638444048e-05, "loss": 0.7425, "step": 7928 }, { "epoch": 0.5892976588628762, "grad_norm": 1.8904840894687565, "learning_rate": 1.8582064806736454e-05, "loss": 0.6708, "step": 7929 }, { "epoch": 0.5893719806763285, "grad_norm": 1.9694286925829663, "learning_rate": 1.8581652919777974e-05, "loss": 0.6649, "step": 7930 }, { "epoch": 0.5894463024897808, "grad_norm": 1.9938323154345525, "learning_rate": 1.858124097757125e-05, "loss": 0.989, "step": 7931 }, { "epoch": 0.589520624303233, "grad_norm": 1.782419855968972, "learning_rate": 1.8580828980118947e-05, "loss": 0.904, "step": 7932 }, { "epoch": 0.5895949461166853, "grad_norm": 1.683055260049155, "learning_rate": 1.8580416927423712e-05, "loss": 0.7814, "step": 7933 }, { "epoch": 0.5896692679301375, "grad_norm": 1.8620657141566206, "learning_rate": 1.8580004819488197e-05, "loss": 0.6955, "step": 7934 }, { "epoch": 0.5897435897435898, "grad_norm": 1.8787868106521, "learning_rate": 1.857959265631506e-05, "loss": 0.8818, "step": 7935 }, { "epoch": 0.589817911557042, "grad_norm": 1.8984276125284774, "learning_rate": 1.8579180437906945e-05, "loss": 0.9847, "step": 7936 }, { "epoch": 0.5898922333704942, "grad_norm": 1.838960654292746, "learning_rate": 1.8578768164266514e-05, "loss": 0.7916, "step": 7937 }, { "epoch": 0.5899665551839465, "grad_norm": 1.6446296509346825, "learning_rate": 1.857835583539642e-05, "loss": 0.6551, "step": 7938 }, { "epoch": 0.5900408769973987, "grad_norm": 2.1099137662761205, "learning_rate": 1.8577943451299315e-05, "loss": 0.8593, "step": 7939 }, { "epoch": 0.590115198810851, "grad_norm": 1.5941175346780403, "learning_rate": 1.8577531011977857e-05, "loss": 0.6746, "step": 7940 }, { "epoch": 0.5901895206243032, "grad_norm": 1.9692956813510492, "learning_rate": 1.8577118517434692e-05, "loss": 0.9398, "step": 7941 }, { "epoch": 0.5902638424377554, "grad_norm": 1.630449701167879, "learning_rate": 1.8576705967672494e-05, "loss": 0.767, "step": 7942 }, { "epoch": 0.5903381642512078, "grad_norm": 2.1414110374806095, "learning_rate": 1.85762933626939e-05, "loss": 0.8598, "step": 7943 }, { "epoch": 0.59041248606466, "grad_norm": 1.7712017518971173, "learning_rate": 1.8575880702501576e-05, "loss": 0.8271, "step": 7944 }, { "epoch": 0.5904868078781123, "grad_norm": 1.5891945787305446, "learning_rate": 1.8575467987098178e-05, "loss": 0.8727, "step": 7945 }, { "epoch": 0.5905611296915645, "grad_norm": 1.9703563697707445, "learning_rate": 1.8575055216486363e-05, "loss": 0.8363, "step": 7946 }, { "epoch": 0.5906354515050167, "grad_norm": 1.7696475398954237, "learning_rate": 1.8574642390668785e-05, "loss": 0.9178, "step": 7947 }, { "epoch": 0.590709773318469, "grad_norm": 2.233696310018043, "learning_rate": 1.8574229509648105e-05, "loss": 0.8583, "step": 7948 }, { "epoch": 0.5907840951319212, "grad_norm": 1.690946159556128, "learning_rate": 1.8573816573426982e-05, "loss": 0.7741, "step": 7949 }, { "epoch": 0.5908584169453734, "grad_norm": 1.6232794322144277, "learning_rate": 1.857340358200807e-05, "loss": 0.7155, "step": 7950 }, { "epoch": 0.5909327387588257, "grad_norm": 1.8254171762156817, "learning_rate": 1.8572990535394035e-05, "loss": 0.8049, "step": 7951 }, { "epoch": 0.5910070605722779, "grad_norm": 2.199823721238366, "learning_rate": 1.8572577433587525e-05, "loss": 0.8757, "step": 7952 }, { "epoch": 0.5910813823857302, "grad_norm": 2.2852527067114674, "learning_rate": 1.8572164276591212e-05, "loss": 1.0302, "step": 7953 }, { "epoch": 0.5911557041991825, "grad_norm": 2.4201882022969294, "learning_rate": 1.8571751064407748e-05, "loss": 0.9179, "step": 7954 }, { "epoch": 0.5912300260126347, "grad_norm": 1.9916733715730437, "learning_rate": 1.8571337797039793e-05, "loss": 0.8842, "step": 7955 }, { "epoch": 0.591304347826087, "grad_norm": 2.0044928150689416, "learning_rate": 1.8570924474490016e-05, "loss": 1.028, "step": 7956 }, { "epoch": 0.5913786696395392, "grad_norm": 2.5294497174899053, "learning_rate": 1.8570511096761067e-05, "loss": 0.8899, "step": 7957 }, { "epoch": 0.5914529914529915, "grad_norm": 1.7987004363743684, "learning_rate": 1.8570097663855617e-05, "loss": 0.6802, "step": 7958 }, { "epoch": 0.5915273132664437, "grad_norm": 1.9946264751423648, "learning_rate": 1.8569684175776317e-05, "loss": 1.1238, "step": 7959 }, { "epoch": 0.5916016350798959, "grad_norm": 1.9250695318337199, "learning_rate": 1.8569270632525838e-05, "loss": 0.8036, "step": 7960 }, { "epoch": 0.5916759568933482, "grad_norm": 2.6174755899870585, "learning_rate": 1.856885703410684e-05, "loss": 1.1121, "step": 7961 }, { "epoch": 0.5917502787068004, "grad_norm": 1.9051779584129425, "learning_rate": 1.856844338052198e-05, "loss": 0.8645, "step": 7962 }, { "epoch": 0.5918246005202527, "grad_norm": 1.9219887510564255, "learning_rate": 1.856802967177393e-05, "loss": 0.9587, "step": 7963 }, { "epoch": 0.5918989223337049, "grad_norm": 2.322945510767914, "learning_rate": 1.8567615907865352e-05, "loss": 1.1269, "step": 7964 }, { "epoch": 0.5919732441471572, "grad_norm": 2.208295691072697, "learning_rate": 1.8567202088798905e-05, "loss": 0.845, "step": 7965 }, { "epoch": 0.5920475659606095, "grad_norm": 1.6620223128169012, "learning_rate": 1.8566788214577258e-05, "loss": 0.7254, "step": 7966 }, { "epoch": 0.5921218877740617, "grad_norm": 2.1755636541936747, "learning_rate": 1.856637428520307e-05, "loss": 1.0236, "step": 7967 }, { "epoch": 0.592196209587514, "grad_norm": 1.7661101543174336, "learning_rate": 1.856596030067901e-05, "loss": 0.8639, "step": 7968 }, { "epoch": 0.5922705314009662, "grad_norm": 2.7441490021048733, "learning_rate": 1.8565546261007737e-05, "loss": 0.8284, "step": 7969 }, { "epoch": 0.5923448532144184, "grad_norm": 2.02654038607027, "learning_rate": 1.8565132166191927e-05, "loss": 0.7146, "step": 7970 }, { "epoch": 0.5924191750278707, "grad_norm": 2.015338962616432, "learning_rate": 1.856471801623424e-05, "loss": 0.8116, "step": 7971 }, { "epoch": 0.5924934968413229, "grad_norm": 2.108712997942582, "learning_rate": 1.856430381113734e-05, "loss": 0.9545, "step": 7972 }, { "epoch": 0.5925678186547751, "grad_norm": 1.844058532265697, "learning_rate": 1.85638895509039e-05, "loss": 0.7114, "step": 7973 }, { "epoch": 0.5926421404682274, "grad_norm": 2.19013107686593, "learning_rate": 1.8563475235536584e-05, "loss": 1.019, "step": 7974 }, { "epoch": 0.5927164622816796, "grad_norm": 1.7955787533892291, "learning_rate": 1.8563060865038055e-05, "loss": 0.8868, "step": 7975 }, { "epoch": 0.592790784095132, "grad_norm": 1.9700706242787636, "learning_rate": 1.8562646439410986e-05, "loss": 0.9375, "step": 7976 }, { "epoch": 0.5928651059085842, "grad_norm": 2.9549133117482373, "learning_rate": 1.8562231958658047e-05, "loss": 0.8405, "step": 7977 }, { "epoch": 0.5929394277220364, "grad_norm": 2.436751063054815, "learning_rate": 1.85618174227819e-05, "loss": 0.9946, "step": 7978 }, { "epoch": 0.5930137495354887, "grad_norm": 1.6221580041334671, "learning_rate": 1.8561402831785213e-05, "loss": 0.7887, "step": 7979 }, { "epoch": 0.5930880713489409, "grad_norm": 4.824963903201231, "learning_rate": 1.8560988185670663e-05, "loss": 0.994, "step": 7980 }, { "epoch": 0.5931623931623932, "grad_norm": 2.4386628137356947, "learning_rate": 1.8560573484440913e-05, "loss": 0.7433, "step": 7981 }, { "epoch": 0.5932367149758454, "grad_norm": 2.7624527257134748, "learning_rate": 1.856015872809864e-05, "loss": 0.9464, "step": 7982 }, { "epoch": 0.5933110367892976, "grad_norm": 2.2925224828989212, "learning_rate": 1.8559743916646505e-05, "loss": 0.9699, "step": 7983 }, { "epoch": 0.5933853586027499, "grad_norm": 2.4173490943247513, "learning_rate": 1.8559329050087182e-05, "loss": 0.9803, "step": 7984 }, { "epoch": 0.5934596804162021, "grad_norm": 2.021866477994739, "learning_rate": 1.8558914128423345e-05, "loss": 0.8889, "step": 7985 }, { "epoch": 0.5935340022296544, "grad_norm": 4.155187185358311, "learning_rate": 1.8558499151657664e-05, "loss": 0.7835, "step": 7986 }, { "epoch": 0.5936083240431067, "grad_norm": 2.0671835754414296, "learning_rate": 1.8558084119792804e-05, "loss": 0.8028, "step": 7987 }, { "epoch": 0.5936826458565589, "grad_norm": 1.8951565269959618, "learning_rate": 1.855766903283145e-05, "loss": 0.7538, "step": 7988 }, { "epoch": 0.5937569676700112, "grad_norm": 2.022715075853934, "learning_rate": 1.855725389077626e-05, "loss": 0.9042, "step": 7989 }, { "epoch": 0.5938312894834634, "grad_norm": 1.902217655460444, "learning_rate": 1.8556838693629916e-05, "loss": 0.9974, "step": 7990 }, { "epoch": 0.5939056112969157, "grad_norm": 2.2373741108648026, "learning_rate": 1.855642344139509e-05, "loss": 0.9503, "step": 7991 }, { "epoch": 0.5939799331103679, "grad_norm": 2.1296748845855586, "learning_rate": 1.8556008134074455e-05, "loss": 0.8729, "step": 7992 }, { "epoch": 0.5940542549238201, "grad_norm": 2.4254556644376692, "learning_rate": 1.855559277167068e-05, "loss": 1.0936, "step": 7993 }, { "epoch": 0.5941285767372724, "grad_norm": 2.0362614662492495, "learning_rate": 1.8555177354186446e-05, "loss": 1.1217, "step": 7994 }, { "epoch": 0.5942028985507246, "grad_norm": 2.20904509580429, "learning_rate": 1.855476188162442e-05, "loss": 0.8108, "step": 7995 }, { "epoch": 0.5942772203641769, "grad_norm": 2.214144815579981, "learning_rate": 1.855434635398728e-05, "loss": 0.9759, "step": 7996 }, { "epoch": 0.5943515421776291, "grad_norm": 1.8348211626900996, "learning_rate": 1.855393077127771e-05, "loss": 0.8034, "step": 7997 }, { "epoch": 0.5944258639910813, "grad_norm": 2.1201358488753086, "learning_rate": 1.855351513349837e-05, "loss": 1.0794, "step": 7998 }, { "epoch": 0.5945001858045337, "grad_norm": 2.032612490225727, "learning_rate": 1.8553099440651948e-05, "loss": 1.1444, "step": 7999 }, { "epoch": 0.5945745076179859, "grad_norm": 2.2288784526738548, "learning_rate": 1.855268369274111e-05, "loss": 0.9007, "step": 8000 }, { "epoch": 0.5946488294314382, "grad_norm": 1.9827213786817488, "learning_rate": 1.8552267889768543e-05, "loss": 0.8516, "step": 8001 }, { "epoch": 0.5947231512448904, "grad_norm": 2.1220764250158792, "learning_rate": 1.8551852031736918e-05, "loss": 0.9961, "step": 8002 }, { "epoch": 0.5947974730583426, "grad_norm": 2.125904169742027, "learning_rate": 1.8551436118648914e-05, "loss": 0.7658, "step": 8003 }, { "epoch": 0.5948717948717949, "grad_norm": 2.1867101589880433, "learning_rate": 1.8551020150507208e-05, "loss": 0.8549, "step": 8004 }, { "epoch": 0.5949461166852471, "grad_norm": 1.8315119711573102, "learning_rate": 1.855060412731448e-05, "loss": 1.0451, "step": 8005 }, { "epoch": 0.5950204384986993, "grad_norm": 2.0390666205160364, "learning_rate": 1.8550188049073397e-05, "loss": 1.0594, "step": 8006 }, { "epoch": 0.5950947603121516, "grad_norm": 1.9555798798280146, "learning_rate": 1.8549771915786653e-05, "loss": 0.9553, "step": 8007 }, { "epoch": 0.5951690821256038, "grad_norm": 1.6845560042844352, "learning_rate": 1.8549355727456926e-05, "loss": 0.8534, "step": 8008 }, { "epoch": 0.5952434039390561, "grad_norm": 1.8770439743667582, "learning_rate": 1.8548939484086883e-05, "loss": 0.8475, "step": 8009 }, { "epoch": 0.5953177257525084, "grad_norm": 3.7383952602587947, "learning_rate": 1.8548523185679214e-05, "loss": 0.8091, "step": 8010 }, { "epoch": 0.5953920475659606, "grad_norm": 1.6365399772221392, "learning_rate": 1.8548106832236595e-05, "loss": 0.7488, "step": 8011 }, { "epoch": 0.5954663693794129, "grad_norm": 1.8215712506611268, "learning_rate": 1.854769042376171e-05, "loss": 0.6707, "step": 8012 }, { "epoch": 0.5955406911928651, "grad_norm": 1.9030132483643303, "learning_rate": 1.8547273960257238e-05, "loss": 0.8651, "step": 8013 }, { "epoch": 0.5956150130063174, "grad_norm": 2.0178807306838866, "learning_rate": 1.854685744172586e-05, "loss": 0.8158, "step": 8014 }, { "epoch": 0.5956893348197696, "grad_norm": 1.7824864599026042, "learning_rate": 1.8546440868170253e-05, "loss": 0.8706, "step": 8015 }, { "epoch": 0.5957636566332218, "grad_norm": 1.7619746135700878, "learning_rate": 1.854602423959311e-05, "loss": 0.7151, "step": 8016 }, { "epoch": 0.5958379784466741, "grad_norm": 2.508087720961467, "learning_rate": 1.85456075559971e-05, "loss": 1.0604, "step": 8017 }, { "epoch": 0.5959123002601263, "grad_norm": 1.9791284650712855, "learning_rate": 1.854519081738491e-05, "loss": 0.9524, "step": 8018 }, { "epoch": 0.5959866220735786, "grad_norm": 1.9744192925480681, "learning_rate": 1.854477402375923e-05, "loss": 0.6844, "step": 8019 }, { "epoch": 0.5960609438870308, "grad_norm": 1.7933226309719779, "learning_rate": 1.8544357175122738e-05, "loss": 0.6932, "step": 8020 }, { "epoch": 0.5961352657004831, "grad_norm": 1.9870667694158834, "learning_rate": 1.854394027147812e-05, "loss": 0.8522, "step": 8021 }, { "epoch": 0.5962095875139354, "grad_norm": 1.9997179369421747, "learning_rate": 1.854352331282805e-05, "loss": 1.0221, "step": 8022 }, { "epoch": 0.5962839093273876, "grad_norm": 2.1803617778298117, "learning_rate": 1.8543106299175227e-05, "loss": 0.8997, "step": 8023 }, { "epoch": 0.5963582311408399, "grad_norm": 2.482743369807451, "learning_rate": 1.8542689230522325e-05, "loss": 1.1903, "step": 8024 }, { "epoch": 0.5964325529542921, "grad_norm": 2.1429661785951795, "learning_rate": 1.8542272106872038e-05, "loss": 0.7187, "step": 8025 }, { "epoch": 0.5965068747677443, "grad_norm": 1.7253506204123814, "learning_rate": 1.8541854928227042e-05, "loss": 0.741, "step": 8026 }, { "epoch": 0.5965811965811966, "grad_norm": 2.047582147397813, "learning_rate": 1.854143769459003e-05, "loss": 0.9227, "step": 8027 }, { "epoch": 0.5966555183946488, "grad_norm": 2.0930328899709547, "learning_rate": 1.854102040596368e-05, "loss": 0.7625, "step": 8028 }, { "epoch": 0.596729840208101, "grad_norm": 2.0291577822184843, "learning_rate": 1.854060306235069e-05, "loss": 0.9937, "step": 8029 }, { "epoch": 0.5968041620215533, "grad_norm": 1.7894041801481955, "learning_rate": 1.854018566375374e-05, "loss": 0.7394, "step": 8030 }, { "epoch": 0.5968784838350055, "grad_norm": 1.7521268984246257, "learning_rate": 1.8539768210175512e-05, "loss": 0.7582, "step": 8031 }, { "epoch": 0.5969528056484579, "grad_norm": 1.7231219145084704, "learning_rate": 1.8539350701618706e-05, "loss": 0.7756, "step": 8032 }, { "epoch": 0.5970271274619101, "grad_norm": 2.26686140244219, "learning_rate": 1.8538933138085998e-05, "loss": 1.0011, "step": 8033 }, { "epoch": 0.5971014492753624, "grad_norm": 1.9040933947031577, "learning_rate": 1.8538515519580086e-05, "loss": 0.8671, "step": 8034 }, { "epoch": 0.5971757710888146, "grad_norm": 2.2824421466519667, "learning_rate": 1.853809784610365e-05, "loss": 1.1382, "step": 8035 }, { "epoch": 0.5972500929022668, "grad_norm": 1.8659146096658774, "learning_rate": 1.8537680117659387e-05, "loss": 0.8135, "step": 8036 }, { "epoch": 0.5973244147157191, "grad_norm": 1.7321711360257281, "learning_rate": 1.853726233424998e-05, "loss": 0.9356, "step": 8037 }, { "epoch": 0.5973987365291713, "grad_norm": 1.8535230733839614, "learning_rate": 1.8536844495878124e-05, "loss": 0.8433, "step": 8038 }, { "epoch": 0.5974730583426235, "grad_norm": 1.7173128746226034, "learning_rate": 1.85364266025465e-05, "loss": 0.6316, "step": 8039 }, { "epoch": 0.5975473801560758, "grad_norm": 1.934837816925932, "learning_rate": 1.8536008654257812e-05, "loss": 0.7299, "step": 8040 }, { "epoch": 0.597621701969528, "grad_norm": 1.6836324689371474, "learning_rate": 1.853559065101474e-05, "loss": 0.681, "step": 8041 }, { "epoch": 0.5976960237829803, "grad_norm": 1.8611193255963965, "learning_rate": 1.8535172592819974e-05, "loss": 0.6873, "step": 8042 }, { "epoch": 0.5977703455964326, "grad_norm": 2.0674699142990307, "learning_rate": 1.8534754479676217e-05, "loss": 0.553, "step": 8043 }, { "epoch": 0.5978446674098848, "grad_norm": 2.0486853254490778, "learning_rate": 1.8534336311586147e-05, "loss": 0.8854, "step": 8044 }, { "epoch": 0.5979189892233371, "grad_norm": 2.3175705839890086, "learning_rate": 1.8533918088552468e-05, "loss": 0.8689, "step": 8045 }, { "epoch": 0.5979933110367893, "grad_norm": 2.121206529314942, "learning_rate": 1.8533499810577864e-05, "loss": 0.7988, "step": 8046 }, { "epoch": 0.5980676328502416, "grad_norm": 2.0304275978099215, "learning_rate": 1.8533081477665032e-05, "loss": 0.8093, "step": 8047 }, { "epoch": 0.5981419546636938, "grad_norm": 2.155675251412351, "learning_rate": 1.853266308981666e-05, "loss": 0.8819, "step": 8048 }, { "epoch": 0.598216276477146, "grad_norm": 2.3306052904016274, "learning_rate": 1.8532244647035453e-05, "loss": 0.7191, "step": 8049 }, { "epoch": 0.5982905982905983, "grad_norm": 1.90118362409637, "learning_rate": 1.8531826149324094e-05, "loss": 0.7871, "step": 8050 }, { "epoch": 0.5983649201040505, "grad_norm": 1.7764319172582033, "learning_rate": 1.853140759668528e-05, "loss": 0.8152, "step": 8051 }, { "epoch": 0.5984392419175028, "grad_norm": 1.6599339125985297, "learning_rate": 1.8530988989121708e-05, "loss": 1.0097, "step": 8052 }, { "epoch": 0.598513563730955, "grad_norm": 1.956433663840369, "learning_rate": 1.8530570326636074e-05, "loss": 1.0008, "step": 8053 }, { "epoch": 0.5985878855444073, "grad_norm": 3.17034029921448, "learning_rate": 1.8530151609231067e-05, "loss": 0.9012, "step": 8054 }, { "epoch": 0.5986622073578596, "grad_norm": 1.5459175325326069, "learning_rate": 1.852973283690939e-05, "loss": 0.7195, "step": 8055 }, { "epoch": 0.5987365291713118, "grad_norm": 1.53860367580317, "learning_rate": 1.852931400967373e-05, "loss": 0.8003, "step": 8056 }, { "epoch": 0.598810850984764, "grad_norm": 1.7865603695862853, "learning_rate": 1.8528895127526795e-05, "loss": 0.9242, "step": 8057 }, { "epoch": 0.5988851727982163, "grad_norm": 1.826031343859808, "learning_rate": 1.852847619047127e-05, "loss": 0.797, "step": 8058 }, { "epoch": 0.5989594946116685, "grad_norm": 1.7811433040474045, "learning_rate": 1.852805719850986e-05, "loss": 0.9108, "step": 8059 }, { "epoch": 0.5990338164251208, "grad_norm": 1.62963575517028, "learning_rate": 1.8527638151645263e-05, "loss": 0.8505, "step": 8060 }, { "epoch": 0.599108138238573, "grad_norm": 2.2020524552617844, "learning_rate": 1.8527219049880174e-05, "loss": 0.8384, "step": 8061 }, { "epoch": 0.5991824600520252, "grad_norm": 2.1392415941808194, "learning_rate": 1.8526799893217287e-05, "loss": 0.9633, "step": 8062 }, { "epoch": 0.5992567818654775, "grad_norm": 1.8312086516365695, "learning_rate": 1.852638068165931e-05, "loss": 0.8358, "step": 8063 }, { "epoch": 0.5993311036789297, "grad_norm": 2.07070563068986, "learning_rate": 1.8525961415208932e-05, "loss": 0.7934, "step": 8064 }, { "epoch": 0.599405425492382, "grad_norm": 2.5536951599304953, "learning_rate": 1.8525542093868858e-05, "loss": 0.9154, "step": 8065 }, { "epoch": 0.5994797473058343, "grad_norm": 12.966816919542458, "learning_rate": 1.8525122717641787e-05, "loss": 0.9525, "step": 8066 }, { "epoch": 0.5995540691192865, "grad_norm": 2.1474746933790776, "learning_rate": 1.8524703286530416e-05, "loss": 0.926, "step": 8067 }, { "epoch": 0.5996283909327388, "grad_norm": 1.5284779703303515, "learning_rate": 1.8524283800537454e-05, "loss": 0.6751, "step": 8068 }, { "epoch": 0.599702712746191, "grad_norm": 2.2344908007581297, "learning_rate": 1.8523864259665586e-05, "loss": 0.888, "step": 8069 }, { "epoch": 0.5997770345596433, "grad_norm": 1.9260710672216113, "learning_rate": 1.852344466391753e-05, "loss": 0.9113, "step": 8070 }, { "epoch": 0.5998513563730955, "grad_norm": 2.065893447025103, "learning_rate": 1.8523025013295975e-05, "loss": 1.0483, "step": 8071 }, { "epoch": 0.5999256781865477, "grad_norm": 2.4924941742400666, "learning_rate": 1.852260530780363e-05, "loss": 0.774, "step": 8072 }, { "epoch": 0.6, "grad_norm": 2.18236190513426, "learning_rate": 1.8522185547443194e-05, "loss": 0.9534, "step": 8073 }, { "epoch": 0.6000743218134522, "grad_norm": 2.2721170899421756, "learning_rate": 1.8521765732217367e-05, "loss": 1.1423, "step": 8074 }, { "epoch": 0.6001486436269045, "grad_norm": 2.0993377953305825, "learning_rate": 1.8521345862128855e-05, "loss": 0.7798, "step": 8075 }, { "epoch": 0.6002229654403567, "grad_norm": 2.0044276875742852, "learning_rate": 1.8520925937180362e-05, "loss": 0.7232, "step": 8076 }, { "epoch": 0.600297287253809, "grad_norm": 3.9080116850770166, "learning_rate": 1.8520505957374588e-05, "loss": 0.7702, "step": 8077 }, { "epoch": 0.6003716090672613, "grad_norm": 1.9769184087793474, "learning_rate": 1.852008592271424e-05, "loss": 0.9416, "step": 8078 }, { "epoch": 0.6004459308807135, "grad_norm": 1.643963376868727, "learning_rate": 1.8519665833202023e-05, "loss": 0.7906, "step": 8079 }, { "epoch": 0.6005202526941658, "grad_norm": 2.258731512915822, "learning_rate": 1.8519245688840637e-05, "loss": 0.8455, "step": 8080 }, { "epoch": 0.600594574507618, "grad_norm": 2.4215247519074548, "learning_rate": 1.8518825489632787e-05, "loss": 0.8743, "step": 8081 }, { "epoch": 0.6006688963210702, "grad_norm": 2.1601831296975247, "learning_rate": 1.8518405235581187e-05, "loss": 0.7298, "step": 8082 }, { "epoch": 0.6007432181345225, "grad_norm": 2.807221266884651, "learning_rate": 1.851798492668853e-05, "loss": 0.7629, "step": 8083 }, { "epoch": 0.6008175399479747, "grad_norm": 2.060279686469096, "learning_rate": 1.8517564562957533e-05, "loss": 0.7789, "step": 8084 }, { "epoch": 0.600891861761427, "grad_norm": 2.2432169918570484, "learning_rate": 1.8517144144390894e-05, "loss": 0.8567, "step": 8085 }, { "epoch": 0.6009661835748792, "grad_norm": 1.9556298334750075, "learning_rate": 1.8516723670991323e-05, "loss": 0.7259, "step": 8086 }, { "epoch": 0.6010405053883314, "grad_norm": 1.951884329464526, "learning_rate": 1.8516303142761527e-05, "loss": 0.9343, "step": 8087 }, { "epoch": 0.6011148272017838, "grad_norm": 2.913109146887854, "learning_rate": 1.8515882559704214e-05, "loss": 1.0509, "step": 8088 }, { "epoch": 0.601189149015236, "grad_norm": 2.8114141950530858, "learning_rate": 1.851546192182209e-05, "loss": 0.7214, "step": 8089 }, { "epoch": 0.6012634708286883, "grad_norm": 1.9809836517913688, "learning_rate": 1.8515041229117866e-05, "loss": 0.7546, "step": 8090 }, { "epoch": 0.6013377926421405, "grad_norm": 2.239899584700751, "learning_rate": 1.8514620481594245e-05, "loss": 0.8608, "step": 8091 }, { "epoch": 0.6014121144555927, "grad_norm": 1.6756527901915819, "learning_rate": 1.8514199679253942e-05, "loss": 0.8584, "step": 8092 }, { "epoch": 0.601486436269045, "grad_norm": 1.9260253138101582, "learning_rate": 1.8513778822099664e-05, "loss": 0.9143, "step": 8093 }, { "epoch": 0.6015607580824972, "grad_norm": 1.8057889429010447, "learning_rate": 1.8513357910134114e-05, "loss": 0.9964, "step": 8094 }, { "epoch": 0.6016350798959494, "grad_norm": 2.1770431897630207, "learning_rate": 1.8512936943360013e-05, "loss": 0.9614, "step": 8095 }, { "epoch": 0.6017094017094017, "grad_norm": 1.598316848300951, "learning_rate": 1.8512515921780065e-05, "loss": 0.7809, "step": 8096 }, { "epoch": 0.6017837235228539, "grad_norm": 1.8763662247639044, "learning_rate": 1.851209484539698e-05, "loss": 0.8858, "step": 8097 }, { "epoch": 0.6018580453363062, "grad_norm": 2.2525235067773828, "learning_rate": 1.8511673714213472e-05, "loss": 0.9489, "step": 8098 }, { "epoch": 0.6019323671497585, "grad_norm": 2.058864436797719, "learning_rate": 1.851125252823225e-05, "loss": 0.8684, "step": 8099 }, { "epoch": 0.6020066889632107, "grad_norm": 2.050461064238279, "learning_rate": 1.8510831287456023e-05, "loss": 0.8835, "step": 8100 }, { "epoch": 0.602081010776663, "grad_norm": 1.9973934979539605, "learning_rate": 1.8510409991887507e-05, "loss": 1.0523, "step": 8101 }, { "epoch": 0.6021553325901152, "grad_norm": 2.4453455906781563, "learning_rate": 1.8509988641529413e-05, "loss": 0.9694, "step": 8102 }, { "epoch": 0.6022296544035675, "grad_norm": 1.9632276545539455, "learning_rate": 1.8509567236384452e-05, "loss": 0.8983, "step": 8103 }, { "epoch": 0.6023039762170197, "grad_norm": 1.9365557043766977, "learning_rate": 1.850914577645534e-05, "loss": 0.7609, "step": 8104 }, { "epoch": 0.6023782980304719, "grad_norm": 1.64834185092583, "learning_rate": 1.8508724261744792e-05, "loss": 0.7984, "step": 8105 }, { "epoch": 0.6024526198439242, "grad_norm": 1.5386182656715628, "learning_rate": 1.8508302692255514e-05, "loss": 0.6285, "step": 8106 }, { "epoch": 0.6025269416573764, "grad_norm": 1.7715289784367971, "learning_rate": 1.850788106799023e-05, "loss": 0.716, "step": 8107 }, { "epoch": 0.6026012634708287, "grad_norm": 1.6560577330222241, "learning_rate": 1.8507459388951644e-05, "loss": 0.7486, "step": 8108 }, { "epoch": 0.6026755852842809, "grad_norm": 2.121176180745117, "learning_rate": 1.850703765514248e-05, "loss": 0.8869, "step": 8109 }, { "epoch": 0.6027499070977332, "grad_norm": 1.9498872730260246, "learning_rate": 1.8506615866565447e-05, "loss": 0.9289, "step": 8110 }, { "epoch": 0.6028242289111855, "grad_norm": 5.030644231763999, "learning_rate": 1.8506194023223264e-05, "loss": 0.7906, "step": 8111 }, { "epoch": 0.6028985507246377, "grad_norm": 2.6271604110287563, "learning_rate": 1.8505772125118643e-05, "loss": 0.9298, "step": 8112 }, { "epoch": 0.60297287253809, "grad_norm": 1.7114137243930883, "learning_rate": 1.8505350172254305e-05, "loss": 0.8756, "step": 8113 }, { "epoch": 0.6030471943515422, "grad_norm": 1.9083335372713182, "learning_rate": 1.8504928164632962e-05, "loss": 0.9471, "step": 8114 }, { "epoch": 0.6031215161649944, "grad_norm": 1.8111285451004604, "learning_rate": 1.8504506102257333e-05, "loss": 0.9032, "step": 8115 }, { "epoch": 0.6031958379784467, "grad_norm": 2.089115907396008, "learning_rate": 1.850408398513013e-05, "loss": 0.9128, "step": 8116 }, { "epoch": 0.6032701597918989, "grad_norm": 2.1592531967001762, "learning_rate": 1.8503661813254083e-05, "loss": 0.959, "step": 8117 }, { "epoch": 0.6033444816053511, "grad_norm": 1.821986224814446, "learning_rate": 1.85032395866319e-05, "loss": 0.8247, "step": 8118 }, { "epoch": 0.6034188034188034, "grad_norm": 2.1752636712347724, "learning_rate": 1.8502817305266302e-05, "loss": 0.8088, "step": 8119 }, { "epoch": 0.6034931252322556, "grad_norm": 1.968089978215165, "learning_rate": 1.8502394969160005e-05, "loss": 0.9072, "step": 8120 }, { "epoch": 0.6035674470457079, "grad_norm": 2.2498846607902956, "learning_rate": 1.8501972578315733e-05, "loss": 1.0441, "step": 8121 }, { "epoch": 0.6036417688591602, "grad_norm": 3.0086035329468896, "learning_rate": 1.85015501327362e-05, "loss": 1.1999, "step": 8122 }, { "epoch": 0.6037160906726124, "grad_norm": 3.064117676046765, "learning_rate": 1.850112763242413e-05, "loss": 0.9703, "step": 8123 }, { "epoch": 0.6037904124860647, "grad_norm": 2.2865560951721426, "learning_rate": 1.8500705077382248e-05, "loss": 1.0656, "step": 8124 }, { "epoch": 0.6038647342995169, "grad_norm": 2.2487003922743867, "learning_rate": 1.8500282467613257e-05, "loss": 0.8851, "step": 8125 }, { "epoch": 0.6039390561129692, "grad_norm": 2.5850384267338704, "learning_rate": 1.8499859803119893e-05, "loss": 0.9514, "step": 8126 }, { "epoch": 0.6040133779264214, "grad_norm": 2.6342008202876386, "learning_rate": 1.8499437083904874e-05, "loss": 0.9228, "step": 8127 }, { "epoch": 0.6040876997398736, "grad_norm": 2.1207274791048896, "learning_rate": 1.849901430997092e-05, "loss": 0.9243, "step": 8128 }, { "epoch": 0.6041620215533259, "grad_norm": 1.9327804845625294, "learning_rate": 1.849859148132075e-05, "loss": 0.9257, "step": 8129 }, { "epoch": 0.6042363433667781, "grad_norm": 1.881681595479724, "learning_rate": 1.8498168597957093e-05, "loss": 0.8743, "step": 8130 }, { "epoch": 0.6043106651802304, "grad_norm": 1.7556201443262822, "learning_rate": 1.849774565988266e-05, "loss": 0.8644, "step": 8131 }, { "epoch": 0.6043849869936826, "grad_norm": 1.9259957144508837, "learning_rate": 1.849732266710019e-05, "loss": 0.7521, "step": 8132 }, { "epoch": 0.6044593088071349, "grad_norm": 1.9287248636675227, "learning_rate": 1.8496899619612393e-05, "loss": 0.7916, "step": 8133 }, { "epoch": 0.6045336306205872, "grad_norm": 2.014999613243075, "learning_rate": 1.8496476517422e-05, "loss": 0.9013, "step": 8134 }, { "epoch": 0.6046079524340394, "grad_norm": 1.7136863404441356, "learning_rate": 1.849605336053173e-05, "loss": 0.7789, "step": 8135 }, { "epoch": 0.6046822742474917, "grad_norm": 1.8485642088487764, "learning_rate": 1.8495630148944308e-05, "loss": 0.6959, "step": 8136 }, { "epoch": 0.6047565960609439, "grad_norm": 1.8952435118716213, "learning_rate": 1.8495206882662458e-05, "loss": 0.5831, "step": 8137 }, { "epoch": 0.6048309178743961, "grad_norm": 2.3521957580263595, "learning_rate": 1.8494783561688914e-05, "loss": 0.9411, "step": 8138 }, { "epoch": 0.6049052396878484, "grad_norm": 2.0051792468717258, "learning_rate": 1.849436018602639e-05, "loss": 0.8206, "step": 8139 }, { "epoch": 0.6049795615013006, "grad_norm": 2.3838634313909304, "learning_rate": 1.8493936755677616e-05, "loss": 1.1137, "step": 8140 }, { "epoch": 0.6050538833147528, "grad_norm": 2.1790676817215875, "learning_rate": 1.849351327064532e-05, "loss": 1.0685, "step": 8141 }, { "epoch": 0.6051282051282051, "grad_norm": 1.9657356150305696, "learning_rate": 1.8493089730932222e-05, "loss": 0.791, "step": 8142 }, { "epoch": 0.6052025269416573, "grad_norm": 1.7924226457383796, "learning_rate": 1.8492666136541054e-05, "loss": 0.8326, "step": 8143 }, { "epoch": 0.6052768487551097, "grad_norm": 1.8752251562421458, "learning_rate": 1.8492242487474546e-05, "loss": 0.8333, "step": 8144 }, { "epoch": 0.6053511705685619, "grad_norm": 1.847521765069797, "learning_rate": 1.849181878373542e-05, "loss": 0.6812, "step": 8145 }, { "epoch": 0.6054254923820142, "grad_norm": 1.910851867272811, "learning_rate": 1.84913950253264e-05, "loss": 0.8835, "step": 8146 }, { "epoch": 0.6054998141954664, "grad_norm": 1.7164572368960427, "learning_rate": 1.8490971212250224e-05, "loss": 0.8216, "step": 8147 }, { "epoch": 0.6055741360089186, "grad_norm": 1.7233406540516603, "learning_rate": 1.8490547344509618e-05, "loss": 0.9461, "step": 8148 }, { "epoch": 0.6056484578223709, "grad_norm": 2.373990075383031, "learning_rate": 1.8490123422107306e-05, "loss": 0.8208, "step": 8149 }, { "epoch": 0.6057227796358231, "grad_norm": 1.7117703761545136, "learning_rate": 1.8489699445046022e-05, "loss": 0.7513, "step": 8150 }, { "epoch": 0.6057971014492753, "grad_norm": 1.9475640363855118, "learning_rate": 1.848927541332849e-05, "loss": 0.9876, "step": 8151 }, { "epoch": 0.6058714232627276, "grad_norm": 2.281389096961777, "learning_rate": 1.8488851326957446e-05, "loss": 1.0455, "step": 8152 }, { "epoch": 0.6059457450761798, "grad_norm": 1.6089420715356433, "learning_rate": 1.8488427185935615e-05, "loss": 0.8234, "step": 8153 }, { "epoch": 0.6060200668896321, "grad_norm": 2.1696915069776264, "learning_rate": 1.8488002990265733e-05, "loss": 0.9028, "step": 8154 }, { "epoch": 0.6060943887030844, "grad_norm": 2.3229947722384625, "learning_rate": 1.848757873995053e-05, "loss": 0.8222, "step": 8155 }, { "epoch": 0.6061687105165366, "grad_norm": 2.074905092720464, "learning_rate": 1.848715443499273e-05, "loss": 0.8125, "step": 8156 }, { "epoch": 0.6062430323299889, "grad_norm": 2.0778615729380148, "learning_rate": 1.8486730075395073e-05, "loss": 1.0374, "step": 8157 }, { "epoch": 0.6063173541434411, "grad_norm": 2.0201195617506817, "learning_rate": 1.8486305661160288e-05, "loss": 0.9859, "step": 8158 }, { "epoch": 0.6063916759568934, "grad_norm": 1.9547587694215527, "learning_rate": 1.8485881192291106e-05, "loss": 0.7632, "step": 8159 }, { "epoch": 0.6064659977703456, "grad_norm": 2.270550395680032, "learning_rate": 1.8485456668790263e-05, "loss": 0.9591, "step": 8160 }, { "epoch": 0.6065403195837978, "grad_norm": 1.7008497365270878, "learning_rate": 1.8485032090660492e-05, "loss": 0.7016, "step": 8161 }, { "epoch": 0.6066146413972501, "grad_norm": 2.3953388299323994, "learning_rate": 1.848460745790452e-05, "loss": 0.9298, "step": 8162 }, { "epoch": 0.6066889632107023, "grad_norm": 1.895610993629884, "learning_rate": 1.848418277052509e-05, "loss": 0.8027, "step": 8163 }, { "epoch": 0.6067632850241546, "grad_norm": 2.2223218969188405, "learning_rate": 1.8483758028524928e-05, "loss": 1.0572, "step": 8164 }, { "epoch": 0.6068376068376068, "grad_norm": 2.063562098423118, "learning_rate": 1.8483333231906772e-05, "loss": 1.0253, "step": 8165 }, { "epoch": 0.6069119286510591, "grad_norm": 1.8635601787064275, "learning_rate": 1.848290838067336e-05, "loss": 0.7625, "step": 8166 }, { "epoch": 0.6069862504645114, "grad_norm": 2.019923785193811, "learning_rate": 1.848248347482742e-05, "loss": 1.0479, "step": 8167 }, { "epoch": 0.6070605722779636, "grad_norm": 1.8400106057843582, "learning_rate": 1.8482058514371693e-05, "loss": 0.9046, "step": 8168 }, { "epoch": 0.6071348940914159, "grad_norm": 3.192063835274897, "learning_rate": 1.8481633499308914e-05, "loss": 0.6984, "step": 8169 }, { "epoch": 0.6072092159048681, "grad_norm": 1.6995649556818286, "learning_rate": 1.848120842964182e-05, "loss": 0.6106, "step": 8170 }, { "epoch": 0.6072835377183203, "grad_norm": 1.6730396650007473, "learning_rate": 1.8480783305373146e-05, "loss": 0.604, "step": 8171 }, { "epoch": 0.6073578595317726, "grad_norm": 2.8802419120615403, "learning_rate": 1.848035812650563e-05, "loss": 0.8996, "step": 8172 }, { "epoch": 0.6074321813452248, "grad_norm": 1.7454939096796769, "learning_rate": 1.8479932893042005e-05, "loss": 0.9579, "step": 8173 }, { "epoch": 0.607506503158677, "grad_norm": 2.0674454692831405, "learning_rate": 1.8479507604985013e-05, "loss": 0.8393, "step": 8174 }, { "epoch": 0.6075808249721293, "grad_norm": 2.3594925002796554, "learning_rate": 1.847908226233739e-05, "loss": 0.8183, "step": 8175 }, { "epoch": 0.6076551467855815, "grad_norm": 1.9761296828018662, "learning_rate": 1.8478656865101877e-05, "loss": 0.9943, "step": 8176 }, { "epoch": 0.6077294685990338, "grad_norm": 2.2786025888068586, "learning_rate": 1.8478231413281212e-05, "loss": 0.9138, "step": 8177 }, { "epoch": 0.6078037904124861, "grad_norm": 2.030040506237559, "learning_rate": 1.8477805906878134e-05, "loss": 1.0766, "step": 8178 }, { "epoch": 0.6078781122259383, "grad_norm": 1.9189253224907243, "learning_rate": 1.8477380345895376e-05, "loss": 0.8226, "step": 8179 }, { "epoch": 0.6079524340393906, "grad_norm": 1.8368744088866422, "learning_rate": 1.8476954730335687e-05, "loss": 0.74, "step": 8180 }, { "epoch": 0.6080267558528428, "grad_norm": 1.8904407371308194, "learning_rate": 1.8476529060201804e-05, "loss": 0.9577, "step": 8181 }, { "epoch": 0.6081010776662951, "grad_norm": 1.9738178921847815, "learning_rate": 1.8476103335496465e-05, "loss": 0.8488, "step": 8182 }, { "epoch": 0.6081753994797473, "grad_norm": 2.1068964610651193, "learning_rate": 1.8475677556222413e-05, "loss": 0.8647, "step": 8183 }, { "epoch": 0.6082497212931995, "grad_norm": 1.9089375028887976, "learning_rate": 1.8475251722382388e-05, "loss": 0.7062, "step": 8184 }, { "epoch": 0.6083240431066518, "grad_norm": 1.6366070859355737, "learning_rate": 1.8474825833979133e-05, "loss": 0.7099, "step": 8185 }, { "epoch": 0.608398364920104, "grad_norm": 2.16645938597757, "learning_rate": 1.8474399891015388e-05, "loss": 0.7808, "step": 8186 }, { "epoch": 0.6084726867335563, "grad_norm": 3.535612756206541, "learning_rate": 1.8473973893493896e-05, "loss": 0.7412, "step": 8187 }, { "epoch": 0.6085470085470085, "grad_norm": 1.9823746286705157, "learning_rate": 1.8473547841417404e-05, "loss": 0.9345, "step": 8188 }, { "epoch": 0.6086213303604608, "grad_norm": 1.861342890273968, "learning_rate": 1.847312173478865e-05, "loss": 0.8993, "step": 8189 }, { "epoch": 0.6086956521739131, "grad_norm": 2.5764313428677683, "learning_rate": 1.8472695573610373e-05, "loss": 0.8665, "step": 8190 }, { "epoch": 0.6087699739873653, "grad_norm": 1.7586414213183383, "learning_rate": 1.8472269357885323e-05, "loss": 0.7517, "step": 8191 }, { "epoch": 0.6088442958008176, "grad_norm": 2.170641581874018, "learning_rate": 1.8471843087616244e-05, "loss": 0.845, "step": 8192 }, { "epoch": 0.6089186176142698, "grad_norm": 2.0999636790699343, "learning_rate": 1.847141676280588e-05, "loss": 0.9864, "step": 8193 }, { "epoch": 0.608992939427722, "grad_norm": 1.8487710292693083, "learning_rate": 1.847099038345697e-05, "loss": 0.7966, "step": 8194 }, { "epoch": 0.6090672612411743, "grad_norm": 2.2474814794142457, "learning_rate": 1.8470563949572267e-05, "loss": 0.9086, "step": 8195 }, { "epoch": 0.6091415830546265, "grad_norm": 1.669138959900009, "learning_rate": 1.847013746115451e-05, "loss": 0.8552, "step": 8196 }, { "epoch": 0.6092159048680788, "grad_norm": 1.9730624691550624, "learning_rate": 1.846971091820645e-05, "loss": 1.0221, "step": 8197 }, { "epoch": 0.609290226681531, "grad_norm": 2.655049801698421, "learning_rate": 1.846928432073083e-05, "loss": 0.9395, "step": 8198 }, { "epoch": 0.6093645484949832, "grad_norm": 2.04115142456957, "learning_rate": 1.84688576687304e-05, "loss": 0.7407, "step": 8199 }, { "epoch": 0.6094388703084356, "grad_norm": 2.6187484728678077, "learning_rate": 1.84684309622079e-05, "loss": 1.1367, "step": 8200 }, { "epoch": 0.6095131921218878, "grad_norm": 2.016896059812822, "learning_rate": 1.8468004201166077e-05, "loss": 0.8932, "step": 8201 }, { "epoch": 0.60958751393534, "grad_norm": 1.932522066362567, "learning_rate": 1.846757738560769e-05, "loss": 0.7914, "step": 8202 }, { "epoch": 0.6096618357487923, "grad_norm": 2.075841235629345, "learning_rate": 1.8467150515535475e-05, "loss": 0.8721, "step": 8203 }, { "epoch": 0.6097361575622445, "grad_norm": 2.0324942396359127, "learning_rate": 1.8466723590952184e-05, "loss": 0.7537, "step": 8204 }, { "epoch": 0.6098104793756968, "grad_norm": 1.6715203242962768, "learning_rate": 1.8466296611860567e-05, "loss": 0.815, "step": 8205 }, { "epoch": 0.609884801189149, "grad_norm": 1.9217619957401855, "learning_rate": 1.8465869578263373e-05, "loss": 0.9668, "step": 8206 }, { "epoch": 0.6099591230026012, "grad_norm": 1.8570848227276004, "learning_rate": 1.846544249016335e-05, "loss": 0.8718, "step": 8207 }, { "epoch": 0.6100334448160535, "grad_norm": 1.8397422872747313, "learning_rate": 1.8465015347563247e-05, "loss": 0.7844, "step": 8208 }, { "epoch": 0.6101077666295057, "grad_norm": 1.928744262294584, "learning_rate": 1.846458815046581e-05, "loss": 0.852, "step": 8209 }, { "epoch": 0.610182088442958, "grad_norm": 2.124850988764125, "learning_rate": 1.84641608988738e-05, "loss": 1.0201, "step": 8210 }, { "epoch": 0.6102564102564103, "grad_norm": 1.8642293058718067, "learning_rate": 1.8463733592789958e-05, "loss": 1.02, "step": 8211 }, { "epoch": 0.6103307320698625, "grad_norm": 2.180175523631687, "learning_rate": 1.846330623221704e-05, "loss": 1.0208, "step": 8212 }, { "epoch": 0.6104050538833148, "grad_norm": 1.9173856797381634, "learning_rate": 1.8462878817157796e-05, "loss": 0.9662, "step": 8213 }, { "epoch": 0.610479375696767, "grad_norm": 1.8362658559305138, "learning_rate": 1.8462451347614974e-05, "loss": 0.8963, "step": 8214 }, { "epoch": 0.6105536975102193, "grad_norm": 2.338548753156981, "learning_rate": 1.846202382359133e-05, "loss": 1.02, "step": 8215 }, { "epoch": 0.6106280193236715, "grad_norm": 1.8200432197605936, "learning_rate": 1.8461596245089615e-05, "loss": 0.7791, "step": 8216 }, { "epoch": 0.6107023411371237, "grad_norm": 1.7909837627557315, "learning_rate": 1.846116861211259e-05, "loss": 0.7514, "step": 8217 }, { "epoch": 0.610776662950576, "grad_norm": 2.0597450012109992, "learning_rate": 1.8460740924662992e-05, "loss": 0.8877, "step": 8218 }, { "epoch": 0.6108509847640282, "grad_norm": 2.049299115108208, "learning_rate": 1.8460313182743586e-05, "loss": 0.8958, "step": 8219 }, { "epoch": 0.6109253065774805, "grad_norm": 1.816106447528521, "learning_rate": 1.8459885386357123e-05, "loss": 0.8657, "step": 8220 }, { "epoch": 0.6109996283909327, "grad_norm": 1.6699871302046552, "learning_rate": 1.8459457535506356e-05, "loss": 0.6523, "step": 8221 }, { "epoch": 0.611073950204385, "grad_norm": 1.6188010835092834, "learning_rate": 1.845902963019404e-05, "loss": 0.744, "step": 8222 }, { "epoch": 0.6111482720178373, "grad_norm": 2.9595247286774833, "learning_rate": 1.845860167042293e-05, "loss": 1.0717, "step": 8223 }, { "epoch": 0.6112225938312895, "grad_norm": 1.8469869699784798, "learning_rate": 1.8458173656195782e-05, "loss": 0.9435, "step": 8224 }, { "epoch": 0.6112969156447418, "grad_norm": 2.1484712576272704, "learning_rate": 1.845774558751535e-05, "loss": 0.938, "step": 8225 }, { "epoch": 0.611371237458194, "grad_norm": 1.6703277306357796, "learning_rate": 1.8457317464384394e-05, "loss": 0.6252, "step": 8226 }, { "epoch": 0.6114455592716462, "grad_norm": 1.7213746533955636, "learning_rate": 1.8456889286805664e-05, "loss": 0.7987, "step": 8227 }, { "epoch": 0.6115198810850985, "grad_norm": 2.3694701749721916, "learning_rate": 1.845646105478192e-05, "loss": 1.0326, "step": 8228 }, { "epoch": 0.6115942028985507, "grad_norm": 2.21788845847881, "learning_rate": 1.8456032768315917e-05, "loss": 0.966, "step": 8229 }, { "epoch": 0.611668524712003, "grad_norm": 2.0622398174914807, "learning_rate": 1.8455604427410417e-05, "loss": 0.816, "step": 8230 }, { "epoch": 0.6117428465254552, "grad_norm": 2.311987231469313, "learning_rate": 1.8455176032068173e-05, "loss": 0.7674, "step": 8231 }, { "epoch": 0.6118171683389074, "grad_norm": 1.6852739310683054, "learning_rate": 1.8454747582291946e-05, "loss": 0.7878, "step": 8232 }, { "epoch": 0.6118914901523597, "grad_norm": 1.6507166559507331, "learning_rate": 1.8454319078084487e-05, "loss": 0.8777, "step": 8233 }, { "epoch": 0.611965811965812, "grad_norm": 2.420461049673829, "learning_rate": 1.8453890519448568e-05, "loss": 0.8026, "step": 8234 }, { "epoch": 0.6120401337792643, "grad_norm": 1.9780842472376474, "learning_rate": 1.8453461906386937e-05, "loss": 0.8991, "step": 8235 }, { "epoch": 0.6121144555927165, "grad_norm": 1.8199912405413226, "learning_rate": 1.8453033238902354e-05, "loss": 0.7821, "step": 8236 }, { "epoch": 0.6121887774061687, "grad_norm": 1.9168040230339227, "learning_rate": 1.845260451699759e-05, "loss": 0.9392, "step": 8237 }, { "epoch": 0.612263099219621, "grad_norm": 1.9544473441489039, "learning_rate": 1.8452175740675385e-05, "loss": 0.8127, "step": 8238 }, { "epoch": 0.6123374210330732, "grad_norm": 2.3533602411081787, "learning_rate": 1.845174690993852e-05, "loss": 0.8243, "step": 8239 }, { "epoch": 0.6124117428465254, "grad_norm": 1.6245793199924046, "learning_rate": 1.8451318024789744e-05, "loss": 0.7888, "step": 8240 }, { "epoch": 0.6124860646599777, "grad_norm": 9.324330368620824, "learning_rate": 1.845088908523182e-05, "loss": 0.7518, "step": 8241 }, { "epoch": 0.6125603864734299, "grad_norm": 1.6906390138024319, "learning_rate": 1.8450460091267515e-05, "loss": 0.6503, "step": 8242 }, { "epoch": 0.6126347082868822, "grad_norm": 1.9520253800867542, "learning_rate": 1.8450031042899582e-05, "loss": 0.9364, "step": 8243 }, { "epoch": 0.6127090301003344, "grad_norm": 1.7777284111060414, "learning_rate": 1.844960194013079e-05, "loss": 0.8445, "step": 8244 }, { "epoch": 0.6127833519137867, "grad_norm": 1.987161496994776, "learning_rate": 1.8449172782963896e-05, "loss": 0.8101, "step": 8245 }, { "epoch": 0.612857673727239, "grad_norm": 2.5313258664643223, "learning_rate": 1.844874357140167e-05, "loss": 0.879, "step": 8246 }, { "epoch": 0.6129319955406912, "grad_norm": 1.9567764090439022, "learning_rate": 1.844831430544687e-05, "loss": 1.026, "step": 8247 }, { "epoch": 0.6130063173541435, "grad_norm": 1.8260420091348284, "learning_rate": 1.8447884985102258e-05, "loss": 0.9126, "step": 8248 }, { "epoch": 0.6130806391675957, "grad_norm": 32.226063494163135, "learning_rate": 1.8447455610370603e-05, "loss": 0.9935, "step": 8249 }, { "epoch": 0.6131549609810479, "grad_norm": 1.7740037694410755, "learning_rate": 1.8447026181254665e-05, "loss": 0.992, "step": 8250 }, { "epoch": 0.6132292827945002, "grad_norm": 1.967125285692935, "learning_rate": 1.8446596697757213e-05, "loss": 0.8326, "step": 8251 }, { "epoch": 0.6133036046079524, "grad_norm": 3.418880581819041, "learning_rate": 1.844616715988101e-05, "loss": 0.7322, "step": 8252 }, { "epoch": 0.6133779264214047, "grad_norm": 1.8888523448320136, "learning_rate": 1.844573756762882e-05, "loss": 1.0411, "step": 8253 }, { "epoch": 0.6134522482348569, "grad_norm": 2.015954120778418, "learning_rate": 1.8445307921003413e-05, "loss": 0.9263, "step": 8254 }, { "epoch": 0.6135265700483091, "grad_norm": 2.121937288907278, "learning_rate": 1.844487822000755e-05, "loss": 0.7013, "step": 8255 }, { "epoch": 0.6136008918617615, "grad_norm": 2.271482859024307, "learning_rate": 1.8444448464643996e-05, "loss": 0.9617, "step": 8256 }, { "epoch": 0.6136752136752137, "grad_norm": 2.1926493303839396, "learning_rate": 1.844401865491552e-05, "loss": 0.9072, "step": 8257 }, { "epoch": 0.613749535488666, "grad_norm": 1.9711770376059943, "learning_rate": 1.8443588790824896e-05, "loss": 0.8997, "step": 8258 }, { "epoch": 0.6138238573021182, "grad_norm": 2.2640018406598985, "learning_rate": 1.8443158872374884e-05, "loss": 0.9306, "step": 8259 }, { "epoch": 0.6138981791155704, "grad_norm": 2.34174140906725, "learning_rate": 1.844272889956825e-05, "loss": 0.8118, "step": 8260 }, { "epoch": 0.6139725009290227, "grad_norm": 2.037474673621085, "learning_rate": 1.844229887240777e-05, "loss": 0.9517, "step": 8261 }, { "epoch": 0.6140468227424749, "grad_norm": 4.577690716229319, "learning_rate": 1.8441868790896204e-05, "loss": 1.0675, "step": 8262 }, { "epoch": 0.6141211445559271, "grad_norm": 1.989285933501258, "learning_rate": 1.8441438655036324e-05, "loss": 0.7872, "step": 8263 }, { "epoch": 0.6141954663693794, "grad_norm": 2.502635184182781, "learning_rate": 1.8441008464830902e-05, "loss": 0.9465, "step": 8264 }, { "epoch": 0.6142697881828316, "grad_norm": 1.7624837985859385, "learning_rate": 1.844057822028271e-05, "loss": 0.6313, "step": 8265 }, { "epoch": 0.6143441099962839, "grad_norm": 1.7943842257087346, "learning_rate": 1.8440147921394507e-05, "loss": 0.7906, "step": 8266 }, { "epoch": 0.6144184318097362, "grad_norm": 2.4657259417720025, "learning_rate": 1.843971756816907e-05, "loss": 0.7028, "step": 8267 }, { "epoch": 0.6144927536231884, "grad_norm": 2.4339889413214584, "learning_rate": 1.843928716060917e-05, "loss": 1.0323, "step": 8268 }, { "epoch": 0.6145670754366407, "grad_norm": 1.6617657180020873, "learning_rate": 1.843885669871758e-05, "loss": 0.7055, "step": 8269 }, { "epoch": 0.6146413972500929, "grad_norm": 2.2484241727442154, "learning_rate": 1.8438426182497064e-05, "loss": 0.9095, "step": 8270 }, { "epoch": 0.6147157190635452, "grad_norm": 2.0661986583952507, "learning_rate": 1.84379956119504e-05, "loss": 1.0113, "step": 8271 }, { "epoch": 0.6147900408769974, "grad_norm": 2.430190796428967, "learning_rate": 1.8437564987080358e-05, "loss": 0.838, "step": 8272 }, { "epoch": 0.6148643626904496, "grad_norm": 2.240574941551259, "learning_rate": 1.843713430788971e-05, "loss": 0.931, "step": 8273 }, { "epoch": 0.6149386845039019, "grad_norm": 2.373482053175323, "learning_rate": 1.843670357438123e-05, "loss": 0.9705, "step": 8274 }, { "epoch": 0.6150130063173541, "grad_norm": 1.7551666977134681, "learning_rate": 1.8436272786557693e-05, "loss": 1.0191, "step": 8275 }, { "epoch": 0.6150873281308064, "grad_norm": 2.005405559575848, "learning_rate": 1.8435841944421864e-05, "loss": 0.8109, "step": 8276 }, { "epoch": 0.6151616499442586, "grad_norm": 2.1007353665294004, "learning_rate": 1.8435411047976525e-05, "loss": 0.9835, "step": 8277 }, { "epoch": 0.6152359717577109, "grad_norm": 2.931186780230786, "learning_rate": 1.8434980097224445e-05, "loss": 0.8085, "step": 8278 }, { "epoch": 0.6153102935711632, "grad_norm": 2.475691256320812, "learning_rate": 1.8434549092168405e-05, "loss": 0.8711, "step": 8279 }, { "epoch": 0.6153846153846154, "grad_norm": 1.8467897563313884, "learning_rate": 1.8434118032811176e-05, "loss": 0.9157, "step": 8280 }, { "epoch": 0.6154589371980677, "grad_norm": 3.084330101046411, "learning_rate": 1.843368691915553e-05, "loss": 0.9803, "step": 8281 }, { "epoch": 0.6155332590115199, "grad_norm": 2.1745056966999146, "learning_rate": 1.8433255751204243e-05, "loss": 0.6842, "step": 8282 }, { "epoch": 0.6156075808249721, "grad_norm": 1.88802165015517, "learning_rate": 1.8432824528960095e-05, "loss": 0.9492, "step": 8283 }, { "epoch": 0.6156819026384244, "grad_norm": 2.2249051102007753, "learning_rate": 1.843239325242586e-05, "loss": 1.0744, "step": 8284 }, { "epoch": 0.6157562244518766, "grad_norm": 1.897227321158089, "learning_rate": 1.8431961921604317e-05, "loss": 0.8889, "step": 8285 }, { "epoch": 0.6158305462653288, "grad_norm": 2.7339725955789533, "learning_rate": 1.8431530536498238e-05, "loss": 0.8649, "step": 8286 }, { "epoch": 0.6159048680787811, "grad_norm": 2.3006983275614084, "learning_rate": 1.8431099097110404e-05, "loss": 1.0125, "step": 8287 }, { "epoch": 0.6159791898922333, "grad_norm": 2.0186236643541142, "learning_rate": 1.8430667603443594e-05, "loss": 1.0042, "step": 8288 }, { "epoch": 0.6160535117056856, "grad_norm": 1.9099842651636991, "learning_rate": 1.8430236055500577e-05, "loss": 0.7385, "step": 8289 }, { "epoch": 0.6161278335191379, "grad_norm": 2.1303322978241424, "learning_rate": 1.8429804453284147e-05, "loss": 0.7897, "step": 8290 }, { "epoch": 0.6162021553325902, "grad_norm": 2.1089208400196497, "learning_rate": 1.8429372796797067e-05, "loss": 0.8599, "step": 8291 }, { "epoch": 0.6162764771460424, "grad_norm": 1.7217295446083474, "learning_rate": 1.8428941086042123e-05, "loss": 0.7892, "step": 8292 }, { "epoch": 0.6163507989594946, "grad_norm": 1.806040230586614, "learning_rate": 1.842850932102209e-05, "loss": 0.9765, "step": 8293 }, { "epoch": 0.6164251207729469, "grad_norm": 2.183819386164002, "learning_rate": 1.8428077501739757e-05, "loss": 0.7039, "step": 8294 }, { "epoch": 0.6164994425863991, "grad_norm": 2.463681951519806, "learning_rate": 1.84276456281979e-05, "loss": 1.0531, "step": 8295 }, { "epoch": 0.6165737643998513, "grad_norm": 3.2241435385893387, "learning_rate": 1.842721370039929e-05, "loss": 0.5863, "step": 8296 }, { "epoch": 0.6166480862133036, "grad_norm": 2.2979369493662256, "learning_rate": 1.8426781718346724e-05, "loss": 0.8742, "step": 8297 }, { "epoch": 0.6167224080267558, "grad_norm": 1.9358503581481143, "learning_rate": 1.8426349682042968e-05, "loss": 0.7224, "step": 8298 }, { "epoch": 0.6167967298402081, "grad_norm": 2.0936557009618944, "learning_rate": 1.842591759149081e-05, "loss": 0.565, "step": 8299 }, { "epoch": 0.6168710516536603, "grad_norm": 3.28920664939613, "learning_rate": 1.8425485446693037e-05, "loss": 0.9764, "step": 8300 }, { "epoch": 0.6169453734671126, "grad_norm": 3.178847506732401, "learning_rate": 1.842505324765242e-05, "loss": 0.689, "step": 8301 }, { "epoch": 0.6170196952805649, "grad_norm": 1.9942408151034807, "learning_rate": 1.8424620994371755e-05, "loss": 0.752, "step": 8302 }, { "epoch": 0.6170940170940171, "grad_norm": 2.682560068651225, "learning_rate": 1.842418868685381e-05, "loss": 0.9338, "step": 8303 }, { "epoch": 0.6171683389074694, "grad_norm": 2.338995958780215, "learning_rate": 1.842375632510138e-05, "loss": 0.9352, "step": 8304 }, { "epoch": 0.6172426607209216, "grad_norm": 2.173221240274235, "learning_rate": 1.842332390911724e-05, "loss": 1.098, "step": 8305 }, { "epoch": 0.6173169825343738, "grad_norm": 2.308947655155742, "learning_rate": 1.8422891438904177e-05, "loss": 0.9628, "step": 8306 }, { "epoch": 0.6173913043478261, "grad_norm": 3.1927924433690222, "learning_rate": 1.8422458914464976e-05, "loss": 0.9959, "step": 8307 }, { "epoch": 0.6174656261612783, "grad_norm": 2.0183837028159464, "learning_rate": 1.8422026335802426e-05, "loss": 0.9035, "step": 8308 }, { "epoch": 0.6175399479747306, "grad_norm": 2.0218156783560275, "learning_rate": 1.8421593702919308e-05, "loss": 1.0561, "step": 8309 }, { "epoch": 0.6176142697881828, "grad_norm": 2.6624492676050022, "learning_rate": 1.8421161015818404e-05, "loss": 0.9502, "step": 8310 }, { "epoch": 0.617688591601635, "grad_norm": 2.7640972200974208, "learning_rate": 1.84207282745025e-05, "loss": 0.8526, "step": 8311 }, { "epoch": 0.6177629134150874, "grad_norm": 1.64758457515704, "learning_rate": 1.8420295478974387e-05, "loss": 0.662, "step": 8312 }, { "epoch": 0.6178372352285396, "grad_norm": 2.1360173704684, "learning_rate": 1.8419862629236845e-05, "loss": 0.871, "step": 8313 }, { "epoch": 0.6179115570419919, "grad_norm": 1.7568903169349281, "learning_rate": 1.841942972529267e-05, "loss": 0.8492, "step": 8314 }, { "epoch": 0.6179858788554441, "grad_norm": 2.237214653228342, "learning_rate": 1.841899676714464e-05, "loss": 0.9955, "step": 8315 }, { "epoch": 0.6180602006688963, "grad_norm": 1.9142815391981949, "learning_rate": 1.8418563754795547e-05, "loss": 0.8139, "step": 8316 }, { "epoch": 0.6181345224823486, "grad_norm": 3.24733675873529, "learning_rate": 1.8418130688248176e-05, "loss": 0.9668, "step": 8317 }, { "epoch": 0.6182088442958008, "grad_norm": 2.3246751614703443, "learning_rate": 1.841769756750532e-05, "loss": 0.844, "step": 8318 }, { "epoch": 0.618283166109253, "grad_norm": 2.1602169897732173, "learning_rate": 1.8417264392569758e-05, "loss": 0.8581, "step": 8319 }, { "epoch": 0.6183574879227053, "grad_norm": 1.746327316839413, "learning_rate": 1.8416831163444285e-05, "loss": 0.9112, "step": 8320 }, { "epoch": 0.6184318097361575, "grad_norm": 4.205206562095933, "learning_rate": 1.8416397880131695e-05, "loss": 0.9019, "step": 8321 }, { "epoch": 0.6185061315496098, "grad_norm": 2.219673748462072, "learning_rate": 1.8415964542634767e-05, "loss": 0.7591, "step": 8322 }, { "epoch": 0.6185804533630621, "grad_norm": 2.5201489325861357, "learning_rate": 1.84155311509563e-05, "loss": 0.6819, "step": 8323 }, { "epoch": 0.6186547751765143, "grad_norm": 1.9093751494962317, "learning_rate": 1.841509770509908e-05, "loss": 0.7333, "step": 8324 }, { "epoch": 0.6187290969899666, "grad_norm": 1.959756480230889, "learning_rate": 1.8414664205065894e-05, "loss": 0.9887, "step": 8325 }, { "epoch": 0.6188034188034188, "grad_norm": 2.3275840768254716, "learning_rate": 1.841423065085954e-05, "loss": 0.9654, "step": 8326 }, { "epoch": 0.6188777406168711, "grad_norm": 1.9699637143125774, "learning_rate": 1.8413797042482804e-05, "loss": 0.8195, "step": 8327 }, { "epoch": 0.6189520624303233, "grad_norm": 2.2168655216627924, "learning_rate": 1.841336337993848e-05, "loss": 1.0204, "step": 8328 }, { "epoch": 0.6190263842437755, "grad_norm": 1.943862133361809, "learning_rate": 1.8412929663229356e-05, "loss": 0.914, "step": 8329 }, { "epoch": 0.6191007060572278, "grad_norm": 2.2480376809003193, "learning_rate": 1.8412495892358232e-05, "loss": 0.8702, "step": 8330 }, { "epoch": 0.61917502787068, "grad_norm": 2.718584363386391, "learning_rate": 1.8412062067327893e-05, "loss": 1.0437, "step": 8331 }, { "epoch": 0.6192493496841323, "grad_norm": 2.229598257901448, "learning_rate": 1.8411628188141134e-05, "loss": 0.9059, "step": 8332 }, { "epoch": 0.6193236714975845, "grad_norm": 2.0118957762980525, "learning_rate": 1.8411194254800752e-05, "loss": 1.0012, "step": 8333 }, { "epoch": 0.6193979933110368, "grad_norm": 2.558758677421563, "learning_rate": 1.8410760267309538e-05, "loss": 0.8716, "step": 8334 }, { "epoch": 0.6194723151244891, "grad_norm": 1.8071424818182926, "learning_rate": 1.8410326225670282e-05, "loss": 0.8315, "step": 8335 }, { "epoch": 0.6195466369379413, "grad_norm": 2.1301344342165063, "learning_rate": 1.8409892129885783e-05, "loss": 0.8578, "step": 8336 }, { "epoch": 0.6196209587513936, "grad_norm": 1.666643600858038, "learning_rate": 1.8409457979958837e-05, "loss": 0.8279, "step": 8337 }, { "epoch": 0.6196952805648458, "grad_norm": 2.258981982645278, "learning_rate": 1.8409023775892238e-05, "loss": 0.9868, "step": 8338 }, { "epoch": 0.619769602378298, "grad_norm": 1.768775902994469, "learning_rate": 1.840858951768878e-05, "loss": 0.8161, "step": 8339 }, { "epoch": 0.6198439241917503, "grad_norm": 2.3017964792158674, "learning_rate": 1.8408155205351254e-05, "loss": 0.9258, "step": 8340 }, { "epoch": 0.6199182460052025, "grad_norm": 1.7321940409184744, "learning_rate": 1.8407720838882466e-05, "loss": 0.9469, "step": 8341 }, { "epoch": 0.6199925678186547, "grad_norm": 1.9305276756569343, "learning_rate": 1.8407286418285206e-05, "loss": 0.8547, "step": 8342 }, { "epoch": 0.620066889632107, "grad_norm": 2.506789842562446, "learning_rate": 1.8406851943562273e-05, "loss": 0.9616, "step": 8343 }, { "epoch": 0.6201412114455592, "grad_norm": 1.6464203260550196, "learning_rate": 1.8406417414716462e-05, "loss": 0.8492, "step": 8344 }, { "epoch": 0.6202155332590115, "grad_norm": 1.9619222395326696, "learning_rate": 1.8405982831750573e-05, "loss": 0.8494, "step": 8345 }, { "epoch": 0.6202898550724638, "grad_norm": 2.1670864992731467, "learning_rate": 1.84055481946674e-05, "loss": 1.0879, "step": 8346 }, { "epoch": 0.620364176885916, "grad_norm": 1.9408510729761534, "learning_rate": 1.8405113503469746e-05, "loss": 0.9091, "step": 8347 }, { "epoch": 0.6204384986993683, "grad_norm": 1.6410223945465243, "learning_rate": 1.8404678758160407e-05, "loss": 0.867, "step": 8348 }, { "epoch": 0.6205128205128205, "grad_norm": 1.954534052666838, "learning_rate": 1.8404243958742183e-05, "loss": 1.0819, "step": 8349 }, { "epoch": 0.6205871423262728, "grad_norm": 1.6345428771692916, "learning_rate": 1.840380910521787e-05, "loss": 0.6735, "step": 8350 }, { "epoch": 0.620661464139725, "grad_norm": 4.843499231591167, "learning_rate": 1.8403374197590275e-05, "loss": 0.7257, "step": 8351 }, { "epoch": 0.6207357859531772, "grad_norm": 2.1589010942512363, "learning_rate": 1.840293923586219e-05, "loss": 0.8619, "step": 8352 }, { "epoch": 0.6208101077666295, "grad_norm": 1.7736679142759164, "learning_rate": 1.8402504220036418e-05, "loss": 0.6814, "step": 8353 }, { "epoch": 0.6208844295800817, "grad_norm": 2.602726054345394, "learning_rate": 1.840206915011576e-05, "loss": 0.8601, "step": 8354 }, { "epoch": 0.620958751393534, "grad_norm": 2.0642923147762295, "learning_rate": 1.8401634026103018e-05, "loss": 0.9082, "step": 8355 }, { "epoch": 0.6210330732069862, "grad_norm": 1.834288288279409, "learning_rate": 1.840119884800099e-05, "loss": 0.8145, "step": 8356 }, { "epoch": 0.6211073950204385, "grad_norm": 2.519803415822464, "learning_rate": 1.840076361581248e-05, "loss": 1.1169, "step": 8357 }, { "epoch": 0.6211817168338908, "grad_norm": 1.6302596424212845, "learning_rate": 1.840032832954029e-05, "loss": 0.7767, "step": 8358 }, { "epoch": 0.621256038647343, "grad_norm": 2.0080178651850815, "learning_rate": 1.8399892989187222e-05, "loss": 0.8321, "step": 8359 }, { "epoch": 0.6213303604607953, "grad_norm": 1.9999389184321095, "learning_rate": 1.839945759475608e-05, "loss": 1.0151, "step": 8360 }, { "epoch": 0.6214046822742475, "grad_norm": 2.592180104409173, "learning_rate": 1.8399022146249662e-05, "loss": 0.787, "step": 8361 }, { "epoch": 0.6214790040876997, "grad_norm": 2.6530839479521564, "learning_rate": 1.839858664367078e-05, "loss": 0.8122, "step": 8362 }, { "epoch": 0.621553325901152, "grad_norm": 1.917341299452187, "learning_rate": 1.8398151087022233e-05, "loss": 0.9173, "step": 8363 }, { "epoch": 0.6216276477146042, "grad_norm": 2.1435150900920372, "learning_rate": 1.8397715476306822e-05, "loss": 0.9963, "step": 8364 }, { "epoch": 0.6217019695280565, "grad_norm": 7.773902759772412, "learning_rate": 1.8397279811527356e-05, "loss": 0.9002, "step": 8365 }, { "epoch": 0.6217762913415087, "grad_norm": 2.116228957684783, "learning_rate": 1.8396844092686638e-05, "loss": 0.8778, "step": 8366 }, { "epoch": 0.6218506131549609, "grad_norm": 2.4696748064673453, "learning_rate": 1.8396408319787475e-05, "loss": 0.8597, "step": 8367 }, { "epoch": 0.6219249349684133, "grad_norm": 2.126483367471428, "learning_rate": 1.8395972492832668e-05, "loss": 1.0511, "step": 8368 }, { "epoch": 0.6219992567818655, "grad_norm": 2.2548618003109233, "learning_rate": 1.8395536611825026e-05, "loss": 0.821, "step": 8369 }, { "epoch": 0.6220735785953178, "grad_norm": 1.8973936901824033, "learning_rate": 1.8395100676767357e-05, "loss": 0.715, "step": 8370 }, { "epoch": 0.62214790040877, "grad_norm": 2.3077487271893484, "learning_rate": 1.839466468766246e-05, "loss": 0.9549, "step": 8371 }, { "epoch": 0.6222222222222222, "grad_norm": 1.7769054933309902, "learning_rate": 1.8394228644513154e-05, "loss": 0.7723, "step": 8372 }, { "epoch": 0.6222965440356745, "grad_norm": 3.8532365301083877, "learning_rate": 1.839379254732224e-05, "loss": 0.7979, "step": 8373 }, { "epoch": 0.6223708658491267, "grad_norm": 2.1731006934028145, "learning_rate": 1.839335639609252e-05, "loss": 0.942, "step": 8374 }, { "epoch": 0.622445187662579, "grad_norm": 2.029490447820236, "learning_rate": 1.8392920190826812e-05, "loss": 0.7807, "step": 8375 }, { "epoch": 0.6225195094760312, "grad_norm": 1.6799138861405578, "learning_rate": 1.8392483931527917e-05, "loss": 0.782, "step": 8376 }, { "epoch": 0.6225938312894834, "grad_norm": 2.2427032653301016, "learning_rate": 1.8392047618198643e-05, "loss": 0.8037, "step": 8377 }, { "epoch": 0.6226681531029357, "grad_norm": 1.9737624884696117, "learning_rate": 1.8391611250841806e-05, "loss": 1.0179, "step": 8378 }, { "epoch": 0.622742474916388, "grad_norm": 1.801933057355304, "learning_rate": 1.839117482946021e-05, "loss": 0.7653, "step": 8379 }, { "epoch": 0.6228167967298402, "grad_norm": 1.9128590724435939, "learning_rate": 1.8390738354056666e-05, "loss": 0.9267, "step": 8380 }, { "epoch": 0.6228911185432925, "grad_norm": 7.017185500610903, "learning_rate": 1.839030182463398e-05, "loss": 0.8988, "step": 8381 }, { "epoch": 0.6229654403567447, "grad_norm": 2.2513534889483617, "learning_rate": 1.8389865241194972e-05, "loss": 0.8195, "step": 8382 }, { "epoch": 0.623039762170197, "grad_norm": 2.1898860514007037, "learning_rate": 1.8389428603742442e-05, "loss": 1.017, "step": 8383 }, { "epoch": 0.6231140839836492, "grad_norm": 2.8401059142186904, "learning_rate": 1.838899191227921e-05, "loss": 0.9127, "step": 8384 }, { "epoch": 0.6231884057971014, "grad_norm": 3.336139252308016, "learning_rate": 1.838855516680808e-05, "loss": 0.9467, "step": 8385 }, { "epoch": 0.6232627276105537, "grad_norm": 2.410090825817728, "learning_rate": 1.838811836733187e-05, "loss": 0.9999, "step": 8386 }, { "epoch": 0.6233370494240059, "grad_norm": 2.2393005202295533, "learning_rate": 1.838768151385339e-05, "loss": 0.9671, "step": 8387 }, { "epoch": 0.6234113712374582, "grad_norm": 2.147589020143292, "learning_rate": 1.8387244606375447e-05, "loss": 0.7223, "step": 8388 }, { "epoch": 0.6234856930509104, "grad_norm": 1.8847207564627764, "learning_rate": 1.838680764490086e-05, "loss": 0.9547, "step": 8389 }, { "epoch": 0.6235600148643627, "grad_norm": 2.429677183109924, "learning_rate": 1.8386370629432445e-05, "loss": 0.6811, "step": 8390 }, { "epoch": 0.623634336677815, "grad_norm": 2.490976967202928, "learning_rate": 1.8385933559973007e-05, "loss": 0.7732, "step": 8391 }, { "epoch": 0.6237086584912672, "grad_norm": 1.8089734317607997, "learning_rate": 1.8385496436525363e-05, "loss": 0.8854, "step": 8392 }, { "epoch": 0.6237829803047195, "grad_norm": 2.189195978452768, "learning_rate": 1.838505925909233e-05, "loss": 0.9038, "step": 8393 }, { "epoch": 0.6238573021181717, "grad_norm": 1.83107540788222, "learning_rate": 1.838462202767672e-05, "loss": 0.8739, "step": 8394 }, { "epoch": 0.6239316239316239, "grad_norm": 2.3529747293179892, "learning_rate": 1.8384184742281347e-05, "loss": 0.9363, "step": 8395 }, { "epoch": 0.6240059457450762, "grad_norm": 2.0682074245791635, "learning_rate": 1.838374740290903e-05, "loss": 0.8321, "step": 8396 }, { "epoch": 0.6240802675585284, "grad_norm": 2.2854098190763095, "learning_rate": 1.838331000956258e-05, "loss": 0.9051, "step": 8397 }, { "epoch": 0.6241545893719807, "grad_norm": 4.347361766245048, "learning_rate": 1.8382872562244817e-05, "loss": 1.0003, "step": 8398 }, { "epoch": 0.6242289111854329, "grad_norm": 2.710267686556115, "learning_rate": 1.8382435060958553e-05, "loss": 0.7327, "step": 8399 }, { "epoch": 0.6243032329988851, "grad_norm": 2.1531164013336905, "learning_rate": 1.838199750570661e-05, "loss": 1.0828, "step": 8400 }, { "epoch": 0.6243775548123375, "grad_norm": 2.0773285874668606, "learning_rate": 1.8381559896491802e-05, "loss": 0.9591, "step": 8401 }, { "epoch": 0.6244518766257897, "grad_norm": 1.8891263504684328, "learning_rate": 1.8381122233316943e-05, "loss": 0.9583, "step": 8402 }, { "epoch": 0.624526198439242, "grad_norm": 4.979796271770509, "learning_rate": 1.8380684516184858e-05, "loss": 0.9171, "step": 8403 }, { "epoch": 0.6246005202526942, "grad_norm": 1.970655182152921, "learning_rate": 1.838024674509836e-05, "loss": 0.9176, "step": 8404 }, { "epoch": 0.6246748420661464, "grad_norm": 2.2903860900688136, "learning_rate": 1.8379808920060264e-05, "loss": 0.8282, "step": 8405 }, { "epoch": 0.6247491638795987, "grad_norm": 1.7280198783497707, "learning_rate": 1.83793710410734e-05, "loss": 0.6891, "step": 8406 }, { "epoch": 0.6248234856930509, "grad_norm": 2.3535316639909554, "learning_rate": 1.8378933108140576e-05, "loss": 0.8381, "step": 8407 }, { "epoch": 0.6248978075065031, "grad_norm": 1.7028860786400342, "learning_rate": 1.837849512126461e-05, "loss": 0.8662, "step": 8408 }, { "epoch": 0.6249721293199554, "grad_norm": 1.9722707116397684, "learning_rate": 1.837805708044834e-05, "loss": 0.7625, "step": 8409 }, { "epoch": 0.6250464511334076, "grad_norm": 1.7781815952537097, "learning_rate": 1.8377618985694563e-05, "loss": 0.8215, "step": 8410 }, { "epoch": 0.6251207729468599, "grad_norm": 1.7852382107051559, "learning_rate": 1.8377180837006113e-05, "loss": 0.7406, "step": 8411 }, { "epoch": 0.6251950947603121, "grad_norm": 1.9686345739916562, "learning_rate": 1.8376742634385808e-05, "loss": 0.8516, "step": 8412 }, { "epoch": 0.6252694165737644, "grad_norm": 2.133667499067631, "learning_rate": 1.8376304377836468e-05, "loss": 0.8698, "step": 8413 }, { "epoch": 0.6253437383872167, "grad_norm": 1.8384817411867544, "learning_rate": 1.8375866067360913e-05, "loss": 0.8653, "step": 8414 }, { "epoch": 0.6254180602006689, "grad_norm": 1.761148319397033, "learning_rate": 1.8375427702961968e-05, "loss": 0.7502, "step": 8415 }, { "epoch": 0.6254923820141212, "grad_norm": 2.103265679064994, "learning_rate": 1.8374989284642453e-05, "loss": 1.0147, "step": 8416 }, { "epoch": 0.6255667038275734, "grad_norm": 2.1670090234046753, "learning_rate": 1.8374550812405195e-05, "loss": 1.0488, "step": 8417 }, { "epoch": 0.6256410256410256, "grad_norm": 2.0093363536213147, "learning_rate": 1.837411228625301e-05, "loss": 0.8827, "step": 8418 }, { "epoch": 0.6257153474544779, "grad_norm": 2.150212659635922, "learning_rate": 1.8373673706188725e-05, "loss": 0.8363, "step": 8419 }, { "epoch": 0.6257896692679301, "grad_norm": 2.7620712087229164, "learning_rate": 1.8373235072215164e-05, "loss": 0.7284, "step": 8420 }, { "epoch": 0.6258639910813824, "grad_norm": 2.0746401440287414, "learning_rate": 1.8372796384335152e-05, "loss": 0.9818, "step": 8421 }, { "epoch": 0.6259383128948346, "grad_norm": 1.7504428738390458, "learning_rate": 1.837235764255151e-05, "loss": 0.8159, "step": 8422 }, { "epoch": 0.6260126347082868, "grad_norm": 4.18827767997942, "learning_rate": 1.8371918846867066e-05, "loss": 0.7639, "step": 8423 }, { "epoch": 0.6260869565217392, "grad_norm": 1.9962451229181508, "learning_rate": 1.8371479997284636e-05, "loss": 0.9075, "step": 8424 }, { "epoch": 0.6261612783351914, "grad_norm": 1.8710411199009658, "learning_rate": 1.837104109380706e-05, "loss": 0.8345, "step": 8425 }, { "epoch": 0.6262356001486437, "grad_norm": 2.362412691477328, "learning_rate": 1.837060213643715e-05, "loss": 1.0154, "step": 8426 }, { "epoch": 0.6263099219620959, "grad_norm": 1.7558313642363095, "learning_rate": 1.8370163125177742e-05, "loss": 0.9693, "step": 8427 }, { "epoch": 0.6263842437755481, "grad_norm": 1.9882526525425162, "learning_rate": 1.8369724060031655e-05, "loss": 0.7679, "step": 8428 }, { "epoch": 0.6264585655890004, "grad_norm": 1.6899546642568137, "learning_rate": 1.836928494100172e-05, "loss": 0.8972, "step": 8429 }, { "epoch": 0.6265328874024526, "grad_norm": 1.8284734113313525, "learning_rate": 1.8368845768090763e-05, "loss": 0.8704, "step": 8430 }, { "epoch": 0.6266072092159048, "grad_norm": 1.9866499741071302, "learning_rate": 1.836840654130161e-05, "loss": 0.8287, "step": 8431 }, { "epoch": 0.6266815310293571, "grad_norm": 1.6133106807767623, "learning_rate": 1.836796726063709e-05, "loss": 0.6838, "step": 8432 }, { "epoch": 0.6267558528428093, "grad_norm": 2.043963477761852, "learning_rate": 1.8367527926100034e-05, "loss": 0.9274, "step": 8433 }, { "epoch": 0.6268301746562616, "grad_norm": 2.105738612939749, "learning_rate": 1.8367088537693262e-05, "loss": 0.9814, "step": 8434 }, { "epoch": 0.6269044964697139, "grad_norm": 2.0461796029526442, "learning_rate": 1.8366649095419612e-05, "loss": 0.8909, "step": 8435 }, { "epoch": 0.6269788182831662, "grad_norm": 2.3457720860303777, "learning_rate": 1.836620959928191e-05, "loss": 0.9866, "step": 8436 }, { "epoch": 0.6270531400966184, "grad_norm": 2.7401282481068887, "learning_rate": 1.8365770049282987e-05, "loss": 0.9682, "step": 8437 }, { "epoch": 0.6271274619100706, "grad_norm": 1.9651132616552567, "learning_rate": 1.8365330445425666e-05, "loss": 0.8364, "step": 8438 }, { "epoch": 0.6272017837235229, "grad_norm": 2.817321199599997, "learning_rate": 1.836489078771278e-05, "loss": 1.0502, "step": 8439 }, { "epoch": 0.6272761055369751, "grad_norm": 2.0941490211392133, "learning_rate": 1.8364451076147164e-05, "loss": 0.9693, "step": 8440 }, { "epoch": 0.6273504273504273, "grad_norm": 2.3365577219050224, "learning_rate": 1.8364011310731644e-05, "loss": 0.7742, "step": 8441 }, { "epoch": 0.6274247491638796, "grad_norm": 2.1588304747457276, "learning_rate": 1.8363571491469054e-05, "loss": 0.7397, "step": 8442 }, { "epoch": 0.6274990709773318, "grad_norm": 2.48538933776712, "learning_rate": 1.8363131618362226e-05, "loss": 0.7952, "step": 8443 }, { "epoch": 0.6275733927907841, "grad_norm": 2.1148950251598566, "learning_rate": 1.836269169141399e-05, "loss": 0.9006, "step": 8444 }, { "epoch": 0.6276477146042363, "grad_norm": 1.9529624851764402, "learning_rate": 1.8362251710627176e-05, "loss": 0.9391, "step": 8445 }, { "epoch": 0.6277220364176886, "grad_norm": 1.7816056558814672, "learning_rate": 1.8361811676004623e-05, "loss": 0.7546, "step": 8446 }, { "epoch": 0.6277963582311409, "grad_norm": 1.7546085212812395, "learning_rate": 1.8361371587549158e-05, "loss": 0.7852, "step": 8447 }, { "epoch": 0.6278706800445931, "grad_norm": 2.04492541002591, "learning_rate": 1.836093144526362e-05, "loss": 0.9164, "step": 8448 }, { "epoch": 0.6279450018580454, "grad_norm": 2.3387849400065654, "learning_rate": 1.836049124915084e-05, "loss": 1.0169, "step": 8449 }, { "epoch": 0.6280193236714976, "grad_norm": 2.7126175045211114, "learning_rate": 1.8360050999213645e-05, "loss": 0.9159, "step": 8450 }, { "epoch": 0.6280936454849498, "grad_norm": 2.2155338792796915, "learning_rate": 1.835961069545488e-05, "loss": 0.9967, "step": 8451 }, { "epoch": 0.6281679672984021, "grad_norm": 2.146710531738461, "learning_rate": 1.8359170337877373e-05, "loss": 0.8792, "step": 8452 }, { "epoch": 0.6282422891118543, "grad_norm": 2.1273134436935255, "learning_rate": 1.8358729926483964e-05, "loss": 0.8266, "step": 8453 }, { "epoch": 0.6283166109253066, "grad_norm": 1.6691058126975642, "learning_rate": 1.835828946127748e-05, "loss": 0.8718, "step": 8454 }, { "epoch": 0.6283909327387588, "grad_norm": 1.9838978704709482, "learning_rate": 1.835784894226077e-05, "loss": 0.9655, "step": 8455 }, { "epoch": 0.628465254552211, "grad_norm": 1.901967928135701, "learning_rate": 1.8357408369436656e-05, "loss": 0.7178, "step": 8456 }, { "epoch": 0.6285395763656634, "grad_norm": 1.9341910073980944, "learning_rate": 1.835696774280798e-05, "loss": 0.7862, "step": 8457 }, { "epoch": 0.6286138981791156, "grad_norm": 1.8130163464578457, "learning_rate": 1.8356527062377583e-05, "loss": 0.73, "step": 8458 }, { "epoch": 0.6286882199925679, "grad_norm": 2.147390902487825, "learning_rate": 1.8356086328148296e-05, "loss": 0.8771, "step": 8459 }, { "epoch": 0.6287625418060201, "grad_norm": 2.079691758236367, "learning_rate": 1.835564554012296e-05, "loss": 1.057, "step": 8460 }, { "epoch": 0.6288368636194723, "grad_norm": 1.6611141296392928, "learning_rate": 1.8355204698304412e-05, "loss": 0.6553, "step": 8461 }, { "epoch": 0.6289111854329246, "grad_norm": 1.9373592184153423, "learning_rate": 1.8354763802695487e-05, "loss": 0.9695, "step": 8462 }, { "epoch": 0.6289855072463768, "grad_norm": 1.9144305441036236, "learning_rate": 1.8354322853299028e-05, "loss": 1.0113, "step": 8463 }, { "epoch": 0.629059829059829, "grad_norm": 1.9115488149658635, "learning_rate": 1.835388185011787e-05, "loss": 0.8754, "step": 8464 }, { "epoch": 0.6291341508732813, "grad_norm": 5.359306581221279, "learning_rate": 1.835344079315486e-05, "loss": 1.0566, "step": 8465 }, { "epoch": 0.6292084726867335, "grad_norm": 1.8557697005994565, "learning_rate": 1.8352999682412825e-05, "loss": 0.8957, "step": 8466 }, { "epoch": 0.6292827945001858, "grad_norm": 1.6897634446005414, "learning_rate": 1.8352558517894615e-05, "loss": 0.5609, "step": 8467 }, { "epoch": 0.629357116313638, "grad_norm": 1.8409680184460213, "learning_rate": 1.8352117299603068e-05, "loss": 0.836, "step": 8468 }, { "epoch": 0.6294314381270903, "grad_norm": 2.0384467467042637, "learning_rate": 1.8351676027541016e-05, "loss": 1.1158, "step": 8469 }, { "epoch": 0.6295057599405426, "grad_norm": 2.0672102961960213, "learning_rate": 1.8351234701711314e-05, "loss": 0.8812, "step": 8470 }, { "epoch": 0.6295800817539948, "grad_norm": 2.3008802822888175, "learning_rate": 1.8350793322116796e-05, "loss": 0.7348, "step": 8471 }, { "epoch": 0.6296544035674471, "grad_norm": 2.4620254236754406, "learning_rate": 1.83503518887603e-05, "loss": 0.7186, "step": 8472 }, { "epoch": 0.6297287253808993, "grad_norm": 2.9084829557732066, "learning_rate": 1.8349910401644676e-05, "loss": 0.6694, "step": 8473 }, { "epoch": 0.6298030471943515, "grad_norm": 1.9017525666901025, "learning_rate": 1.834946886077276e-05, "loss": 0.766, "step": 8474 }, { "epoch": 0.6298773690078038, "grad_norm": 1.8762050387075102, "learning_rate": 1.8349027266147395e-05, "loss": 0.7941, "step": 8475 }, { "epoch": 0.629951690821256, "grad_norm": 2.122619261313899, "learning_rate": 1.834858561777143e-05, "loss": 0.8174, "step": 8476 }, { "epoch": 0.6300260126347083, "grad_norm": 3.199075692776841, "learning_rate": 1.8348143915647697e-05, "loss": 0.7842, "step": 8477 }, { "epoch": 0.6301003344481605, "grad_norm": 4.099268240667031, "learning_rate": 1.8347702159779053e-05, "loss": 0.8359, "step": 8478 }, { "epoch": 0.6301746562616127, "grad_norm": 1.5832782261686837, "learning_rate": 1.8347260350168333e-05, "loss": 0.7936, "step": 8479 }, { "epoch": 0.6302489780750651, "grad_norm": 2.456136780901748, "learning_rate": 1.8346818486818384e-05, "loss": 1.1146, "step": 8480 }, { "epoch": 0.6303232998885173, "grad_norm": 1.9033955787209826, "learning_rate": 1.8346376569732052e-05, "loss": 0.7531, "step": 8481 }, { "epoch": 0.6303976217019696, "grad_norm": 2.1097723510926403, "learning_rate": 1.8345934598912182e-05, "loss": 0.8819, "step": 8482 }, { "epoch": 0.6304719435154218, "grad_norm": 1.6815543170036982, "learning_rate": 1.8345492574361614e-05, "loss": 0.8229, "step": 8483 }, { "epoch": 0.630546265328874, "grad_norm": 2.1332925908375784, "learning_rate": 1.8345050496083197e-05, "loss": 0.996, "step": 8484 }, { "epoch": 0.6306205871423263, "grad_norm": 1.7910546562481855, "learning_rate": 1.8344608364079785e-05, "loss": 0.8894, "step": 8485 }, { "epoch": 0.6306949089557785, "grad_norm": 2.435143063396472, "learning_rate": 1.8344166178354212e-05, "loss": 0.9117, "step": 8486 }, { "epoch": 0.6307692307692307, "grad_norm": 3.2889113842289093, "learning_rate": 1.8343723938909332e-05, "loss": 1.0355, "step": 8487 }, { "epoch": 0.630843552582683, "grad_norm": 2.4688326067769477, "learning_rate": 1.834328164574799e-05, "loss": 0.6661, "step": 8488 }, { "epoch": 0.6309178743961352, "grad_norm": 2.0286444431305144, "learning_rate": 1.834283929887303e-05, "loss": 0.6862, "step": 8489 }, { "epoch": 0.6309921962095875, "grad_norm": 2.4860869029345336, "learning_rate": 1.834239689828731e-05, "loss": 0.8496, "step": 8490 }, { "epoch": 0.6310665180230398, "grad_norm": 1.9116086231080833, "learning_rate": 1.8341954443993667e-05, "loss": 0.8386, "step": 8491 }, { "epoch": 0.631140839836492, "grad_norm": 1.8102382573739901, "learning_rate": 1.8341511935994955e-05, "loss": 0.8562, "step": 8492 }, { "epoch": 0.6312151616499443, "grad_norm": 4.1829647825778435, "learning_rate": 1.8341069374294022e-05, "loss": 0.8364, "step": 8493 }, { "epoch": 0.6312894834633965, "grad_norm": 2.1647478792444392, "learning_rate": 1.8340626758893717e-05, "loss": 0.8546, "step": 8494 }, { "epoch": 0.6313638052768488, "grad_norm": 1.7284792525768833, "learning_rate": 1.834018408979689e-05, "loss": 0.8514, "step": 8495 }, { "epoch": 0.631438127090301, "grad_norm": 3.2318236860721457, "learning_rate": 1.833974136700639e-05, "loss": 0.8435, "step": 8496 }, { "epoch": 0.6315124489037532, "grad_norm": 1.913591933889278, "learning_rate": 1.833929859052507e-05, "loss": 0.6193, "step": 8497 }, { "epoch": 0.6315867707172055, "grad_norm": 1.8914383623154274, "learning_rate": 1.8338855760355776e-05, "loss": 0.6918, "step": 8498 }, { "epoch": 0.6316610925306577, "grad_norm": 2.3308762911976477, "learning_rate": 1.8338412876501362e-05, "loss": 0.9273, "step": 8499 }, { "epoch": 0.63173541434411, "grad_norm": 2.1841187760519105, "learning_rate": 1.833796993896468e-05, "loss": 0.8548, "step": 8500 }, { "epoch": 0.6318097361575622, "grad_norm": 2.253660701154368, "learning_rate": 1.833752694774858e-05, "loss": 0.9581, "step": 8501 }, { "epoch": 0.6318840579710145, "grad_norm": 1.8254310767796986, "learning_rate": 1.8337083902855913e-05, "loss": 0.8207, "step": 8502 }, { "epoch": 0.6319583797844668, "grad_norm": 2.0139314692297567, "learning_rate": 1.8336640804289533e-05, "loss": 0.8559, "step": 8503 }, { "epoch": 0.632032701597919, "grad_norm": 2.059667249003157, "learning_rate": 1.833619765205229e-05, "loss": 0.9931, "step": 8504 }, { "epoch": 0.6321070234113713, "grad_norm": 1.6286510310292666, "learning_rate": 1.833575444614704e-05, "loss": 0.6653, "step": 8505 }, { "epoch": 0.6321813452248235, "grad_norm": 2.117204606485417, "learning_rate": 1.8335311186576636e-05, "loss": 0.7707, "step": 8506 }, { "epoch": 0.6322556670382757, "grad_norm": 2.170901629377671, "learning_rate": 1.833486787334393e-05, "loss": 0.9222, "step": 8507 }, { "epoch": 0.632329988851728, "grad_norm": 1.7215698102380215, "learning_rate": 1.833442450645178e-05, "loss": 0.8713, "step": 8508 }, { "epoch": 0.6324043106651802, "grad_norm": 1.96799979680147, "learning_rate": 1.8333981085903035e-05, "loss": 0.8825, "step": 8509 }, { "epoch": 0.6324786324786325, "grad_norm": 2.297475912179897, "learning_rate": 1.833353761170055e-05, "loss": 0.7575, "step": 8510 }, { "epoch": 0.6325529542920847, "grad_norm": 1.645984464747266, "learning_rate": 1.8333094083847188e-05, "loss": 0.9425, "step": 8511 }, { "epoch": 0.6326272761055369, "grad_norm": 1.6950041613590456, "learning_rate": 1.8332650502345796e-05, "loss": 0.893, "step": 8512 }, { "epoch": 0.6327015979189893, "grad_norm": 1.9059278073491015, "learning_rate": 1.833220686719923e-05, "loss": 0.969, "step": 8513 }, { "epoch": 0.6327759197324415, "grad_norm": 1.8455655110176463, "learning_rate": 1.833176317841035e-05, "loss": 0.7552, "step": 8514 }, { "epoch": 0.6328502415458938, "grad_norm": 1.6702803710869596, "learning_rate": 1.8331319435982014e-05, "loss": 0.8913, "step": 8515 }, { "epoch": 0.632924563359346, "grad_norm": 2.345496744391073, "learning_rate": 1.833087563991707e-05, "loss": 0.9873, "step": 8516 }, { "epoch": 0.6329988851727982, "grad_norm": 1.8350219386295639, "learning_rate": 1.8330431790218386e-05, "loss": 0.7869, "step": 8517 }, { "epoch": 0.6330732069862505, "grad_norm": 2.483656530222113, "learning_rate": 1.8329987886888812e-05, "loss": 1.0164, "step": 8518 }, { "epoch": 0.6331475287997027, "grad_norm": 1.813313440240759, "learning_rate": 1.8329543929931207e-05, "loss": 0.9842, "step": 8519 }, { "epoch": 0.633221850613155, "grad_norm": 2.3480330205383786, "learning_rate": 1.8329099919348434e-05, "loss": 0.9395, "step": 8520 }, { "epoch": 0.6332961724266072, "grad_norm": 2.1920622464594395, "learning_rate": 1.8328655855143345e-05, "loss": 0.9451, "step": 8521 }, { "epoch": 0.6333704942400594, "grad_norm": 2.3121923396397364, "learning_rate": 1.8328211737318808e-05, "loss": 1.0463, "step": 8522 }, { "epoch": 0.6334448160535117, "grad_norm": 2.6990757722779475, "learning_rate": 1.8327767565877668e-05, "loss": 0.9172, "step": 8523 }, { "epoch": 0.6335191378669639, "grad_norm": 2.2522041695357826, "learning_rate": 1.8327323340822794e-05, "loss": 0.8413, "step": 8524 }, { "epoch": 0.6335934596804162, "grad_norm": 1.8407456623285336, "learning_rate": 1.8326879062157044e-05, "loss": 0.8813, "step": 8525 }, { "epoch": 0.6336677814938685, "grad_norm": 2.035904527853917, "learning_rate": 1.8326434729883284e-05, "loss": 0.9054, "step": 8526 }, { "epoch": 0.6337421033073207, "grad_norm": 1.666330879643762, "learning_rate": 1.8325990344004366e-05, "loss": 0.818, "step": 8527 }, { "epoch": 0.633816425120773, "grad_norm": 2.0412317394656845, "learning_rate": 1.8325545904523153e-05, "loss": 1.0191, "step": 8528 }, { "epoch": 0.6338907469342252, "grad_norm": 5.052450654617647, "learning_rate": 1.8325101411442508e-05, "loss": 0.9201, "step": 8529 }, { "epoch": 0.6339650687476774, "grad_norm": 2.061276659265749, "learning_rate": 1.832465686476529e-05, "loss": 0.7281, "step": 8530 }, { "epoch": 0.6340393905611297, "grad_norm": 2.3208805598493516, "learning_rate": 1.832421226449437e-05, "loss": 0.863, "step": 8531 }, { "epoch": 0.6341137123745819, "grad_norm": 2.449987019970749, "learning_rate": 1.8323767610632596e-05, "loss": 0.7338, "step": 8532 }, { "epoch": 0.6341880341880342, "grad_norm": 1.9562885514373103, "learning_rate": 1.8323322903182837e-05, "loss": 0.8709, "step": 8533 }, { "epoch": 0.6342623560014864, "grad_norm": 1.9889225716256338, "learning_rate": 1.832287814214796e-05, "loss": 0.9706, "step": 8534 }, { "epoch": 0.6343366778149386, "grad_norm": 1.8969519156305685, "learning_rate": 1.832243332753083e-05, "loss": 0.7607, "step": 8535 }, { "epoch": 0.634410999628391, "grad_norm": 2.2535615833351743, "learning_rate": 1.83219884593343e-05, "loss": 1.1297, "step": 8536 }, { "epoch": 0.6344853214418432, "grad_norm": 1.9626152323674906, "learning_rate": 1.8321543537561242e-05, "loss": 0.791, "step": 8537 }, { "epoch": 0.6345596432552955, "grad_norm": 1.9674224588632856, "learning_rate": 1.832109856221452e-05, "loss": 0.72, "step": 8538 }, { "epoch": 0.6346339650687477, "grad_norm": 2.0807630251185576, "learning_rate": 1.8320653533296996e-05, "loss": 0.7697, "step": 8539 }, { "epoch": 0.6347082868821999, "grad_norm": 2.4392716089622724, "learning_rate": 1.8320208450811538e-05, "loss": 0.7791, "step": 8540 }, { "epoch": 0.6347826086956522, "grad_norm": 2.1013810869692913, "learning_rate": 1.8319763314761007e-05, "loss": 0.8764, "step": 8541 }, { "epoch": 0.6348569305091044, "grad_norm": 1.9131536745686475, "learning_rate": 1.831931812514827e-05, "loss": 0.9205, "step": 8542 }, { "epoch": 0.6349312523225566, "grad_norm": 1.6790103542845887, "learning_rate": 1.83188728819762e-05, "loss": 0.8213, "step": 8543 }, { "epoch": 0.6350055741360089, "grad_norm": 1.9609599634590167, "learning_rate": 1.8318427585247655e-05, "loss": 0.839, "step": 8544 }, { "epoch": 0.6350798959494611, "grad_norm": 2.5303741546676997, "learning_rate": 1.8317982234965504e-05, "loss": 0.9733, "step": 8545 }, { "epoch": 0.6351542177629134, "grad_norm": 1.9750475616177017, "learning_rate": 1.8317536831132615e-05, "loss": 0.7967, "step": 8546 }, { "epoch": 0.6352285395763657, "grad_norm": 2.488949459081916, "learning_rate": 1.8317091373751855e-05, "loss": 0.975, "step": 8547 }, { "epoch": 0.635302861389818, "grad_norm": 2.6243417644014606, "learning_rate": 1.8316645862826093e-05, "loss": 0.7644, "step": 8548 }, { "epoch": 0.6353771832032702, "grad_norm": 1.8912140021541655, "learning_rate": 1.8316200298358197e-05, "loss": 0.9239, "step": 8549 }, { "epoch": 0.6354515050167224, "grad_norm": 1.9280238230875504, "learning_rate": 1.8315754680351035e-05, "loss": 0.9267, "step": 8550 }, { "epoch": 0.6355258268301747, "grad_norm": 1.6285452901321515, "learning_rate": 1.8315309008807474e-05, "loss": 0.6548, "step": 8551 }, { "epoch": 0.6356001486436269, "grad_norm": 1.828656668424478, "learning_rate": 1.8314863283730388e-05, "loss": 0.8392, "step": 8552 }, { "epoch": 0.6356744704570791, "grad_norm": 1.7801388921236831, "learning_rate": 1.8314417505122645e-05, "loss": 0.9476, "step": 8553 }, { "epoch": 0.6357487922705314, "grad_norm": 1.9343293241245758, "learning_rate": 1.831397167298711e-05, "loss": 0.8384, "step": 8554 }, { "epoch": 0.6358231140839836, "grad_norm": 1.7667858931608904, "learning_rate": 1.8313525787326657e-05, "loss": 0.8275, "step": 8555 }, { "epoch": 0.6358974358974359, "grad_norm": 13.38749858540213, "learning_rate": 1.8313079848144158e-05, "loss": 0.9389, "step": 8556 }, { "epoch": 0.6359717577108881, "grad_norm": 2.0033191356217364, "learning_rate": 1.8312633855442482e-05, "loss": 0.781, "step": 8557 }, { "epoch": 0.6360460795243404, "grad_norm": 1.9230430863457266, "learning_rate": 1.83121878092245e-05, "loss": 0.8015, "step": 8558 }, { "epoch": 0.6361204013377927, "grad_norm": 2.0403784631345334, "learning_rate": 1.831174170949308e-05, "loss": 0.7135, "step": 8559 }, { "epoch": 0.6361947231512449, "grad_norm": 2.4941812887754757, "learning_rate": 1.8311295556251102e-05, "loss": 0.8606, "step": 8560 }, { "epoch": 0.6362690449646972, "grad_norm": 1.8165383977533545, "learning_rate": 1.8310849349501435e-05, "loss": 0.7894, "step": 8561 }, { "epoch": 0.6363433667781494, "grad_norm": 3.574586551738516, "learning_rate": 1.831040308924695e-05, "loss": 0.8972, "step": 8562 }, { "epoch": 0.6364176885916016, "grad_norm": 2.338131505451679, "learning_rate": 1.8309956775490524e-05, "loss": 1.0538, "step": 8563 }, { "epoch": 0.6364920104050539, "grad_norm": 2.0160891348911085, "learning_rate": 1.8309510408235026e-05, "loss": 0.765, "step": 8564 }, { "epoch": 0.6365663322185061, "grad_norm": 2.4294501439079883, "learning_rate": 1.830906398748333e-05, "loss": 0.7924, "step": 8565 }, { "epoch": 0.6366406540319584, "grad_norm": 2.05209101393187, "learning_rate": 1.830861751323831e-05, "loss": 0.9148, "step": 8566 }, { "epoch": 0.6367149758454106, "grad_norm": 1.9665496247739378, "learning_rate": 1.830817098550284e-05, "loss": 0.8984, "step": 8567 }, { "epoch": 0.6367892976588628, "grad_norm": 2.0849887755618477, "learning_rate": 1.8307724404279803e-05, "loss": 0.9992, "step": 8568 }, { "epoch": 0.6368636194723152, "grad_norm": 2.7848845570731915, "learning_rate": 1.8307277769572063e-05, "loss": 0.9852, "step": 8569 }, { "epoch": 0.6369379412857674, "grad_norm": 1.7447026113641666, "learning_rate": 1.8306831081382502e-05, "loss": 0.9673, "step": 8570 }, { "epoch": 0.6370122630992197, "grad_norm": 2.357015475033521, "learning_rate": 1.8306384339713993e-05, "loss": 0.9656, "step": 8571 }, { "epoch": 0.6370865849126719, "grad_norm": 2.2426354153171744, "learning_rate": 1.830593754456941e-05, "loss": 0.9947, "step": 8572 }, { "epoch": 0.6371609067261241, "grad_norm": 1.8553044895122717, "learning_rate": 1.8305490695951633e-05, "loss": 0.9588, "step": 8573 }, { "epoch": 0.6372352285395764, "grad_norm": 2.2347530266677453, "learning_rate": 1.830504379386354e-05, "loss": 0.9105, "step": 8574 }, { "epoch": 0.6373095503530286, "grad_norm": 2.7989686350464607, "learning_rate": 1.8304596838308003e-05, "loss": 1.0387, "step": 8575 }, { "epoch": 0.6373838721664808, "grad_norm": 2.016404673004519, "learning_rate": 1.8304149829287902e-05, "loss": 0.8659, "step": 8576 }, { "epoch": 0.6374581939799331, "grad_norm": 2.4103843293063516, "learning_rate": 1.830370276680612e-05, "loss": 0.7969, "step": 8577 }, { "epoch": 0.6375325157933853, "grad_norm": 1.8082477197408584, "learning_rate": 1.8303255650865526e-05, "loss": 0.8923, "step": 8578 }, { "epoch": 0.6376068376068376, "grad_norm": 2.450551521798649, "learning_rate": 1.8302808481469006e-05, "loss": 0.8972, "step": 8579 }, { "epoch": 0.6376811594202898, "grad_norm": 2.097088400418445, "learning_rate": 1.8302361258619433e-05, "loss": 0.8976, "step": 8580 }, { "epoch": 0.6377554812337421, "grad_norm": 2.2825200207355203, "learning_rate": 1.830191398231969e-05, "loss": 0.9885, "step": 8581 }, { "epoch": 0.6378298030471944, "grad_norm": 2.3406241808354857, "learning_rate": 1.8301466652572656e-05, "loss": 0.9568, "step": 8582 }, { "epoch": 0.6379041248606466, "grad_norm": 2.0156567655417406, "learning_rate": 1.830101926938121e-05, "loss": 0.9576, "step": 8583 }, { "epoch": 0.6379784466740989, "grad_norm": 2.246066642414518, "learning_rate": 1.8300571832748234e-05, "loss": 1.0347, "step": 8584 }, { "epoch": 0.6380527684875511, "grad_norm": 1.796570995277814, "learning_rate": 1.8300124342676605e-05, "loss": 0.8454, "step": 8585 }, { "epoch": 0.6381270903010033, "grad_norm": 1.9573005329524162, "learning_rate": 1.8299676799169207e-05, "loss": 0.8218, "step": 8586 }, { "epoch": 0.6382014121144556, "grad_norm": 1.359453014691422, "learning_rate": 1.829922920222892e-05, "loss": 0.6989, "step": 8587 }, { "epoch": 0.6382757339279078, "grad_norm": 2.0773070789956893, "learning_rate": 1.8298781551858628e-05, "loss": 1.0647, "step": 8588 }, { "epoch": 0.6383500557413601, "grad_norm": 2.2994836466568933, "learning_rate": 1.8298333848061212e-05, "loss": 0.8802, "step": 8589 }, { "epoch": 0.6384243775548123, "grad_norm": 1.7030587426208272, "learning_rate": 1.8297886090839552e-05, "loss": 0.9284, "step": 8590 }, { "epoch": 0.6384986993682645, "grad_norm": 1.9007112035660823, "learning_rate": 1.829743828019653e-05, "loss": 0.9783, "step": 8591 }, { "epoch": 0.6385730211817169, "grad_norm": 2.1008384371616105, "learning_rate": 1.8296990416135033e-05, "loss": 0.9396, "step": 8592 }, { "epoch": 0.6386473429951691, "grad_norm": 2.251050319799583, "learning_rate": 1.829654249865794e-05, "loss": 0.7293, "step": 8593 }, { "epoch": 0.6387216648086214, "grad_norm": 1.935229868505358, "learning_rate": 1.8296094527768136e-05, "loss": 0.9563, "step": 8594 }, { "epoch": 0.6387959866220736, "grad_norm": 2.426827066627422, "learning_rate": 1.8295646503468507e-05, "loss": 1.0456, "step": 8595 }, { "epoch": 0.6388703084355258, "grad_norm": 1.7320151060285565, "learning_rate": 1.8295198425761936e-05, "loss": 0.6043, "step": 8596 }, { "epoch": 0.6389446302489781, "grad_norm": 1.9594407281368644, "learning_rate": 1.829475029465131e-05, "loss": 0.8129, "step": 8597 }, { "epoch": 0.6390189520624303, "grad_norm": 1.8249397517909147, "learning_rate": 1.8294302110139512e-05, "loss": 0.8433, "step": 8598 }, { "epoch": 0.6390932738758826, "grad_norm": 1.9200369934132102, "learning_rate": 1.8293853872229425e-05, "loss": 0.8824, "step": 8599 }, { "epoch": 0.6391675956893348, "grad_norm": 2.134400529022642, "learning_rate": 1.829340558092394e-05, "loss": 0.8262, "step": 8600 }, { "epoch": 0.639241917502787, "grad_norm": 2.1889029377460476, "learning_rate": 1.829295723622594e-05, "loss": 0.801, "step": 8601 }, { "epoch": 0.6393162393162393, "grad_norm": 1.7312152527771825, "learning_rate": 1.829250883813831e-05, "loss": 0.9429, "step": 8602 }, { "epoch": 0.6393905611296916, "grad_norm": 1.9281972972883814, "learning_rate": 1.8292060386663938e-05, "loss": 0.868, "step": 8603 }, { "epoch": 0.6394648829431439, "grad_norm": 1.96022779062861, "learning_rate": 1.829161188180571e-05, "loss": 0.9669, "step": 8604 }, { "epoch": 0.6395392047565961, "grad_norm": 1.8754788095077664, "learning_rate": 1.8291163323566515e-05, "loss": 0.6824, "step": 8605 }, { "epoch": 0.6396135265700483, "grad_norm": 2.498213657129403, "learning_rate": 1.8290714711949243e-05, "loss": 1.0325, "step": 8606 }, { "epoch": 0.6396878483835006, "grad_norm": 1.790055201165432, "learning_rate": 1.8290266046956783e-05, "loss": 0.9284, "step": 8607 }, { "epoch": 0.6397621701969528, "grad_norm": 1.7755268186500444, "learning_rate": 1.8289817328592018e-05, "loss": 0.814, "step": 8608 }, { "epoch": 0.639836492010405, "grad_norm": 2.078374646267156, "learning_rate": 1.8289368556857837e-05, "loss": 0.808, "step": 8609 }, { "epoch": 0.6399108138238573, "grad_norm": 2.311886597943285, "learning_rate": 1.8288919731757132e-05, "loss": 0.9719, "step": 8610 }, { "epoch": 0.6399851356373095, "grad_norm": 2.202541794431641, "learning_rate": 1.828847085329279e-05, "loss": 0.8632, "step": 8611 }, { "epoch": 0.6400594574507618, "grad_norm": 1.8159089871351277, "learning_rate": 1.8288021921467708e-05, "loss": 0.9257, "step": 8612 }, { "epoch": 0.640133779264214, "grad_norm": 1.8932062558611216, "learning_rate": 1.828757293628477e-05, "loss": 0.9167, "step": 8613 }, { "epoch": 0.6402081010776663, "grad_norm": 1.7984920715758774, "learning_rate": 1.8287123897746862e-05, "loss": 0.7235, "step": 8614 }, { "epoch": 0.6402824228911186, "grad_norm": 1.6969823928715553, "learning_rate": 1.8286674805856885e-05, "loss": 0.6999, "step": 8615 }, { "epoch": 0.6403567447045708, "grad_norm": 1.5985169453239978, "learning_rate": 1.8286225660617722e-05, "loss": 0.7086, "step": 8616 }, { "epoch": 0.6404310665180231, "grad_norm": 1.939305791202095, "learning_rate": 1.8285776462032273e-05, "loss": 1.0846, "step": 8617 }, { "epoch": 0.6405053883314753, "grad_norm": 1.9678728122491003, "learning_rate": 1.8285327210103422e-05, "loss": 0.8688, "step": 8618 }, { "epoch": 0.6405797101449275, "grad_norm": 1.7093928114895032, "learning_rate": 1.8284877904834064e-05, "loss": 0.7223, "step": 8619 }, { "epoch": 0.6406540319583798, "grad_norm": 1.7845302784688257, "learning_rate": 1.828442854622709e-05, "loss": 0.8128, "step": 8620 }, { "epoch": 0.640728353771832, "grad_norm": 2.1796928920279472, "learning_rate": 1.8283979134285397e-05, "loss": 1.0697, "step": 8621 }, { "epoch": 0.6408026755852843, "grad_norm": 1.7153324572688071, "learning_rate": 1.8283529669011878e-05, "loss": 0.8262, "step": 8622 }, { "epoch": 0.6408769973987365, "grad_norm": 1.8119218504160453, "learning_rate": 1.8283080150409426e-05, "loss": 0.6553, "step": 8623 }, { "epoch": 0.6409513192121887, "grad_norm": 2.102942461624941, "learning_rate": 1.828263057848093e-05, "loss": 0.9335, "step": 8624 }, { "epoch": 0.6410256410256411, "grad_norm": 2.114077812563624, "learning_rate": 1.828218095322929e-05, "loss": 1.0222, "step": 8625 }, { "epoch": 0.6410999628390933, "grad_norm": 2.0584296425138766, "learning_rate": 1.82817312746574e-05, "loss": 0.8288, "step": 8626 }, { "epoch": 0.6411742846525456, "grad_norm": 1.9875055564951003, "learning_rate": 1.8281281542768147e-05, "loss": 0.8963, "step": 8627 }, { "epoch": 0.6412486064659978, "grad_norm": 1.4606638219354757, "learning_rate": 1.828083175756444e-05, "loss": 0.7841, "step": 8628 }, { "epoch": 0.64132292827945, "grad_norm": 1.8918027753040525, "learning_rate": 1.8280381919049162e-05, "loss": 0.8585, "step": 8629 }, { "epoch": 0.6413972500929023, "grad_norm": 1.9238238483580852, "learning_rate": 1.8279932027225223e-05, "loss": 0.8569, "step": 8630 }, { "epoch": 0.6414715719063545, "grad_norm": 2.119474012232285, "learning_rate": 1.8279482082095505e-05, "loss": 0.9359, "step": 8631 }, { "epoch": 0.6415458937198067, "grad_norm": 2.5695883149399132, "learning_rate": 1.8279032083662915e-05, "loss": 0.9803, "step": 8632 }, { "epoch": 0.641620215533259, "grad_norm": 1.7963925192515007, "learning_rate": 1.8278582031930342e-05, "loss": 0.805, "step": 8633 }, { "epoch": 0.6416945373467112, "grad_norm": 2.3229912122900402, "learning_rate": 1.827813192690069e-05, "loss": 1.0393, "step": 8634 }, { "epoch": 0.6417688591601635, "grad_norm": 2.278990811168879, "learning_rate": 1.8277681768576853e-05, "loss": 1.0871, "step": 8635 }, { "epoch": 0.6418431809736157, "grad_norm": 1.9324094081502805, "learning_rate": 1.827723155696173e-05, "loss": 0.7734, "step": 8636 }, { "epoch": 0.641917502787068, "grad_norm": 1.843199280872243, "learning_rate": 1.827678129205822e-05, "loss": 1.0102, "step": 8637 }, { "epoch": 0.6419918246005203, "grad_norm": 2.046161167771309, "learning_rate": 1.8276330973869225e-05, "loss": 0.8718, "step": 8638 }, { "epoch": 0.6420661464139725, "grad_norm": 2.0485470873431004, "learning_rate": 1.8275880602397636e-05, "loss": 0.9931, "step": 8639 }, { "epoch": 0.6421404682274248, "grad_norm": 1.858485742383656, "learning_rate": 1.827543017764636e-05, "loss": 0.8716, "step": 8640 }, { "epoch": 0.642214790040877, "grad_norm": 2.579780366998534, "learning_rate": 1.827497969961829e-05, "loss": 0.9765, "step": 8641 }, { "epoch": 0.6422891118543292, "grad_norm": 1.8196870985689673, "learning_rate": 1.827452916831633e-05, "loss": 0.8231, "step": 8642 }, { "epoch": 0.6423634336677815, "grad_norm": 2.240368212819447, "learning_rate": 1.8274078583743386e-05, "loss": 1.2327, "step": 8643 }, { "epoch": 0.6424377554812337, "grad_norm": 2.117330812349378, "learning_rate": 1.827362794590235e-05, "loss": 0.7723, "step": 8644 }, { "epoch": 0.642512077294686, "grad_norm": 1.925647276340052, "learning_rate": 1.8273177254796127e-05, "loss": 0.7874, "step": 8645 }, { "epoch": 0.6425863991081382, "grad_norm": 2.476323825228105, "learning_rate": 1.8272726510427617e-05, "loss": 0.9648, "step": 8646 }, { "epoch": 0.6426607209215904, "grad_norm": 1.9042704752970974, "learning_rate": 1.8272275712799726e-05, "loss": 0.9848, "step": 8647 }, { "epoch": 0.6427350427350428, "grad_norm": 1.8047338123357268, "learning_rate": 1.827182486191535e-05, "loss": 0.9241, "step": 8648 }, { "epoch": 0.642809364548495, "grad_norm": 2.0093336336383643, "learning_rate": 1.8271373957777398e-05, "loss": 0.8586, "step": 8649 }, { "epoch": 0.6428836863619473, "grad_norm": 2.199973792508748, "learning_rate": 1.8270923000388766e-05, "loss": 0.8224, "step": 8650 }, { "epoch": 0.6429580081753995, "grad_norm": 2.2636625057374182, "learning_rate": 1.8270471989752362e-05, "loss": 1.0102, "step": 8651 }, { "epoch": 0.6430323299888517, "grad_norm": 2.4696091123802213, "learning_rate": 1.827002092587109e-05, "loss": 0.9478, "step": 8652 }, { "epoch": 0.643106651802304, "grad_norm": 1.7383715442070937, "learning_rate": 1.826956980874785e-05, "loss": 0.689, "step": 8653 }, { "epoch": 0.6431809736157562, "grad_norm": 2.2516333985150254, "learning_rate": 1.826911863838555e-05, "loss": 0.7624, "step": 8654 }, { "epoch": 0.6432552954292085, "grad_norm": 1.9149171500492503, "learning_rate": 1.8268667414787094e-05, "loss": 0.7167, "step": 8655 }, { "epoch": 0.6433296172426607, "grad_norm": 2.1519162639860125, "learning_rate": 1.8268216137955385e-05, "loss": 0.8093, "step": 8656 }, { "epoch": 0.6434039390561129, "grad_norm": 2.4031531287772463, "learning_rate": 1.826776480789333e-05, "loss": 1.0951, "step": 8657 }, { "epoch": 0.6434782608695652, "grad_norm": 2.2509615088756068, "learning_rate": 1.8267313424603833e-05, "loss": 1.0001, "step": 8658 }, { "epoch": 0.6435525826830175, "grad_norm": 2.3024967876180606, "learning_rate": 1.8266861988089804e-05, "loss": 1.0763, "step": 8659 }, { "epoch": 0.6436269044964698, "grad_norm": 1.749721151500666, "learning_rate": 1.8266410498354144e-05, "loss": 0.788, "step": 8660 }, { "epoch": 0.643701226309922, "grad_norm": 1.6711710547035212, "learning_rate": 1.8265958955399764e-05, "loss": 0.8032, "step": 8661 }, { "epoch": 0.6437755481233742, "grad_norm": 1.91176981383198, "learning_rate": 1.8265507359229565e-05, "loss": 0.5245, "step": 8662 }, { "epoch": 0.6438498699368265, "grad_norm": 1.6949461137011543, "learning_rate": 1.8265055709846464e-05, "loss": 0.7725, "step": 8663 }, { "epoch": 0.6439241917502787, "grad_norm": 1.8726341093639565, "learning_rate": 1.8264604007253362e-05, "loss": 0.8679, "step": 8664 }, { "epoch": 0.643998513563731, "grad_norm": 2.5546331402219784, "learning_rate": 1.8264152251453168e-05, "loss": 0.8804, "step": 8665 }, { "epoch": 0.6440728353771832, "grad_norm": 4.809839793566641, "learning_rate": 1.8263700442448788e-05, "loss": 0.9624, "step": 8666 }, { "epoch": 0.6441471571906354, "grad_norm": 2.930986075911138, "learning_rate": 1.8263248580243135e-05, "loss": 0.6926, "step": 8667 }, { "epoch": 0.6442214790040877, "grad_norm": 2.4523815912405733, "learning_rate": 1.8262796664839115e-05, "loss": 1.0972, "step": 8668 }, { "epoch": 0.6442958008175399, "grad_norm": 4.586364460939751, "learning_rate": 1.8262344696239643e-05, "loss": 0.8572, "step": 8669 }, { "epoch": 0.6443701226309922, "grad_norm": 2.271684996102799, "learning_rate": 1.826189267444762e-05, "loss": 0.799, "step": 8670 }, { "epoch": 0.6444444444444445, "grad_norm": 2.110642822726691, "learning_rate": 1.826144059946596e-05, "loss": 0.8504, "step": 8671 }, { "epoch": 0.6445187662578967, "grad_norm": 2.150159308318996, "learning_rate": 1.826098847129758e-05, "loss": 0.9734, "step": 8672 }, { "epoch": 0.644593088071349, "grad_norm": 2.1924931617742587, "learning_rate": 1.826053628994538e-05, "loss": 0.8898, "step": 8673 }, { "epoch": 0.6446674098848012, "grad_norm": 2.0375255063681457, "learning_rate": 1.8260084055412277e-05, "loss": 0.9774, "step": 8674 }, { "epoch": 0.6447417316982534, "grad_norm": 2.0173436481762326, "learning_rate": 1.8259631767701182e-05, "loss": 0.9679, "step": 8675 }, { "epoch": 0.6448160535117057, "grad_norm": 2.3549740719026504, "learning_rate": 1.8259179426815005e-05, "loss": 0.8559, "step": 8676 }, { "epoch": 0.6448903753251579, "grad_norm": 2.6121982415930773, "learning_rate": 1.825872703275666e-05, "loss": 0.7761, "step": 8677 }, { "epoch": 0.6449646971386102, "grad_norm": 2.0446264214391587, "learning_rate": 1.8258274585529058e-05, "loss": 1.0063, "step": 8678 }, { "epoch": 0.6450390189520624, "grad_norm": 2.5125424527359486, "learning_rate": 1.8257822085135112e-05, "loss": 1.0807, "step": 8679 }, { "epoch": 0.6451133407655146, "grad_norm": 2.245858896129188, "learning_rate": 1.8257369531577743e-05, "loss": 0.9536, "step": 8680 }, { "epoch": 0.645187662578967, "grad_norm": 2.8147316137816314, "learning_rate": 1.8256916924859847e-05, "loss": 0.8128, "step": 8681 }, { "epoch": 0.6452619843924192, "grad_norm": 2.417893612431931, "learning_rate": 1.8256464264984356e-05, "loss": 1.1096, "step": 8682 }, { "epoch": 0.6453363062058715, "grad_norm": 2.3281826303618507, "learning_rate": 1.8256011551954173e-05, "loss": 0.7748, "step": 8683 }, { "epoch": 0.6454106280193237, "grad_norm": 1.828786595543649, "learning_rate": 1.8255558785772218e-05, "loss": 0.7867, "step": 8684 }, { "epoch": 0.6454849498327759, "grad_norm": 1.932134600470843, "learning_rate": 1.8255105966441402e-05, "loss": 0.971, "step": 8685 }, { "epoch": 0.6455592716462282, "grad_norm": 1.7954875632985259, "learning_rate": 1.8254653093964643e-05, "loss": 0.8098, "step": 8686 }, { "epoch": 0.6456335934596804, "grad_norm": 2.246824529480039, "learning_rate": 1.8254200168344856e-05, "loss": 0.9375, "step": 8687 }, { "epoch": 0.6457079152731326, "grad_norm": 2.341363763865642, "learning_rate": 1.8253747189584957e-05, "loss": 0.7582, "step": 8688 }, { "epoch": 0.6457822370865849, "grad_norm": 2.24203076689822, "learning_rate": 1.825329415768786e-05, "loss": 1.0559, "step": 8689 }, { "epoch": 0.6458565589000371, "grad_norm": 2.088951817017911, "learning_rate": 1.8252841072656485e-05, "loss": 0.8903, "step": 8690 }, { "epoch": 0.6459308807134894, "grad_norm": 2.4275735324932097, "learning_rate": 1.8252387934493744e-05, "loss": 0.8592, "step": 8691 }, { "epoch": 0.6460052025269416, "grad_norm": 1.9729395174388753, "learning_rate": 1.825193474320256e-05, "loss": 0.5561, "step": 8692 }, { "epoch": 0.646079524340394, "grad_norm": 2.268705045276039, "learning_rate": 1.8251481498785852e-05, "loss": 0.8749, "step": 8693 }, { "epoch": 0.6461538461538462, "grad_norm": 2.4772067040914374, "learning_rate": 1.825102820124653e-05, "loss": 0.9593, "step": 8694 }, { "epoch": 0.6462281679672984, "grad_norm": 2.1900558746833796, "learning_rate": 1.8250574850587518e-05, "loss": 1.0238, "step": 8695 }, { "epoch": 0.6463024897807507, "grad_norm": 1.983976418139638, "learning_rate": 1.825012144681173e-05, "loss": 0.7761, "step": 8696 }, { "epoch": 0.6463768115942029, "grad_norm": 2.577441669424053, "learning_rate": 1.824966798992209e-05, "loss": 0.8964, "step": 8697 }, { "epoch": 0.6464511334076551, "grad_norm": 2.3057986988090207, "learning_rate": 1.824921447992152e-05, "loss": 0.7349, "step": 8698 }, { "epoch": 0.6465254552211074, "grad_norm": 1.689075635463398, "learning_rate": 1.8248760916812932e-05, "loss": 0.8176, "step": 8699 }, { "epoch": 0.6465997770345596, "grad_norm": 2.029618750753229, "learning_rate": 1.824830730059925e-05, "loss": 0.9132, "step": 8700 }, { "epoch": 0.6466740988480119, "grad_norm": 1.8941134158903992, "learning_rate": 1.824785363128339e-05, "loss": 0.7964, "step": 8701 }, { "epoch": 0.6467484206614641, "grad_norm": 2.080095098554892, "learning_rate": 1.8247399908868278e-05, "loss": 0.817, "step": 8702 }, { "epoch": 0.6468227424749163, "grad_norm": 2.358874452679622, "learning_rate": 1.8246946133356836e-05, "loss": 1.0095, "step": 8703 }, { "epoch": 0.6468970642883687, "grad_norm": 1.9520841550992576, "learning_rate": 1.8246492304751982e-05, "loss": 0.9013, "step": 8704 }, { "epoch": 0.6469713861018209, "grad_norm": 1.8104169149451497, "learning_rate": 1.8246038423056635e-05, "loss": 0.9403, "step": 8705 }, { "epoch": 0.6470457079152732, "grad_norm": 1.6495900318147658, "learning_rate": 1.824558448827372e-05, "loss": 0.7205, "step": 8706 }, { "epoch": 0.6471200297287254, "grad_norm": 2.356790250152273, "learning_rate": 1.8245130500406165e-05, "loss": 0.8092, "step": 8707 }, { "epoch": 0.6471943515421776, "grad_norm": 2.6208510198993595, "learning_rate": 1.8244676459456884e-05, "loss": 0.8894, "step": 8708 }, { "epoch": 0.6472686733556299, "grad_norm": 1.8412278854109823, "learning_rate": 1.8244222365428808e-05, "loss": 0.871, "step": 8709 }, { "epoch": 0.6473429951690821, "grad_norm": 2.217159123243948, "learning_rate": 1.8243768218324852e-05, "loss": 0.9586, "step": 8710 }, { "epoch": 0.6474173169825344, "grad_norm": 2.7116822969381116, "learning_rate": 1.8243314018147945e-05, "loss": 1.2157, "step": 8711 }, { "epoch": 0.6474916387959866, "grad_norm": 1.8773796392561497, "learning_rate": 1.824285976490101e-05, "loss": 0.8073, "step": 8712 }, { "epoch": 0.6475659606094388, "grad_norm": 1.6762531241821426, "learning_rate": 1.8242405458586972e-05, "loss": 0.7438, "step": 8713 }, { "epoch": 0.6476402824228911, "grad_norm": 5.2733079016601305, "learning_rate": 1.8241951099208756e-05, "loss": 1.0174, "step": 8714 }, { "epoch": 0.6477146042363434, "grad_norm": 1.9151929728405652, "learning_rate": 1.8241496686769283e-05, "loss": 0.7861, "step": 8715 }, { "epoch": 0.6477889260497957, "grad_norm": 1.8831855234831254, "learning_rate": 1.8241042221271486e-05, "loss": 0.7979, "step": 8716 }, { "epoch": 0.6478632478632479, "grad_norm": 1.8102738766285396, "learning_rate": 1.8240587702718283e-05, "loss": 0.8051, "step": 8717 }, { "epoch": 0.6479375696767001, "grad_norm": 2.12201256999209, "learning_rate": 1.8240133131112608e-05, "loss": 0.9457, "step": 8718 }, { "epoch": 0.6480118914901524, "grad_norm": 2.5465518569259573, "learning_rate": 1.823967850645738e-05, "loss": 0.8683, "step": 8719 }, { "epoch": 0.6480862133036046, "grad_norm": 2.1428104698240307, "learning_rate": 1.823922382875553e-05, "loss": 1.1311, "step": 8720 }, { "epoch": 0.6481605351170568, "grad_norm": 1.7902329134877366, "learning_rate": 1.8238769098009987e-05, "loss": 0.71, "step": 8721 }, { "epoch": 0.6482348569305091, "grad_norm": 2.214104879360451, "learning_rate": 1.8238314314223676e-05, "loss": 0.932, "step": 8722 }, { "epoch": 0.6483091787439613, "grad_norm": 5.226147198415624, "learning_rate": 1.823785947739952e-05, "loss": 1.2497, "step": 8723 }, { "epoch": 0.6483835005574136, "grad_norm": 1.9762258760048768, "learning_rate": 1.8237404587540457e-05, "loss": 0.7969, "step": 8724 }, { "epoch": 0.6484578223708658, "grad_norm": 4.672149832893305, "learning_rate": 1.8236949644649412e-05, "loss": 1.0581, "step": 8725 }, { "epoch": 0.6485321441843181, "grad_norm": 2.116996771730508, "learning_rate": 1.8236494648729305e-05, "loss": 1.0117, "step": 8726 }, { "epoch": 0.6486064659977704, "grad_norm": 1.916997044146022, "learning_rate": 1.823603959978308e-05, "loss": 0.83, "step": 8727 }, { "epoch": 0.6486807878112226, "grad_norm": 2.2827752987829517, "learning_rate": 1.8235584497813655e-05, "loss": 0.9199, "step": 8728 }, { "epoch": 0.6487551096246749, "grad_norm": 1.732601789190134, "learning_rate": 1.8235129342823968e-05, "loss": 0.8478, "step": 8729 }, { "epoch": 0.6488294314381271, "grad_norm": 1.9415283042638345, "learning_rate": 1.8234674134816944e-05, "loss": 1.0514, "step": 8730 }, { "epoch": 0.6489037532515793, "grad_norm": 1.7979803555459728, "learning_rate": 1.8234218873795514e-05, "loss": 0.9835, "step": 8731 }, { "epoch": 0.6489780750650316, "grad_norm": 2.631656125736971, "learning_rate": 1.823376355976261e-05, "loss": 0.9442, "step": 8732 }, { "epoch": 0.6490523968784838, "grad_norm": 2.5436867445321654, "learning_rate": 1.8233308192721167e-05, "loss": 0.935, "step": 8733 }, { "epoch": 0.649126718691936, "grad_norm": 2.017226149485794, "learning_rate": 1.823285277267411e-05, "loss": 0.8327, "step": 8734 }, { "epoch": 0.6492010405053883, "grad_norm": 1.9634030895639063, "learning_rate": 1.8232397299624373e-05, "loss": 0.7509, "step": 8735 }, { "epoch": 0.6492753623188405, "grad_norm": 2.3072685975782132, "learning_rate": 1.8231941773574892e-05, "loss": 1.1214, "step": 8736 }, { "epoch": 0.6493496841322929, "grad_norm": 1.7121551064470757, "learning_rate": 1.8231486194528596e-05, "loss": 0.8255, "step": 8737 }, { "epoch": 0.6494240059457451, "grad_norm": 1.750907537669832, "learning_rate": 1.8231030562488418e-05, "loss": 0.9379, "step": 8738 }, { "epoch": 0.6494983277591974, "grad_norm": 1.9241816769461024, "learning_rate": 1.8230574877457293e-05, "loss": 0.8315, "step": 8739 }, { "epoch": 0.6495726495726496, "grad_norm": 1.9594418908179663, "learning_rate": 1.8230119139438156e-05, "loss": 1.0136, "step": 8740 }, { "epoch": 0.6496469713861018, "grad_norm": 1.808218798107913, "learning_rate": 1.8229663348433936e-05, "loss": 0.9673, "step": 8741 }, { "epoch": 0.6497212931995541, "grad_norm": 1.9323997415094913, "learning_rate": 1.8229207504447572e-05, "loss": 0.9174, "step": 8742 }, { "epoch": 0.6497956150130063, "grad_norm": 2.006482177091382, "learning_rate": 1.8228751607481998e-05, "loss": 0.8554, "step": 8743 }, { "epoch": 0.6498699368264585, "grad_norm": 2.0738078305606735, "learning_rate": 1.8228295657540146e-05, "loss": 0.9802, "step": 8744 }, { "epoch": 0.6499442586399108, "grad_norm": 1.7887188718383993, "learning_rate": 1.8227839654624956e-05, "loss": 0.7077, "step": 8745 }, { "epoch": 0.650018580453363, "grad_norm": 1.957654813614072, "learning_rate": 1.8227383598739358e-05, "loss": 0.9705, "step": 8746 }, { "epoch": 0.6500929022668153, "grad_norm": 1.8040780869662056, "learning_rate": 1.8226927489886293e-05, "loss": 0.843, "step": 8747 }, { "epoch": 0.6501672240802676, "grad_norm": 2.2618051548822287, "learning_rate": 1.8226471328068694e-05, "loss": 0.9259, "step": 8748 }, { "epoch": 0.6502415458937199, "grad_norm": 1.9235725018677479, "learning_rate": 1.82260151132895e-05, "loss": 0.9754, "step": 8749 }, { "epoch": 0.6503158677071721, "grad_norm": 1.8243350417590547, "learning_rate": 1.8225558845551648e-05, "loss": 0.7213, "step": 8750 }, { "epoch": 0.6503901895206243, "grad_norm": 1.9387407200618463, "learning_rate": 1.822510252485808e-05, "loss": 1.0205, "step": 8751 }, { "epoch": 0.6504645113340766, "grad_norm": 2.208520523270123, "learning_rate": 1.8224646151211723e-05, "loss": 0.9403, "step": 8752 }, { "epoch": 0.6505388331475288, "grad_norm": 2.30282866044825, "learning_rate": 1.822418972461552e-05, "loss": 0.8861, "step": 8753 }, { "epoch": 0.650613154960981, "grad_norm": 1.7754784890469573, "learning_rate": 1.8223733245072415e-05, "loss": 0.9453, "step": 8754 }, { "epoch": 0.6506874767744333, "grad_norm": 2.7815820023021662, "learning_rate": 1.822327671258534e-05, "loss": 1.1976, "step": 8755 }, { "epoch": 0.6507617985878855, "grad_norm": 2.35707683464943, "learning_rate": 1.8222820127157232e-05, "loss": 0.887, "step": 8756 }, { "epoch": 0.6508361204013378, "grad_norm": 12.534893473609449, "learning_rate": 1.822236348879104e-05, "loss": 1.1937, "step": 8757 }, { "epoch": 0.65091044221479, "grad_norm": 2.009150705547187, "learning_rate": 1.8221906797489698e-05, "loss": 0.9161, "step": 8758 }, { "epoch": 0.6509847640282422, "grad_norm": 2.369088128313471, "learning_rate": 1.822145005325614e-05, "loss": 0.9699, "step": 8759 }, { "epoch": 0.6510590858416946, "grad_norm": 1.8131313134381837, "learning_rate": 1.822099325609332e-05, "loss": 0.897, "step": 8760 }, { "epoch": 0.6511334076551468, "grad_norm": 2.0918442621660582, "learning_rate": 1.8220536406004174e-05, "loss": 0.8521, "step": 8761 }, { "epoch": 0.6512077294685991, "grad_norm": 2.4684084055817985, "learning_rate": 1.8220079502991638e-05, "loss": 0.8818, "step": 8762 }, { "epoch": 0.6512820512820513, "grad_norm": 1.6323884402087856, "learning_rate": 1.8219622547058653e-05, "loss": 0.5946, "step": 8763 }, { "epoch": 0.6513563730955035, "grad_norm": 1.840022820300009, "learning_rate": 1.8219165538208168e-05, "loss": 0.834, "step": 8764 }, { "epoch": 0.6514306949089558, "grad_norm": 1.5689827454313843, "learning_rate": 1.8218708476443125e-05, "loss": 0.809, "step": 8765 }, { "epoch": 0.651505016722408, "grad_norm": 1.9931190130568188, "learning_rate": 1.8218251361766458e-05, "loss": 1.0585, "step": 8766 }, { "epoch": 0.6515793385358603, "grad_norm": 2.4337326971108086, "learning_rate": 1.8217794194181114e-05, "loss": 0.936, "step": 8767 }, { "epoch": 0.6516536603493125, "grad_norm": 2.0649410391081475, "learning_rate": 1.8217336973690042e-05, "loss": 0.6798, "step": 8768 }, { "epoch": 0.6517279821627647, "grad_norm": 2.1965369934035732, "learning_rate": 1.821687970029618e-05, "loss": 0.8051, "step": 8769 }, { "epoch": 0.651802303976217, "grad_norm": 1.896453609131283, "learning_rate": 1.8216422374002472e-05, "loss": 0.6992, "step": 8770 }, { "epoch": 0.6518766257896693, "grad_norm": 2.365956603456306, "learning_rate": 1.8215964994811863e-05, "loss": 0.8478, "step": 8771 }, { "epoch": 0.6519509476031216, "grad_norm": 1.5438085510465545, "learning_rate": 1.82155075627273e-05, "loss": 0.7443, "step": 8772 }, { "epoch": 0.6520252694165738, "grad_norm": 2.790810282625457, "learning_rate": 1.821505007775172e-05, "loss": 1.0141, "step": 8773 }, { "epoch": 0.652099591230026, "grad_norm": 2.6519861960364324, "learning_rate": 1.821459253988808e-05, "loss": 0.7049, "step": 8774 }, { "epoch": 0.6521739130434783, "grad_norm": 2.553143128768377, "learning_rate": 1.8214134949139315e-05, "loss": 1.1454, "step": 8775 }, { "epoch": 0.6522482348569305, "grad_norm": 2.289091318766381, "learning_rate": 1.8213677305508377e-05, "loss": 0.9225, "step": 8776 }, { "epoch": 0.6523225566703827, "grad_norm": 2.193305085683747, "learning_rate": 1.821321960899821e-05, "loss": 1.027, "step": 8777 }, { "epoch": 0.652396878483835, "grad_norm": 2.041753741447593, "learning_rate": 1.8212761859611762e-05, "loss": 0.8946, "step": 8778 }, { "epoch": 0.6524712002972872, "grad_norm": 2.0041904837974482, "learning_rate": 1.821230405735198e-05, "loss": 0.8725, "step": 8779 }, { "epoch": 0.6525455221107395, "grad_norm": 1.7613348518922158, "learning_rate": 1.821184620222181e-05, "loss": 0.8826, "step": 8780 }, { "epoch": 0.6526198439241917, "grad_norm": 1.833008746698467, "learning_rate": 1.82113882942242e-05, "loss": 0.6768, "step": 8781 }, { "epoch": 0.652694165737644, "grad_norm": 2.7392998968431748, "learning_rate": 1.82109303333621e-05, "loss": 0.9281, "step": 8782 }, { "epoch": 0.6527684875510963, "grad_norm": 1.8979561433860441, "learning_rate": 1.821047231963845e-05, "loss": 0.7018, "step": 8783 }, { "epoch": 0.6528428093645485, "grad_norm": 2.263932585577155, "learning_rate": 1.8210014253056214e-05, "loss": 0.8887, "step": 8784 }, { "epoch": 0.6529171311780008, "grad_norm": 1.9135476977084391, "learning_rate": 1.8209556133618328e-05, "loss": 0.8507, "step": 8785 }, { "epoch": 0.652991452991453, "grad_norm": 2.183261418700894, "learning_rate": 1.8209097961327745e-05, "loss": 0.8128, "step": 8786 }, { "epoch": 0.6530657748049052, "grad_norm": 2.048492628084692, "learning_rate": 1.8208639736187418e-05, "loss": 0.7662, "step": 8787 }, { "epoch": 0.6531400966183575, "grad_norm": 1.7981856091833288, "learning_rate": 1.8208181458200292e-05, "loss": 0.8099, "step": 8788 }, { "epoch": 0.6532144184318097, "grad_norm": 2.0518907389543957, "learning_rate": 1.8207723127369318e-05, "loss": 0.7173, "step": 8789 }, { "epoch": 0.653288740245262, "grad_norm": 1.929042486663449, "learning_rate": 1.820726474369745e-05, "loss": 0.8807, "step": 8790 }, { "epoch": 0.6533630620587142, "grad_norm": 1.8970312915661296, "learning_rate": 1.8206806307187638e-05, "loss": 0.6991, "step": 8791 }, { "epoch": 0.6534373838721664, "grad_norm": 2.039291951467072, "learning_rate": 1.8206347817842834e-05, "loss": 0.9769, "step": 8792 }, { "epoch": 0.6535117056856188, "grad_norm": 2.1508181376960636, "learning_rate": 1.8205889275665986e-05, "loss": 1.0742, "step": 8793 }, { "epoch": 0.653586027499071, "grad_norm": 2.090268149400985, "learning_rate": 1.8205430680660053e-05, "loss": 0.7579, "step": 8794 }, { "epoch": 0.6536603493125233, "grad_norm": 1.7206589198030913, "learning_rate": 1.8204972032827978e-05, "loss": 0.7186, "step": 8795 }, { "epoch": 0.6537346711259755, "grad_norm": 2.063193705220904, "learning_rate": 1.820451333217272e-05, "loss": 0.8823, "step": 8796 }, { "epoch": 0.6538089929394277, "grad_norm": 2.2129182134722636, "learning_rate": 1.820405457869723e-05, "loss": 1.087, "step": 8797 }, { "epoch": 0.65388331475288, "grad_norm": 2.18143909028036, "learning_rate": 1.8203595772404465e-05, "loss": 0.6515, "step": 8798 }, { "epoch": 0.6539576365663322, "grad_norm": 1.8635946358924482, "learning_rate": 1.8203136913297375e-05, "loss": 0.9151, "step": 8799 }, { "epoch": 0.6540319583797845, "grad_norm": 2.0483218158678413, "learning_rate": 1.8202678001378916e-05, "loss": 0.9844, "step": 8800 }, { "epoch": 0.6541062801932367, "grad_norm": 1.9350096628730555, "learning_rate": 1.8202219036652043e-05, "loss": 0.704, "step": 8801 }, { "epoch": 0.6541806020066889, "grad_norm": 1.8714397490112076, "learning_rate": 1.8201760019119705e-05, "loss": 1.0059, "step": 8802 }, { "epoch": 0.6542549238201412, "grad_norm": 1.6602408206940324, "learning_rate": 1.8201300948784865e-05, "loss": 0.7893, "step": 8803 }, { "epoch": 0.6543292456335935, "grad_norm": 2.0309125223930815, "learning_rate": 1.8200841825650474e-05, "loss": 0.7974, "step": 8804 }, { "epoch": 0.6544035674470458, "grad_norm": 2.0469136705165396, "learning_rate": 1.820038264971949e-05, "loss": 0.9691, "step": 8805 }, { "epoch": 0.654477889260498, "grad_norm": 2.278661352383346, "learning_rate": 1.8199923420994865e-05, "loss": 0.9198, "step": 8806 }, { "epoch": 0.6545522110739502, "grad_norm": 1.8449799616823883, "learning_rate": 1.819946413947956e-05, "loss": 0.8495, "step": 8807 }, { "epoch": 0.6546265328874025, "grad_norm": 1.9321498463786213, "learning_rate": 1.819900480517653e-05, "loss": 0.7291, "step": 8808 }, { "epoch": 0.6547008547008547, "grad_norm": 2.4903936794514574, "learning_rate": 1.8198545418088732e-05, "loss": 0.987, "step": 8809 }, { "epoch": 0.654775176514307, "grad_norm": 2.2001254549055558, "learning_rate": 1.8198085978219127e-05, "loss": 0.9375, "step": 8810 }, { "epoch": 0.6548494983277592, "grad_norm": 1.8240464934828025, "learning_rate": 1.8197626485570668e-05, "loss": 0.7903, "step": 8811 }, { "epoch": 0.6549238201412114, "grad_norm": 1.910345660376715, "learning_rate": 1.8197166940146313e-05, "loss": 0.9531, "step": 8812 }, { "epoch": 0.6549981419546637, "grad_norm": 1.7165985061141473, "learning_rate": 1.8196707341949024e-05, "loss": 0.7973, "step": 8813 }, { "epoch": 0.6550724637681159, "grad_norm": 1.9914865961053079, "learning_rate": 1.8196247690981762e-05, "loss": 0.834, "step": 8814 }, { "epoch": 0.6551467855815681, "grad_norm": 1.7360013890710224, "learning_rate": 1.819578798724748e-05, "loss": 0.6781, "step": 8815 }, { "epoch": 0.6552211073950205, "grad_norm": 2.161298726758248, "learning_rate": 1.8195328230749137e-05, "loss": 0.9026, "step": 8816 }, { "epoch": 0.6552954292084727, "grad_norm": 1.7773902395804027, "learning_rate": 1.8194868421489697e-05, "loss": 0.8082, "step": 8817 }, { "epoch": 0.655369751021925, "grad_norm": 2.1470329138269695, "learning_rate": 1.8194408559472124e-05, "loss": 0.9849, "step": 8818 }, { "epoch": 0.6554440728353772, "grad_norm": 2.2734379889327627, "learning_rate": 1.819394864469937e-05, "loss": 0.8039, "step": 8819 }, { "epoch": 0.6555183946488294, "grad_norm": 1.9154356204888605, "learning_rate": 1.81934886771744e-05, "loss": 0.9553, "step": 8820 }, { "epoch": 0.6555927164622817, "grad_norm": 1.8211982760808785, "learning_rate": 1.8193028656900177e-05, "loss": 0.92, "step": 8821 }, { "epoch": 0.6556670382757339, "grad_norm": 1.8231670361119756, "learning_rate": 1.819256858387966e-05, "loss": 0.7424, "step": 8822 }, { "epoch": 0.6557413600891862, "grad_norm": 1.9996697026450376, "learning_rate": 1.8192108458115808e-05, "loss": 0.7788, "step": 8823 }, { "epoch": 0.6558156819026384, "grad_norm": 1.739782205610799, "learning_rate": 1.819164827961159e-05, "loss": 0.922, "step": 8824 }, { "epoch": 0.6558900037160906, "grad_norm": 2.191326240120712, "learning_rate": 1.819118804836997e-05, "loss": 0.842, "step": 8825 }, { "epoch": 0.6559643255295429, "grad_norm": 2.370432755333951, "learning_rate": 1.8190727764393895e-05, "loss": 1.0654, "step": 8826 }, { "epoch": 0.6560386473429952, "grad_norm": 2.666408809014825, "learning_rate": 1.8190267427686346e-05, "loss": 0.8762, "step": 8827 }, { "epoch": 0.6561129691564475, "grad_norm": 2.31959255531323, "learning_rate": 1.8189807038250282e-05, "loss": 1.0029, "step": 8828 }, { "epoch": 0.6561872909698997, "grad_norm": 2.30707826918037, "learning_rate": 1.8189346596088664e-05, "loss": 0.9129, "step": 8829 }, { "epoch": 0.6562616127833519, "grad_norm": 1.8255379873005606, "learning_rate": 1.818888610120446e-05, "loss": 0.608, "step": 8830 }, { "epoch": 0.6563359345968042, "grad_norm": 1.5566921408417442, "learning_rate": 1.8188425553600624e-05, "loss": 0.7442, "step": 8831 }, { "epoch": 0.6564102564102564, "grad_norm": 1.6736351534062783, "learning_rate": 1.8187964953280136e-05, "loss": 0.7696, "step": 8832 }, { "epoch": 0.6564845782237086, "grad_norm": 1.961453585363854, "learning_rate": 1.8187504300245952e-05, "loss": 0.8952, "step": 8833 }, { "epoch": 0.6565589000371609, "grad_norm": 1.7464013112697303, "learning_rate": 1.8187043594501043e-05, "loss": 0.7595, "step": 8834 }, { "epoch": 0.6566332218506131, "grad_norm": 2.350986337908048, "learning_rate": 1.8186582836048374e-05, "loss": 0.7411, "step": 8835 }, { "epoch": 0.6567075436640654, "grad_norm": 2.7314917542894577, "learning_rate": 1.8186122024890902e-05, "loss": 0.8047, "step": 8836 }, { "epoch": 0.6567818654775176, "grad_norm": 1.9043794736174315, "learning_rate": 1.8185661161031603e-05, "loss": 0.9248, "step": 8837 }, { "epoch": 0.65685618729097, "grad_norm": 2.263336611755368, "learning_rate": 1.8185200244473448e-05, "loss": 1.0778, "step": 8838 }, { "epoch": 0.6569305091044222, "grad_norm": 2.1688842084920066, "learning_rate": 1.818473927521939e-05, "loss": 1.0004, "step": 8839 }, { "epoch": 0.6570048309178744, "grad_norm": 1.9715917381688717, "learning_rate": 1.818427825327241e-05, "loss": 0.9068, "step": 8840 }, { "epoch": 0.6570791527313267, "grad_norm": 1.9282500038430395, "learning_rate": 1.8183817178635474e-05, "loss": 0.9042, "step": 8841 }, { "epoch": 0.6571534745447789, "grad_norm": 2.0030000643505645, "learning_rate": 1.8183356051311544e-05, "loss": 0.7655, "step": 8842 }, { "epoch": 0.6572277963582311, "grad_norm": 2.092658258098213, "learning_rate": 1.8182894871303594e-05, "loss": 0.7157, "step": 8843 }, { "epoch": 0.6573021181716834, "grad_norm": 2.2175298107033656, "learning_rate": 1.8182433638614586e-05, "loss": 0.8676, "step": 8844 }, { "epoch": 0.6573764399851356, "grad_norm": 2.0508426516739813, "learning_rate": 1.81819723532475e-05, "loss": 0.6465, "step": 8845 }, { "epoch": 0.6574507617985879, "grad_norm": 2.003889301850104, "learning_rate": 1.81815110152053e-05, "loss": 0.9563, "step": 8846 }, { "epoch": 0.6575250836120401, "grad_norm": 2.3098423192501265, "learning_rate": 1.8181049624490956e-05, "loss": 0.9782, "step": 8847 }, { "epoch": 0.6575994054254923, "grad_norm": 1.7530420352612992, "learning_rate": 1.818058818110744e-05, "loss": 0.8862, "step": 8848 }, { "epoch": 0.6576737272389447, "grad_norm": 1.9255161392004096, "learning_rate": 1.818012668505772e-05, "loss": 0.8087, "step": 8849 }, { "epoch": 0.6577480490523969, "grad_norm": 2.024074417817197, "learning_rate": 1.817966513634477e-05, "loss": 0.7957, "step": 8850 }, { "epoch": 0.6578223708658492, "grad_norm": 2.198173232393219, "learning_rate": 1.817920353497156e-05, "loss": 0.8725, "step": 8851 }, { "epoch": 0.6578966926793014, "grad_norm": 1.9279880781697436, "learning_rate": 1.8178741880941063e-05, "loss": 0.8269, "step": 8852 }, { "epoch": 0.6579710144927536, "grad_norm": 1.9228247509402256, "learning_rate": 1.817828017425625e-05, "loss": 0.7103, "step": 8853 }, { "epoch": 0.6580453363062059, "grad_norm": 1.7503550123899478, "learning_rate": 1.8177818414920092e-05, "loss": 0.9093, "step": 8854 }, { "epoch": 0.6581196581196581, "grad_norm": 2.2100320816236465, "learning_rate": 1.8177356602935565e-05, "loss": 0.9427, "step": 8855 }, { "epoch": 0.6581939799331104, "grad_norm": 2.005063551513228, "learning_rate": 1.817689473830564e-05, "loss": 0.9391, "step": 8856 }, { "epoch": 0.6582683017465626, "grad_norm": 1.4167423883215495, "learning_rate": 1.8176432821033293e-05, "loss": 0.6178, "step": 8857 }, { "epoch": 0.6583426235600148, "grad_norm": 2.289514523583979, "learning_rate": 1.8175970851121492e-05, "loss": 0.9674, "step": 8858 }, { "epoch": 0.6584169453734671, "grad_norm": 1.909475959799384, "learning_rate": 1.8175508828573214e-05, "loss": 0.915, "step": 8859 }, { "epoch": 0.6584912671869194, "grad_norm": 2.5545850258761753, "learning_rate": 1.817504675339144e-05, "loss": 0.8594, "step": 8860 }, { "epoch": 0.6585655890003717, "grad_norm": 2.2632262010165167, "learning_rate": 1.817458462557913e-05, "loss": 0.9853, "step": 8861 }, { "epoch": 0.6586399108138239, "grad_norm": 2.168193298352833, "learning_rate": 1.817412244513928e-05, "loss": 1.0602, "step": 8862 }, { "epoch": 0.6587142326272761, "grad_norm": 1.8607853110643315, "learning_rate": 1.8173660212074847e-05, "loss": 0.8938, "step": 8863 }, { "epoch": 0.6587885544407284, "grad_norm": 1.702651147382036, "learning_rate": 1.8173197926388814e-05, "loss": 0.779, "step": 8864 }, { "epoch": 0.6588628762541806, "grad_norm": 2.5612720518402985, "learning_rate": 1.817273558808416e-05, "loss": 1.0544, "step": 8865 }, { "epoch": 0.6589371980676328, "grad_norm": 1.9309948126488323, "learning_rate": 1.8172273197163854e-05, "loss": 0.8192, "step": 8866 }, { "epoch": 0.6590115198810851, "grad_norm": 1.5202870803122672, "learning_rate": 1.8171810753630877e-05, "loss": 0.78, "step": 8867 }, { "epoch": 0.6590858416945373, "grad_norm": 2.236540116874405, "learning_rate": 1.817134825748821e-05, "loss": 0.9541, "step": 8868 }, { "epoch": 0.6591601635079896, "grad_norm": 3.0154119303485563, "learning_rate": 1.8170885708738825e-05, "loss": 0.8508, "step": 8869 }, { "epoch": 0.6592344853214418, "grad_norm": 1.867867961699934, "learning_rate": 1.8170423107385703e-05, "loss": 0.7694, "step": 8870 }, { "epoch": 0.659308807134894, "grad_norm": 1.7708469007192515, "learning_rate": 1.816996045343182e-05, "loss": 0.9385, "step": 8871 }, { "epoch": 0.6593831289483464, "grad_norm": 2.054776115946192, "learning_rate": 1.8169497746880152e-05, "loss": 0.9686, "step": 8872 }, { "epoch": 0.6594574507617986, "grad_norm": 2.008426386173021, "learning_rate": 1.8169034987733686e-05, "loss": 0.871, "step": 8873 }, { "epoch": 0.6595317725752509, "grad_norm": 4.050468487845314, "learning_rate": 1.8168572175995395e-05, "loss": 0.8968, "step": 8874 }, { "epoch": 0.6596060943887031, "grad_norm": 1.8088806610674775, "learning_rate": 1.8168109311668264e-05, "loss": 0.9319, "step": 8875 }, { "epoch": 0.6596804162021553, "grad_norm": 1.9018639713211714, "learning_rate": 1.8167646394755264e-05, "loss": 0.7812, "step": 8876 }, { "epoch": 0.6597547380156076, "grad_norm": 2.0059672739210814, "learning_rate": 1.8167183425259384e-05, "loss": 0.7786, "step": 8877 }, { "epoch": 0.6598290598290598, "grad_norm": 1.9424186320942485, "learning_rate": 1.8166720403183595e-05, "loss": 0.7492, "step": 8878 }, { "epoch": 0.659903381642512, "grad_norm": 1.9173569906188646, "learning_rate": 1.816625732853089e-05, "loss": 0.8912, "step": 8879 }, { "epoch": 0.6599777034559643, "grad_norm": 2.3348630083414323, "learning_rate": 1.8165794201304243e-05, "loss": 0.9143, "step": 8880 }, { "epoch": 0.6600520252694165, "grad_norm": 2.055023348285221, "learning_rate": 1.8165331021506633e-05, "loss": 0.9048, "step": 8881 }, { "epoch": 0.6601263470828688, "grad_norm": 2.057882091777585, "learning_rate": 1.8164867789141047e-05, "loss": 1.064, "step": 8882 }, { "epoch": 0.6602006688963211, "grad_norm": 1.5560540244615952, "learning_rate": 1.816440450421047e-05, "loss": 0.7569, "step": 8883 }, { "epoch": 0.6602749907097734, "grad_norm": 1.8902075601876196, "learning_rate": 1.8163941166717876e-05, "loss": 0.8027, "step": 8884 }, { "epoch": 0.6603493125232256, "grad_norm": 2.1843263785604803, "learning_rate": 1.8163477776666255e-05, "loss": 0.7491, "step": 8885 }, { "epoch": 0.6604236343366778, "grad_norm": 1.9399988412694973, "learning_rate": 1.8163014334058583e-05, "loss": 0.924, "step": 8886 }, { "epoch": 0.6604979561501301, "grad_norm": 1.997486994994591, "learning_rate": 1.8162550838897854e-05, "loss": 0.8095, "step": 8887 }, { "epoch": 0.6605722779635823, "grad_norm": 1.797945006968942, "learning_rate": 1.8162087291187042e-05, "loss": 0.729, "step": 8888 }, { "epoch": 0.6606465997770345, "grad_norm": 2.1140991799359106, "learning_rate": 1.8161623690929138e-05, "loss": 0.7757, "step": 8889 }, { "epoch": 0.6607209215904868, "grad_norm": 1.7679974985118643, "learning_rate": 1.8161160038127124e-05, "loss": 0.8635, "step": 8890 }, { "epoch": 0.660795243403939, "grad_norm": 2.236883880678701, "learning_rate": 1.8160696332783984e-05, "loss": 0.8121, "step": 8891 }, { "epoch": 0.6608695652173913, "grad_norm": 1.9628080903111063, "learning_rate": 1.8160232574902705e-05, "loss": 0.7706, "step": 8892 }, { "epoch": 0.6609438870308435, "grad_norm": 2.2634984114472365, "learning_rate": 1.815976876448627e-05, "loss": 0.8388, "step": 8893 }, { "epoch": 0.6610182088442959, "grad_norm": 2.5273981157729732, "learning_rate": 1.815930490153767e-05, "loss": 0.7656, "step": 8894 }, { "epoch": 0.6610925306577481, "grad_norm": 2.0058054060656656, "learning_rate": 1.815884098605989e-05, "loss": 1.0125, "step": 8895 }, { "epoch": 0.6611668524712003, "grad_norm": 2.2460341196442095, "learning_rate": 1.815837701805591e-05, "loss": 0.9757, "step": 8896 }, { "epoch": 0.6612411742846526, "grad_norm": 3.1490833910600045, "learning_rate": 1.8157912997528724e-05, "loss": 1.0887, "step": 8897 }, { "epoch": 0.6613154960981048, "grad_norm": 1.8608786660946988, "learning_rate": 1.815744892448132e-05, "loss": 0.7529, "step": 8898 }, { "epoch": 0.661389817911557, "grad_norm": 2.114191894962351, "learning_rate": 1.815698479891668e-05, "loss": 0.9864, "step": 8899 }, { "epoch": 0.6614641397250093, "grad_norm": 1.8730073885335485, "learning_rate": 1.8156520620837796e-05, "loss": 0.8251, "step": 8900 }, { "epoch": 0.6615384615384615, "grad_norm": 2.383639761195007, "learning_rate": 1.8156056390247656e-05, "loss": 0.8808, "step": 8901 }, { "epoch": 0.6616127833519138, "grad_norm": 1.9342764026927335, "learning_rate": 1.8155592107149245e-05, "loss": 0.9617, "step": 8902 }, { "epoch": 0.661687105165366, "grad_norm": 1.7928031116807948, "learning_rate": 1.815512777154556e-05, "loss": 0.6651, "step": 8903 }, { "epoch": 0.6617614269788182, "grad_norm": 1.8511836064966967, "learning_rate": 1.8154663383439587e-05, "loss": 0.7119, "step": 8904 }, { "epoch": 0.6618357487922706, "grad_norm": 1.9645511057733753, "learning_rate": 1.815419894283431e-05, "loss": 0.9272, "step": 8905 }, { "epoch": 0.6619100706057228, "grad_norm": 1.983839246530184, "learning_rate": 1.8153734449732724e-05, "loss": 0.9821, "step": 8906 }, { "epoch": 0.6619843924191751, "grad_norm": 1.8776036528713962, "learning_rate": 1.8153269904137814e-05, "loss": 0.8088, "step": 8907 }, { "epoch": 0.6620587142326273, "grad_norm": 1.9977114850202673, "learning_rate": 1.8152805306052584e-05, "loss": 0.8355, "step": 8908 }, { "epoch": 0.6621330360460795, "grad_norm": 2.0785599461923665, "learning_rate": 1.8152340655480012e-05, "loss": 1.0753, "step": 8909 }, { "epoch": 0.6622073578595318, "grad_norm": 1.5915113063958872, "learning_rate": 1.8151875952423094e-05, "loss": 0.7222, "step": 8910 }, { "epoch": 0.662281679672984, "grad_norm": 1.6097760068507345, "learning_rate": 1.8151411196884823e-05, "loss": 0.8235, "step": 8911 }, { "epoch": 0.6623560014864363, "grad_norm": 1.6217435222434409, "learning_rate": 1.815094638886819e-05, "loss": 0.6849, "step": 8912 }, { "epoch": 0.6624303232998885, "grad_norm": 1.5345400721179627, "learning_rate": 1.815048152837618e-05, "loss": 0.6709, "step": 8913 }, { "epoch": 0.6625046451133407, "grad_norm": 2.201692218507341, "learning_rate": 1.8150016615411803e-05, "loss": 0.8636, "step": 8914 }, { "epoch": 0.662578966926793, "grad_norm": 1.5738818935940802, "learning_rate": 1.814955164997804e-05, "loss": 0.8956, "step": 8915 }, { "epoch": 0.6626532887402453, "grad_norm": 1.9579926463950732, "learning_rate": 1.814908663207788e-05, "loss": 0.8942, "step": 8916 }, { "epoch": 0.6627276105536976, "grad_norm": 2.6562173864360665, "learning_rate": 1.814862156171433e-05, "loss": 0.7553, "step": 8917 }, { "epoch": 0.6628019323671498, "grad_norm": 2.3933552786779804, "learning_rate": 1.8148156438890372e-05, "loss": 0.9509, "step": 8918 }, { "epoch": 0.662876254180602, "grad_norm": 2.237495507949092, "learning_rate": 1.8147691263609007e-05, "loss": 1.0053, "step": 8919 }, { "epoch": 0.6629505759940543, "grad_norm": 2.540260049753061, "learning_rate": 1.814722603587323e-05, "loss": 1.1003, "step": 8920 }, { "epoch": 0.6630248978075065, "grad_norm": 1.9630477827805002, "learning_rate": 1.8146760755686035e-05, "loss": 0.7967, "step": 8921 }, { "epoch": 0.6630992196209587, "grad_norm": 1.9883575117373062, "learning_rate": 1.8146295423050415e-05, "loss": 0.5409, "step": 8922 }, { "epoch": 0.663173541434411, "grad_norm": 1.9415920135569256, "learning_rate": 1.814583003796937e-05, "loss": 0.8996, "step": 8923 }, { "epoch": 0.6632478632478632, "grad_norm": 2.1214360953915388, "learning_rate": 1.814536460044589e-05, "loss": 1.0054, "step": 8924 }, { "epoch": 0.6633221850613155, "grad_norm": 1.599290032918927, "learning_rate": 1.814489911048298e-05, "loss": 0.8175, "step": 8925 }, { "epoch": 0.6633965068747677, "grad_norm": 1.7101535590152066, "learning_rate": 1.8144433568083628e-05, "loss": 0.695, "step": 8926 }, { "epoch": 0.6634708286882199, "grad_norm": 2.1111587185241856, "learning_rate": 1.814396797325084e-05, "loss": 0.8698, "step": 8927 }, { "epoch": 0.6635451505016723, "grad_norm": 2.358005694822643, "learning_rate": 1.8143502325987605e-05, "loss": 0.751, "step": 8928 }, { "epoch": 0.6636194723151245, "grad_norm": 2.127228579391557, "learning_rate": 1.8143036626296924e-05, "loss": 0.6654, "step": 8929 }, { "epoch": 0.6636937941285768, "grad_norm": 1.669481193133498, "learning_rate": 1.8142570874181796e-05, "loss": 0.7219, "step": 8930 }, { "epoch": 0.663768115942029, "grad_norm": 2.01283227978124, "learning_rate": 1.814210506964522e-05, "loss": 0.8009, "step": 8931 }, { "epoch": 0.6638424377554812, "grad_norm": 2.1180169737948473, "learning_rate": 1.8141639212690193e-05, "loss": 0.8307, "step": 8932 }, { "epoch": 0.6639167595689335, "grad_norm": 1.5992155893822224, "learning_rate": 1.8141173303319718e-05, "loss": 0.6836, "step": 8933 }, { "epoch": 0.6639910813823857, "grad_norm": 5.457509200192763, "learning_rate": 1.814070734153679e-05, "loss": 1.0122, "step": 8934 }, { "epoch": 0.664065403195838, "grad_norm": 1.9961193143482219, "learning_rate": 1.814024132734441e-05, "loss": 0.9732, "step": 8935 }, { "epoch": 0.6641397250092902, "grad_norm": 1.9557059183727445, "learning_rate": 1.813977526074558e-05, "loss": 0.8982, "step": 8936 }, { "epoch": 0.6642140468227424, "grad_norm": 1.9490779780629157, "learning_rate": 1.81393091417433e-05, "loss": 0.8875, "step": 8937 }, { "epoch": 0.6642883686361947, "grad_norm": 2.452576092799112, "learning_rate": 1.8138842970340566e-05, "loss": 0.838, "step": 8938 }, { "epoch": 0.664362690449647, "grad_norm": 2.5360502123135955, "learning_rate": 1.8138376746540386e-05, "loss": 1.0219, "step": 8939 }, { "epoch": 0.6644370122630993, "grad_norm": 2.06785190640168, "learning_rate": 1.813791047034576e-05, "loss": 0.9056, "step": 8940 }, { "epoch": 0.6645113340765515, "grad_norm": 1.9182861010993262, "learning_rate": 1.813744414175969e-05, "loss": 0.9076, "step": 8941 }, { "epoch": 0.6645856558900037, "grad_norm": 2.155225837149023, "learning_rate": 1.813697776078517e-05, "loss": 0.7058, "step": 8942 }, { "epoch": 0.664659977703456, "grad_norm": 2.429167988418885, "learning_rate": 1.8136511327425216e-05, "loss": 0.8665, "step": 8943 }, { "epoch": 0.6647342995169082, "grad_norm": 2.0106256230584125, "learning_rate": 1.813604484168282e-05, "loss": 1.0285, "step": 8944 }, { "epoch": 0.6648086213303604, "grad_norm": 1.6755387807327728, "learning_rate": 1.8135578303560992e-05, "loss": 0.8268, "step": 8945 }, { "epoch": 0.6648829431438127, "grad_norm": 1.680692563893997, "learning_rate": 1.8135111713062732e-05, "loss": 0.6929, "step": 8946 }, { "epoch": 0.6649572649572649, "grad_norm": 1.7861345549615661, "learning_rate": 1.8134645070191046e-05, "loss": 0.7267, "step": 8947 }, { "epoch": 0.6650315867707172, "grad_norm": 2.187415585668964, "learning_rate": 1.8134178374948936e-05, "loss": 0.9713, "step": 8948 }, { "epoch": 0.6651059085841694, "grad_norm": 1.8410932776784978, "learning_rate": 1.8133711627339407e-05, "loss": 0.7941, "step": 8949 }, { "epoch": 0.6651802303976218, "grad_norm": 2.0099813207809496, "learning_rate": 1.8133244827365467e-05, "loss": 0.7954, "step": 8950 }, { "epoch": 0.665254552211074, "grad_norm": 1.6345835071689474, "learning_rate": 1.8132777975030116e-05, "loss": 0.651, "step": 8951 }, { "epoch": 0.6653288740245262, "grad_norm": 1.6800893807084947, "learning_rate": 1.8132311070336365e-05, "loss": 0.7934, "step": 8952 }, { "epoch": 0.6654031958379785, "grad_norm": 1.8825856096584481, "learning_rate": 1.8131844113287216e-05, "loss": 0.7506, "step": 8953 }, { "epoch": 0.6654775176514307, "grad_norm": 2.1169166244163597, "learning_rate": 1.8131377103885675e-05, "loss": 0.8391, "step": 8954 }, { "epoch": 0.6655518394648829, "grad_norm": 1.7825945812255837, "learning_rate": 1.8130910042134753e-05, "loss": 0.6539, "step": 8955 }, { "epoch": 0.6656261612783352, "grad_norm": 2.0332843003683188, "learning_rate": 1.813044292803745e-05, "loss": 0.8766, "step": 8956 }, { "epoch": 0.6657004830917874, "grad_norm": 2.05856830916183, "learning_rate": 1.8129975761596782e-05, "loss": 0.9226, "step": 8957 }, { "epoch": 0.6657748049052397, "grad_norm": 1.882071201294498, "learning_rate": 1.812950854281575e-05, "loss": 0.839, "step": 8958 }, { "epoch": 0.6658491267186919, "grad_norm": 1.7449307848835218, "learning_rate": 1.8129041271697362e-05, "loss": 0.8362, "step": 8959 }, { "epoch": 0.6659234485321441, "grad_norm": 2.067035103499501, "learning_rate": 1.8128573948244634e-05, "loss": 0.9454, "step": 8960 }, { "epoch": 0.6659977703455965, "grad_norm": 2.114704450451989, "learning_rate": 1.8128106572460563e-05, "loss": 0.8615, "step": 8961 }, { "epoch": 0.6660720921590487, "grad_norm": 2.1948919233057675, "learning_rate": 1.8127639144348162e-05, "loss": 0.742, "step": 8962 }, { "epoch": 0.666146413972501, "grad_norm": 1.8284930901436014, "learning_rate": 1.8127171663910447e-05, "loss": 0.8506, "step": 8963 }, { "epoch": 0.6662207357859532, "grad_norm": 2.1717294993724194, "learning_rate": 1.8126704131150422e-05, "loss": 0.8839, "step": 8964 }, { "epoch": 0.6662950575994054, "grad_norm": 1.6552917040586734, "learning_rate": 1.8126236546071094e-05, "loss": 0.7236, "step": 8965 }, { "epoch": 0.6663693794128577, "grad_norm": 2.3224042121788298, "learning_rate": 1.812576890867548e-05, "loss": 0.8297, "step": 8966 }, { "epoch": 0.6664437012263099, "grad_norm": 3.5547229168522945, "learning_rate": 1.8125301218966586e-05, "loss": 1.0055, "step": 8967 }, { "epoch": 0.6665180230397622, "grad_norm": 1.901670854219179, "learning_rate": 1.8124833476947424e-05, "loss": 0.5836, "step": 8968 }, { "epoch": 0.6665923448532144, "grad_norm": 3.0843684700858875, "learning_rate": 1.8124365682621006e-05, "loss": 0.9709, "step": 8969 }, { "epoch": 0.6666666666666666, "grad_norm": 1.2656000904414675, "learning_rate": 1.8123897835990344e-05, "loss": 0.5613, "step": 8970 }, { "epoch": 0.6667409884801189, "grad_norm": 1.985306769293737, "learning_rate": 1.8123429937058447e-05, "loss": 0.7996, "step": 8971 }, { "epoch": 0.6668153102935712, "grad_norm": 2.007414952843123, "learning_rate": 1.812296198582833e-05, "loss": 0.8174, "step": 8972 }, { "epoch": 0.6668896321070235, "grad_norm": 2.3032273908437357, "learning_rate": 1.812249398230301e-05, "loss": 0.7763, "step": 8973 }, { "epoch": 0.6669639539204757, "grad_norm": 1.8136014215371234, "learning_rate": 1.812202592648549e-05, "loss": 0.8831, "step": 8974 }, { "epoch": 0.6670382757339279, "grad_norm": 2.1540767142435158, "learning_rate": 1.812155781837879e-05, "loss": 0.7522, "step": 8975 }, { "epoch": 0.6671125975473802, "grad_norm": 2.0622009130389283, "learning_rate": 1.8121089657985922e-05, "loss": 0.9902, "step": 8976 }, { "epoch": 0.6671869193608324, "grad_norm": 1.9249646690196074, "learning_rate": 1.81206214453099e-05, "loss": 1.0678, "step": 8977 }, { "epoch": 0.6672612411742846, "grad_norm": 2.290985628212212, "learning_rate": 1.812015318035374e-05, "loss": 0.7881, "step": 8978 }, { "epoch": 0.6673355629877369, "grad_norm": 1.6911614877918015, "learning_rate": 1.8119684863120452e-05, "loss": 0.9042, "step": 8979 }, { "epoch": 0.6674098848011891, "grad_norm": 1.9346742306481697, "learning_rate": 1.8119216493613056e-05, "loss": 0.892, "step": 8980 }, { "epoch": 0.6674842066146414, "grad_norm": 2.4135856731069674, "learning_rate": 1.8118748071834565e-05, "loss": 0.8617, "step": 8981 }, { "epoch": 0.6675585284280936, "grad_norm": 2.0886385021061695, "learning_rate": 1.8118279597788e-05, "loss": 0.9915, "step": 8982 }, { "epoch": 0.6676328502415458, "grad_norm": 1.7924563403272578, "learning_rate": 1.8117811071476365e-05, "loss": 0.9277, "step": 8983 }, { "epoch": 0.6677071720549982, "grad_norm": 3.682814509011255, "learning_rate": 1.8117342492902687e-05, "loss": 0.8066, "step": 8984 }, { "epoch": 0.6677814938684504, "grad_norm": 3.7682960998957253, "learning_rate": 1.811687386206998e-05, "loss": 1.0158, "step": 8985 }, { "epoch": 0.6678558156819027, "grad_norm": 1.7539562636384918, "learning_rate": 1.811640517898126e-05, "loss": 0.7781, "step": 8986 }, { "epoch": 0.6679301374953549, "grad_norm": 1.6236245105958895, "learning_rate": 1.8115936443639543e-05, "loss": 0.7065, "step": 8987 }, { "epoch": 0.6680044593088071, "grad_norm": 1.9820239507721025, "learning_rate": 1.811546765604785e-05, "loss": 0.9345, "step": 8988 }, { "epoch": 0.6680787811222594, "grad_norm": 1.7211817225284454, "learning_rate": 1.8114998816209194e-05, "loss": 0.7412, "step": 8989 }, { "epoch": 0.6681531029357116, "grad_norm": 1.7538348228471548, "learning_rate": 1.81145299241266e-05, "loss": 0.8263, "step": 8990 }, { "epoch": 0.6682274247491639, "grad_norm": 2.279074991108, "learning_rate": 1.8114060979803082e-05, "loss": 0.9558, "step": 8991 }, { "epoch": 0.6683017465626161, "grad_norm": 1.748505300956364, "learning_rate": 1.811359198324166e-05, "loss": 0.775, "step": 8992 }, { "epoch": 0.6683760683760683, "grad_norm": 2.0339827870988727, "learning_rate": 1.8113122934445356e-05, "loss": 0.9696, "step": 8993 }, { "epoch": 0.6684503901895206, "grad_norm": 2.064437502326148, "learning_rate": 1.8112653833417186e-05, "loss": 0.6661, "step": 8994 }, { "epoch": 0.6685247120029729, "grad_norm": 2.1493615465518414, "learning_rate": 1.811218468016017e-05, "loss": 0.9249, "step": 8995 }, { "epoch": 0.6685990338164252, "grad_norm": 1.7929988547154747, "learning_rate": 1.811171547467733e-05, "loss": 0.8311, "step": 8996 }, { "epoch": 0.6686733556298774, "grad_norm": 1.8086759680511242, "learning_rate": 1.811124621697169e-05, "loss": 0.95, "step": 8997 }, { "epoch": 0.6687476774433296, "grad_norm": 1.9305187793494658, "learning_rate": 1.8110776907046265e-05, "loss": 0.8947, "step": 8998 }, { "epoch": 0.6688219992567819, "grad_norm": 2.0136823234373407, "learning_rate": 1.811030754490408e-05, "loss": 0.874, "step": 8999 }, { "epoch": 0.6688963210702341, "grad_norm": 1.4999761360098738, "learning_rate": 1.8109838130548155e-05, "loss": 0.5739, "step": 9000 }, { "epoch": 0.6689706428836864, "grad_norm": 1.9242480293206579, "learning_rate": 1.8109368663981513e-05, "loss": 0.9147, "step": 9001 }, { "epoch": 0.6690449646971386, "grad_norm": 3.008670797153106, "learning_rate": 1.810889914520718e-05, "loss": 0.9548, "step": 9002 }, { "epoch": 0.6691192865105908, "grad_norm": 1.5753944565965141, "learning_rate": 1.810842957422817e-05, "loss": 0.8084, "step": 9003 }, { "epoch": 0.6691936083240431, "grad_norm": 1.6591094761487342, "learning_rate": 1.8107959951047514e-05, "loss": 0.8374, "step": 9004 }, { "epoch": 0.6692679301374953, "grad_norm": 2.173140096023639, "learning_rate": 1.8107490275668233e-05, "loss": 0.8856, "step": 9005 }, { "epoch": 0.6693422519509477, "grad_norm": 2.054448608327424, "learning_rate": 1.8107020548093346e-05, "loss": 0.7383, "step": 9006 }, { "epoch": 0.6694165737643999, "grad_norm": 2.077585181853875, "learning_rate": 1.8106550768325887e-05, "loss": 0.8985, "step": 9007 }, { "epoch": 0.6694908955778521, "grad_norm": 1.905792248251905, "learning_rate": 1.810608093636887e-05, "loss": 0.6364, "step": 9008 }, { "epoch": 0.6695652173913044, "grad_norm": 2.2031070694070336, "learning_rate": 1.810561105222533e-05, "loss": 0.9654, "step": 9009 }, { "epoch": 0.6696395392047566, "grad_norm": 2.132624537030931, "learning_rate": 1.810514111589828e-05, "loss": 0.8227, "step": 9010 }, { "epoch": 0.6697138610182088, "grad_norm": 1.9252808929654768, "learning_rate": 1.8104671127390757e-05, "loss": 0.901, "step": 9011 }, { "epoch": 0.6697881828316611, "grad_norm": 1.8878966515926947, "learning_rate": 1.810420108670578e-05, "loss": 0.7549, "step": 9012 }, { "epoch": 0.6698625046451133, "grad_norm": 2.2488824714445164, "learning_rate": 1.810373099384638e-05, "loss": 1.0893, "step": 9013 }, { "epoch": 0.6699368264585656, "grad_norm": 1.784403650150086, "learning_rate": 1.8103260848815573e-05, "loss": 0.7818, "step": 9014 }, { "epoch": 0.6700111482720178, "grad_norm": 1.8278222912051423, "learning_rate": 1.8102790651616397e-05, "loss": 0.7733, "step": 9015 }, { "epoch": 0.67008547008547, "grad_norm": 2.085904592542174, "learning_rate": 1.810232040225188e-05, "loss": 0.9523, "step": 9016 }, { "epoch": 0.6701597918989224, "grad_norm": 1.8065162767751544, "learning_rate": 1.8101850100725035e-05, "loss": 0.8765, "step": 9017 }, { "epoch": 0.6702341137123746, "grad_norm": 1.8112557144391737, "learning_rate": 1.8101379747038907e-05, "loss": 0.8231, "step": 9018 }, { "epoch": 0.6703084355258269, "grad_norm": 2.076773994062228, "learning_rate": 1.8100909341196512e-05, "loss": 0.8199, "step": 9019 }, { "epoch": 0.6703827573392791, "grad_norm": 1.8705782509383606, "learning_rate": 1.8100438883200886e-05, "loss": 0.8442, "step": 9020 }, { "epoch": 0.6704570791527313, "grad_norm": 1.9532861173885308, "learning_rate": 1.8099968373055054e-05, "loss": 0.8534, "step": 9021 }, { "epoch": 0.6705314009661836, "grad_norm": 1.572237491367152, "learning_rate": 1.8099497810762048e-05, "loss": 0.751, "step": 9022 }, { "epoch": 0.6706057227796358, "grad_norm": 4.411478388939926, "learning_rate": 1.809902719632489e-05, "loss": 0.9149, "step": 9023 }, { "epoch": 0.670680044593088, "grad_norm": 1.4784522489712846, "learning_rate": 1.809855652974662e-05, "loss": 0.5451, "step": 9024 }, { "epoch": 0.6707543664065403, "grad_norm": 1.7185256238180162, "learning_rate": 1.809808581103026e-05, "loss": 0.7123, "step": 9025 }, { "epoch": 0.6708286882199925, "grad_norm": 1.9130175426805063, "learning_rate": 1.8097615040178847e-05, "loss": 0.8049, "step": 9026 }, { "epoch": 0.6709030100334448, "grad_norm": 2.129027259094579, "learning_rate": 1.809714421719541e-05, "loss": 0.8196, "step": 9027 }, { "epoch": 0.6709773318468971, "grad_norm": 1.394963562619005, "learning_rate": 1.8096673342082972e-05, "loss": 0.6073, "step": 9028 }, { "epoch": 0.6710516536603494, "grad_norm": 2.0607840574360092, "learning_rate": 1.8096202414844578e-05, "loss": 0.7845, "step": 9029 }, { "epoch": 0.6711259754738016, "grad_norm": 1.8229625573362782, "learning_rate": 1.809573143548325e-05, "loss": 0.7225, "step": 9030 }, { "epoch": 0.6712002972872538, "grad_norm": 1.6135615158564338, "learning_rate": 1.809526040400202e-05, "loss": 0.73, "step": 9031 }, { "epoch": 0.6712746191007061, "grad_norm": 1.7559470772379813, "learning_rate": 1.8094789320403926e-05, "loss": 0.8444, "step": 9032 }, { "epoch": 0.6713489409141583, "grad_norm": 1.8576209477875485, "learning_rate": 1.8094318184692e-05, "loss": 0.7955, "step": 9033 }, { "epoch": 0.6714232627276105, "grad_norm": 6.904523653676312, "learning_rate": 1.8093846996869273e-05, "loss": 0.5796, "step": 9034 }, { "epoch": 0.6714975845410628, "grad_norm": 1.8775647372865365, "learning_rate": 1.809337575693878e-05, "loss": 0.572, "step": 9035 }, { "epoch": 0.671571906354515, "grad_norm": 2.0784166282853622, "learning_rate": 1.809290446490355e-05, "loss": 1.077, "step": 9036 }, { "epoch": 0.6716462281679673, "grad_norm": 1.8241789980059466, "learning_rate": 1.809243312076662e-05, "loss": 0.9483, "step": 9037 }, { "epoch": 0.6717205499814195, "grad_norm": 1.7340754639994556, "learning_rate": 1.809196172453103e-05, "loss": 0.825, "step": 9038 }, { "epoch": 0.6717948717948717, "grad_norm": 1.8680443606514352, "learning_rate": 1.809149027619981e-05, "loss": 0.7165, "step": 9039 }, { "epoch": 0.6718691936083241, "grad_norm": 2.0869479382932954, "learning_rate": 1.809101877577599e-05, "loss": 0.9852, "step": 9040 }, { "epoch": 0.6719435154217763, "grad_norm": 1.694276072664692, "learning_rate": 1.809054722326262e-05, "loss": 0.8557, "step": 9041 }, { "epoch": 0.6720178372352286, "grad_norm": 2.088084393350037, "learning_rate": 1.809007561866272e-05, "loss": 1.0358, "step": 9042 }, { "epoch": 0.6720921590486808, "grad_norm": 2.411447416831863, "learning_rate": 1.8089603961979335e-05, "loss": 0.8276, "step": 9043 }, { "epoch": 0.672166480862133, "grad_norm": 1.603467330450911, "learning_rate": 1.80891322532155e-05, "loss": 0.5893, "step": 9044 }, { "epoch": 0.6722408026755853, "grad_norm": 1.9667101497716348, "learning_rate": 1.808866049237425e-05, "loss": 0.9424, "step": 9045 }, { "epoch": 0.6723151244890375, "grad_norm": 1.7459516102987984, "learning_rate": 1.8088188679458623e-05, "loss": 0.7881, "step": 9046 }, { "epoch": 0.6723894463024898, "grad_norm": 2.2299899381892994, "learning_rate": 1.808771681447166e-05, "loss": 0.9599, "step": 9047 }, { "epoch": 0.672463768115942, "grad_norm": 5.919782686011845, "learning_rate": 1.808724489741639e-05, "loss": 0.9525, "step": 9048 }, { "epoch": 0.6725380899293942, "grad_norm": 1.676415207161382, "learning_rate": 1.8086772928295863e-05, "loss": 0.822, "step": 9049 }, { "epoch": 0.6726124117428465, "grad_norm": 1.8440899969342917, "learning_rate": 1.808630090711311e-05, "loss": 1.0435, "step": 9050 }, { "epoch": 0.6726867335562988, "grad_norm": 2.0105745873103764, "learning_rate": 1.808582883387117e-05, "loss": 0.9297, "step": 9051 }, { "epoch": 0.6727610553697511, "grad_norm": 1.7844185962750163, "learning_rate": 1.8085356708573084e-05, "loss": 0.7513, "step": 9052 }, { "epoch": 0.6728353771832033, "grad_norm": 2.0794333150534623, "learning_rate": 1.808488453122189e-05, "loss": 0.7896, "step": 9053 }, { "epoch": 0.6729096989966555, "grad_norm": 2.2663606158718768, "learning_rate": 1.808441230182063e-05, "loss": 0.9048, "step": 9054 }, { "epoch": 0.6729840208101078, "grad_norm": 1.7961732450660648, "learning_rate": 1.8083940020372343e-05, "loss": 0.8274, "step": 9055 }, { "epoch": 0.67305834262356, "grad_norm": 2.0565066834665355, "learning_rate": 1.808346768688007e-05, "loss": 0.9281, "step": 9056 }, { "epoch": 0.6731326644370123, "grad_norm": 2.390157747921266, "learning_rate": 1.808299530134685e-05, "loss": 0.9476, "step": 9057 }, { "epoch": 0.6732069862504645, "grad_norm": 2.0422056049378514, "learning_rate": 1.8082522863775728e-05, "loss": 1.0231, "step": 9058 }, { "epoch": 0.6732813080639167, "grad_norm": 2.071909496437208, "learning_rate": 1.808205037416974e-05, "loss": 0.8545, "step": 9059 }, { "epoch": 0.673355629877369, "grad_norm": 1.6305477316203105, "learning_rate": 1.8081577832531933e-05, "loss": 1.0634, "step": 9060 }, { "epoch": 0.6734299516908212, "grad_norm": 1.8064428984393042, "learning_rate": 1.808110523886535e-05, "loss": 0.9766, "step": 9061 }, { "epoch": 0.6735042735042736, "grad_norm": 2.5009228497280978, "learning_rate": 1.8080632593173028e-05, "loss": 0.8926, "step": 9062 }, { "epoch": 0.6735785953177258, "grad_norm": 2.064162770016765, "learning_rate": 1.808015989545801e-05, "loss": 0.9301, "step": 9063 }, { "epoch": 0.673652917131178, "grad_norm": 1.8994010888549309, "learning_rate": 1.8079687145723345e-05, "loss": 0.94, "step": 9064 }, { "epoch": 0.6737272389446303, "grad_norm": 1.962521173422515, "learning_rate": 1.807921434397207e-05, "loss": 0.8581, "step": 9065 }, { "epoch": 0.6738015607580825, "grad_norm": 2.535224815995031, "learning_rate": 1.807874149020724e-05, "loss": 0.9451, "step": 9066 }, { "epoch": 0.6738758825715347, "grad_norm": 2.1772572909586994, "learning_rate": 1.8078268584431885e-05, "loss": 0.9515, "step": 9067 }, { "epoch": 0.673950204384987, "grad_norm": 1.7965946007739393, "learning_rate": 1.8077795626649057e-05, "loss": 0.6888, "step": 9068 }, { "epoch": 0.6740245261984392, "grad_norm": 1.7772635765602238, "learning_rate": 1.80773226168618e-05, "loss": 0.6418, "step": 9069 }, { "epoch": 0.6740988480118915, "grad_norm": 2.382523286191352, "learning_rate": 1.807684955507316e-05, "loss": 0.7697, "step": 9070 }, { "epoch": 0.6741731698253437, "grad_norm": 1.9176803998684742, "learning_rate": 1.8076376441286178e-05, "loss": 0.7312, "step": 9071 }, { "epoch": 0.6742474916387959, "grad_norm": 1.6953108216581156, "learning_rate": 1.8075903275503906e-05, "loss": 0.8248, "step": 9072 }, { "epoch": 0.6743218134522483, "grad_norm": 2.4998281787002754, "learning_rate": 1.807543005772939e-05, "loss": 0.9534, "step": 9073 }, { "epoch": 0.6743961352657005, "grad_norm": 2.053525432779572, "learning_rate": 1.8074956787965672e-05, "loss": 0.9503, "step": 9074 }, { "epoch": 0.6744704570791528, "grad_norm": 2.1511394407558946, "learning_rate": 1.8074483466215806e-05, "loss": 0.9179, "step": 9075 }, { "epoch": 0.674544778892605, "grad_norm": 2.1894288911447104, "learning_rate": 1.807401009248283e-05, "loss": 0.9192, "step": 9076 }, { "epoch": 0.6746191007060572, "grad_norm": 2.2412455657540007, "learning_rate": 1.8073536666769795e-05, "loss": 0.917, "step": 9077 }, { "epoch": 0.6746934225195095, "grad_norm": 1.6556090444228848, "learning_rate": 1.807306318907975e-05, "loss": 0.9295, "step": 9078 }, { "epoch": 0.6747677443329617, "grad_norm": 2.1890185052400906, "learning_rate": 1.8072589659415746e-05, "loss": 0.9356, "step": 9079 }, { "epoch": 0.674842066146414, "grad_norm": 2.9500489817210753, "learning_rate": 1.8072116077780823e-05, "loss": 0.9097, "step": 9080 }, { "epoch": 0.6749163879598662, "grad_norm": 1.968114574612396, "learning_rate": 1.807164244417804e-05, "loss": 0.9324, "step": 9081 }, { "epoch": 0.6749907097733184, "grad_norm": 1.9130944639324106, "learning_rate": 1.8071168758610443e-05, "loss": 0.686, "step": 9082 }, { "epoch": 0.6750650315867707, "grad_norm": 1.9851888633601158, "learning_rate": 1.8070695021081077e-05, "loss": 0.8309, "step": 9083 }, { "epoch": 0.675139353400223, "grad_norm": 2.794165029711552, "learning_rate": 1.8070221231592997e-05, "loss": 0.8865, "step": 9084 }, { "epoch": 0.6752136752136753, "grad_norm": 2.277613094095246, "learning_rate": 1.806974739014925e-05, "loss": 1.1472, "step": 9085 }, { "epoch": 0.6752879970271275, "grad_norm": 1.6810011284342607, "learning_rate": 1.806927349675289e-05, "loss": 0.9253, "step": 9086 }, { "epoch": 0.6753623188405797, "grad_norm": 2.0492148590217623, "learning_rate": 1.8068799551406962e-05, "loss": 0.6133, "step": 9087 }, { "epoch": 0.675436640654032, "grad_norm": 1.5602664245630646, "learning_rate": 1.8068325554114527e-05, "loss": 0.6207, "step": 9088 }, { "epoch": 0.6755109624674842, "grad_norm": 1.7288945986837525, "learning_rate": 1.806785150487863e-05, "loss": 0.7026, "step": 9089 }, { "epoch": 0.6755852842809364, "grad_norm": 1.7405427572336232, "learning_rate": 1.806737740370232e-05, "loss": 0.913, "step": 9090 }, { "epoch": 0.6756596060943887, "grad_norm": 1.6097937237834574, "learning_rate": 1.8066903250588656e-05, "loss": 0.7755, "step": 9091 }, { "epoch": 0.6757339279078409, "grad_norm": 1.7722771324162268, "learning_rate": 1.8066429045540682e-05, "loss": 0.7952, "step": 9092 }, { "epoch": 0.6758082497212932, "grad_norm": 2.8548138883761442, "learning_rate": 1.806595478856146e-05, "loss": 1.1672, "step": 9093 }, { "epoch": 0.6758825715347454, "grad_norm": 1.726289085141491, "learning_rate": 1.806548047965404e-05, "loss": 0.8065, "step": 9094 }, { "epoch": 0.6759568933481978, "grad_norm": 1.8726572659640757, "learning_rate": 1.8065006118821474e-05, "loss": 0.7055, "step": 9095 }, { "epoch": 0.67603121516165, "grad_norm": 2.0065217070785146, "learning_rate": 1.806453170606682e-05, "loss": 0.8743, "step": 9096 }, { "epoch": 0.6761055369751022, "grad_norm": 1.8267010396553196, "learning_rate": 1.8064057241393126e-05, "loss": 0.7227, "step": 9097 }, { "epoch": 0.6761798587885545, "grad_norm": 1.7176732123588, "learning_rate": 1.806358272480345e-05, "loss": 0.7645, "step": 9098 }, { "epoch": 0.6762541806020067, "grad_norm": 1.927106805406337, "learning_rate": 1.8063108156300847e-05, "loss": 0.7259, "step": 9099 }, { "epoch": 0.6763285024154589, "grad_norm": 2.0340279247363586, "learning_rate": 1.8062633535888373e-05, "loss": 0.8706, "step": 9100 }, { "epoch": 0.6764028242289112, "grad_norm": 2.152001992894587, "learning_rate": 1.8062158863569083e-05, "loss": 1.0665, "step": 9101 }, { "epoch": 0.6764771460423634, "grad_norm": 1.5625583848843745, "learning_rate": 1.8061684139346034e-05, "loss": 0.5701, "step": 9102 }, { "epoch": 0.6765514678558157, "grad_norm": 1.856663167759302, "learning_rate": 1.8061209363222277e-05, "loss": 0.8508, "step": 9103 }, { "epoch": 0.6766257896692679, "grad_norm": 6.005553359810355, "learning_rate": 1.8060734535200876e-05, "loss": 0.7836, "step": 9104 }, { "epoch": 0.6767001114827201, "grad_norm": 1.7959092229545186, "learning_rate": 1.8060259655284883e-05, "loss": 0.6844, "step": 9105 }, { "epoch": 0.6767744332961724, "grad_norm": 1.986300231360821, "learning_rate": 1.8059784723477355e-05, "loss": 0.8263, "step": 9106 }, { "epoch": 0.6768487551096247, "grad_norm": 1.8664582930048998, "learning_rate": 1.8059309739781354e-05, "loss": 0.7696, "step": 9107 }, { "epoch": 0.676923076923077, "grad_norm": 1.8578031632939622, "learning_rate": 1.8058834704199933e-05, "loss": 0.7356, "step": 9108 }, { "epoch": 0.6769973987365292, "grad_norm": 2.0011146640836706, "learning_rate": 1.8058359616736152e-05, "loss": 1.0232, "step": 9109 }, { "epoch": 0.6770717205499814, "grad_norm": 2.048068034311145, "learning_rate": 1.8057884477393073e-05, "loss": 1.0546, "step": 9110 }, { "epoch": 0.6771460423634337, "grad_norm": 3.0666356933124748, "learning_rate": 1.805740928617375e-05, "loss": 0.9222, "step": 9111 }, { "epoch": 0.6772203641768859, "grad_norm": 1.8394910415427275, "learning_rate": 1.8056934043081244e-05, "loss": 0.8295, "step": 9112 }, { "epoch": 0.6772946859903382, "grad_norm": 1.753952653570408, "learning_rate": 1.805645874811861e-05, "loss": 0.5384, "step": 9113 }, { "epoch": 0.6773690078037904, "grad_norm": 2.024156304847898, "learning_rate": 1.8055983401288918e-05, "loss": 0.7356, "step": 9114 }, { "epoch": 0.6774433296172426, "grad_norm": 1.9231451042634673, "learning_rate": 1.805550800259522e-05, "loss": 0.963, "step": 9115 }, { "epoch": 0.6775176514306949, "grad_norm": 2.1858820369033816, "learning_rate": 1.8055032552040584e-05, "loss": 1.0621, "step": 9116 }, { "epoch": 0.6775919732441471, "grad_norm": 1.805561130734197, "learning_rate": 1.8054557049628064e-05, "loss": 0.9168, "step": 9117 }, { "epoch": 0.6776662950575995, "grad_norm": 1.7705991664140737, "learning_rate": 1.8054081495360723e-05, "loss": 0.8701, "step": 9118 }, { "epoch": 0.6777406168710517, "grad_norm": 2.136607890750666, "learning_rate": 1.8053605889241622e-05, "loss": 0.7548, "step": 9119 }, { "epoch": 0.6778149386845039, "grad_norm": 1.75867186749947, "learning_rate": 1.8053130231273825e-05, "loss": 0.922, "step": 9120 }, { "epoch": 0.6778892604979562, "grad_norm": 2.1623212452236897, "learning_rate": 1.8052654521460396e-05, "loss": 0.8369, "step": 9121 }, { "epoch": 0.6779635823114084, "grad_norm": 2.28144989811957, "learning_rate": 1.8052178759804393e-05, "loss": 1.1132, "step": 9122 }, { "epoch": 0.6780379041248606, "grad_norm": 1.9738335866819057, "learning_rate": 1.805170294630888e-05, "loss": 0.7999, "step": 9123 }, { "epoch": 0.6781122259383129, "grad_norm": 1.7946472227235883, "learning_rate": 1.805122708097692e-05, "loss": 0.7897, "step": 9124 }, { "epoch": 0.6781865477517651, "grad_norm": 2.104505335652473, "learning_rate": 1.805075116381158e-05, "loss": 0.7723, "step": 9125 }, { "epoch": 0.6782608695652174, "grad_norm": 1.7431449435358655, "learning_rate": 1.805027519481592e-05, "loss": 0.8485, "step": 9126 }, { "epoch": 0.6783351913786696, "grad_norm": 2.1493933912221603, "learning_rate": 1.804979917399301e-05, "loss": 0.9391, "step": 9127 }, { "epoch": 0.6784095131921218, "grad_norm": 2.705816685700753, "learning_rate": 1.804932310134591e-05, "loss": 0.9287, "step": 9128 }, { "epoch": 0.6784838350055742, "grad_norm": 2.1215176717469117, "learning_rate": 1.8048846976877682e-05, "loss": 0.9247, "step": 9129 }, { "epoch": 0.6785581568190264, "grad_norm": 2.0283693097106084, "learning_rate": 1.8048370800591397e-05, "loss": 0.8126, "step": 9130 }, { "epoch": 0.6786324786324787, "grad_norm": 1.8128209448852561, "learning_rate": 1.8047894572490115e-05, "loss": 0.7858, "step": 9131 }, { "epoch": 0.6787068004459309, "grad_norm": 2.097936824320519, "learning_rate": 1.804741829257691e-05, "loss": 1.0754, "step": 9132 }, { "epoch": 0.6787811222593831, "grad_norm": 2.7783993450053144, "learning_rate": 1.8046941960854843e-05, "loss": 0.9589, "step": 9133 }, { "epoch": 0.6788554440728354, "grad_norm": 2.1435040910153083, "learning_rate": 1.804646557732698e-05, "loss": 0.8119, "step": 9134 }, { "epoch": 0.6789297658862876, "grad_norm": 1.9344077151154186, "learning_rate": 1.804598914199639e-05, "loss": 0.8216, "step": 9135 }, { "epoch": 0.6790040876997399, "grad_norm": 2.5621082522074783, "learning_rate": 1.8045512654866137e-05, "loss": 1.1765, "step": 9136 }, { "epoch": 0.6790784095131921, "grad_norm": 2.2255622855876918, "learning_rate": 1.8045036115939294e-05, "loss": 0.7558, "step": 9137 }, { "epoch": 0.6791527313266443, "grad_norm": 1.9263974211236778, "learning_rate": 1.8044559525218925e-05, "loss": 0.8885, "step": 9138 }, { "epoch": 0.6792270531400966, "grad_norm": 2.502424845212328, "learning_rate": 1.8044082882708098e-05, "loss": 0.8393, "step": 9139 }, { "epoch": 0.6793013749535489, "grad_norm": 2.112496632311593, "learning_rate": 1.8043606188409883e-05, "loss": 0.9791, "step": 9140 }, { "epoch": 0.6793756967670012, "grad_norm": 1.5919598437223994, "learning_rate": 1.804312944232735e-05, "loss": 0.7675, "step": 9141 }, { "epoch": 0.6794500185804534, "grad_norm": 2.314898976633291, "learning_rate": 1.8042652644463566e-05, "loss": 0.8035, "step": 9142 }, { "epoch": 0.6795243403939056, "grad_norm": 1.5001914612081002, "learning_rate": 1.8042175794821602e-05, "loss": 0.689, "step": 9143 }, { "epoch": 0.6795986622073579, "grad_norm": 2.7643007810409976, "learning_rate": 1.8041698893404523e-05, "loss": 0.9819, "step": 9144 }, { "epoch": 0.6796729840208101, "grad_norm": 1.5495975734632805, "learning_rate": 1.804122194021541e-05, "loss": 0.6474, "step": 9145 }, { "epoch": 0.6797473058342623, "grad_norm": 1.8808799139020196, "learning_rate": 1.8040744935257326e-05, "loss": 0.9202, "step": 9146 }, { "epoch": 0.6798216276477146, "grad_norm": 2.3483751102060904, "learning_rate": 1.804026787853334e-05, "loss": 0.8421, "step": 9147 }, { "epoch": 0.6798959494611668, "grad_norm": 1.8807890321334602, "learning_rate": 1.803979077004653e-05, "loss": 0.8535, "step": 9148 }, { "epoch": 0.6799702712746191, "grad_norm": 2.037613644498572, "learning_rate": 1.8039313609799965e-05, "loss": 0.7802, "step": 9149 }, { "epoch": 0.6800445930880713, "grad_norm": 2.0003270931379875, "learning_rate": 1.8038836397796714e-05, "loss": 0.941, "step": 9150 }, { "epoch": 0.6801189149015237, "grad_norm": 2.3541696267684857, "learning_rate": 1.8038359134039855e-05, "loss": 0.9541, "step": 9151 }, { "epoch": 0.6801932367149759, "grad_norm": 2.1702317885216282, "learning_rate": 1.8037881818532454e-05, "loss": 0.6999, "step": 9152 }, { "epoch": 0.6802675585284281, "grad_norm": 1.6074730318724415, "learning_rate": 1.8037404451277584e-05, "loss": 0.9123, "step": 9153 }, { "epoch": 0.6803418803418804, "grad_norm": 1.6623309955898509, "learning_rate": 1.8036927032278326e-05, "loss": 0.6316, "step": 9154 }, { "epoch": 0.6804162021553326, "grad_norm": 1.8030817535833363, "learning_rate": 1.8036449561537744e-05, "loss": 0.8762, "step": 9155 }, { "epoch": 0.6804905239687848, "grad_norm": 2.961315489647223, "learning_rate": 1.8035972039058924e-05, "loss": 0.8844, "step": 9156 }, { "epoch": 0.6805648457822371, "grad_norm": 1.829320093896401, "learning_rate": 1.8035494464844926e-05, "loss": 0.7657, "step": 9157 }, { "epoch": 0.6806391675956893, "grad_norm": 2.766912189180879, "learning_rate": 1.8035016838898832e-05, "loss": 0.7615, "step": 9158 }, { "epoch": 0.6807134894091416, "grad_norm": 2.1408350999424095, "learning_rate": 1.803453916122372e-05, "loss": 0.6623, "step": 9159 }, { "epoch": 0.6807878112225938, "grad_norm": 2.425079118497227, "learning_rate": 1.803406143182266e-05, "loss": 1.1331, "step": 9160 }, { "epoch": 0.680862133036046, "grad_norm": 1.8419845837136002, "learning_rate": 1.803358365069873e-05, "loss": 0.8133, "step": 9161 }, { "epoch": 0.6809364548494983, "grad_norm": 2.741086781372321, "learning_rate": 1.8033105817855006e-05, "loss": 1.0325, "step": 9162 }, { "epoch": 0.6810107766629506, "grad_norm": 1.921688956234246, "learning_rate": 1.803262793329456e-05, "loss": 0.8528, "step": 9163 }, { "epoch": 0.6810850984764029, "grad_norm": 1.3992408522055735, "learning_rate": 1.8032149997020477e-05, "loss": 0.6784, "step": 9164 }, { "epoch": 0.6811594202898551, "grad_norm": 1.859000449118987, "learning_rate": 1.8031672009035824e-05, "loss": 0.9044, "step": 9165 }, { "epoch": 0.6812337421033073, "grad_norm": 1.9478465518224846, "learning_rate": 1.8031193969343687e-05, "loss": 1.0813, "step": 9166 }, { "epoch": 0.6813080639167596, "grad_norm": 2.853736470100561, "learning_rate": 1.803071587794714e-05, "loss": 0.9083, "step": 9167 }, { "epoch": 0.6813823857302118, "grad_norm": 2.0699400191632598, "learning_rate": 1.8030237734849258e-05, "loss": 0.8958, "step": 9168 }, { "epoch": 0.681456707543664, "grad_norm": 2.119206098398965, "learning_rate": 1.8029759540053126e-05, "loss": 1.0989, "step": 9169 }, { "epoch": 0.6815310293571163, "grad_norm": 1.898680700310179, "learning_rate": 1.8029281293561814e-05, "loss": 0.7157, "step": 9170 }, { "epoch": 0.6816053511705685, "grad_norm": 2.4159435533027342, "learning_rate": 1.802880299537841e-05, "loss": 1.0806, "step": 9171 }, { "epoch": 0.6816796729840208, "grad_norm": 1.8269695221867992, "learning_rate": 1.8028324645505985e-05, "loss": 0.8893, "step": 9172 }, { "epoch": 0.681753994797473, "grad_norm": 1.6847897220633437, "learning_rate": 1.8027846243947626e-05, "loss": 0.8208, "step": 9173 }, { "epoch": 0.6818283166109254, "grad_norm": 2.9902079038965788, "learning_rate": 1.802736779070641e-05, "loss": 0.8354, "step": 9174 }, { "epoch": 0.6819026384243776, "grad_norm": 2.1475825693250115, "learning_rate": 1.8026889285785415e-05, "loss": 0.8985, "step": 9175 }, { "epoch": 0.6819769602378298, "grad_norm": 1.9863487686295087, "learning_rate": 1.802641072918772e-05, "loss": 0.8809, "step": 9176 }, { "epoch": 0.6820512820512821, "grad_norm": 2.0767515679897945, "learning_rate": 1.8025932120916414e-05, "loss": 0.6795, "step": 9177 }, { "epoch": 0.6821256038647343, "grad_norm": 1.7098938074760988, "learning_rate": 1.802545346097457e-05, "loss": 0.8743, "step": 9178 }, { "epoch": 0.6821999256781865, "grad_norm": 1.8550608028778035, "learning_rate": 1.8024974749365274e-05, "loss": 0.9594, "step": 9179 }, { "epoch": 0.6822742474916388, "grad_norm": 2.2768326182084198, "learning_rate": 1.802449598609161e-05, "loss": 0.9911, "step": 9180 }, { "epoch": 0.682348569305091, "grad_norm": 2.54176889235484, "learning_rate": 1.8024017171156653e-05, "loss": 0.969, "step": 9181 }, { "epoch": 0.6824228911185433, "grad_norm": 1.9836939534153024, "learning_rate": 1.802353830456349e-05, "loss": 0.7931, "step": 9182 }, { "epoch": 0.6824972129319955, "grad_norm": 1.985501417016542, "learning_rate": 1.8023059386315208e-05, "loss": 0.8439, "step": 9183 }, { "epoch": 0.6825715347454477, "grad_norm": 1.890551217281884, "learning_rate": 1.8022580416414883e-05, "loss": 1.0858, "step": 9184 }, { "epoch": 0.6826458565589001, "grad_norm": 1.4880036272453088, "learning_rate": 1.8022101394865602e-05, "loss": 0.698, "step": 9185 }, { "epoch": 0.6827201783723523, "grad_norm": 2.132449963648321, "learning_rate": 1.802162232167045e-05, "loss": 1.0668, "step": 9186 }, { "epoch": 0.6827945001858046, "grad_norm": 1.874572628632485, "learning_rate": 1.8021143196832507e-05, "loss": 0.8004, "step": 9187 }, { "epoch": 0.6828688219992568, "grad_norm": 2.49252857020245, "learning_rate": 1.8020664020354862e-05, "loss": 0.9284, "step": 9188 }, { "epoch": 0.682943143812709, "grad_norm": 2.247142852427575, "learning_rate": 1.8020184792240595e-05, "loss": 1.0967, "step": 9189 }, { "epoch": 0.6830174656261613, "grad_norm": 1.4630245476207882, "learning_rate": 1.80197055124928e-05, "loss": 0.6032, "step": 9190 }, { "epoch": 0.6830917874396135, "grad_norm": 9.697221202645755, "learning_rate": 1.8019226181114553e-05, "loss": 0.9266, "step": 9191 }, { "epoch": 0.6831661092530658, "grad_norm": 1.8219455273183158, "learning_rate": 1.8018746798108944e-05, "loss": 0.7711, "step": 9192 }, { "epoch": 0.683240431066518, "grad_norm": 2.748548067392995, "learning_rate": 1.801826736347906e-05, "loss": 0.5883, "step": 9193 }, { "epoch": 0.6833147528799702, "grad_norm": 1.9854970936748773, "learning_rate": 1.8017787877227988e-05, "loss": 0.8667, "step": 9194 }, { "epoch": 0.6833890746934225, "grad_norm": 2.078266134163177, "learning_rate": 1.8017308339358813e-05, "loss": 0.7447, "step": 9195 }, { "epoch": 0.6834633965068748, "grad_norm": 2.417113117233272, "learning_rate": 1.8016828749874623e-05, "loss": 0.8201, "step": 9196 }, { "epoch": 0.6835377183203271, "grad_norm": 1.958963282268587, "learning_rate": 1.8016349108778505e-05, "loss": 0.8792, "step": 9197 }, { "epoch": 0.6836120401337793, "grad_norm": 2.0856425516892187, "learning_rate": 1.8015869416073544e-05, "loss": 0.9981, "step": 9198 }, { "epoch": 0.6836863619472315, "grad_norm": 2.21934150102556, "learning_rate": 1.8015389671762833e-05, "loss": 0.698, "step": 9199 }, { "epoch": 0.6837606837606838, "grad_norm": 1.855634714165771, "learning_rate": 1.8014909875849466e-05, "loss": 0.5678, "step": 9200 }, { "epoch": 0.683835005574136, "grad_norm": 1.7731283992095181, "learning_rate": 1.801443002833652e-05, "loss": 0.8567, "step": 9201 }, { "epoch": 0.6839093273875883, "grad_norm": 1.9750347141623414, "learning_rate": 1.8013950129227087e-05, "loss": 1.2173, "step": 9202 }, { "epoch": 0.6839836492010405, "grad_norm": 2.0930026656518153, "learning_rate": 1.8013470178524263e-05, "loss": 0.9849, "step": 9203 }, { "epoch": 0.6840579710144927, "grad_norm": 1.9698552336660027, "learning_rate": 1.801299017623113e-05, "loss": 0.9603, "step": 9204 }, { "epoch": 0.684132292827945, "grad_norm": 2.038982095637839, "learning_rate": 1.8012510122350787e-05, "loss": 0.9291, "step": 9205 }, { "epoch": 0.6842066146413972, "grad_norm": 1.791415795690347, "learning_rate": 1.801203001688632e-05, "loss": 0.784, "step": 9206 }, { "epoch": 0.6842809364548496, "grad_norm": 2.1627540431362555, "learning_rate": 1.801154985984081e-05, "loss": 0.6718, "step": 9207 }, { "epoch": 0.6843552582683018, "grad_norm": 1.7805996198174794, "learning_rate": 1.8011069651217364e-05, "loss": 0.7347, "step": 9208 }, { "epoch": 0.684429580081754, "grad_norm": 2.0804368167597853, "learning_rate": 1.801058939101907e-05, "loss": 0.9161, "step": 9209 }, { "epoch": 0.6845039018952063, "grad_norm": 2.1465079614376834, "learning_rate": 1.8010109079249013e-05, "loss": 0.7823, "step": 9210 }, { "epoch": 0.6845782237086585, "grad_norm": 1.9380872399943172, "learning_rate": 1.800962871591029e-05, "loss": 1.0141, "step": 9211 }, { "epoch": 0.6846525455221107, "grad_norm": 1.9821596575582483, "learning_rate": 1.8009148301005993e-05, "loss": 0.8727, "step": 9212 }, { "epoch": 0.684726867335563, "grad_norm": 2.223010554050462, "learning_rate": 1.8008667834539214e-05, "loss": 0.9649, "step": 9213 }, { "epoch": 0.6848011891490152, "grad_norm": 1.6473565103408612, "learning_rate": 1.800818731651305e-05, "loss": 0.742, "step": 9214 }, { "epoch": 0.6848755109624675, "grad_norm": 2.038591438616099, "learning_rate": 1.800770674693059e-05, "loss": 0.9968, "step": 9215 }, { "epoch": 0.6849498327759197, "grad_norm": 1.5659818518420017, "learning_rate": 1.800722612579493e-05, "loss": 0.5974, "step": 9216 }, { "epoch": 0.6850241545893719, "grad_norm": 1.9324792192176488, "learning_rate": 1.8006745453109163e-05, "loss": 0.6723, "step": 9217 }, { "epoch": 0.6850984764028242, "grad_norm": 2.4083515999546683, "learning_rate": 1.8006264728876383e-05, "loss": 0.9563, "step": 9218 }, { "epoch": 0.6851727982162765, "grad_norm": 1.6876816119659173, "learning_rate": 1.800578395309969e-05, "loss": 0.8838, "step": 9219 }, { "epoch": 0.6852471200297288, "grad_norm": 1.85710483822103, "learning_rate": 1.8005303125782174e-05, "loss": 0.7393, "step": 9220 }, { "epoch": 0.685321441843181, "grad_norm": 2.111566866692304, "learning_rate": 1.8004822246926927e-05, "loss": 0.7936, "step": 9221 }, { "epoch": 0.6853957636566332, "grad_norm": 2.402006742025295, "learning_rate": 1.8004341316537055e-05, "loss": 0.8574, "step": 9222 }, { "epoch": 0.6854700854700855, "grad_norm": 2.088428294596265, "learning_rate": 1.8003860334615646e-05, "loss": 0.8225, "step": 9223 }, { "epoch": 0.6855444072835377, "grad_norm": 2.3367268002647257, "learning_rate": 1.80033793011658e-05, "loss": 0.8423, "step": 9224 }, { "epoch": 0.68561872909699, "grad_norm": 1.7580727624292463, "learning_rate": 1.8002898216190616e-05, "loss": 0.7855, "step": 9225 }, { "epoch": 0.6856930509104422, "grad_norm": 2.0511116624690837, "learning_rate": 1.8002417079693185e-05, "loss": 1.075, "step": 9226 }, { "epoch": 0.6857673727238944, "grad_norm": 1.995500606750928, "learning_rate": 1.800193589167661e-05, "loss": 1.0109, "step": 9227 }, { "epoch": 0.6858416945373467, "grad_norm": 1.6118891191315823, "learning_rate": 1.800145465214399e-05, "loss": 0.7152, "step": 9228 }, { "epoch": 0.6859160163507989, "grad_norm": 1.9381306724973344, "learning_rate": 1.800097336109841e-05, "loss": 0.8463, "step": 9229 }, { "epoch": 0.6859903381642513, "grad_norm": 1.7728195813767824, "learning_rate": 1.8000492018542987e-05, "loss": 0.7878, "step": 9230 }, { "epoch": 0.6860646599777035, "grad_norm": 3.741239638744001, "learning_rate": 1.800001062448081e-05, "loss": 0.6664, "step": 9231 }, { "epoch": 0.6861389817911557, "grad_norm": 1.6355748241400174, "learning_rate": 1.799952917891498e-05, "loss": 0.8025, "step": 9232 }, { "epoch": 0.686213303604608, "grad_norm": 2.2114870958810653, "learning_rate": 1.7999047681848597e-05, "loss": 0.7823, "step": 9233 }, { "epoch": 0.6862876254180602, "grad_norm": 2.02077092646599, "learning_rate": 1.799856613328476e-05, "loss": 0.8417, "step": 9234 }, { "epoch": 0.6863619472315124, "grad_norm": 1.970613933322407, "learning_rate": 1.7998084533226565e-05, "loss": 0.962, "step": 9235 }, { "epoch": 0.6864362690449647, "grad_norm": 2.6263477091289547, "learning_rate": 1.799760288167712e-05, "loss": 0.9883, "step": 9236 }, { "epoch": 0.6865105908584169, "grad_norm": 1.7433950117501582, "learning_rate": 1.799712117863952e-05, "loss": 0.9108, "step": 9237 }, { "epoch": 0.6865849126718692, "grad_norm": 1.7061337462664952, "learning_rate": 1.7996639424116874e-05, "loss": 0.7682, "step": 9238 }, { "epoch": 0.6866592344853214, "grad_norm": 1.7806479112441893, "learning_rate": 1.7996157618112276e-05, "loss": 0.8746, "step": 9239 }, { "epoch": 0.6867335562987736, "grad_norm": 1.7897104272030588, "learning_rate": 1.799567576062883e-05, "loss": 0.8161, "step": 9240 }, { "epoch": 0.686807878112226, "grad_norm": 2.089027447520013, "learning_rate": 1.7995193851669637e-05, "loss": 0.7781, "step": 9241 }, { "epoch": 0.6868821999256782, "grad_norm": 2.049506052100381, "learning_rate": 1.79947118912378e-05, "loss": 0.7564, "step": 9242 }, { "epoch": 0.6869565217391305, "grad_norm": 1.9742682596174659, "learning_rate": 1.7994229879336423e-05, "loss": 0.8725, "step": 9243 }, { "epoch": 0.6870308435525827, "grad_norm": 1.6495360214782016, "learning_rate": 1.7993747815968616e-05, "loss": 0.84, "step": 9244 }, { "epoch": 0.6871051653660349, "grad_norm": 1.6132617013046504, "learning_rate": 1.799326570113747e-05, "loss": 0.8197, "step": 9245 }, { "epoch": 0.6871794871794872, "grad_norm": 2.1513940377225316, "learning_rate": 1.799278353484609e-05, "loss": 0.8954, "step": 9246 }, { "epoch": 0.6872538089929394, "grad_norm": 1.5366428106281398, "learning_rate": 1.799230131709759e-05, "loss": 0.7191, "step": 9247 }, { "epoch": 0.6873281308063917, "grad_norm": 1.4691906297357027, "learning_rate": 1.7991819047895066e-05, "loss": 0.5172, "step": 9248 }, { "epoch": 0.6874024526198439, "grad_norm": 1.745285046582533, "learning_rate": 1.7991336727241625e-05, "loss": 0.6908, "step": 9249 }, { "epoch": 0.6874767744332961, "grad_norm": 2.1034935328700373, "learning_rate": 1.7990854355140374e-05, "loss": 0.7243, "step": 9250 }, { "epoch": 0.6875510962467484, "grad_norm": 3.2742125748625854, "learning_rate": 1.799037193159442e-05, "loss": 0.7399, "step": 9251 }, { "epoch": 0.6876254180602007, "grad_norm": 3.985115138565142, "learning_rate": 1.798988945660686e-05, "loss": 1.0508, "step": 9252 }, { "epoch": 0.687699739873653, "grad_norm": 2.165844876069561, "learning_rate": 1.7989406930180812e-05, "loss": 0.8323, "step": 9253 }, { "epoch": 0.6877740616871052, "grad_norm": 1.7195316842757882, "learning_rate": 1.7988924352319376e-05, "loss": 0.934, "step": 9254 }, { "epoch": 0.6878483835005574, "grad_norm": 1.6930887503346341, "learning_rate": 1.7988441723025654e-05, "loss": 0.7791, "step": 9255 }, { "epoch": 0.6879227053140097, "grad_norm": 1.55370537641431, "learning_rate": 1.7987959042302766e-05, "loss": 0.5459, "step": 9256 }, { "epoch": 0.6879970271274619, "grad_norm": 1.6092322483227046, "learning_rate": 1.798747631015381e-05, "loss": 0.7228, "step": 9257 }, { "epoch": 0.6880713489409142, "grad_norm": 1.7754910536675905, "learning_rate": 1.7986993526581894e-05, "loss": 0.7718, "step": 9258 }, { "epoch": 0.6881456707543664, "grad_norm": 1.7816587579252283, "learning_rate": 1.7986510691590128e-05, "loss": 0.8079, "step": 9259 }, { "epoch": 0.6882199925678186, "grad_norm": 1.7380671956031113, "learning_rate": 1.798602780518162e-05, "loss": 0.9025, "step": 9260 }, { "epoch": 0.6882943143812709, "grad_norm": 1.7214296051922864, "learning_rate": 1.7985544867359484e-05, "loss": 0.6973, "step": 9261 }, { "epoch": 0.6883686361947231, "grad_norm": 2.2012143014434473, "learning_rate": 1.798506187812682e-05, "loss": 0.9689, "step": 9262 }, { "epoch": 0.6884429580081755, "grad_norm": 2.3996151010370217, "learning_rate": 1.7984578837486744e-05, "loss": 0.9406, "step": 9263 }, { "epoch": 0.6885172798216277, "grad_norm": 1.939708072522746, "learning_rate": 1.7984095745442363e-05, "loss": 0.8428, "step": 9264 }, { "epoch": 0.6885916016350799, "grad_norm": 2.1285746130252416, "learning_rate": 1.7983612601996786e-05, "loss": 0.7191, "step": 9265 }, { "epoch": 0.6886659234485322, "grad_norm": 1.8924170899363952, "learning_rate": 1.7983129407153132e-05, "loss": 0.9191, "step": 9266 }, { "epoch": 0.6887402452619844, "grad_norm": 2.168085051693548, "learning_rate": 1.7982646160914497e-05, "loss": 0.9731, "step": 9267 }, { "epoch": 0.6888145670754366, "grad_norm": 3.410561187761416, "learning_rate": 1.7982162863284004e-05, "loss": 0.9164, "step": 9268 }, { "epoch": 0.6888888888888889, "grad_norm": 1.8424785451642847, "learning_rate": 1.798167951426476e-05, "loss": 0.8185, "step": 9269 }, { "epoch": 0.6889632107023411, "grad_norm": 1.9026889837606833, "learning_rate": 1.798119611385988e-05, "loss": 0.9741, "step": 9270 }, { "epoch": 0.6890375325157934, "grad_norm": 1.8706257725072402, "learning_rate": 1.798071266207247e-05, "loss": 0.9204, "step": 9271 }, { "epoch": 0.6891118543292456, "grad_norm": 2.5456873805831246, "learning_rate": 1.7980229158905645e-05, "loss": 0.7791, "step": 9272 }, { "epoch": 0.6891861761426978, "grad_norm": 1.516507561442419, "learning_rate": 1.797974560436252e-05, "loss": 0.6914, "step": 9273 }, { "epoch": 0.6892604979561501, "grad_norm": 1.6727708470412583, "learning_rate": 1.7979261998446208e-05, "loss": 0.6335, "step": 9274 }, { "epoch": 0.6893348197696024, "grad_norm": 1.980740735946267, "learning_rate": 1.7978778341159824e-05, "loss": 0.9089, "step": 9275 }, { "epoch": 0.6894091415830547, "grad_norm": 2.376889469387935, "learning_rate": 1.7978294632506475e-05, "loss": 0.9594, "step": 9276 }, { "epoch": 0.6894834633965069, "grad_norm": 1.9570963109362915, "learning_rate": 1.797781087248928e-05, "loss": 0.8889, "step": 9277 }, { "epoch": 0.6895577852099591, "grad_norm": 2.2069214263745853, "learning_rate": 1.7977327061111352e-05, "loss": 0.7703, "step": 9278 }, { "epoch": 0.6896321070234114, "grad_norm": 2.307001623110775, "learning_rate": 1.797684319837581e-05, "loss": 0.7461, "step": 9279 }, { "epoch": 0.6897064288368636, "grad_norm": 1.6310131124296996, "learning_rate": 1.797635928428576e-05, "loss": 0.8135, "step": 9280 }, { "epoch": 0.6897807506503159, "grad_norm": 1.7221698244344994, "learning_rate": 1.7975875318844323e-05, "loss": 0.6448, "step": 9281 }, { "epoch": 0.6898550724637681, "grad_norm": 1.979459201645161, "learning_rate": 1.7975391302054618e-05, "loss": 0.9416, "step": 9282 }, { "epoch": 0.6899293942772203, "grad_norm": 2.1620952216811555, "learning_rate": 1.7974907233919755e-05, "loss": 0.7694, "step": 9283 }, { "epoch": 0.6900037160906726, "grad_norm": 2.1473934577640477, "learning_rate": 1.7974423114442857e-05, "loss": 0.8729, "step": 9284 }, { "epoch": 0.6900780379041248, "grad_norm": 2.009380536583336, "learning_rate": 1.7973938943627034e-05, "loss": 0.5046, "step": 9285 }, { "epoch": 0.6901523597175772, "grad_norm": 2.127038685219265, "learning_rate": 1.7973454721475404e-05, "loss": 1.0476, "step": 9286 }, { "epoch": 0.6902266815310294, "grad_norm": 1.8739599865452536, "learning_rate": 1.797297044799109e-05, "loss": 0.8106, "step": 9287 }, { "epoch": 0.6903010033444816, "grad_norm": 1.952565244828909, "learning_rate": 1.7972486123177204e-05, "loss": 0.9152, "step": 9288 }, { "epoch": 0.6903753251579339, "grad_norm": 2.3834593440401766, "learning_rate": 1.7972001747036865e-05, "loss": 0.7719, "step": 9289 }, { "epoch": 0.6904496469713861, "grad_norm": 2.050307595939344, "learning_rate": 1.7971517319573194e-05, "loss": 0.9101, "step": 9290 }, { "epoch": 0.6905239687848383, "grad_norm": 2.490038993931107, "learning_rate": 1.7971032840789307e-05, "loss": 0.9473, "step": 9291 }, { "epoch": 0.6905982905982906, "grad_norm": 2.03648120162406, "learning_rate": 1.7970548310688325e-05, "loss": 1.0131, "step": 9292 }, { "epoch": 0.6906726124117428, "grad_norm": 2.6597298579791313, "learning_rate": 1.7970063729273366e-05, "loss": 0.7313, "step": 9293 }, { "epoch": 0.6907469342251951, "grad_norm": 2.128571659159896, "learning_rate": 1.796957909654755e-05, "loss": 1.0232, "step": 9294 }, { "epoch": 0.6908212560386473, "grad_norm": 2.5679824833401845, "learning_rate": 1.7969094412513997e-05, "loss": 0.8326, "step": 9295 }, { "epoch": 0.6908955778520995, "grad_norm": 2.457297922228839, "learning_rate": 1.7968609677175832e-05, "loss": 1.0219, "step": 9296 }, { "epoch": 0.6909698996655519, "grad_norm": 1.8280791381836863, "learning_rate": 1.7968124890536164e-05, "loss": 0.9041, "step": 9297 }, { "epoch": 0.6910442214790041, "grad_norm": 2.4797248267011787, "learning_rate": 1.7967640052598127e-05, "loss": 0.9878, "step": 9298 }, { "epoch": 0.6911185432924564, "grad_norm": 2.1657632927760995, "learning_rate": 1.7967155163364836e-05, "loss": 0.7442, "step": 9299 }, { "epoch": 0.6911928651059086, "grad_norm": 2.202790507767787, "learning_rate": 1.7966670222839413e-05, "loss": 0.758, "step": 9300 }, { "epoch": 0.6912671869193608, "grad_norm": 1.909150922717365, "learning_rate": 1.796618523102498e-05, "loss": 0.7533, "step": 9301 }, { "epoch": 0.6913415087328131, "grad_norm": 2.5682080833408194, "learning_rate": 1.796570018792466e-05, "loss": 0.7905, "step": 9302 }, { "epoch": 0.6914158305462653, "grad_norm": 1.5517664374025513, "learning_rate": 1.7965215093541575e-05, "loss": 0.6336, "step": 9303 }, { "epoch": 0.6914901523597176, "grad_norm": 2.6506606623529674, "learning_rate": 1.7964729947878847e-05, "loss": 0.6493, "step": 9304 }, { "epoch": 0.6915644741731698, "grad_norm": 1.7304750969538556, "learning_rate": 1.7964244750939604e-05, "loss": 0.7429, "step": 9305 }, { "epoch": 0.691638795986622, "grad_norm": 2.6789962575926722, "learning_rate": 1.7963759502726967e-05, "loss": 1.0383, "step": 9306 }, { "epoch": 0.6917131178000743, "grad_norm": 1.661940820493673, "learning_rate": 1.7963274203244055e-05, "loss": 0.9172, "step": 9307 }, { "epoch": 0.6917874396135266, "grad_norm": 1.9184495180411427, "learning_rate": 1.7962788852494e-05, "loss": 0.9777, "step": 9308 }, { "epoch": 0.6918617614269789, "grad_norm": 1.6225876507739836, "learning_rate": 1.796230345047992e-05, "loss": 0.6475, "step": 9309 }, { "epoch": 0.6919360832404311, "grad_norm": 1.974636704952916, "learning_rate": 1.796181799720495e-05, "loss": 0.9779, "step": 9310 }, { "epoch": 0.6920104050538833, "grad_norm": 2.5018127985620677, "learning_rate": 1.7961332492672205e-05, "loss": 0.8589, "step": 9311 }, { "epoch": 0.6920847268673356, "grad_norm": 1.4295592906410672, "learning_rate": 1.7960846936884815e-05, "loss": 0.7747, "step": 9312 }, { "epoch": 0.6921590486807878, "grad_norm": 1.917526696245705, "learning_rate": 1.7960361329845907e-05, "loss": 0.846, "step": 9313 }, { "epoch": 0.69223337049424, "grad_norm": 1.8144338131945172, "learning_rate": 1.7959875671558602e-05, "loss": 0.69, "step": 9314 }, { "epoch": 0.6923076923076923, "grad_norm": 1.8319163525689446, "learning_rate": 1.795938996202603e-05, "loss": 0.829, "step": 9315 }, { "epoch": 0.6923820141211445, "grad_norm": 1.7851296692924346, "learning_rate": 1.7958904201251324e-05, "loss": 0.926, "step": 9316 }, { "epoch": 0.6924563359345968, "grad_norm": 1.9639850278063957, "learning_rate": 1.7958418389237602e-05, "loss": 0.8803, "step": 9317 }, { "epoch": 0.692530657748049, "grad_norm": 2.8949729791033425, "learning_rate": 1.7957932525987996e-05, "loss": 1.0193, "step": 9318 }, { "epoch": 0.6926049795615014, "grad_norm": 1.8798458530774063, "learning_rate": 1.7957446611505632e-05, "loss": 0.9295, "step": 9319 }, { "epoch": 0.6926793013749536, "grad_norm": 1.6655750217876433, "learning_rate": 1.795696064579364e-05, "loss": 0.6226, "step": 9320 }, { "epoch": 0.6927536231884058, "grad_norm": 2.0491253687616204, "learning_rate": 1.795647462885515e-05, "loss": 0.7036, "step": 9321 }, { "epoch": 0.6928279450018581, "grad_norm": 1.436055791934844, "learning_rate": 1.7955988560693293e-05, "loss": 0.5477, "step": 9322 }, { "epoch": 0.6929022668153103, "grad_norm": 1.8677611925094, "learning_rate": 1.795550244131119e-05, "loss": 0.8864, "step": 9323 }, { "epoch": 0.6929765886287625, "grad_norm": 2.1795786868001645, "learning_rate": 1.795501627071197e-05, "loss": 0.863, "step": 9324 }, { "epoch": 0.6930509104422148, "grad_norm": 2.0705422412098935, "learning_rate": 1.7954530048898776e-05, "loss": 1.0528, "step": 9325 }, { "epoch": 0.693125232255667, "grad_norm": 1.8794718011629425, "learning_rate": 1.7954043775874725e-05, "loss": 1.0757, "step": 9326 }, { "epoch": 0.6931995540691193, "grad_norm": 2.414137630798326, "learning_rate": 1.7953557451642954e-05, "loss": 0.8203, "step": 9327 }, { "epoch": 0.6932738758825715, "grad_norm": 2.342825400901223, "learning_rate": 1.7953071076206595e-05, "loss": 0.9681, "step": 9328 }, { "epoch": 0.6933481976960237, "grad_norm": 2.6586570994524164, "learning_rate": 1.7952584649568775e-05, "loss": 0.7722, "step": 9329 }, { "epoch": 0.693422519509476, "grad_norm": 1.8218802751515315, "learning_rate": 1.795209817173263e-05, "loss": 0.7167, "step": 9330 }, { "epoch": 0.6934968413229283, "grad_norm": 2.082089890118167, "learning_rate": 1.795161164270129e-05, "loss": 0.8649, "step": 9331 }, { "epoch": 0.6935711631363806, "grad_norm": 2.2442916289778108, "learning_rate": 1.7951125062477882e-05, "loss": 1.0977, "step": 9332 }, { "epoch": 0.6936454849498328, "grad_norm": 1.8141081104488372, "learning_rate": 1.7950638431065548e-05, "loss": 0.938, "step": 9333 }, { "epoch": 0.693719806763285, "grad_norm": 1.7602369179895285, "learning_rate": 1.7950151748467415e-05, "loss": 0.8466, "step": 9334 }, { "epoch": 0.6937941285767373, "grad_norm": 1.9774943091661845, "learning_rate": 1.794966501468662e-05, "loss": 0.9238, "step": 9335 }, { "epoch": 0.6938684503901895, "grad_norm": 2.4474165035855813, "learning_rate": 1.794917822972629e-05, "loss": 0.8988, "step": 9336 }, { "epoch": 0.6939427722036418, "grad_norm": 1.8303138559017436, "learning_rate": 1.7948691393589565e-05, "loss": 0.7661, "step": 9337 }, { "epoch": 0.694017094017094, "grad_norm": 1.8558717389184312, "learning_rate": 1.7948204506279575e-05, "loss": 0.8761, "step": 9338 }, { "epoch": 0.6940914158305462, "grad_norm": 2.7655082510365165, "learning_rate": 1.794771756779946e-05, "loss": 1.035, "step": 9339 }, { "epoch": 0.6941657376439985, "grad_norm": 1.6884292219633426, "learning_rate": 1.794723057815235e-05, "loss": 0.8871, "step": 9340 }, { "epoch": 0.6942400594574507, "grad_norm": 2.1612509003527642, "learning_rate": 1.794674353734138e-05, "loss": 0.9198, "step": 9341 }, { "epoch": 0.6943143812709031, "grad_norm": 4.083761945785308, "learning_rate": 1.794625644536969e-05, "loss": 0.9836, "step": 9342 }, { "epoch": 0.6943887030843553, "grad_norm": 1.6501491230464376, "learning_rate": 1.7945769302240413e-05, "loss": 0.7347, "step": 9343 }, { "epoch": 0.6944630248978075, "grad_norm": 2.1220562710194373, "learning_rate": 1.7945282107956687e-05, "loss": 0.8062, "step": 9344 }, { "epoch": 0.6945373467112598, "grad_norm": 1.83614592583031, "learning_rate": 1.7944794862521644e-05, "loss": 0.9496, "step": 9345 }, { "epoch": 0.694611668524712, "grad_norm": 1.668417771296833, "learning_rate": 1.7944307565938426e-05, "loss": 0.7667, "step": 9346 }, { "epoch": 0.6946859903381642, "grad_norm": 3.836368080095267, "learning_rate": 1.7943820218210165e-05, "loss": 0.9907, "step": 9347 }, { "epoch": 0.6947603121516165, "grad_norm": 2.0903096553253695, "learning_rate": 1.7943332819340004e-05, "loss": 0.7517, "step": 9348 }, { "epoch": 0.6948346339650687, "grad_norm": 2.259568198314125, "learning_rate": 1.7942845369331078e-05, "loss": 0.9589, "step": 9349 }, { "epoch": 0.694908955778521, "grad_norm": 2.1887963868175055, "learning_rate": 1.7942357868186526e-05, "loss": 0.7214, "step": 9350 }, { "epoch": 0.6949832775919732, "grad_norm": 1.9947358834137017, "learning_rate": 1.7941870315909486e-05, "loss": 1.1321, "step": 9351 }, { "epoch": 0.6950575994054254, "grad_norm": 2.3179407361267, "learning_rate": 1.79413827125031e-05, "loss": 1.0311, "step": 9352 }, { "epoch": 0.6951319212188778, "grad_norm": 2.272778845120196, "learning_rate": 1.79408950579705e-05, "loss": 0.9714, "step": 9353 }, { "epoch": 0.69520624303233, "grad_norm": 5.287959840465378, "learning_rate": 1.794040735231483e-05, "loss": 0.6466, "step": 9354 }, { "epoch": 0.6952805648457823, "grad_norm": 1.8677047688275858, "learning_rate": 1.7939919595539232e-05, "loss": 0.9575, "step": 9355 }, { "epoch": 0.6953548866592345, "grad_norm": 2.2907340582253743, "learning_rate": 1.7939431787646842e-05, "loss": 0.8827, "step": 9356 }, { "epoch": 0.6954292084726867, "grad_norm": 3.986956473892086, "learning_rate": 1.79389439286408e-05, "loss": 1.099, "step": 9357 }, { "epoch": 0.695503530286139, "grad_norm": 2.1766465546481815, "learning_rate": 1.7938456018524252e-05, "loss": 1.0061, "step": 9358 }, { "epoch": 0.6955778520995912, "grad_norm": 1.730353302203758, "learning_rate": 1.7937968057300335e-05, "loss": 0.6477, "step": 9359 }, { "epoch": 0.6956521739130435, "grad_norm": 2.027457959509014, "learning_rate": 1.7937480044972192e-05, "loss": 0.7089, "step": 9360 }, { "epoch": 0.6957264957264957, "grad_norm": 1.8370063572091764, "learning_rate": 1.7936991981542965e-05, "loss": 0.837, "step": 9361 }, { "epoch": 0.6958008175399479, "grad_norm": 2.1198246060784736, "learning_rate": 1.7936503867015794e-05, "loss": 0.8406, "step": 9362 }, { "epoch": 0.6958751393534002, "grad_norm": 2.0964336443738927, "learning_rate": 1.7936015701393825e-05, "loss": 0.7753, "step": 9363 }, { "epoch": 0.6959494611668525, "grad_norm": 2.1284192896892193, "learning_rate": 1.7935527484680193e-05, "loss": 0.9, "step": 9364 }, { "epoch": 0.6960237829803048, "grad_norm": 1.987907407980404, "learning_rate": 1.7935039216878053e-05, "loss": 1.0425, "step": 9365 }, { "epoch": 0.696098104793757, "grad_norm": 1.7563091520669616, "learning_rate": 1.793455089799054e-05, "loss": 0.8263, "step": 9366 }, { "epoch": 0.6961724266072092, "grad_norm": 2.135996419097589, "learning_rate": 1.7934062528020798e-05, "loss": 1.0947, "step": 9367 }, { "epoch": 0.6962467484206615, "grad_norm": 2.073767448061632, "learning_rate": 1.7933574106971977e-05, "loss": 0.8059, "step": 9368 }, { "epoch": 0.6963210702341137, "grad_norm": 1.9693960219532347, "learning_rate": 1.7933085634847215e-05, "loss": 0.9999, "step": 9369 }, { "epoch": 0.696395392047566, "grad_norm": 2.171368937020157, "learning_rate": 1.7932597111649663e-05, "loss": 0.9845, "step": 9370 }, { "epoch": 0.6964697138610182, "grad_norm": 1.8309525050455078, "learning_rate": 1.7932108537382458e-05, "loss": 0.8873, "step": 9371 }, { "epoch": 0.6965440356744704, "grad_norm": 1.7059562822467644, "learning_rate": 1.793161991204875e-05, "loss": 0.8728, "step": 9372 }, { "epoch": 0.6966183574879227, "grad_norm": 1.9729879725447104, "learning_rate": 1.7931131235651684e-05, "loss": 1.0635, "step": 9373 }, { "epoch": 0.6966926793013749, "grad_norm": 1.6158884690569568, "learning_rate": 1.793064250819441e-05, "loss": 0.7889, "step": 9374 }, { "epoch": 0.6967670011148273, "grad_norm": 1.716073336667268, "learning_rate": 1.7930153729680068e-05, "loss": 0.7707, "step": 9375 }, { "epoch": 0.6968413229282795, "grad_norm": 1.902716338699724, "learning_rate": 1.792966490011181e-05, "loss": 0.7062, "step": 9376 }, { "epoch": 0.6969156447417317, "grad_norm": 2.0653623886754873, "learning_rate": 1.792917601949278e-05, "loss": 1.0053, "step": 9377 }, { "epoch": 0.696989966555184, "grad_norm": 1.5674553632791368, "learning_rate": 1.7928687087826127e-05, "loss": 0.6128, "step": 9378 }, { "epoch": 0.6970642883686362, "grad_norm": 1.8899894582155479, "learning_rate": 1.7928198105114997e-05, "loss": 0.9703, "step": 9379 }, { "epoch": 0.6971386101820884, "grad_norm": 2.4400249602143194, "learning_rate": 1.792770907136254e-05, "loss": 0.8966, "step": 9380 }, { "epoch": 0.6972129319955407, "grad_norm": 1.939343193777488, "learning_rate": 1.79272199865719e-05, "loss": 0.8393, "step": 9381 }, { "epoch": 0.6972872538089929, "grad_norm": 2.616683596695517, "learning_rate": 1.7926730850746232e-05, "loss": 1.015, "step": 9382 }, { "epoch": 0.6973615756224452, "grad_norm": 2.0650026929785525, "learning_rate": 1.7926241663888683e-05, "loss": 1.0626, "step": 9383 }, { "epoch": 0.6974358974358974, "grad_norm": 2.1015479241037096, "learning_rate": 1.79257524260024e-05, "loss": 0.787, "step": 9384 }, { "epoch": 0.6975102192493496, "grad_norm": 1.6842270781387927, "learning_rate": 1.7925263137090532e-05, "loss": 0.8063, "step": 9385 }, { "epoch": 0.6975845410628019, "grad_norm": 1.6470021024736967, "learning_rate": 1.7924773797156233e-05, "loss": 0.8333, "step": 9386 }, { "epoch": 0.6976588628762542, "grad_norm": 2.1808179549020648, "learning_rate": 1.792428440620265e-05, "loss": 0.9502, "step": 9387 }, { "epoch": 0.6977331846897065, "grad_norm": 2.152240396886566, "learning_rate": 1.792379496423294e-05, "loss": 1.0416, "step": 9388 }, { "epoch": 0.6978075065031587, "grad_norm": 1.918975041939151, "learning_rate": 1.7923305471250245e-05, "loss": 0.8822, "step": 9389 }, { "epoch": 0.6978818283166109, "grad_norm": 1.7338655426141438, "learning_rate": 1.7922815927257723e-05, "loss": 0.8559, "step": 9390 }, { "epoch": 0.6979561501300632, "grad_norm": 1.7726523856686494, "learning_rate": 1.792232633225852e-05, "loss": 0.8509, "step": 9391 }, { "epoch": 0.6980304719435154, "grad_norm": 3.03588636394492, "learning_rate": 1.792183668625579e-05, "loss": 0.7589, "step": 9392 }, { "epoch": 0.6981047937569677, "grad_norm": 1.8761071302557157, "learning_rate": 1.7921346989252693e-05, "loss": 1.0267, "step": 9393 }, { "epoch": 0.6981791155704199, "grad_norm": 2.062484372980472, "learning_rate": 1.792085724125237e-05, "loss": 0.8519, "step": 9394 }, { "epoch": 0.6982534373838721, "grad_norm": 2.244041697062554, "learning_rate": 1.792036744225798e-05, "loss": 0.7935, "step": 9395 }, { "epoch": 0.6983277591973244, "grad_norm": 1.8223667975787738, "learning_rate": 1.7919877592272675e-05, "loss": 0.8194, "step": 9396 }, { "epoch": 0.6984020810107766, "grad_norm": 1.9469954906176898, "learning_rate": 1.7919387691299612e-05, "loss": 0.9011, "step": 9397 }, { "epoch": 0.698476402824229, "grad_norm": 1.757297422699513, "learning_rate": 1.7918897739341938e-05, "loss": 0.7137, "step": 9398 }, { "epoch": 0.6985507246376812, "grad_norm": 2.587326202491389, "learning_rate": 1.7918407736402815e-05, "loss": 0.8975, "step": 9399 }, { "epoch": 0.6986250464511334, "grad_norm": 9.147375344079455, "learning_rate": 1.791791768248539e-05, "loss": 0.9266, "step": 9400 }, { "epoch": 0.6986993682645857, "grad_norm": 2.208101192451313, "learning_rate": 1.7917427577592823e-05, "loss": 0.9615, "step": 9401 }, { "epoch": 0.6987736900780379, "grad_norm": 1.8491130426968747, "learning_rate": 1.7916937421728268e-05, "loss": 0.8034, "step": 9402 }, { "epoch": 0.6988480118914902, "grad_norm": 2.067217765144726, "learning_rate": 1.791644721489488e-05, "loss": 0.8711, "step": 9403 }, { "epoch": 0.6989223337049424, "grad_norm": 1.6147798070781356, "learning_rate": 1.7915956957095818e-05, "loss": 0.7283, "step": 9404 }, { "epoch": 0.6989966555183946, "grad_norm": 2.1903178156215906, "learning_rate": 1.7915466648334236e-05, "loss": 0.897, "step": 9405 }, { "epoch": 0.6990709773318469, "grad_norm": 1.8570531243175155, "learning_rate": 1.7914976288613288e-05, "loss": 0.9893, "step": 9406 }, { "epoch": 0.6991452991452991, "grad_norm": 1.7914388581630785, "learning_rate": 1.7914485877936133e-05, "loss": 0.9494, "step": 9407 }, { "epoch": 0.6992196209587513, "grad_norm": 2.6197624745768824, "learning_rate": 1.791399541630593e-05, "loss": 1.0367, "step": 9408 }, { "epoch": 0.6992939427722037, "grad_norm": 2.021744765265747, "learning_rate": 1.7913504903725836e-05, "loss": 0.8729, "step": 9409 }, { "epoch": 0.6993682645856559, "grad_norm": 2.272995152043374, "learning_rate": 1.7913014340199007e-05, "loss": 0.8994, "step": 9410 }, { "epoch": 0.6994425863991082, "grad_norm": 1.679396524006527, "learning_rate": 1.7912523725728605e-05, "loss": 0.9932, "step": 9411 }, { "epoch": 0.6995169082125604, "grad_norm": 1.6270026182339787, "learning_rate": 1.7912033060317783e-05, "loss": 0.6614, "step": 9412 }, { "epoch": 0.6995912300260126, "grad_norm": 2.5982350581740605, "learning_rate": 1.79115423439697e-05, "loss": 0.7744, "step": 9413 }, { "epoch": 0.6996655518394649, "grad_norm": 2.091882504191192, "learning_rate": 1.7911051576687523e-05, "loss": 0.7416, "step": 9414 }, { "epoch": 0.6997398736529171, "grad_norm": 1.8181023526935005, "learning_rate": 1.7910560758474404e-05, "loss": 0.825, "step": 9415 }, { "epoch": 0.6998141954663694, "grad_norm": 1.7356159561601452, "learning_rate": 1.7910069889333507e-05, "loss": 0.7241, "step": 9416 }, { "epoch": 0.6998885172798216, "grad_norm": 2.2246569584251463, "learning_rate": 1.790957896926799e-05, "loss": 0.9711, "step": 9417 }, { "epoch": 0.6999628390932738, "grad_norm": 3.4606968180715683, "learning_rate": 1.7909087998281013e-05, "loss": 0.8171, "step": 9418 }, { "epoch": 0.7000371609067261, "grad_norm": 1.8470784395567807, "learning_rate": 1.7908596976375734e-05, "loss": 0.8119, "step": 9419 }, { "epoch": 0.7001114827201784, "grad_norm": 1.924682334914651, "learning_rate": 1.7908105903555323e-05, "loss": 0.8222, "step": 9420 }, { "epoch": 0.7001858045336307, "grad_norm": 1.7297775276305438, "learning_rate": 1.7907614779822936e-05, "loss": 0.8358, "step": 9421 }, { "epoch": 0.7002601263470829, "grad_norm": 2.3946742579597102, "learning_rate": 1.7907123605181733e-05, "loss": 1.0516, "step": 9422 }, { "epoch": 0.7003344481605351, "grad_norm": 4.797647725898389, "learning_rate": 1.790663237963488e-05, "loss": 0.8758, "step": 9423 }, { "epoch": 0.7004087699739874, "grad_norm": 1.9606289697296204, "learning_rate": 1.790614110318554e-05, "loss": 1.0219, "step": 9424 }, { "epoch": 0.7004830917874396, "grad_norm": 1.7469638327150305, "learning_rate": 1.7905649775836874e-05, "loss": 0.6721, "step": 9425 }, { "epoch": 0.7005574136008919, "grad_norm": 4.225376826973098, "learning_rate": 1.7905158397592042e-05, "loss": 0.8513, "step": 9426 }, { "epoch": 0.7006317354143441, "grad_norm": 2.490904038942961, "learning_rate": 1.790466696845421e-05, "loss": 0.8145, "step": 9427 }, { "epoch": 0.7007060572277963, "grad_norm": 1.7742926691471286, "learning_rate": 1.7904175488426542e-05, "loss": 0.7503, "step": 9428 }, { "epoch": 0.7007803790412486, "grad_norm": 1.9200344217439995, "learning_rate": 1.7903683957512206e-05, "loss": 0.9128, "step": 9429 }, { "epoch": 0.7008547008547008, "grad_norm": 2.1030729406537194, "learning_rate": 1.790319237571436e-05, "loss": 0.922, "step": 9430 }, { "epoch": 0.7009290226681532, "grad_norm": 2.0251141378524826, "learning_rate": 1.7902700743036174e-05, "loss": 0.9334, "step": 9431 }, { "epoch": 0.7010033444816054, "grad_norm": 2.249840571175666, "learning_rate": 1.790220905948081e-05, "loss": 0.8972, "step": 9432 }, { "epoch": 0.7010776662950576, "grad_norm": 1.890858768411817, "learning_rate": 1.790171732505143e-05, "loss": 0.7831, "step": 9433 }, { "epoch": 0.7011519881085099, "grad_norm": 2.3108769612101887, "learning_rate": 1.790122553975121e-05, "loss": 1.0668, "step": 9434 }, { "epoch": 0.7012263099219621, "grad_norm": 2.1080900860681813, "learning_rate": 1.7900733703583303e-05, "loss": 0.8834, "step": 9435 }, { "epoch": 0.7013006317354143, "grad_norm": 1.7630977370648486, "learning_rate": 1.790024181655089e-05, "loss": 0.8774, "step": 9436 }, { "epoch": 0.7013749535488666, "grad_norm": 2.0449402857334684, "learning_rate": 1.7899749878657124e-05, "loss": 0.9587, "step": 9437 }, { "epoch": 0.7014492753623188, "grad_norm": 2.195458300647007, "learning_rate": 1.789925788990518e-05, "loss": 0.7314, "step": 9438 }, { "epoch": 0.7015235971757711, "grad_norm": 1.993819654715254, "learning_rate": 1.7898765850298225e-05, "loss": 0.9219, "step": 9439 }, { "epoch": 0.7015979189892233, "grad_norm": 1.7366665120048075, "learning_rate": 1.7898273759839424e-05, "loss": 0.9774, "step": 9440 }, { "epoch": 0.7016722408026755, "grad_norm": 2.529494091967761, "learning_rate": 1.7897781618531947e-05, "loss": 0.9853, "step": 9441 }, { "epoch": 0.7017465626161278, "grad_norm": 2.033127962003939, "learning_rate": 1.789728942637896e-05, "loss": 1.0396, "step": 9442 }, { "epoch": 0.7018208844295801, "grad_norm": 2.0239706493300997, "learning_rate": 1.7896797183383636e-05, "loss": 1.003, "step": 9443 }, { "epoch": 0.7018952062430324, "grad_norm": 2.1127186607379573, "learning_rate": 1.789630488954914e-05, "loss": 0.8428, "step": 9444 }, { "epoch": 0.7019695280564846, "grad_norm": 2.5998384781356094, "learning_rate": 1.7895812544878645e-05, "loss": 0.8527, "step": 9445 }, { "epoch": 0.7020438498699368, "grad_norm": 1.8912693642191456, "learning_rate": 1.7895320149375313e-05, "loss": 0.8058, "step": 9446 }, { "epoch": 0.7021181716833891, "grad_norm": 1.9827508929384068, "learning_rate": 1.7894827703042324e-05, "loss": 0.6859, "step": 9447 }, { "epoch": 0.7021924934968413, "grad_norm": 2.415550483164123, "learning_rate": 1.7894335205882845e-05, "loss": 0.8219, "step": 9448 }, { "epoch": 0.7022668153102936, "grad_norm": 1.9221879040219383, "learning_rate": 1.789384265790004e-05, "loss": 0.9766, "step": 9449 }, { "epoch": 0.7023411371237458, "grad_norm": 1.700681416822755, "learning_rate": 1.7893350059097093e-05, "loss": 0.7815, "step": 9450 }, { "epoch": 0.702415458937198, "grad_norm": 1.9606786782442092, "learning_rate": 1.7892857409477162e-05, "loss": 0.9605, "step": 9451 }, { "epoch": 0.7024897807506503, "grad_norm": 1.7414713130501898, "learning_rate": 1.7892364709043426e-05, "loss": 0.6757, "step": 9452 }, { "epoch": 0.7025641025641025, "grad_norm": 1.993604098052209, "learning_rate": 1.7891871957799057e-05, "loss": 0.8977, "step": 9453 }, { "epoch": 0.7026384243775549, "grad_norm": 2.0923025844168395, "learning_rate": 1.7891379155747226e-05, "loss": 0.8688, "step": 9454 }, { "epoch": 0.7027127461910071, "grad_norm": 1.7137076578653683, "learning_rate": 1.7890886302891105e-05, "loss": 0.8375, "step": 9455 }, { "epoch": 0.7027870680044593, "grad_norm": 3.539496862861072, "learning_rate": 1.7890393399233863e-05, "loss": 0.5948, "step": 9456 }, { "epoch": 0.7028613898179116, "grad_norm": 2.0823040560660355, "learning_rate": 1.7889900444778684e-05, "loss": 0.9989, "step": 9457 }, { "epoch": 0.7029357116313638, "grad_norm": 1.9721585739068848, "learning_rate": 1.788940743952873e-05, "loss": 0.9991, "step": 9458 }, { "epoch": 0.703010033444816, "grad_norm": 2.510732861795147, "learning_rate": 1.7888914383487183e-05, "loss": 0.9438, "step": 9459 }, { "epoch": 0.7030843552582683, "grad_norm": 2.369923781837662, "learning_rate": 1.7888421276657214e-05, "loss": 0.7115, "step": 9460 }, { "epoch": 0.7031586770717205, "grad_norm": 2.3105993267826315, "learning_rate": 1.7887928119042e-05, "loss": 1.029, "step": 9461 }, { "epoch": 0.7032329988851728, "grad_norm": 1.8076011648587424, "learning_rate": 1.7887434910644716e-05, "loss": 0.9485, "step": 9462 }, { "epoch": 0.703307320698625, "grad_norm": 2.059169050513468, "learning_rate": 1.788694165146853e-05, "loss": 0.912, "step": 9463 }, { "epoch": 0.7033816425120772, "grad_norm": 1.547641080704586, "learning_rate": 1.7886448341516623e-05, "loss": 0.7307, "step": 9464 }, { "epoch": 0.7034559643255296, "grad_norm": 13.044246181746566, "learning_rate": 1.7885954980792174e-05, "loss": 0.9209, "step": 9465 }, { "epoch": 0.7035302861389818, "grad_norm": 1.9146684460027195, "learning_rate": 1.7885461569298355e-05, "loss": 0.8538, "step": 9466 }, { "epoch": 0.7036046079524341, "grad_norm": 1.8345959580365954, "learning_rate": 1.7884968107038342e-05, "loss": 0.7002, "step": 9467 }, { "epoch": 0.7036789297658863, "grad_norm": 2.211453993896866, "learning_rate": 1.7884474594015316e-05, "loss": 0.8403, "step": 9468 }, { "epoch": 0.7037532515793385, "grad_norm": 2.073818598888664, "learning_rate": 1.788398103023245e-05, "loss": 0.9447, "step": 9469 }, { "epoch": 0.7038275733927908, "grad_norm": 1.6995065546452421, "learning_rate": 1.7883487415692925e-05, "loss": 0.9041, "step": 9470 }, { "epoch": 0.703901895206243, "grad_norm": 2.0119537948412582, "learning_rate": 1.7882993750399914e-05, "loss": 0.8881, "step": 9471 }, { "epoch": 0.7039762170196953, "grad_norm": 2.3758642515294937, "learning_rate": 1.78825000343566e-05, "loss": 1.069, "step": 9472 }, { "epoch": 0.7040505388331475, "grad_norm": 1.960438797702321, "learning_rate": 1.788200626756616e-05, "loss": 0.9378, "step": 9473 }, { "epoch": 0.7041248606465997, "grad_norm": 2.045233113328428, "learning_rate": 1.788151245003177e-05, "loss": 0.9376, "step": 9474 }, { "epoch": 0.704199182460052, "grad_norm": 2.424862187629817, "learning_rate": 1.7881018581756614e-05, "loss": 0.9427, "step": 9475 }, { "epoch": 0.7042735042735043, "grad_norm": 1.9444067239486726, "learning_rate": 1.7880524662743872e-05, "loss": 0.9239, "step": 9476 }, { "epoch": 0.7043478260869566, "grad_norm": 2.4533286939629066, "learning_rate": 1.7880030692996716e-05, "loss": 1.0284, "step": 9477 }, { "epoch": 0.7044221479004088, "grad_norm": 1.8492183045355561, "learning_rate": 1.7879536672518335e-05, "loss": 0.7367, "step": 9478 }, { "epoch": 0.704496469713861, "grad_norm": 1.6930541916045838, "learning_rate": 1.7879042601311906e-05, "loss": 0.743, "step": 9479 }, { "epoch": 0.7045707915273133, "grad_norm": 2.0934863074120678, "learning_rate": 1.7878548479380607e-05, "loss": 1.0126, "step": 9480 }, { "epoch": 0.7046451133407655, "grad_norm": 1.8205721209773664, "learning_rate": 1.787805430672762e-05, "loss": 0.8505, "step": 9481 }, { "epoch": 0.7047194351542178, "grad_norm": 2.396802620009241, "learning_rate": 1.7877560083356133e-05, "loss": 1.1269, "step": 9482 }, { "epoch": 0.70479375696767, "grad_norm": 1.9467668101947093, "learning_rate": 1.787706580926932e-05, "loss": 0.9369, "step": 9483 }, { "epoch": 0.7048680787811222, "grad_norm": 2.052376703958006, "learning_rate": 1.787657148447037e-05, "loss": 0.9517, "step": 9484 }, { "epoch": 0.7049424005945745, "grad_norm": 1.7018892019655414, "learning_rate": 1.7876077108962454e-05, "loss": 0.8783, "step": 9485 }, { "epoch": 0.7050167224080267, "grad_norm": 2.1721444627093813, "learning_rate": 1.787558268274877e-05, "loss": 0.7943, "step": 9486 }, { "epoch": 0.7050910442214791, "grad_norm": 1.955321374578758, "learning_rate": 1.787508820583249e-05, "loss": 0.9965, "step": 9487 }, { "epoch": 0.7051653660349313, "grad_norm": 2.42252209555752, "learning_rate": 1.78745936782168e-05, "loss": 0.9729, "step": 9488 }, { "epoch": 0.7052396878483835, "grad_norm": 1.742919121401393, "learning_rate": 1.7874099099904883e-05, "loss": 0.7982, "step": 9489 }, { "epoch": 0.7053140096618358, "grad_norm": 1.8538458275177332, "learning_rate": 1.787360447089993e-05, "loss": 0.7804, "step": 9490 }, { "epoch": 0.705388331475288, "grad_norm": 2.350064582937277, "learning_rate": 1.7873109791205115e-05, "loss": 0.799, "step": 9491 }, { "epoch": 0.7054626532887402, "grad_norm": 1.9738793603681841, "learning_rate": 1.7872615060823627e-05, "loss": 0.9629, "step": 9492 }, { "epoch": 0.7055369751021925, "grad_norm": 2.3276653281198603, "learning_rate": 1.7872120279758654e-05, "loss": 1.0528, "step": 9493 }, { "epoch": 0.7056112969156447, "grad_norm": 1.8321074801676709, "learning_rate": 1.787162544801338e-05, "loss": 0.8707, "step": 9494 }, { "epoch": 0.705685618729097, "grad_norm": 2.696098876386033, "learning_rate": 1.7871130565590988e-05, "loss": 0.871, "step": 9495 }, { "epoch": 0.7057599405425492, "grad_norm": 1.8410353220870352, "learning_rate": 1.7870635632494667e-05, "loss": 0.8515, "step": 9496 }, { "epoch": 0.7058342623560014, "grad_norm": 1.9966629905080615, "learning_rate": 1.7870140648727602e-05, "loss": 0.9594, "step": 9497 }, { "epoch": 0.7059085841694538, "grad_norm": 2.3146786063311935, "learning_rate": 1.786964561429298e-05, "loss": 0.884, "step": 9498 }, { "epoch": 0.705982905982906, "grad_norm": 2.1765005706085403, "learning_rate": 1.786915052919399e-05, "loss": 0.8776, "step": 9499 }, { "epoch": 0.7060572277963583, "grad_norm": 2.0435571814809323, "learning_rate": 1.7868655393433815e-05, "loss": 1.0406, "step": 9500 }, { "epoch": 0.7061315496098105, "grad_norm": 1.8004200334455323, "learning_rate": 1.7868160207015642e-05, "loss": 0.8598, "step": 9501 }, { "epoch": 0.7062058714232627, "grad_norm": 2.1599006884741843, "learning_rate": 1.7867664969942662e-05, "loss": 0.6997, "step": 9502 }, { "epoch": 0.706280193236715, "grad_norm": 1.9725612863929014, "learning_rate": 1.7867169682218065e-05, "loss": 0.8763, "step": 9503 }, { "epoch": 0.7063545150501672, "grad_norm": 2.44400121868858, "learning_rate": 1.786667434384504e-05, "loss": 1.0463, "step": 9504 }, { "epoch": 0.7064288368636195, "grad_norm": 1.864439220250415, "learning_rate": 1.7866178954826775e-05, "loss": 0.6389, "step": 9505 }, { "epoch": 0.7065031586770717, "grad_norm": 1.9092731011206299, "learning_rate": 1.7865683515166452e-05, "loss": 0.6938, "step": 9506 }, { "epoch": 0.7065774804905239, "grad_norm": 2.1286292685428294, "learning_rate": 1.786518802486727e-05, "loss": 0.713, "step": 9507 }, { "epoch": 0.7066518023039762, "grad_norm": 2.14748040956995, "learning_rate": 1.7864692483932417e-05, "loss": 0.849, "step": 9508 }, { "epoch": 0.7067261241174284, "grad_norm": 2.1693935818065926, "learning_rate": 1.786419689236508e-05, "loss": 0.8964, "step": 9509 }, { "epoch": 0.7068004459308808, "grad_norm": 2.8043775276770435, "learning_rate": 1.7863701250168453e-05, "loss": 1.0398, "step": 9510 }, { "epoch": 0.706874767744333, "grad_norm": 1.565885876345158, "learning_rate": 1.7863205557345723e-05, "loss": 0.7173, "step": 9511 }, { "epoch": 0.7069490895577852, "grad_norm": 2.0343813194787232, "learning_rate": 1.7862709813900087e-05, "loss": 1.0005, "step": 9512 }, { "epoch": 0.7070234113712375, "grad_norm": 1.9469836315724813, "learning_rate": 1.7862214019834728e-05, "loss": 0.8686, "step": 9513 }, { "epoch": 0.7070977331846897, "grad_norm": 1.6656488375446048, "learning_rate": 1.7861718175152845e-05, "loss": 0.8163, "step": 9514 }, { "epoch": 0.707172054998142, "grad_norm": 3.6056311859385697, "learning_rate": 1.786122227985763e-05, "loss": 0.6027, "step": 9515 }, { "epoch": 0.7072463768115942, "grad_norm": 1.341566237570028, "learning_rate": 1.7860726333952275e-05, "loss": 0.5718, "step": 9516 }, { "epoch": 0.7073206986250464, "grad_norm": 3.960572159233276, "learning_rate": 1.7860230337439967e-05, "loss": 0.8849, "step": 9517 }, { "epoch": 0.7073950204384987, "grad_norm": 3.4236389818874007, "learning_rate": 1.7859734290323908e-05, "loss": 0.829, "step": 9518 }, { "epoch": 0.7074693422519509, "grad_norm": 1.7604659827482734, "learning_rate": 1.7859238192607287e-05, "loss": 0.801, "step": 9519 }, { "epoch": 0.7075436640654031, "grad_norm": 6.824997093017912, "learning_rate": 1.7858742044293298e-05, "loss": 0.8546, "step": 9520 }, { "epoch": 0.7076179858788555, "grad_norm": 2.367017048287254, "learning_rate": 1.7858245845385134e-05, "loss": 0.7665, "step": 9521 }, { "epoch": 0.7076923076923077, "grad_norm": 1.8536707786389888, "learning_rate": 1.7857749595885992e-05, "loss": 1.0194, "step": 9522 }, { "epoch": 0.70776662950576, "grad_norm": 2.2550923475542626, "learning_rate": 1.7857253295799064e-05, "loss": 1.0456, "step": 9523 }, { "epoch": 0.7078409513192122, "grad_norm": 1.7701381332343202, "learning_rate": 1.7856756945127553e-05, "loss": 0.7395, "step": 9524 }, { "epoch": 0.7079152731326644, "grad_norm": 1.7187790746238512, "learning_rate": 1.7856260543874642e-05, "loss": 0.8854, "step": 9525 }, { "epoch": 0.7079895949461167, "grad_norm": 1.8055889388009556, "learning_rate": 1.7855764092043534e-05, "loss": 0.9025, "step": 9526 }, { "epoch": 0.7080639167595689, "grad_norm": 1.5421332587676775, "learning_rate": 1.7855267589637427e-05, "loss": 0.8302, "step": 9527 }, { "epoch": 0.7081382385730212, "grad_norm": 1.9296425860897422, "learning_rate": 1.7854771036659513e-05, "loss": 0.9989, "step": 9528 }, { "epoch": 0.7082125603864734, "grad_norm": 2.448291982073394, "learning_rate": 1.7854274433112993e-05, "loss": 1.115, "step": 9529 }, { "epoch": 0.7082868821999256, "grad_norm": 2.862271981378215, "learning_rate": 1.785377777900106e-05, "loss": 0.921, "step": 9530 }, { "epoch": 0.7083612040133779, "grad_norm": 2.1862910588350366, "learning_rate": 1.7853281074326913e-05, "loss": 1.0198, "step": 9531 }, { "epoch": 0.7084355258268302, "grad_norm": 3.442590609126464, "learning_rate": 1.785278431909375e-05, "loss": 0.6655, "step": 9532 }, { "epoch": 0.7085098476402825, "grad_norm": 2.2479872878390337, "learning_rate": 1.7852287513304768e-05, "loss": 1.0173, "step": 9533 }, { "epoch": 0.7085841694537347, "grad_norm": 1.87904643700555, "learning_rate": 1.785179065696317e-05, "loss": 0.9212, "step": 9534 }, { "epoch": 0.7086584912671869, "grad_norm": 1.852355478776036, "learning_rate": 1.7851293750072148e-05, "loss": 0.6656, "step": 9535 }, { "epoch": 0.7087328130806392, "grad_norm": 2.196584323468116, "learning_rate": 1.7850796792634903e-05, "loss": 0.9836, "step": 9536 }, { "epoch": 0.7088071348940914, "grad_norm": 1.7479639920339638, "learning_rate": 1.785029978465464e-05, "loss": 0.6515, "step": 9537 }, { "epoch": 0.7088814567075437, "grad_norm": 1.8048333975287831, "learning_rate": 1.784980272613455e-05, "loss": 0.8967, "step": 9538 }, { "epoch": 0.7089557785209959, "grad_norm": 1.5934400176502512, "learning_rate": 1.784930561707784e-05, "loss": 0.614, "step": 9539 }, { "epoch": 0.7090301003344481, "grad_norm": 2.2831679501517614, "learning_rate": 1.7848808457487705e-05, "loss": 1.1745, "step": 9540 }, { "epoch": 0.7091044221479004, "grad_norm": 2.0832043890604153, "learning_rate": 1.7848311247367353e-05, "loss": 0.7623, "step": 9541 }, { "epoch": 0.7091787439613526, "grad_norm": 1.9672169669082515, "learning_rate": 1.7847813986719974e-05, "loss": 0.8853, "step": 9542 }, { "epoch": 0.709253065774805, "grad_norm": 1.966795729085372, "learning_rate": 1.784731667554878e-05, "loss": 0.9868, "step": 9543 }, { "epoch": 0.7093273875882572, "grad_norm": 1.694424988832859, "learning_rate": 1.7846819313856968e-05, "loss": 0.689, "step": 9544 }, { "epoch": 0.7094017094017094, "grad_norm": 2.0461699576664647, "learning_rate": 1.784632190164774e-05, "loss": 0.9448, "step": 9545 }, { "epoch": 0.7094760312151617, "grad_norm": 2.125952089375701, "learning_rate": 1.78458244389243e-05, "loss": 0.9751, "step": 9546 }, { "epoch": 0.7095503530286139, "grad_norm": 1.580306784206942, "learning_rate": 1.784532692568985e-05, "loss": 0.9444, "step": 9547 }, { "epoch": 0.7096246748420661, "grad_norm": 1.8413276264704792, "learning_rate": 1.784482936194759e-05, "loss": 0.7181, "step": 9548 }, { "epoch": 0.7096989966555184, "grad_norm": 1.871233359667583, "learning_rate": 1.7844331747700725e-05, "loss": 0.9589, "step": 9549 }, { "epoch": 0.7097733184689706, "grad_norm": 1.8530948769006486, "learning_rate": 1.7843834082952464e-05, "loss": 0.8666, "step": 9550 }, { "epoch": 0.7098476402824229, "grad_norm": 2.1108236450606026, "learning_rate": 1.7843336367706002e-05, "loss": 0.948, "step": 9551 }, { "epoch": 0.7099219620958751, "grad_norm": 1.6961947801474704, "learning_rate": 1.7842838601964545e-05, "loss": 0.7882, "step": 9552 }, { "epoch": 0.7099962839093273, "grad_norm": 1.6166629175605949, "learning_rate": 1.7842340785731306e-05, "loss": 0.7795, "step": 9553 }, { "epoch": 0.7100706057227797, "grad_norm": 1.8084946601156606, "learning_rate": 1.7841842919009478e-05, "loss": 0.7964, "step": 9554 }, { "epoch": 0.7101449275362319, "grad_norm": 1.6546489940503524, "learning_rate": 1.7841345001802274e-05, "loss": 0.825, "step": 9555 }, { "epoch": 0.7102192493496842, "grad_norm": 1.966656595387063, "learning_rate": 1.7840847034112902e-05, "loss": 1.0231, "step": 9556 }, { "epoch": 0.7102935711631364, "grad_norm": 1.9319457677540925, "learning_rate": 1.784034901594456e-05, "loss": 0.8021, "step": 9557 }, { "epoch": 0.7103678929765886, "grad_norm": 2.419710303809073, "learning_rate": 1.7839850947300457e-05, "loss": 0.8122, "step": 9558 }, { "epoch": 0.7104422147900409, "grad_norm": 2.2766082977069204, "learning_rate": 1.7839352828183804e-05, "loss": 1.0111, "step": 9559 }, { "epoch": 0.7105165366034931, "grad_norm": 2.2448720446661006, "learning_rate": 1.78388546585978e-05, "loss": 1.0171, "step": 9560 }, { "epoch": 0.7105908584169454, "grad_norm": 1.7570716847284527, "learning_rate": 1.7838356438545662e-05, "loss": 0.6707, "step": 9561 }, { "epoch": 0.7106651802303976, "grad_norm": 1.7994030848307805, "learning_rate": 1.7837858168030587e-05, "loss": 1.0172, "step": 9562 }, { "epoch": 0.7107395020438498, "grad_norm": 2.081861576252539, "learning_rate": 1.7837359847055788e-05, "loss": 0.8995, "step": 9563 }, { "epoch": 0.7108138238573021, "grad_norm": 2.0006458173503296, "learning_rate": 1.7836861475624475e-05, "loss": 0.8568, "step": 9564 }, { "epoch": 0.7108881456707543, "grad_norm": 1.6084408393247256, "learning_rate": 1.7836363053739853e-05, "loss": 0.7091, "step": 9565 }, { "epoch": 0.7109624674842067, "grad_norm": 1.696652887185781, "learning_rate": 1.7835864581405134e-05, "loss": 0.6589, "step": 9566 }, { "epoch": 0.7110367892976589, "grad_norm": 1.7067884316746857, "learning_rate": 1.7835366058623522e-05, "loss": 0.8161, "step": 9567 }, { "epoch": 0.7111111111111111, "grad_norm": 2.2062780467154024, "learning_rate": 1.783486748539823e-05, "loss": 0.9197, "step": 9568 }, { "epoch": 0.7111854329245634, "grad_norm": 1.8115865230026056, "learning_rate": 1.7834368861732475e-05, "loss": 0.7935, "step": 9569 }, { "epoch": 0.7112597547380156, "grad_norm": 1.8235685680289704, "learning_rate": 1.7833870187629453e-05, "loss": 0.8954, "step": 9570 }, { "epoch": 0.7113340765514679, "grad_norm": 2.3510349806100352, "learning_rate": 1.7833371463092382e-05, "loss": 0.7273, "step": 9571 }, { "epoch": 0.7114083983649201, "grad_norm": 3.521507936353365, "learning_rate": 1.783287268812447e-05, "loss": 0.7415, "step": 9572 }, { "epoch": 0.7114827201783723, "grad_norm": 2.0170518394384995, "learning_rate": 1.7832373862728937e-05, "loss": 0.8392, "step": 9573 }, { "epoch": 0.7115570419918246, "grad_norm": 2.1650488808087793, "learning_rate": 1.783187498690898e-05, "loss": 0.8397, "step": 9574 }, { "epoch": 0.7116313638052768, "grad_norm": 1.801110264739639, "learning_rate": 1.783137606066782e-05, "loss": 0.917, "step": 9575 }, { "epoch": 0.711705685618729, "grad_norm": 1.430268908130927, "learning_rate": 1.783087708400867e-05, "loss": 0.7932, "step": 9576 }, { "epoch": 0.7117800074321814, "grad_norm": 1.6081729530559299, "learning_rate": 1.783037805693474e-05, "loss": 0.7696, "step": 9577 }, { "epoch": 0.7118543292456336, "grad_norm": 1.42900873279176, "learning_rate": 1.7829878979449242e-05, "loss": 0.7515, "step": 9578 }, { "epoch": 0.7119286510590859, "grad_norm": 2.036515839818769, "learning_rate": 1.7829379851555387e-05, "loss": 0.8226, "step": 9579 }, { "epoch": 0.7120029728725381, "grad_norm": 1.7779213017764126, "learning_rate": 1.7828880673256393e-05, "loss": 0.8431, "step": 9580 }, { "epoch": 0.7120772946859903, "grad_norm": 2.2765463232281036, "learning_rate": 1.7828381444555473e-05, "loss": 0.7836, "step": 9581 }, { "epoch": 0.7121516164994426, "grad_norm": 2.4759861472964566, "learning_rate": 1.7827882165455835e-05, "loss": 1.0588, "step": 9582 }, { "epoch": 0.7122259383128948, "grad_norm": 1.9043663512963793, "learning_rate": 1.7827382835960697e-05, "loss": 0.8399, "step": 9583 }, { "epoch": 0.7123002601263471, "grad_norm": 1.8762660550731578, "learning_rate": 1.782688345607328e-05, "loss": 0.6734, "step": 9584 }, { "epoch": 0.7123745819397993, "grad_norm": 1.5897042778693409, "learning_rate": 1.7826384025796787e-05, "loss": 0.7926, "step": 9585 }, { "epoch": 0.7124489037532515, "grad_norm": 1.9132399298028229, "learning_rate": 1.7825884545134444e-05, "loss": 0.9943, "step": 9586 }, { "epoch": 0.7125232255667038, "grad_norm": 2.1312795817124357, "learning_rate": 1.782538501408946e-05, "loss": 0.9244, "step": 9587 }, { "epoch": 0.7125975473801561, "grad_norm": 2.216771408320112, "learning_rate": 1.7824885432665055e-05, "loss": 0.8745, "step": 9588 }, { "epoch": 0.7126718691936084, "grad_norm": 2.4692005681650078, "learning_rate": 1.7824385800864445e-05, "loss": 1.0605, "step": 9589 }, { "epoch": 0.7127461910070606, "grad_norm": 2.2172574424976594, "learning_rate": 1.7823886118690846e-05, "loss": 0.9358, "step": 9590 }, { "epoch": 0.7128205128205128, "grad_norm": 1.6533528308653982, "learning_rate": 1.7823386386147468e-05, "loss": 0.8049, "step": 9591 }, { "epoch": 0.7128948346339651, "grad_norm": 2.1329492478259184, "learning_rate": 1.782288660323754e-05, "loss": 0.9006, "step": 9592 }, { "epoch": 0.7129691564474173, "grad_norm": 2.0007624823166177, "learning_rate": 1.782238676996427e-05, "loss": 0.9898, "step": 9593 }, { "epoch": 0.7130434782608696, "grad_norm": 2.096538602627744, "learning_rate": 1.7821886886330883e-05, "loss": 0.9052, "step": 9594 }, { "epoch": 0.7131178000743218, "grad_norm": 1.8557374332217438, "learning_rate": 1.7821386952340593e-05, "loss": 0.8361, "step": 9595 }, { "epoch": 0.713192121887774, "grad_norm": 1.8605274467775503, "learning_rate": 1.782088696799662e-05, "loss": 0.8006, "step": 9596 }, { "epoch": 0.7132664437012263, "grad_norm": 2.023433611353947, "learning_rate": 1.7820386933302184e-05, "loss": 0.8785, "step": 9597 }, { "epoch": 0.7133407655146785, "grad_norm": 1.7186593782630304, "learning_rate": 1.7819886848260498e-05, "loss": 0.7449, "step": 9598 }, { "epoch": 0.7134150873281309, "grad_norm": 1.5482790900699401, "learning_rate": 1.781938671287479e-05, "loss": 0.7955, "step": 9599 }, { "epoch": 0.7134894091415831, "grad_norm": 1.9760880407375556, "learning_rate": 1.7818886527148276e-05, "loss": 1.0178, "step": 9600 }, { "epoch": 0.7135637309550353, "grad_norm": 1.9435976574494251, "learning_rate": 1.7818386291084174e-05, "loss": 0.7038, "step": 9601 }, { "epoch": 0.7136380527684876, "grad_norm": 1.673527910015164, "learning_rate": 1.7817886004685708e-05, "loss": 0.8244, "step": 9602 }, { "epoch": 0.7137123745819398, "grad_norm": 1.5645515693556231, "learning_rate": 1.78173856679561e-05, "loss": 0.76, "step": 9603 }, { "epoch": 0.713786696395392, "grad_norm": 2.1497121231677636, "learning_rate": 1.7816885280898563e-05, "loss": 0.9881, "step": 9604 }, { "epoch": 0.7138610182088443, "grad_norm": 1.782552448910012, "learning_rate": 1.7816384843516328e-05, "loss": 0.9305, "step": 9605 }, { "epoch": 0.7139353400222965, "grad_norm": 1.7124228928033405, "learning_rate": 1.781588435581261e-05, "loss": 0.7326, "step": 9606 }, { "epoch": 0.7140096618357488, "grad_norm": 1.7954430907691747, "learning_rate": 1.7815383817790638e-05, "loss": 0.7809, "step": 9607 }, { "epoch": 0.714083983649201, "grad_norm": 1.69556372477261, "learning_rate": 1.781488322945363e-05, "loss": 0.634, "step": 9608 }, { "epoch": 0.7141583054626532, "grad_norm": 1.8774325891331056, "learning_rate": 1.7814382590804807e-05, "loss": 1.0542, "step": 9609 }, { "epoch": 0.7142326272761056, "grad_norm": 1.8018529987263194, "learning_rate": 1.7813881901847397e-05, "loss": 0.8745, "step": 9610 }, { "epoch": 0.7143069490895578, "grad_norm": 1.7765786173981017, "learning_rate": 1.781338116258462e-05, "loss": 0.9459, "step": 9611 }, { "epoch": 0.7143812709030101, "grad_norm": 1.9703851641026544, "learning_rate": 1.78128803730197e-05, "loss": 0.8295, "step": 9612 }, { "epoch": 0.7144555927164623, "grad_norm": 2.080314204813444, "learning_rate": 1.781237953315586e-05, "loss": 0.8511, "step": 9613 }, { "epoch": 0.7145299145299145, "grad_norm": 1.889936899873309, "learning_rate": 1.7811878642996325e-05, "loss": 0.8639, "step": 9614 }, { "epoch": 0.7146042363433668, "grad_norm": 2.1729845009563715, "learning_rate": 1.7811377702544326e-05, "loss": 1.0104, "step": 9615 }, { "epoch": 0.714678558156819, "grad_norm": 2.0883195118155893, "learning_rate": 1.7810876711803077e-05, "loss": 0.9245, "step": 9616 }, { "epoch": 0.7147528799702713, "grad_norm": 2.201935172081006, "learning_rate": 1.7810375670775813e-05, "loss": 0.9311, "step": 9617 }, { "epoch": 0.7148272017837235, "grad_norm": 1.5301653423600579, "learning_rate": 1.7809874579465752e-05, "loss": 0.7979, "step": 9618 }, { "epoch": 0.7149015235971757, "grad_norm": 2.3255811847777705, "learning_rate": 1.7809373437876127e-05, "loss": 0.7308, "step": 9619 }, { "epoch": 0.714975845410628, "grad_norm": 1.9724548979497523, "learning_rate": 1.780887224601016e-05, "loss": 0.8556, "step": 9620 }, { "epoch": 0.7150501672240802, "grad_norm": 1.8223770336599785, "learning_rate": 1.7808371003871077e-05, "loss": 0.8467, "step": 9621 }, { "epoch": 0.7151244890375326, "grad_norm": 1.8067129117643834, "learning_rate": 1.780786971146211e-05, "loss": 0.7075, "step": 9622 }, { "epoch": 0.7151988108509848, "grad_norm": 1.8988868647150587, "learning_rate": 1.780736836878648e-05, "loss": 0.8385, "step": 9623 }, { "epoch": 0.715273132664437, "grad_norm": 1.8208608833164892, "learning_rate": 1.7806866975847418e-05, "loss": 0.9134, "step": 9624 }, { "epoch": 0.7153474544778893, "grad_norm": 1.8135852787749582, "learning_rate": 1.7806365532648155e-05, "loss": 1.0098, "step": 9625 }, { "epoch": 0.7154217762913415, "grad_norm": 2.1442703541303905, "learning_rate": 1.7805864039191913e-05, "loss": 1.0573, "step": 9626 }, { "epoch": 0.7154960981047938, "grad_norm": 2.0813078631218365, "learning_rate": 1.780536249548192e-05, "loss": 0.8402, "step": 9627 }, { "epoch": 0.715570419918246, "grad_norm": 1.5855384261439995, "learning_rate": 1.780486090152141e-05, "loss": 0.6177, "step": 9628 }, { "epoch": 0.7156447417316982, "grad_norm": 1.6690620309388902, "learning_rate": 1.7804359257313613e-05, "loss": 0.8369, "step": 9629 }, { "epoch": 0.7157190635451505, "grad_norm": 1.7892933024773943, "learning_rate": 1.780385756286176e-05, "loss": 0.7759, "step": 9630 }, { "epoch": 0.7157933853586027, "grad_norm": 2.70546325874116, "learning_rate": 1.780335581816907e-05, "loss": 0.9346, "step": 9631 }, { "epoch": 0.715867707172055, "grad_norm": 1.8190776168249465, "learning_rate": 1.7802854023238782e-05, "loss": 0.9294, "step": 9632 }, { "epoch": 0.7159420289855073, "grad_norm": 2.0210717864129757, "learning_rate": 1.7802352178074125e-05, "loss": 0.9028, "step": 9633 }, { "epoch": 0.7160163507989595, "grad_norm": 1.8978600686639304, "learning_rate": 1.7801850282678326e-05, "loss": 0.9347, "step": 9634 }, { "epoch": 0.7160906726124118, "grad_norm": 2.6684147264019202, "learning_rate": 1.7801348337054623e-05, "loss": 0.9736, "step": 9635 }, { "epoch": 0.716164994425864, "grad_norm": 2.006220915819779, "learning_rate": 1.7800846341206247e-05, "loss": 0.8606, "step": 9636 }, { "epoch": 0.7162393162393162, "grad_norm": 2.3703641035473777, "learning_rate": 1.7800344295136423e-05, "loss": 0.8845, "step": 9637 }, { "epoch": 0.7163136380527685, "grad_norm": 1.9843495203593609, "learning_rate": 1.7799842198848388e-05, "loss": 0.8584, "step": 9638 }, { "epoch": 0.7163879598662207, "grad_norm": 1.7672888901719315, "learning_rate": 1.7799340052345373e-05, "loss": 0.7652, "step": 9639 }, { "epoch": 0.716462281679673, "grad_norm": 2.0139138516781245, "learning_rate": 1.7798837855630614e-05, "loss": 0.9834, "step": 9640 }, { "epoch": 0.7165366034931252, "grad_norm": 2.148646959763047, "learning_rate": 1.779833560870734e-05, "loss": 0.934, "step": 9641 }, { "epoch": 0.7166109253065774, "grad_norm": 1.7498753090211174, "learning_rate": 1.7797833311578784e-05, "loss": 0.8593, "step": 9642 }, { "epoch": 0.7166852471200297, "grad_norm": 2.326004050239029, "learning_rate": 1.7797330964248184e-05, "loss": 0.7737, "step": 9643 }, { "epoch": 0.716759568933482, "grad_norm": 1.7430415095371603, "learning_rate": 1.7796828566718773e-05, "loss": 0.8475, "step": 9644 }, { "epoch": 0.7168338907469343, "grad_norm": 1.8750311262356678, "learning_rate": 1.779632611899378e-05, "loss": 0.9257, "step": 9645 }, { "epoch": 0.7169082125603865, "grad_norm": 1.957899212790721, "learning_rate": 1.779582362107645e-05, "loss": 0.7216, "step": 9646 }, { "epoch": 0.7169825343738387, "grad_norm": 2.212215577877041, "learning_rate": 1.7795321072970004e-05, "loss": 1.0306, "step": 9647 }, { "epoch": 0.717056856187291, "grad_norm": 1.9521894270430198, "learning_rate": 1.779481847467769e-05, "loss": 0.7649, "step": 9648 }, { "epoch": 0.7171311780007432, "grad_norm": 1.8343327855422145, "learning_rate": 1.779431582620274e-05, "loss": 0.8989, "step": 9649 }, { "epoch": 0.7172054998141955, "grad_norm": 2.3713829944126394, "learning_rate": 1.779381312754839e-05, "loss": 0.9514, "step": 9650 }, { "epoch": 0.7172798216276477, "grad_norm": 2.32013467703155, "learning_rate": 1.7793310378717872e-05, "loss": 1.0203, "step": 9651 }, { "epoch": 0.7173541434410999, "grad_norm": 2.0619508626340353, "learning_rate": 1.779280757971443e-05, "loss": 0.9889, "step": 9652 }, { "epoch": 0.7174284652545522, "grad_norm": 5.458939282332853, "learning_rate": 1.7792304730541295e-05, "loss": 0.7844, "step": 9653 }, { "epoch": 0.7175027870680044, "grad_norm": 2.0910635768816217, "learning_rate": 1.7791801831201706e-05, "loss": 0.9468, "step": 9654 }, { "epoch": 0.7175771088814568, "grad_norm": 2.173492365629178, "learning_rate": 1.77912988816989e-05, "loss": 0.7273, "step": 9655 }, { "epoch": 0.717651430694909, "grad_norm": 2.0269759570665062, "learning_rate": 1.779079588203612e-05, "loss": 0.6698, "step": 9656 }, { "epoch": 0.7177257525083612, "grad_norm": 2.0865596654200007, "learning_rate": 1.7790292832216596e-05, "loss": 0.746, "step": 9657 }, { "epoch": 0.7178000743218135, "grad_norm": 2.7262055357629693, "learning_rate": 1.778978973224357e-05, "loss": 0.8947, "step": 9658 }, { "epoch": 0.7178743961352657, "grad_norm": 1.7451036060470382, "learning_rate": 1.7789286582120287e-05, "loss": 0.8715, "step": 9659 }, { "epoch": 0.717948717948718, "grad_norm": 1.7362216367563832, "learning_rate": 1.7788783381849983e-05, "loss": 0.7445, "step": 9660 }, { "epoch": 0.7180230397621702, "grad_norm": 2.140805196267809, "learning_rate": 1.778828013143589e-05, "loss": 1.0782, "step": 9661 }, { "epoch": 0.7180973615756224, "grad_norm": 2.039907717646674, "learning_rate": 1.7787776830881252e-05, "loss": 0.8316, "step": 9662 }, { "epoch": 0.7181716833890747, "grad_norm": 1.5718692841748294, "learning_rate": 1.7787273480189315e-05, "loss": 0.8163, "step": 9663 }, { "epoch": 0.7182460052025269, "grad_norm": 3.354515448212897, "learning_rate": 1.7786770079363316e-05, "loss": 0.8004, "step": 9664 }, { "epoch": 0.7183203270159791, "grad_norm": 1.5957777076423447, "learning_rate": 1.7786266628406493e-05, "loss": 0.7699, "step": 9665 }, { "epoch": 0.7183946488294315, "grad_norm": 1.7619266611919526, "learning_rate": 1.778576312732209e-05, "loss": 0.7472, "step": 9666 }, { "epoch": 0.7184689706428837, "grad_norm": 1.9767467377744747, "learning_rate": 1.778525957611335e-05, "loss": 0.7709, "step": 9667 }, { "epoch": 0.718543292456336, "grad_norm": 8.226770255161345, "learning_rate": 1.7784755974783513e-05, "loss": 1.0114, "step": 9668 }, { "epoch": 0.7186176142697882, "grad_norm": 1.8975938357662723, "learning_rate": 1.7784252323335814e-05, "loss": 0.7965, "step": 9669 }, { "epoch": 0.7186919360832404, "grad_norm": 1.6038156661390364, "learning_rate": 1.778374862177351e-05, "loss": 0.7803, "step": 9670 }, { "epoch": 0.7187662578966927, "grad_norm": 1.669730663154134, "learning_rate": 1.7783244870099833e-05, "loss": 0.8312, "step": 9671 }, { "epoch": 0.7188405797101449, "grad_norm": 1.6278408464852496, "learning_rate": 1.778274106831803e-05, "loss": 0.6616, "step": 9672 }, { "epoch": 0.7189149015235972, "grad_norm": 2.433251420667071, "learning_rate": 1.7782237216431343e-05, "loss": 1.0236, "step": 9673 }, { "epoch": 0.7189892233370494, "grad_norm": 1.8712657214389732, "learning_rate": 1.7781733314443018e-05, "loss": 0.9244, "step": 9674 }, { "epoch": 0.7190635451505016, "grad_norm": 2.0355942046516398, "learning_rate": 1.7781229362356298e-05, "loss": 0.7529, "step": 9675 }, { "epoch": 0.7191378669639539, "grad_norm": 1.7140161528058036, "learning_rate": 1.7780725360174424e-05, "loss": 1.0507, "step": 9676 }, { "epoch": 0.7192121887774061, "grad_norm": 1.893679105102181, "learning_rate": 1.7780221307900648e-05, "loss": 0.6577, "step": 9677 }, { "epoch": 0.7192865105908585, "grad_norm": 1.7754689907495333, "learning_rate": 1.7779717205538208e-05, "loss": 0.9138, "step": 9678 }, { "epoch": 0.7193608324043107, "grad_norm": 1.6816793627227262, "learning_rate": 1.7779213053090354e-05, "loss": 0.896, "step": 9679 }, { "epoch": 0.7194351542177629, "grad_norm": 2.1587527684313566, "learning_rate": 1.777870885056033e-05, "loss": 0.9482, "step": 9680 }, { "epoch": 0.7195094760312152, "grad_norm": 1.7563770623943376, "learning_rate": 1.7778204597951382e-05, "loss": 0.8597, "step": 9681 }, { "epoch": 0.7195837978446674, "grad_norm": 1.7485336651675647, "learning_rate": 1.7777700295266752e-05, "loss": 0.7554, "step": 9682 }, { "epoch": 0.7196581196581197, "grad_norm": 3.9875263119513042, "learning_rate": 1.7777195942509698e-05, "loss": 1.0181, "step": 9683 }, { "epoch": 0.7197324414715719, "grad_norm": 2.1130415346095006, "learning_rate": 1.777669153968346e-05, "loss": 0.8903, "step": 9684 }, { "epoch": 0.7198067632850241, "grad_norm": 2.104846812303607, "learning_rate": 1.777618708679128e-05, "loss": 0.9392, "step": 9685 }, { "epoch": 0.7198810850984764, "grad_norm": 2.07379780147626, "learning_rate": 1.7775682583836413e-05, "loss": 0.6345, "step": 9686 }, { "epoch": 0.7199554069119286, "grad_norm": 1.7140193980995613, "learning_rate": 1.77751780308221e-05, "loss": 0.9422, "step": 9687 }, { "epoch": 0.7200297287253808, "grad_norm": 2.1082812129576873, "learning_rate": 1.77746734277516e-05, "loss": 0.8262, "step": 9688 }, { "epoch": 0.7201040505388332, "grad_norm": 1.9598884786710085, "learning_rate": 1.7774168774628157e-05, "loss": 0.8882, "step": 9689 }, { "epoch": 0.7201783723522854, "grad_norm": 2.228660003731349, "learning_rate": 1.7773664071455015e-05, "loss": 0.9363, "step": 9690 }, { "epoch": 0.7202526941657377, "grad_norm": 2.0651719849171353, "learning_rate": 1.777315931823543e-05, "loss": 0.9507, "step": 9691 }, { "epoch": 0.7203270159791899, "grad_norm": 2.2682771916018787, "learning_rate": 1.7772654514972648e-05, "loss": 1.0502, "step": 9692 }, { "epoch": 0.7204013377926421, "grad_norm": 1.692942809975603, "learning_rate": 1.777214966166992e-05, "loss": 0.6657, "step": 9693 }, { "epoch": 0.7204756596060944, "grad_norm": 4.234045925601224, "learning_rate": 1.7771644758330496e-05, "loss": 1.0157, "step": 9694 }, { "epoch": 0.7205499814195466, "grad_norm": 2.0625179696964695, "learning_rate": 1.7771139804957626e-05, "loss": 0.8923, "step": 9695 }, { "epoch": 0.7206243032329989, "grad_norm": 2.028156233651799, "learning_rate": 1.777063480155456e-05, "loss": 0.7038, "step": 9696 }, { "epoch": 0.7206986250464511, "grad_norm": 1.9785030703022954, "learning_rate": 1.7770129748124548e-05, "loss": 0.9691, "step": 9697 }, { "epoch": 0.7207729468599033, "grad_norm": 1.9631320278701054, "learning_rate": 1.776962464467085e-05, "loss": 0.9049, "step": 9698 }, { "epoch": 0.7208472686733556, "grad_norm": 2.080346095173788, "learning_rate": 1.7769119491196708e-05, "loss": 1.0431, "step": 9699 }, { "epoch": 0.7209215904868079, "grad_norm": 2.7510675924606653, "learning_rate": 1.7768614287705375e-05, "loss": 0.7913, "step": 9700 }, { "epoch": 0.7209959123002602, "grad_norm": 2.5442140470990515, "learning_rate": 1.7768109034200112e-05, "loss": 0.8096, "step": 9701 }, { "epoch": 0.7210702341137124, "grad_norm": 1.8625271446866785, "learning_rate": 1.7767603730684163e-05, "loss": 0.839, "step": 9702 }, { "epoch": 0.7211445559271646, "grad_norm": 1.5639261107746116, "learning_rate": 1.776709837716079e-05, "loss": 0.8765, "step": 9703 }, { "epoch": 0.7212188777406169, "grad_norm": 2.050150972879982, "learning_rate": 1.776659297363323e-05, "loss": 0.7835, "step": 9704 }, { "epoch": 0.7212931995540691, "grad_norm": 1.628749723760094, "learning_rate": 1.7766087520104752e-05, "loss": 0.7297, "step": 9705 }, { "epoch": 0.7213675213675214, "grad_norm": 1.9518566731874891, "learning_rate": 1.776558201657861e-05, "loss": 0.8637, "step": 9706 }, { "epoch": 0.7214418431809736, "grad_norm": 2.2312405263972934, "learning_rate": 1.776507646305805e-05, "loss": 0.8118, "step": 9707 }, { "epoch": 0.7215161649944258, "grad_norm": 1.7987790926509895, "learning_rate": 1.776457085954633e-05, "loss": 0.754, "step": 9708 }, { "epoch": 0.7215904868078781, "grad_norm": 1.9047449987162393, "learning_rate": 1.7764065206046706e-05, "loss": 0.9307, "step": 9709 }, { "epoch": 0.7216648086213303, "grad_norm": 1.9601983556350047, "learning_rate": 1.7763559502562434e-05, "loss": 0.9613, "step": 9710 }, { "epoch": 0.7217391304347827, "grad_norm": 2.0255676315605085, "learning_rate": 1.776305374909677e-05, "loss": 0.9247, "step": 9711 }, { "epoch": 0.7218134522482349, "grad_norm": 1.8481290880255687, "learning_rate": 1.7762547945652966e-05, "loss": 0.6051, "step": 9712 }, { "epoch": 0.7218877740616871, "grad_norm": 1.9123840244105288, "learning_rate": 1.776204209223428e-05, "loss": 0.8497, "step": 9713 }, { "epoch": 0.7219620958751394, "grad_norm": 1.8034321363259174, "learning_rate": 1.7761536188843974e-05, "loss": 0.9292, "step": 9714 }, { "epoch": 0.7220364176885916, "grad_norm": 2.0917528832239674, "learning_rate": 1.77610302354853e-05, "loss": 0.8611, "step": 9715 }, { "epoch": 0.7221107395020439, "grad_norm": 5.9336613295488085, "learning_rate": 1.7760524232161515e-05, "loss": 1.0443, "step": 9716 }, { "epoch": 0.7221850613154961, "grad_norm": 2.133033790220356, "learning_rate": 1.7760018178875874e-05, "loss": 1.028, "step": 9717 }, { "epoch": 0.7222593831289483, "grad_norm": 2.057447284331366, "learning_rate": 1.775951207563164e-05, "loss": 0.879, "step": 9718 }, { "epoch": 0.7223337049424006, "grad_norm": 2.311661030289971, "learning_rate": 1.7759005922432074e-05, "loss": 0.8123, "step": 9719 }, { "epoch": 0.7224080267558528, "grad_norm": 2.2082348147123163, "learning_rate": 1.775849971928043e-05, "loss": 0.6976, "step": 9720 }, { "epoch": 0.722482348569305, "grad_norm": 1.9645401003045002, "learning_rate": 1.7757993466179964e-05, "loss": 0.75, "step": 9721 }, { "epoch": 0.7225566703827574, "grad_norm": 2.5501603889001583, "learning_rate": 1.7757487163133943e-05, "loss": 0.8535, "step": 9722 }, { "epoch": 0.7226309921962096, "grad_norm": 2.4304670922102254, "learning_rate": 1.775698081014562e-05, "loss": 0.7994, "step": 9723 }, { "epoch": 0.7227053140096619, "grad_norm": 1.3222456888277747, "learning_rate": 1.7756474407218256e-05, "loss": 0.593, "step": 9724 }, { "epoch": 0.7227796358231141, "grad_norm": 2.186420732168468, "learning_rate": 1.7755967954355112e-05, "loss": 1.0026, "step": 9725 }, { "epoch": 0.7228539576365663, "grad_norm": 1.9659428172669273, "learning_rate": 1.775546145155945e-05, "loss": 0.8784, "step": 9726 }, { "epoch": 0.7229282794500186, "grad_norm": 2.4064394345261517, "learning_rate": 1.7754954898834527e-05, "loss": 1.0377, "step": 9727 }, { "epoch": 0.7230026012634708, "grad_norm": 2.5095083432947707, "learning_rate": 1.7754448296183612e-05, "loss": 0.5424, "step": 9728 }, { "epoch": 0.7230769230769231, "grad_norm": 2.190468669920164, "learning_rate": 1.7753941643609957e-05, "loss": 0.7123, "step": 9729 }, { "epoch": 0.7231512448903753, "grad_norm": 1.837268384450693, "learning_rate": 1.775343494111683e-05, "loss": 0.8903, "step": 9730 }, { "epoch": 0.7232255667038275, "grad_norm": 1.4918363815130944, "learning_rate": 1.775292818870749e-05, "loss": 0.7429, "step": 9731 }, { "epoch": 0.7232998885172798, "grad_norm": 1.7514000262774905, "learning_rate": 1.77524213863852e-05, "loss": 0.8956, "step": 9732 }, { "epoch": 0.723374210330732, "grad_norm": 1.861361116544114, "learning_rate": 1.7751914534153224e-05, "loss": 0.7731, "step": 9733 }, { "epoch": 0.7234485321441844, "grad_norm": 1.9913457863121877, "learning_rate": 1.7751407632014823e-05, "loss": 1.0305, "step": 9734 }, { "epoch": 0.7235228539576366, "grad_norm": 2.000011809803735, "learning_rate": 1.7750900679973262e-05, "loss": 0.8227, "step": 9735 }, { "epoch": 0.7235971757710888, "grad_norm": 2.0871793083943246, "learning_rate": 1.7750393678031808e-05, "loss": 0.9362, "step": 9736 }, { "epoch": 0.7236714975845411, "grad_norm": 1.8290307684982754, "learning_rate": 1.774988662619372e-05, "loss": 0.5617, "step": 9737 }, { "epoch": 0.7237458193979933, "grad_norm": 1.6711088283946753, "learning_rate": 1.7749379524462265e-05, "loss": 0.7939, "step": 9738 }, { "epoch": 0.7238201412114456, "grad_norm": 1.7127034173386912, "learning_rate": 1.7748872372840704e-05, "loss": 0.7699, "step": 9739 }, { "epoch": 0.7238944630248978, "grad_norm": 1.9581443107762455, "learning_rate": 1.7748365171332307e-05, "loss": 0.9758, "step": 9740 }, { "epoch": 0.72396878483835, "grad_norm": 1.8427848662358688, "learning_rate": 1.7747857919940338e-05, "loss": 0.9732, "step": 9741 }, { "epoch": 0.7240431066518023, "grad_norm": 1.8191563723670945, "learning_rate": 1.774735061866806e-05, "loss": 0.9619, "step": 9742 }, { "epoch": 0.7241174284652545, "grad_norm": 2.2303451688136877, "learning_rate": 1.774684326751874e-05, "loss": 0.8041, "step": 9743 }, { "epoch": 0.7241917502787067, "grad_norm": 1.9778995532150907, "learning_rate": 1.7746335866495642e-05, "loss": 0.637, "step": 9744 }, { "epoch": 0.7242660720921591, "grad_norm": 2.4808338196562563, "learning_rate": 1.774582841560204e-05, "loss": 0.8367, "step": 9745 }, { "epoch": 0.7243403939056113, "grad_norm": 2.4019952771045947, "learning_rate": 1.7745320914841196e-05, "loss": 0.861, "step": 9746 }, { "epoch": 0.7244147157190636, "grad_norm": 1.9115567727500324, "learning_rate": 1.774481336421638e-05, "loss": 0.8956, "step": 9747 }, { "epoch": 0.7244890375325158, "grad_norm": 1.812752293213258, "learning_rate": 1.774430576373085e-05, "loss": 0.9009, "step": 9748 }, { "epoch": 0.724563359345968, "grad_norm": 1.9627188474882253, "learning_rate": 1.7743798113387888e-05, "loss": 0.9865, "step": 9749 }, { "epoch": 0.7246376811594203, "grad_norm": 2.172061142537938, "learning_rate": 1.774329041319076e-05, "loss": 0.7436, "step": 9750 }, { "epoch": 0.7247120029728725, "grad_norm": 2.5235973842545367, "learning_rate": 1.7742782663142718e-05, "loss": 0.8791, "step": 9751 }, { "epoch": 0.7247863247863248, "grad_norm": 2.608456465712931, "learning_rate": 1.774227486324705e-05, "loss": 0.955, "step": 9752 }, { "epoch": 0.724860646599777, "grad_norm": 2.16950750445338, "learning_rate": 1.7741767013507018e-05, "loss": 0.8781, "step": 9753 }, { "epoch": 0.7249349684132292, "grad_norm": 2.062069912206034, "learning_rate": 1.7741259113925888e-05, "loss": 0.8947, "step": 9754 }, { "epoch": 0.7250092902266815, "grad_norm": 1.878852631074694, "learning_rate": 1.7740751164506938e-05, "loss": 1.0093, "step": 9755 }, { "epoch": 0.7250836120401338, "grad_norm": 2.0094485033388114, "learning_rate": 1.7740243165253432e-05, "loss": 0.9551, "step": 9756 }, { "epoch": 0.7251579338535861, "grad_norm": 2.80364424461266, "learning_rate": 1.7739735116168642e-05, "loss": 0.9856, "step": 9757 }, { "epoch": 0.7252322556670383, "grad_norm": 1.8953716633163566, "learning_rate": 1.7739227017255838e-05, "loss": 0.7798, "step": 9758 }, { "epoch": 0.7253065774804905, "grad_norm": 1.607873212546243, "learning_rate": 1.7738718868518293e-05, "loss": 0.8776, "step": 9759 }, { "epoch": 0.7253808992939428, "grad_norm": 1.9607274211804708, "learning_rate": 1.773821066995928e-05, "loss": 0.9582, "step": 9760 }, { "epoch": 0.725455221107395, "grad_norm": 1.7715435525419212, "learning_rate": 1.7737702421582064e-05, "loss": 0.5084, "step": 9761 }, { "epoch": 0.7255295429208473, "grad_norm": 1.6505388609436256, "learning_rate": 1.7737194123389922e-05, "loss": 0.6584, "step": 9762 }, { "epoch": 0.7256038647342995, "grad_norm": 1.9151074479745998, "learning_rate": 1.7736685775386128e-05, "loss": 0.6467, "step": 9763 }, { "epoch": 0.7256781865477517, "grad_norm": 1.7600625364412685, "learning_rate": 1.773617737757395e-05, "loss": 0.7644, "step": 9764 }, { "epoch": 0.725752508361204, "grad_norm": 2.7603950293060477, "learning_rate": 1.7735668929956666e-05, "loss": 0.7241, "step": 9765 }, { "epoch": 0.7258268301746562, "grad_norm": 5.694429155588093, "learning_rate": 1.7735160432537544e-05, "loss": 0.8831, "step": 9766 }, { "epoch": 0.7259011519881086, "grad_norm": 2.0591558402643715, "learning_rate": 1.7734651885319864e-05, "loss": 0.9428, "step": 9767 }, { "epoch": 0.7259754738015608, "grad_norm": 1.9324257733868961, "learning_rate": 1.7734143288306892e-05, "loss": 0.8293, "step": 9768 }, { "epoch": 0.726049795615013, "grad_norm": 1.612500293866963, "learning_rate": 1.773363464150191e-05, "loss": 0.9534, "step": 9769 }, { "epoch": 0.7261241174284653, "grad_norm": 1.9155766282590325, "learning_rate": 1.7733125944908187e-05, "loss": 0.9813, "step": 9770 }, { "epoch": 0.7261984392419175, "grad_norm": 2.0144110685315817, "learning_rate": 1.7732617198529002e-05, "loss": 0.7934, "step": 9771 }, { "epoch": 0.7262727610553698, "grad_norm": 6.778783826954602, "learning_rate": 1.773210840236763e-05, "loss": 1.0318, "step": 9772 }, { "epoch": 0.726347082868822, "grad_norm": 2.4655097828456394, "learning_rate": 1.7731599556427343e-05, "loss": 0.8391, "step": 9773 }, { "epoch": 0.7264214046822742, "grad_norm": 2.110992983349509, "learning_rate": 1.7731090660711418e-05, "loss": 0.8672, "step": 9774 }, { "epoch": 0.7264957264957265, "grad_norm": 1.9551371506586213, "learning_rate": 1.7730581715223136e-05, "loss": 0.9563, "step": 9775 }, { "epoch": 0.7265700483091787, "grad_norm": 2.020305150761062, "learning_rate": 1.773007271996577e-05, "loss": 0.926, "step": 9776 }, { "epoch": 0.726644370122631, "grad_norm": 1.8276052558202627, "learning_rate": 1.7729563674942594e-05, "loss": 0.7987, "step": 9777 }, { "epoch": 0.7267186919360833, "grad_norm": 1.7566733746173004, "learning_rate": 1.772905458015689e-05, "loss": 0.7437, "step": 9778 }, { "epoch": 0.7267930137495355, "grad_norm": 2.3059824522860515, "learning_rate": 1.7728545435611933e-05, "loss": 0.665, "step": 9779 }, { "epoch": 0.7268673355629878, "grad_norm": 2.407086458505443, "learning_rate": 1.7728036241311e-05, "loss": 0.9721, "step": 9780 }, { "epoch": 0.72694165737644, "grad_norm": 1.7177695792002718, "learning_rate": 1.7727526997257375e-05, "loss": 0.8856, "step": 9781 }, { "epoch": 0.7270159791898922, "grad_norm": 2.3819184733930343, "learning_rate": 1.772701770345433e-05, "loss": 0.7197, "step": 9782 }, { "epoch": 0.7270903010033445, "grad_norm": 1.7960877663104047, "learning_rate": 1.7726508359905142e-05, "loss": 0.9056, "step": 9783 }, { "epoch": 0.7271646228167967, "grad_norm": 1.770359919360106, "learning_rate": 1.7725998966613102e-05, "loss": 0.7521, "step": 9784 }, { "epoch": 0.727238944630249, "grad_norm": 1.5226598731789136, "learning_rate": 1.7725489523581475e-05, "loss": 0.8928, "step": 9785 }, { "epoch": 0.7273132664437012, "grad_norm": 1.8834949573018696, "learning_rate": 1.772498003081355e-05, "loss": 0.9438, "step": 9786 }, { "epoch": 0.7273875882571534, "grad_norm": 2.252699987957377, "learning_rate": 1.7724470488312604e-05, "loss": 0.9184, "step": 9787 }, { "epoch": 0.7274619100706057, "grad_norm": 1.7871079753830887, "learning_rate": 1.7723960896081917e-05, "loss": 0.8983, "step": 9788 }, { "epoch": 0.7275362318840579, "grad_norm": 1.6490427331026614, "learning_rate": 1.772345125412477e-05, "loss": 0.8988, "step": 9789 }, { "epoch": 0.7276105536975103, "grad_norm": 2.7404076981247463, "learning_rate": 1.7722941562444447e-05, "loss": 1.078, "step": 9790 }, { "epoch": 0.7276848755109625, "grad_norm": 2.1881793839008044, "learning_rate": 1.7722431821044224e-05, "loss": 0.9352, "step": 9791 }, { "epoch": 0.7277591973244147, "grad_norm": 1.9967309324653169, "learning_rate": 1.772192202992739e-05, "loss": 0.9284, "step": 9792 }, { "epoch": 0.727833519137867, "grad_norm": 1.8607849366782656, "learning_rate": 1.772141218909722e-05, "loss": 0.8469, "step": 9793 }, { "epoch": 0.7279078409513192, "grad_norm": 1.667808214710308, "learning_rate": 1.7720902298557e-05, "loss": 0.849, "step": 9794 }, { "epoch": 0.7279821627647715, "grad_norm": 1.8738471849526497, "learning_rate": 1.772039235831001e-05, "loss": 0.7995, "step": 9795 }, { "epoch": 0.7280564845782237, "grad_norm": 2.3470699324773663, "learning_rate": 1.7719882368359535e-05, "loss": 0.9547, "step": 9796 }, { "epoch": 0.7281308063916759, "grad_norm": 1.7441938842103564, "learning_rate": 1.7719372328708857e-05, "loss": 0.9361, "step": 9797 }, { "epoch": 0.7282051282051282, "grad_norm": 2.1797838206244258, "learning_rate": 1.7718862239361262e-05, "loss": 1.0697, "step": 9798 }, { "epoch": 0.7282794500185804, "grad_norm": 2.1011334157249633, "learning_rate": 1.771835210032003e-05, "loss": 0.9763, "step": 9799 }, { "epoch": 0.7283537718320326, "grad_norm": 1.7515787429971275, "learning_rate": 1.771784191158845e-05, "loss": 0.8694, "step": 9800 }, { "epoch": 0.728428093645485, "grad_norm": 2.1136176190800198, "learning_rate": 1.7717331673169804e-05, "loss": 0.9794, "step": 9801 }, { "epoch": 0.7285024154589372, "grad_norm": 2.339014524172884, "learning_rate": 1.7716821385067375e-05, "loss": 0.9506, "step": 9802 }, { "epoch": 0.7285767372723895, "grad_norm": 2.068160565850925, "learning_rate": 1.7716311047284454e-05, "loss": 0.8512, "step": 9803 }, { "epoch": 0.7286510590858417, "grad_norm": 2.322340595866975, "learning_rate": 1.771580065982432e-05, "loss": 1.1237, "step": 9804 }, { "epoch": 0.728725380899294, "grad_norm": 1.981484457199182, "learning_rate": 1.7715290222690263e-05, "loss": 0.958, "step": 9805 }, { "epoch": 0.7287997027127462, "grad_norm": 2.155090962394712, "learning_rate": 1.7714779735885566e-05, "loss": 0.9883, "step": 9806 }, { "epoch": 0.7288740245261984, "grad_norm": 1.6157950086901964, "learning_rate": 1.771426919941352e-05, "loss": 0.8406, "step": 9807 }, { "epoch": 0.7289483463396507, "grad_norm": 1.5774000946117945, "learning_rate": 1.771375861327741e-05, "loss": 0.9097, "step": 9808 }, { "epoch": 0.7290226681531029, "grad_norm": 1.7012712307635889, "learning_rate": 1.7713247977480518e-05, "loss": 0.8539, "step": 9809 }, { "epoch": 0.7290969899665551, "grad_norm": 2.010530675480646, "learning_rate": 1.771273729202614e-05, "loss": 0.9552, "step": 9810 }, { "epoch": 0.7291713117800074, "grad_norm": 2.3857164906768236, "learning_rate": 1.7712226556917557e-05, "loss": 0.8954, "step": 9811 }, { "epoch": 0.7292456335934597, "grad_norm": 2.577410685276226, "learning_rate": 1.771171577215806e-05, "loss": 0.9725, "step": 9812 }, { "epoch": 0.729319955406912, "grad_norm": 1.6978103850992285, "learning_rate": 1.7711204937750937e-05, "loss": 0.8834, "step": 9813 }, { "epoch": 0.7293942772203642, "grad_norm": 1.7422141290457478, "learning_rate": 1.7710694053699476e-05, "loss": 0.8225, "step": 9814 }, { "epoch": 0.7294685990338164, "grad_norm": 7.8409592987316, "learning_rate": 1.771018312000697e-05, "loss": 0.8073, "step": 9815 }, { "epoch": 0.7295429208472687, "grad_norm": 1.8951243490635359, "learning_rate": 1.7709672136676702e-05, "loss": 0.8386, "step": 9816 }, { "epoch": 0.7296172426607209, "grad_norm": 2.630545223541378, "learning_rate": 1.7709161103711964e-05, "loss": 1.0144, "step": 9817 }, { "epoch": 0.7296915644741732, "grad_norm": 1.850755883300712, "learning_rate": 1.770865002111605e-05, "loss": 0.8996, "step": 9818 }, { "epoch": 0.7297658862876254, "grad_norm": 1.8776101300328758, "learning_rate": 1.7708138888892246e-05, "loss": 0.9602, "step": 9819 }, { "epoch": 0.7298402081010776, "grad_norm": 1.7701848697101372, "learning_rate": 1.7707627707043843e-05, "loss": 0.8693, "step": 9820 }, { "epoch": 0.7299145299145299, "grad_norm": 2.18852825957021, "learning_rate": 1.770711647557413e-05, "loss": 0.9857, "step": 9821 }, { "epoch": 0.7299888517279821, "grad_norm": 2.1178753406969237, "learning_rate": 1.7706605194486406e-05, "loss": 0.8419, "step": 9822 }, { "epoch": 0.7300631735414345, "grad_norm": 1.7155139342591215, "learning_rate": 1.7706093863783956e-05, "loss": 0.8076, "step": 9823 }, { "epoch": 0.7301374953548867, "grad_norm": 2.0349411943405564, "learning_rate": 1.7705582483470075e-05, "loss": 0.761, "step": 9824 }, { "epoch": 0.7302118171683389, "grad_norm": 2.1321954663376785, "learning_rate": 1.770507105354805e-05, "loss": 0.8873, "step": 9825 }, { "epoch": 0.7302861389817912, "grad_norm": 5.405538095379027, "learning_rate": 1.770455957402118e-05, "loss": 0.7101, "step": 9826 }, { "epoch": 0.7303604607952434, "grad_norm": 2.2282483754886644, "learning_rate": 1.7704048044892753e-05, "loss": 1.0463, "step": 9827 }, { "epoch": 0.7304347826086957, "grad_norm": 1.9038296829633465, "learning_rate": 1.7703536466166066e-05, "loss": 0.9311, "step": 9828 }, { "epoch": 0.7305091044221479, "grad_norm": 1.9591108677590636, "learning_rate": 1.770302483784441e-05, "loss": 0.9337, "step": 9829 }, { "epoch": 0.7305834262356001, "grad_norm": 1.9096648018335232, "learning_rate": 1.7702513159931084e-05, "loss": 0.74, "step": 9830 }, { "epoch": 0.7306577480490524, "grad_norm": 1.6841575012228025, "learning_rate": 1.7702001432429375e-05, "loss": 0.9225, "step": 9831 }, { "epoch": 0.7307320698625046, "grad_norm": 1.7158992049222566, "learning_rate": 1.7701489655342578e-05, "loss": 0.8183, "step": 9832 }, { "epoch": 0.7308063916759568, "grad_norm": 2.0939223898414205, "learning_rate": 1.7700977828673992e-05, "loss": 0.8959, "step": 9833 }, { "epoch": 0.7308807134894092, "grad_norm": 1.74397226061491, "learning_rate": 1.770046595242691e-05, "loss": 0.9001, "step": 9834 }, { "epoch": 0.7309550353028614, "grad_norm": 2.8352086904196, "learning_rate": 1.769995402660463e-05, "loss": 1.0128, "step": 9835 }, { "epoch": 0.7310293571163137, "grad_norm": 1.9583268689821214, "learning_rate": 1.7699442051210443e-05, "loss": 0.7824, "step": 9836 }, { "epoch": 0.7311036789297659, "grad_norm": 1.8285251033464243, "learning_rate": 1.769893002624765e-05, "loss": 0.7876, "step": 9837 }, { "epoch": 0.7311780007432181, "grad_norm": 2.2902536756359457, "learning_rate": 1.769841795171954e-05, "loss": 0.9402, "step": 9838 }, { "epoch": 0.7312523225566704, "grad_norm": 3.0363886391855734, "learning_rate": 1.769790582762942e-05, "loss": 1.0936, "step": 9839 }, { "epoch": 0.7313266443701226, "grad_norm": 1.8230218871322956, "learning_rate": 1.769739365398058e-05, "loss": 0.6831, "step": 9840 }, { "epoch": 0.7314009661835749, "grad_norm": 1.8859948757315712, "learning_rate": 1.769688143077632e-05, "loss": 1.1162, "step": 9841 }, { "epoch": 0.7314752879970271, "grad_norm": 2.2111136794468083, "learning_rate": 1.7696369158019936e-05, "loss": 0.8345, "step": 9842 }, { "epoch": 0.7315496098104793, "grad_norm": 1.9870608575543414, "learning_rate": 1.7695856835714724e-05, "loss": 0.7636, "step": 9843 }, { "epoch": 0.7316239316239316, "grad_norm": 1.6880223135098784, "learning_rate": 1.7695344463863987e-05, "loss": 0.6798, "step": 9844 }, { "epoch": 0.7316982534373839, "grad_norm": 1.773744549364305, "learning_rate": 1.769483204247102e-05, "loss": 0.476, "step": 9845 }, { "epoch": 0.7317725752508362, "grad_norm": 2.305794182665812, "learning_rate": 1.769431957153913e-05, "loss": 0.8321, "step": 9846 }, { "epoch": 0.7318468970642884, "grad_norm": 2.207773862013472, "learning_rate": 1.7693807051071603e-05, "loss": 0.9018, "step": 9847 }, { "epoch": 0.7319212188777406, "grad_norm": 1.7229475199311486, "learning_rate": 1.769329448107175e-05, "loss": 0.9315, "step": 9848 }, { "epoch": 0.7319955406911929, "grad_norm": 2.6432043135492695, "learning_rate": 1.769278186154286e-05, "loss": 0.8138, "step": 9849 }, { "epoch": 0.7320698625046451, "grad_norm": 2.018510521625307, "learning_rate": 1.7692269192488246e-05, "loss": 0.9895, "step": 9850 }, { "epoch": 0.7321441843180974, "grad_norm": 2.382177703892342, "learning_rate": 1.76917564739112e-05, "loss": 0.7711, "step": 9851 }, { "epoch": 0.7322185061315496, "grad_norm": 1.7588392666984252, "learning_rate": 1.7691243705815023e-05, "loss": 0.9513, "step": 9852 }, { "epoch": 0.7322928279450018, "grad_norm": 1.953946803360537, "learning_rate": 1.769073088820302e-05, "loss": 0.6305, "step": 9853 }, { "epoch": 0.7323671497584541, "grad_norm": 2.010742107416114, "learning_rate": 1.769021802107849e-05, "loss": 0.7819, "step": 9854 }, { "epoch": 0.7324414715719063, "grad_norm": 2.0138729734428997, "learning_rate": 1.7689705104444737e-05, "loss": 1.0203, "step": 9855 }, { "epoch": 0.7325157933853585, "grad_norm": 2.1397149043130845, "learning_rate": 1.768919213830506e-05, "loss": 0.8751, "step": 9856 }, { "epoch": 0.7325901151988109, "grad_norm": 2.4716635910013323, "learning_rate": 1.7688679122662763e-05, "loss": 0.8502, "step": 9857 }, { "epoch": 0.7326644370122631, "grad_norm": 2.1905089610490545, "learning_rate": 1.768816605752115e-05, "loss": 0.9457, "step": 9858 }, { "epoch": 0.7327387588257154, "grad_norm": 1.681188745217447, "learning_rate": 1.768765294288352e-05, "loss": 0.9027, "step": 9859 }, { "epoch": 0.7328130806391676, "grad_norm": 2.083315665690602, "learning_rate": 1.7687139778753183e-05, "loss": 0.9891, "step": 9860 }, { "epoch": 0.7328874024526199, "grad_norm": 1.6686901559009684, "learning_rate": 1.7686626565133432e-05, "loss": 0.7924, "step": 9861 }, { "epoch": 0.7329617242660721, "grad_norm": 1.7477034701249616, "learning_rate": 1.7686113302027587e-05, "loss": 0.881, "step": 9862 }, { "epoch": 0.7330360460795243, "grad_norm": 1.6249943751219167, "learning_rate": 1.7685599989438938e-05, "loss": 0.881, "step": 9863 }, { "epoch": 0.7331103678929766, "grad_norm": 1.796920109995757, "learning_rate": 1.7685086627370795e-05, "loss": 0.9057, "step": 9864 }, { "epoch": 0.7331846897064288, "grad_norm": 2.3059604360717914, "learning_rate": 1.7684573215826464e-05, "loss": 0.8822, "step": 9865 }, { "epoch": 0.733259011519881, "grad_norm": 2.1707117051285594, "learning_rate": 1.768405975480925e-05, "loss": 0.8494, "step": 9866 }, { "epoch": 0.7333333333333333, "grad_norm": 1.9303411133284323, "learning_rate": 1.768354624432246e-05, "loss": 0.9973, "step": 9867 }, { "epoch": 0.7334076551467856, "grad_norm": 1.581671170429666, "learning_rate": 1.7683032684369395e-05, "loss": 0.8362, "step": 9868 }, { "epoch": 0.7334819769602379, "grad_norm": 2.3825406725044442, "learning_rate": 1.7682519074953367e-05, "loss": 1.1106, "step": 9869 }, { "epoch": 0.7335562987736901, "grad_norm": 1.7207618512702085, "learning_rate": 1.7682005416077676e-05, "loss": 0.8337, "step": 9870 }, { "epoch": 0.7336306205871423, "grad_norm": 2.963824363566832, "learning_rate": 1.7681491707745632e-05, "loss": 1.0351, "step": 9871 }, { "epoch": 0.7337049424005946, "grad_norm": 2.446265196663036, "learning_rate": 1.7680977949960545e-05, "loss": 1.043, "step": 9872 }, { "epoch": 0.7337792642140468, "grad_norm": 2.226312807529894, "learning_rate": 1.7680464142725722e-05, "loss": 1.0246, "step": 9873 }, { "epoch": 0.7338535860274991, "grad_norm": 2.1616043729988097, "learning_rate": 1.767995028604447e-05, "loss": 0.791, "step": 9874 }, { "epoch": 0.7339279078409513, "grad_norm": 1.9729587332214766, "learning_rate": 1.7679436379920095e-05, "loss": 1.094, "step": 9875 }, { "epoch": 0.7340022296544035, "grad_norm": 1.8227915790087308, "learning_rate": 1.7678922424355905e-05, "loss": 0.7458, "step": 9876 }, { "epoch": 0.7340765514678558, "grad_norm": 2.2047432303547008, "learning_rate": 1.767840841935521e-05, "loss": 0.7744, "step": 9877 }, { "epoch": 0.734150873281308, "grad_norm": 1.7211996625959052, "learning_rate": 1.7677894364921325e-05, "loss": 0.8695, "step": 9878 }, { "epoch": 0.7342251950947604, "grad_norm": 2.2195174926699055, "learning_rate": 1.767738026105755e-05, "loss": 0.921, "step": 9879 }, { "epoch": 0.7342995169082126, "grad_norm": 1.8054787696764327, "learning_rate": 1.7676866107767203e-05, "loss": 0.8755, "step": 9880 }, { "epoch": 0.7343738387216648, "grad_norm": 2.543241334512008, "learning_rate": 1.767635190505359e-05, "loss": 0.9882, "step": 9881 }, { "epoch": 0.7344481605351171, "grad_norm": 2.1166630877589183, "learning_rate": 1.7675837652920022e-05, "loss": 0.9552, "step": 9882 }, { "epoch": 0.7345224823485693, "grad_norm": 1.7111859951357955, "learning_rate": 1.7675323351369805e-05, "loss": 0.8776, "step": 9883 }, { "epoch": 0.7345968041620216, "grad_norm": 3.0476133737240834, "learning_rate": 1.7674809000406258e-05, "loss": 1.1093, "step": 9884 }, { "epoch": 0.7346711259754738, "grad_norm": 1.9881146048776661, "learning_rate": 1.767429460003269e-05, "loss": 0.6316, "step": 9885 }, { "epoch": 0.734745447788926, "grad_norm": 2.001353292075746, "learning_rate": 1.7673780150252405e-05, "loss": 0.7201, "step": 9886 }, { "epoch": 0.7348197696023783, "grad_norm": 2.154696840268382, "learning_rate": 1.7673265651068727e-05, "loss": 0.8169, "step": 9887 }, { "epoch": 0.7348940914158305, "grad_norm": 1.5438028822499632, "learning_rate": 1.7672751102484963e-05, "loss": 0.8284, "step": 9888 }, { "epoch": 0.7349684132292827, "grad_norm": 1.970903269268909, "learning_rate": 1.7672236504504426e-05, "loss": 0.9985, "step": 9889 }, { "epoch": 0.7350427350427351, "grad_norm": 1.7318500633876932, "learning_rate": 1.7671721857130427e-05, "loss": 0.6434, "step": 9890 }, { "epoch": 0.7351170568561873, "grad_norm": 1.708154655413741, "learning_rate": 1.767120716036628e-05, "loss": 0.7683, "step": 9891 }, { "epoch": 0.7351913786696396, "grad_norm": 1.797972835770659, "learning_rate": 1.76706924142153e-05, "loss": 1.0191, "step": 9892 }, { "epoch": 0.7352657004830918, "grad_norm": 1.8477250617317342, "learning_rate": 1.7670177618680802e-05, "loss": 0.9374, "step": 9893 }, { "epoch": 0.735340022296544, "grad_norm": 5.2228012894471, "learning_rate": 1.76696627737661e-05, "loss": 0.8723, "step": 9894 }, { "epoch": 0.7354143441099963, "grad_norm": 2.3054002037866534, "learning_rate": 1.76691478794745e-05, "loss": 0.5757, "step": 9895 }, { "epoch": 0.7354886659234485, "grad_norm": 2.129454172806538, "learning_rate": 1.766863293580933e-05, "loss": 0.6566, "step": 9896 }, { "epoch": 0.7355629877369008, "grad_norm": 2.2547426089958815, "learning_rate": 1.7668117942773895e-05, "loss": 0.9422, "step": 9897 }, { "epoch": 0.735637309550353, "grad_norm": 3.386998111530587, "learning_rate": 1.7667602900371515e-05, "loss": 0.8037, "step": 9898 }, { "epoch": 0.7357116313638052, "grad_norm": 1.6096955902595824, "learning_rate": 1.766708780860551e-05, "loss": 0.7146, "step": 9899 }, { "epoch": 0.7357859531772575, "grad_norm": 1.764613968390843, "learning_rate": 1.766657266747919e-05, "loss": 0.7161, "step": 9900 }, { "epoch": 0.7358602749907098, "grad_norm": 1.9386042476021228, "learning_rate": 1.766605747699587e-05, "loss": 1.0793, "step": 9901 }, { "epoch": 0.7359345968041621, "grad_norm": 1.8128097439356923, "learning_rate": 1.766554223715887e-05, "loss": 0.9365, "step": 9902 }, { "epoch": 0.7360089186176143, "grad_norm": 1.8459530762415075, "learning_rate": 1.7665026947971508e-05, "loss": 0.8444, "step": 9903 }, { "epoch": 0.7360832404310665, "grad_norm": 2.4061440506048237, "learning_rate": 1.7664511609437102e-05, "loss": 0.9417, "step": 9904 }, { "epoch": 0.7361575622445188, "grad_norm": 1.9605216708453328, "learning_rate": 1.766399622155896e-05, "loss": 0.5397, "step": 9905 }, { "epoch": 0.736231884057971, "grad_norm": 1.8744429389293251, "learning_rate": 1.766348078434042e-05, "loss": 1.0076, "step": 9906 }, { "epoch": 0.7363062058714233, "grad_norm": 1.8209446308584845, "learning_rate": 1.766296529778478e-05, "loss": 0.8166, "step": 9907 }, { "epoch": 0.7363805276848755, "grad_norm": 1.8946875894530957, "learning_rate": 1.766244976189537e-05, "loss": 0.9111, "step": 9908 }, { "epoch": 0.7364548494983277, "grad_norm": 2.136475638601312, "learning_rate": 1.7661934176675505e-05, "loss": 0.8831, "step": 9909 }, { "epoch": 0.73652917131178, "grad_norm": 2.0540957762064096, "learning_rate": 1.7661418542128506e-05, "loss": 0.9726, "step": 9910 }, { "epoch": 0.7366034931252322, "grad_norm": 1.930043540279495, "learning_rate": 1.766090285825769e-05, "loss": 0.8563, "step": 9911 }, { "epoch": 0.7366778149386844, "grad_norm": 2.0314970974112474, "learning_rate": 1.7660387125066382e-05, "loss": 1.0203, "step": 9912 }, { "epoch": 0.7367521367521368, "grad_norm": 1.9319714631552634, "learning_rate": 1.76598713425579e-05, "loss": 0.9355, "step": 9913 }, { "epoch": 0.736826458565589, "grad_norm": 1.5947444301919866, "learning_rate": 1.765935551073556e-05, "loss": 0.7341, "step": 9914 }, { "epoch": 0.7369007803790413, "grad_norm": 2.298418854990608, "learning_rate": 1.7658839629602688e-05, "loss": 1.0475, "step": 9915 }, { "epoch": 0.7369751021924935, "grad_norm": 2.0161687382832953, "learning_rate": 1.7658323699162604e-05, "loss": 0.8435, "step": 9916 }, { "epoch": 0.7370494240059458, "grad_norm": 2.3327491832514182, "learning_rate": 1.765780771941863e-05, "loss": 0.9148, "step": 9917 }, { "epoch": 0.737123745819398, "grad_norm": 2.4920654501856463, "learning_rate": 1.7657291690374085e-05, "loss": 0.8435, "step": 9918 }, { "epoch": 0.7371980676328502, "grad_norm": 2.080358625920189, "learning_rate": 1.7656775612032294e-05, "loss": 0.7178, "step": 9919 }, { "epoch": 0.7372723894463025, "grad_norm": 1.6331200116905995, "learning_rate": 1.765625948439658e-05, "loss": 0.5909, "step": 9920 }, { "epoch": 0.7373467112597547, "grad_norm": 2.0407000423526886, "learning_rate": 1.7655743307470265e-05, "loss": 0.7128, "step": 9921 }, { "epoch": 0.7374210330732069, "grad_norm": 1.9588010246787813, "learning_rate": 1.765522708125667e-05, "loss": 0.8062, "step": 9922 }, { "epoch": 0.7374953548866592, "grad_norm": 1.8755729154124632, "learning_rate": 1.7654710805759122e-05, "loss": 0.9827, "step": 9923 }, { "epoch": 0.7375696767001115, "grad_norm": 1.8518992015645457, "learning_rate": 1.7654194480980945e-05, "loss": 0.9119, "step": 9924 }, { "epoch": 0.7376439985135638, "grad_norm": 2.365248019885382, "learning_rate": 1.7653678106925458e-05, "loss": 0.9984, "step": 9925 }, { "epoch": 0.737718320327016, "grad_norm": 1.6497242747109275, "learning_rate": 1.7653161683595987e-05, "loss": 0.7358, "step": 9926 }, { "epoch": 0.7377926421404682, "grad_norm": 1.7252196723513817, "learning_rate": 1.7652645210995864e-05, "loss": 1.0113, "step": 9927 }, { "epoch": 0.7378669639539205, "grad_norm": 5.382735200378908, "learning_rate": 1.7652128689128404e-05, "loss": 0.978, "step": 9928 }, { "epoch": 0.7379412857673727, "grad_norm": 1.6818058573172265, "learning_rate": 1.7651612117996936e-05, "loss": 0.6319, "step": 9929 }, { "epoch": 0.738015607580825, "grad_norm": 1.4526100683618628, "learning_rate": 1.765109549760479e-05, "loss": 0.6707, "step": 9930 }, { "epoch": 0.7380899293942772, "grad_norm": 2.049106514449695, "learning_rate": 1.765057882795528e-05, "loss": 0.9194, "step": 9931 }, { "epoch": 0.7381642512077294, "grad_norm": 1.9225949964236702, "learning_rate": 1.765006210905175e-05, "loss": 0.8936, "step": 9932 }, { "epoch": 0.7382385730211817, "grad_norm": 2.010964743874583, "learning_rate": 1.764954534089751e-05, "loss": 0.7577, "step": 9933 }, { "epoch": 0.7383128948346339, "grad_norm": 2.185157800215138, "learning_rate": 1.76490285234959e-05, "loss": 0.9772, "step": 9934 }, { "epoch": 0.7383872166480863, "grad_norm": 2.620861353372745, "learning_rate": 1.7648511656850238e-05, "loss": 0.8859, "step": 9935 }, { "epoch": 0.7384615384615385, "grad_norm": 1.9870521461461907, "learning_rate": 1.7647994740963854e-05, "loss": 0.795, "step": 9936 }, { "epoch": 0.7385358602749907, "grad_norm": 2.165210716839272, "learning_rate": 1.764747777584008e-05, "loss": 0.8126, "step": 9937 }, { "epoch": 0.738610182088443, "grad_norm": 1.9831067409839116, "learning_rate": 1.7646960761482236e-05, "loss": 0.8709, "step": 9938 }, { "epoch": 0.7386845039018952, "grad_norm": 1.7886047381921395, "learning_rate": 1.764644369789366e-05, "loss": 0.8148, "step": 9939 }, { "epoch": 0.7387588257153475, "grad_norm": 1.9643532614724144, "learning_rate": 1.7645926585077676e-05, "loss": 0.8439, "step": 9940 }, { "epoch": 0.7388331475287997, "grad_norm": 1.96296740543577, "learning_rate": 1.764540942303761e-05, "loss": 1.0372, "step": 9941 }, { "epoch": 0.7389074693422519, "grad_norm": 2.1118586601360394, "learning_rate": 1.7644892211776796e-05, "loss": 0.8501, "step": 9942 }, { "epoch": 0.7389817911557042, "grad_norm": 1.8486740274054538, "learning_rate": 1.7644374951298565e-05, "loss": 0.7546, "step": 9943 }, { "epoch": 0.7390561129691564, "grad_norm": 3.5645404577344078, "learning_rate": 1.7643857641606242e-05, "loss": 0.6646, "step": 9944 }, { "epoch": 0.7391304347826086, "grad_norm": 2.0720946873023145, "learning_rate": 1.764334028270316e-05, "loss": 0.787, "step": 9945 }, { "epoch": 0.739204756596061, "grad_norm": 1.8560873880081636, "learning_rate": 1.764282287459265e-05, "loss": 0.9305, "step": 9946 }, { "epoch": 0.7392790784095132, "grad_norm": 1.9951148666600707, "learning_rate": 1.764230541727804e-05, "loss": 0.9457, "step": 9947 }, { "epoch": 0.7393534002229655, "grad_norm": 1.7470261978468133, "learning_rate": 1.764178791076267e-05, "loss": 0.7899, "step": 9948 }, { "epoch": 0.7394277220364177, "grad_norm": 2.48441498218666, "learning_rate": 1.7641270355049863e-05, "loss": 1.0152, "step": 9949 }, { "epoch": 0.73950204384987, "grad_norm": 2.5111245199588286, "learning_rate": 1.7640752750142957e-05, "loss": 0.9881, "step": 9950 }, { "epoch": 0.7395763656633222, "grad_norm": 2.206507055351625, "learning_rate": 1.764023509604528e-05, "loss": 0.7876, "step": 9951 }, { "epoch": 0.7396506874767744, "grad_norm": 1.4732472535049905, "learning_rate": 1.763971739276016e-05, "loss": 0.5629, "step": 9952 }, { "epoch": 0.7397250092902267, "grad_norm": 1.8302825352323593, "learning_rate": 1.7639199640290944e-05, "loss": 0.8403, "step": 9953 }, { "epoch": 0.7397993311036789, "grad_norm": 2.409898029441687, "learning_rate": 1.7638681838640954e-05, "loss": 0.7094, "step": 9954 }, { "epoch": 0.7398736529171311, "grad_norm": 2.0049971567001394, "learning_rate": 1.763816398781353e-05, "loss": 0.8826, "step": 9955 }, { "epoch": 0.7399479747305834, "grad_norm": 2.0619019230005393, "learning_rate": 1.7637646087811995e-05, "loss": 0.9489, "step": 9956 }, { "epoch": 0.7400222965440357, "grad_norm": 2.006831023173739, "learning_rate": 1.7637128138639697e-05, "loss": 0.8438, "step": 9957 }, { "epoch": 0.740096618357488, "grad_norm": 2.039625702327251, "learning_rate": 1.763661014029996e-05, "loss": 0.8767, "step": 9958 }, { "epoch": 0.7401709401709402, "grad_norm": 2.3362721328924683, "learning_rate": 1.7636092092796126e-05, "loss": 0.9749, "step": 9959 }, { "epoch": 0.7402452619843924, "grad_norm": 2.514284535526493, "learning_rate": 1.7635573996131526e-05, "loss": 0.7374, "step": 9960 }, { "epoch": 0.7403195837978447, "grad_norm": 1.8519930629168742, "learning_rate": 1.7635055850309498e-05, "loss": 0.7365, "step": 9961 }, { "epoch": 0.7403939056112969, "grad_norm": 2.200083422171519, "learning_rate": 1.7634537655333375e-05, "loss": 1.0734, "step": 9962 }, { "epoch": 0.7404682274247492, "grad_norm": 2.5625504105052905, "learning_rate": 1.7634019411206496e-05, "loss": 1.0266, "step": 9963 }, { "epoch": 0.7405425492382014, "grad_norm": 2.1206431351294195, "learning_rate": 1.7633501117932197e-05, "loss": 1.0711, "step": 9964 }, { "epoch": 0.7406168710516536, "grad_norm": 1.6423049444919429, "learning_rate": 1.7632982775513815e-05, "loss": 0.6657, "step": 9965 }, { "epoch": 0.7406911928651059, "grad_norm": 1.946806063190671, "learning_rate": 1.763246438395468e-05, "loss": 0.7458, "step": 9966 }, { "epoch": 0.7407655146785581, "grad_norm": 2.041413954408025, "learning_rate": 1.763194594325814e-05, "loss": 0.878, "step": 9967 }, { "epoch": 0.7408398364920104, "grad_norm": 1.7973276607075475, "learning_rate": 1.7631427453427527e-05, "loss": 0.897, "step": 9968 }, { "epoch": 0.7409141583054627, "grad_norm": 2.6422644887088165, "learning_rate": 1.763090891446618e-05, "loss": 0.8779, "step": 9969 }, { "epoch": 0.7409884801189149, "grad_norm": 1.9172083877329353, "learning_rate": 1.7630390326377437e-05, "loss": 0.8453, "step": 9970 }, { "epoch": 0.7410628019323672, "grad_norm": 2.33360208905283, "learning_rate": 1.7629871689164637e-05, "loss": 1.0438, "step": 9971 }, { "epoch": 0.7411371237458194, "grad_norm": 2.0315521483461936, "learning_rate": 1.762935300283112e-05, "loss": 1.0101, "step": 9972 }, { "epoch": 0.7412114455592717, "grad_norm": 2.326657975932068, "learning_rate": 1.7628834267380223e-05, "loss": 0.9906, "step": 9973 }, { "epoch": 0.7412857673727239, "grad_norm": 2.1458231349279915, "learning_rate": 1.7628315482815285e-05, "loss": 0.8532, "step": 9974 }, { "epoch": 0.7413600891861761, "grad_norm": 1.9742722026054753, "learning_rate": 1.7627796649139654e-05, "loss": 0.9133, "step": 9975 }, { "epoch": 0.7414344109996284, "grad_norm": 1.9689637568561624, "learning_rate": 1.7627277766356657e-05, "loss": 0.8609, "step": 9976 }, { "epoch": 0.7415087328130806, "grad_norm": 2.2966027544417775, "learning_rate": 1.7626758834469647e-05, "loss": 0.8351, "step": 9977 }, { "epoch": 0.7415830546265328, "grad_norm": 1.8559021538035894, "learning_rate": 1.7626239853481955e-05, "loss": 0.8077, "step": 9978 }, { "epoch": 0.7416573764399851, "grad_norm": 1.7055966523125854, "learning_rate": 1.762572082339693e-05, "loss": 0.8752, "step": 9979 }, { "epoch": 0.7417316982534374, "grad_norm": 1.8472133465907656, "learning_rate": 1.7625201744217907e-05, "loss": 0.9169, "step": 9980 }, { "epoch": 0.7418060200668897, "grad_norm": 2.3597337081390055, "learning_rate": 1.7624682615948232e-05, "loss": 0.7039, "step": 9981 }, { "epoch": 0.7418803418803419, "grad_norm": 1.5805326208067947, "learning_rate": 1.7624163438591246e-05, "loss": 0.7599, "step": 9982 }, { "epoch": 0.7419546636937941, "grad_norm": 1.850894873497611, "learning_rate": 1.762364421215029e-05, "loss": 0.8884, "step": 9983 }, { "epoch": 0.7420289855072464, "grad_norm": 2.6934460788816796, "learning_rate": 1.7623124936628714e-05, "loss": 0.7918, "step": 9984 }, { "epoch": 0.7421033073206986, "grad_norm": 1.9678665193367528, "learning_rate": 1.762260561202985e-05, "loss": 0.9317, "step": 9985 }, { "epoch": 0.7421776291341509, "grad_norm": 2.1097035561495114, "learning_rate": 1.7622086238357047e-05, "loss": 0.7633, "step": 9986 }, { "epoch": 0.7422519509476031, "grad_norm": 1.7077731872213904, "learning_rate": 1.762156681561365e-05, "loss": 0.8882, "step": 9987 }, { "epoch": 0.7423262727610553, "grad_norm": 1.9767014211536373, "learning_rate": 1.7621047343802997e-05, "loss": 0.9111, "step": 9988 }, { "epoch": 0.7424005945745076, "grad_norm": 1.7485964995395746, "learning_rate": 1.7620527822928442e-05, "loss": 0.728, "step": 9989 }, { "epoch": 0.7424749163879598, "grad_norm": 2.087745365232932, "learning_rate": 1.762000825299332e-05, "loss": 0.5882, "step": 9990 }, { "epoch": 0.7425492382014122, "grad_norm": 1.9403675140526595, "learning_rate": 1.761948863400098e-05, "loss": 0.9836, "step": 9991 }, { "epoch": 0.7426235600148644, "grad_norm": 1.9689345089607606, "learning_rate": 1.761896896595477e-05, "loss": 0.9143, "step": 9992 }, { "epoch": 0.7426978818283166, "grad_norm": 2.0023831471171496, "learning_rate": 1.7618449248858037e-05, "loss": 1.0329, "step": 9993 }, { "epoch": 0.7427722036417689, "grad_norm": 2.3143397858336874, "learning_rate": 1.7617929482714114e-05, "loss": 0.8855, "step": 9994 }, { "epoch": 0.7428465254552211, "grad_norm": 1.6759215221917205, "learning_rate": 1.7617409667526364e-05, "loss": 0.7629, "step": 9995 }, { "epoch": 0.7429208472686734, "grad_norm": 1.956144376034131, "learning_rate": 1.7616889803298123e-05, "loss": 0.8104, "step": 9996 }, { "epoch": 0.7429951690821256, "grad_norm": 1.7881506738893307, "learning_rate": 1.761636989003274e-05, "loss": 0.9105, "step": 9997 }, { "epoch": 0.7430694908955778, "grad_norm": 2.106179621782463, "learning_rate": 1.7615849927733563e-05, "loss": 0.8155, "step": 9998 }, { "epoch": 0.7431438127090301, "grad_norm": 1.8623670554601617, "learning_rate": 1.761532991640394e-05, "loss": 0.8232, "step": 9999 }, { "epoch": 0.7432181345224823, "grad_norm": 1.8612510734105445, "learning_rate": 1.761480985604722e-05, "loss": 0.739, "step": 10000 }, { "epoch": 0.7432924563359345, "grad_norm": 2.0149985597288054, "learning_rate": 1.7614289746666744e-05, "loss": 0.8849, "step": 10001 }, { "epoch": 0.7433667781493869, "grad_norm": 1.6046854280140246, "learning_rate": 1.761376958826587e-05, "loss": 0.7821, "step": 10002 }, { "epoch": 0.7434410999628391, "grad_norm": 2.9529964666995463, "learning_rate": 1.761324938084794e-05, "loss": 1.1344, "step": 10003 }, { "epoch": 0.7435154217762914, "grad_norm": 1.6657284289288883, "learning_rate": 1.7612729124416307e-05, "loss": 0.7187, "step": 10004 }, { "epoch": 0.7435897435897436, "grad_norm": 1.880329217873184, "learning_rate": 1.761220881897432e-05, "loss": 0.747, "step": 10005 }, { "epoch": 0.7436640654031959, "grad_norm": 2.0822669417256114, "learning_rate": 1.7611688464525326e-05, "loss": 0.851, "step": 10006 }, { "epoch": 0.7437383872166481, "grad_norm": 1.9568291388336592, "learning_rate": 1.7611168061072676e-05, "loss": 0.7305, "step": 10007 }, { "epoch": 0.7438127090301003, "grad_norm": 2.0880357560689413, "learning_rate": 1.761064760861972e-05, "loss": 0.772, "step": 10008 }, { "epoch": 0.7438870308435526, "grad_norm": 1.750515691390094, "learning_rate": 1.7610127107169814e-05, "loss": 0.9297, "step": 10009 }, { "epoch": 0.7439613526570048, "grad_norm": 2.206280874880406, "learning_rate": 1.76096065567263e-05, "loss": 1.1697, "step": 10010 }, { "epoch": 0.744035674470457, "grad_norm": 2.2061590174990173, "learning_rate": 1.7609085957292537e-05, "loss": 0.938, "step": 10011 }, { "epoch": 0.7441099962839093, "grad_norm": 2.372410489296367, "learning_rate": 1.7608565308871872e-05, "loss": 0.9113, "step": 10012 }, { "epoch": 0.7441843180973616, "grad_norm": 1.5247768376053197, "learning_rate": 1.7608044611467655e-05, "loss": 0.8207, "step": 10013 }, { "epoch": 0.7442586399108139, "grad_norm": 1.6890524812196932, "learning_rate": 1.7607523865083246e-05, "loss": 0.8062, "step": 10014 }, { "epoch": 0.7443329617242661, "grad_norm": 2.043916939700799, "learning_rate": 1.7607003069721993e-05, "loss": 0.9246, "step": 10015 }, { "epoch": 0.7444072835377183, "grad_norm": 2.0255561300422973, "learning_rate": 1.760648222538725e-05, "loss": 0.8358, "step": 10016 }, { "epoch": 0.7444816053511706, "grad_norm": 1.7258617175148583, "learning_rate": 1.7605961332082368e-05, "loss": 0.7628, "step": 10017 }, { "epoch": 0.7445559271646228, "grad_norm": 2.657297150823686, "learning_rate": 1.76054403898107e-05, "loss": 1.0493, "step": 10018 }, { "epoch": 0.7446302489780751, "grad_norm": 1.4991066999827278, "learning_rate": 1.7604919398575602e-05, "loss": 0.7513, "step": 10019 }, { "epoch": 0.7447045707915273, "grad_norm": 1.9120402036051656, "learning_rate": 1.7604398358380428e-05, "loss": 0.7488, "step": 10020 }, { "epoch": 0.7447788926049795, "grad_norm": 1.5648110215549906, "learning_rate": 1.7603877269228528e-05, "loss": 0.6281, "step": 10021 }, { "epoch": 0.7448532144184318, "grad_norm": 1.9238060394459582, "learning_rate": 1.7603356131123266e-05, "loss": 0.9708, "step": 10022 }, { "epoch": 0.744927536231884, "grad_norm": 1.8063343858447571, "learning_rate": 1.760283494406799e-05, "loss": 0.8174, "step": 10023 }, { "epoch": 0.7450018580453363, "grad_norm": 1.8184883469204907, "learning_rate": 1.7602313708066057e-05, "loss": 0.9049, "step": 10024 }, { "epoch": 0.7450761798587886, "grad_norm": 1.587219896357851, "learning_rate": 1.7601792423120824e-05, "loss": 0.884, "step": 10025 }, { "epoch": 0.7451505016722408, "grad_norm": 1.9281015311472676, "learning_rate": 1.7601271089235644e-05, "loss": 0.8862, "step": 10026 }, { "epoch": 0.7452248234856931, "grad_norm": 1.7745676770429213, "learning_rate": 1.7600749706413877e-05, "loss": 0.8365, "step": 10027 }, { "epoch": 0.7452991452991453, "grad_norm": 1.6967643831041572, "learning_rate": 1.7600228274658874e-05, "loss": 0.6661, "step": 10028 }, { "epoch": 0.7453734671125976, "grad_norm": 2.3295628093903047, "learning_rate": 1.7599706793973997e-05, "loss": 0.9721, "step": 10029 }, { "epoch": 0.7454477889260498, "grad_norm": 1.7461972989027619, "learning_rate": 1.7599185264362606e-05, "loss": 0.9577, "step": 10030 }, { "epoch": 0.745522110739502, "grad_norm": 2.1285599605110033, "learning_rate": 1.759866368582805e-05, "loss": 1.06, "step": 10031 }, { "epoch": 0.7455964325529543, "grad_norm": 1.840308664302734, "learning_rate": 1.7598142058373694e-05, "loss": 0.8557, "step": 10032 }, { "epoch": 0.7456707543664065, "grad_norm": 2.296157127987849, "learning_rate": 1.7597620382002895e-05, "loss": 0.9033, "step": 10033 }, { "epoch": 0.7457450761798587, "grad_norm": 2.071495610120133, "learning_rate": 1.7597098656719005e-05, "loss": 0.968, "step": 10034 }, { "epoch": 0.745819397993311, "grad_norm": 2.0912827081786176, "learning_rate": 1.7596576882525394e-05, "loss": 0.7243, "step": 10035 }, { "epoch": 0.7458937198067633, "grad_norm": 2.56995866459585, "learning_rate": 1.7596055059425412e-05, "loss": 0.898, "step": 10036 }, { "epoch": 0.7459680416202156, "grad_norm": 1.7660637879721495, "learning_rate": 1.759553318742242e-05, "loss": 0.8025, "step": 10037 }, { "epoch": 0.7460423634336678, "grad_norm": 2.11133610049705, "learning_rate": 1.7595011266519784e-05, "loss": 0.9065, "step": 10038 }, { "epoch": 0.74611668524712, "grad_norm": 2.537148266787906, "learning_rate": 1.7594489296720855e-05, "loss": 0.9198, "step": 10039 }, { "epoch": 0.7461910070605723, "grad_norm": 1.6331675468520293, "learning_rate": 1.7593967278029e-05, "loss": 0.8184, "step": 10040 }, { "epoch": 0.7462653288740245, "grad_norm": 1.7652903048363817, "learning_rate": 1.759344521044758e-05, "loss": 0.8891, "step": 10041 }, { "epoch": 0.7463396506874768, "grad_norm": 2.1780371460500363, "learning_rate": 1.759292309397995e-05, "loss": 0.6407, "step": 10042 }, { "epoch": 0.746413972500929, "grad_norm": 1.981944390958849, "learning_rate": 1.7592400928629475e-05, "loss": 0.9286, "step": 10043 }, { "epoch": 0.7464882943143812, "grad_norm": 2.315533206860754, "learning_rate": 1.759187871439952e-05, "loss": 0.9568, "step": 10044 }, { "epoch": 0.7465626161278335, "grad_norm": 2.1845384839288093, "learning_rate": 1.759135645129344e-05, "loss": 0.9825, "step": 10045 }, { "epoch": 0.7466369379412857, "grad_norm": 1.840901762960581, "learning_rate": 1.7590834139314606e-05, "loss": 1.0236, "step": 10046 }, { "epoch": 0.7467112597547381, "grad_norm": 1.948350668678978, "learning_rate": 1.7590311778466372e-05, "loss": 0.9237, "step": 10047 }, { "epoch": 0.7467855815681903, "grad_norm": 2.060844558026516, "learning_rate": 1.7589789368752103e-05, "loss": 1.0252, "step": 10048 }, { "epoch": 0.7468599033816425, "grad_norm": 2.258126745938965, "learning_rate": 1.758926691017517e-05, "loss": 0.8019, "step": 10049 }, { "epoch": 0.7469342251950948, "grad_norm": 2.352998724489897, "learning_rate": 1.7588744402738925e-05, "loss": 0.9108, "step": 10050 }, { "epoch": 0.747008547008547, "grad_norm": 1.9280746584552917, "learning_rate": 1.758822184644674e-05, "loss": 0.8591, "step": 10051 }, { "epoch": 0.7470828688219993, "grad_norm": 1.994095618029376, "learning_rate": 1.7587699241301975e-05, "loss": 0.8295, "step": 10052 }, { "epoch": 0.7471571906354515, "grad_norm": 1.9597281958925796, "learning_rate": 1.7587176587307998e-05, "loss": 0.8713, "step": 10053 }, { "epoch": 0.7472315124489037, "grad_norm": 1.5273549936337183, "learning_rate": 1.758665388446817e-05, "loss": 0.6636, "step": 10054 }, { "epoch": 0.747305834262356, "grad_norm": 1.4861205833473026, "learning_rate": 1.7586131132785858e-05, "loss": 0.8016, "step": 10055 }, { "epoch": 0.7473801560758082, "grad_norm": 1.744438615300089, "learning_rate": 1.7585608332264423e-05, "loss": 0.8108, "step": 10056 }, { "epoch": 0.7474544778892604, "grad_norm": 1.6680534676695038, "learning_rate": 1.758508548290724e-05, "loss": 0.9155, "step": 10057 }, { "epoch": 0.7475287997027128, "grad_norm": 1.7595835157608246, "learning_rate": 1.758456258471767e-05, "loss": 0.8046, "step": 10058 }, { "epoch": 0.747603121516165, "grad_norm": 1.7168019822717995, "learning_rate": 1.758403963769908e-05, "loss": 0.8566, "step": 10059 }, { "epoch": 0.7476774433296173, "grad_norm": 2.3340189789435257, "learning_rate": 1.7583516641854837e-05, "loss": 0.9109, "step": 10060 }, { "epoch": 0.7477517651430695, "grad_norm": 2.2291148726035552, "learning_rate": 1.7582993597188306e-05, "loss": 1.041, "step": 10061 }, { "epoch": 0.7478260869565218, "grad_norm": 2.161670966951902, "learning_rate": 1.7582470503702858e-05, "loss": 0.9558, "step": 10062 }, { "epoch": 0.747900408769974, "grad_norm": 1.5518322628542711, "learning_rate": 1.7581947361401854e-05, "loss": 0.7288, "step": 10063 }, { "epoch": 0.7479747305834262, "grad_norm": 1.8238892917900524, "learning_rate": 1.758142417028867e-05, "loss": 0.6688, "step": 10064 }, { "epoch": 0.7480490523968785, "grad_norm": 1.7818312387126471, "learning_rate": 1.7580900930366667e-05, "loss": 0.8248, "step": 10065 }, { "epoch": 0.7481233742103307, "grad_norm": 1.5271412018087618, "learning_rate": 1.758037764163922e-05, "loss": 0.5188, "step": 10066 }, { "epoch": 0.7481976960237829, "grad_norm": 1.8096841260949355, "learning_rate": 1.7579854304109695e-05, "loss": 0.7698, "step": 10067 }, { "epoch": 0.7482720178372352, "grad_norm": 1.8550497295628474, "learning_rate": 1.757933091778146e-05, "loss": 0.7988, "step": 10068 }, { "epoch": 0.7483463396506875, "grad_norm": 1.8442697237890182, "learning_rate": 1.7578807482657887e-05, "loss": 0.8245, "step": 10069 }, { "epoch": 0.7484206614641398, "grad_norm": 2.092499976200115, "learning_rate": 1.7578283998742343e-05, "loss": 1.0678, "step": 10070 }, { "epoch": 0.748494983277592, "grad_norm": 1.670160020688108, "learning_rate": 1.7577760466038202e-05, "loss": 0.8358, "step": 10071 }, { "epoch": 0.7485693050910442, "grad_norm": 1.8037159372699538, "learning_rate": 1.757723688454883e-05, "loss": 0.7377, "step": 10072 }, { "epoch": 0.7486436269044965, "grad_norm": 1.748739274906147, "learning_rate": 1.7576713254277603e-05, "loss": 0.8232, "step": 10073 }, { "epoch": 0.7487179487179487, "grad_norm": 2.0474142198356686, "learning_rate": 1.7576189575227884e-05, "loss": 0.6026, "step": 10074 }, { "epoch": 0.748792270531401, "grad_norm": 2.0194645677489955, "learning_rate": 1.7575665847403053e-05, "loss": 0.6735, "step": 10075 }, { "epoch": 0.7488665923448532, "grad_norm": 1.954720459850864, "learning_rate": 1.7575142070806478e-05, "loss": 1.0066, "step": 10076 }, { "epoch": 0.7489409141583054, "grad_norm": 1.3354339686578578, "learning_rate": 1.7574618245441534e-05, "loss": 0.7092, "step": 10077 }, { "epoch": 0.7490152359717577, "grad_norm": 2.271070062377137, "learning_rate": 1.757409437131159e-05, "loss": 0.9172, "step": 10078 }, { "epoch": 0.7490895577852099, "grad_norm": 1.9430326745721473, "learning_rate": 1.7573570448420017e-05, "loss": 0.9112, "step": 10079 }, { "epoch": 0.7491638795986622, "grad_norm": 1.9341269178082634, "learning_rate": 1.757304647677019e-05, "loss": 0.9486, "step": 10080 }, { "epoch": 0.7492382014121145, "grad_norm": 2.1155889942092863, "learning_rate": 1.7572522456365484e-05, "loss": 0.8931, "step": 10081 }, { "epoch": 0.7493125232255667, "grad_norm": 1.4440786147920204, "learning_rate": 1.757199838720927e-05, "loss": 0.7613, "step": 10082 }, { "epoch": 0.749386845039019, "grad_norm": 2.035781825505018, "learning_rate": 1.7571474269304925e-05, "loss": 0.7856, "step": 10083 }, { "epoch": 0.7494611668524712, "grad_norm": 1.7651485611617486, "learning_rate": 1.7570950102655824e-05, "loss": 0.7786, "step": 10084 }, { "epoch": 0.7495354886659235, "grad_norm": 2.466229984076174, "learning_rate": 1.7570425887265334e-05, "loss": 0.8732, "step": 10085 }, { "epoch": 0.7496098104793757, "grad_norm": 1.720338220984509, "learning_rate": 1.756990162313684e-05, "loss": 0.9475, "step": 10086 }, { "epoch": 0.7496841322928279, "grad_norm": 2.0073658008265394, "learning_rate": 1.7569377310273707e-05, "loss": 0.8788, "step": 10087 }, { "epoch": 0.7497584541062802, "grad_norm": 2.14931005091133, "learning_rate": 1.756885294867932e-05, "loss": 0.6254, "step": 10088 }, { "epoch": 0.7498327759197324, "grad_norm": 1.7355520636010004, "learning_rate": 1.7568328538357047e-05, "loss": 0.6467, "step": 10089 }, { "epoch": 0.7499070977331846, "grad_norm": 1.9519980190974793, "learning_rate": 1.756780407931027e-05, "loss": 0.9082, "step": 10090 }, { "epoch": 0.7499814195466369, "grad_norm": 2.3901788059913467, "learning_rate": 1.756727957154236e-05, "loss": 0.8655, "step": 10091 }, { "epoch": 0.7500557413600892, "grad_norm": 1.6447666490818176, "learning_rate": 1.75667550150567e-05, "loss": 0.6827, "step": 10092 }, { "epoch": 0.7501300631735415, "grad_norm": 2.616221907504682, "learning_rate": 1.756623040985666e-05, "loss": 0.7633, "step": 10093 }, { "epoch": 0.7502043849869937, "grad_norm": 1.8017477812108214, "learning_rate": 1.7565705755945623e-05, "loss": 0.6983, "step": 10094 }, { "epoch": 0.750278706800446, "grad_norm": 1.9030812037430285, "learning_rate": 1.756518105332697e-05, "loss": 1.0316, "step": 10095 }, { "epoch": 0.7503530286138982, "grad_norm": 1.7882221636178095, "learning_rate": 1.7564656302004066e-05, "loss": 0.8127, "step": 10096 }, { "epoch": 0.7504273504273504, "grad_norm": 1.8218464351519768, "learning_rate": 1.75641315019803e-05, "loss": 0.7725, "step": 10097 }, { "epoch": 0.7505016722408027, "grad_norm": 1.8916095629735332, "learning_rate": 1.756360665325905e-05, "loss": 0.7352, "step": 10098 }, { "epoch": 0.7505759940542549, "grad_norm": 1.6997584587777934, "learning_rate": 1.756308175584369e-05, "loss": 0.7537, "step": 10099 }, { "epoch": 0.7506503158677071, "grad_norm": 1.6864933832557423, "learning_rate": 1.7562556809737605e-05, "loss": 0.8341, "step": 10100 }, { "epoch": 0.7507246376811594, "grad_norm": 1.987966442123008, "learning_rate": 1.756203181494417e-05, "loss": 0.8771, "step": 10101 }, { "epoch": 0.7507989594946116, "grad_norm": 1.7961955735187418, "learning_rate": 1.7561506771466767e-05, "loss": 0.8276, "step": 10102 }, { "epoch": 0.750873281308064, "grad_norm": 1.8506793884740071, "learning_rate": 1.7560981679308773e-05, "loss": 0.9266, "step": 10103 }, { "epoch": 0.7509476031215162, "grad_norm": 1.8874163201632914, "learning_rate": 1.7560456538473575e-05, "loss": 0.8582, "step": 10104 }, { "epoch": 0.7510219249349684, "grad_norm": 1.865926086883856, "learning_rate": 1.7559931348964553e-05, "loss": 0.8205, "step": 10105 }, { "epoch": 0.7510962467484207, "grad_norm": 1.6771730327790233, "learning_rate": 1.755940611078508e-05, "loss": 0.7775, "step": 10106 }, { "epoch": 0.7511705685618729, "grad_norm": 2.1119519704272007, "learning_rate": 1.755888082393854e-05, "loss": 1.0425, "step": 10107 }, { "epoch": 0.7512448903753252, "grad_norm": 2.1239373282336524, "learning_rate": 1.7558355488428323e-05, "loss": 0.89, "step": 10108 }, { "epoch": 0.7513192121887774, "grad_norm": 2.0513375521801542, "learning_rate": 1.7557830104257804e-05, "loss": 0.9468, "step": 10109 }, { "epoch": 0.7513935340022296, "grad_norm": 2.3598317453362214, "learning_rate": 1.7557304671430366e-05, "loss": 0.9063, "step": 10110 }, { "epoch": 0.7514678558156819, "grad_norm": 2.106788900311191, "learning_rate": 1.7556779189949392e-05, "loss": 0.9358, "step": 10111 }, { "epoch": 0.7515421776291341, "grad_norm": 2.0653549264254627, "learning_rate": 1.755625365981827e-05, "loss": 0.9699, "step": 10112 }, { "epoch": 0.7516164994425863, "grad_norm": 2.2832992704015633, "learning_rate": 1.7555728081040374e-05, "loss": 0.744, "step": 10113 }, { "epoch": 0.7516908212560387, "grad_norm": 1.4830098397849458, "learning_rate": 1.75552024536191e-05, "loss": 0.7245, "step": 10114 }, { "epoch": 0.7517651430694909, "grad_norm": 1.9247548889286061, "learning_rate": 1.755467677755782e-05, "loss": 0.7652, "step": 10115 }, { "epoch": 0.7518394648829432, "grad_norm": 1.8740946402820684, "learning_rate": 1.755415105285992e-05, "loss": 0.9489, "step": 10116 }, { "epoch": 0.7519137866963954, "grad_norm": 1.9707655304690663, "learning_rate": 1.7553625279528786e-05, "loss": 0.7271, "step": 10117 }, { "epoch": 0.7519881085098477, "grad_norm": 2.4045363093274994, "learning_rate": 1.755309945756781e-05, "loss": 0.838, "step": 10118 }, { "epoch": 0.7520624303232999, "grad_norm": 1.8668507944907227, "learning_rate": 1.755257358698037e-05, "loss": 0.6733, "step": 10119 }, { "epoch": 0.7521367521367521, "grad_norm": 1.3721050690674192, "learning_rate": 1.755204766776985e-05, "loss": 0.724, "step": 10120 }, { "epoch": 0.7522110739502044, "grad_norm": 2.1173429774328163, "learning_rate": 1.7551521699939645e-05, "loss": 0.9402, "step": 10121 }, { "epoch": 0.7522853957636566, "grad_norm": 1.8813745651379084, "learning_rate": 1.755099568349313e-05, "loss": 0.938, "step": 10122 }, { "epoch": 0.7523597175771088, "grad_norm": 1.736112553885041, "learning_rate": 1.7550469618433703e-05, "loss": 0.8788, "step": 10123 }, { "epoch": 0.7524340393905611, "grad_norm": 1.8769538400010477, "learning_rate": 1.754994350476474e-05, "loss": 0.8257, "step": 10124 }, { "epoch": 0.7525083612040134, "grad_norm": 1.5816144579242464, "learning_rate": 1.754941734248963e-05, "loss": 0.7722, "step": 10125 }, { "epoch": 0.7525826830174657, "grad_norm": 2.0516205131537935, "learning_rate": 1.7548891131611765e-05, "loss": 0.8399, "step": 10126 }, { "epoch": 0.7526570048309179, "grad_norm": 1.844811601464558, "learning_rate": 1.754836487213453e-05, "loss": 0.8425, "step": 10127 }, { "epoch": 0.7527313266443701, "grad_norm": 2.936953178894717, "learning_rate": 1.7547838564061312e-05, "loss": 0.7181, "step": 10128 }, { "epoch": 0.7528056484578224, "grad_norm": 1.9359686558523668, "learning_rate": 1.7547312207395502e-05, "loss": 0.892, "step": 10129 }, { "epoch": 0.7528799702712746, "grad_norm": 2.192037036203244, "learning_rate": 1.754678580214049e-05, "loss": 0.8059, "step": 10130 }, { "epoch": 0.7529542920847269, "grad_norm": 1.8648409796926892, "learning_rate": 1.7546259348299663e-05, "loss": 0.7866, "step": 10131 }, { "epoch": 0.7530286138981791, "grad_norm": 2.3546511785791506, "learning_rate": 1.754573284587641e-05, "loss": 0.9782, "step": 10132 }, { "epoch": 0.7531029357116313, "grad_norm": 1.7699513799978113, "learning_rate": 1.7545206294874116e-05, "loss": 0.7578, "step": 10133 }, { "epoch": 0.7531772575250836, "grad_norm": 1.99278437768767, "learning_rate": 1.754467969529618e-05, "loss": 0.7281, "step": 10134 }, { "epoch": 0.7532515793385358, "grad_norm": 1.95775759966574, "learning_rate": 1.7544153047145985e-05, "loss": 0.9158, "step": 10135 }, { "epoch": 0.753325901151988, "grad_norm": 2.035329223374514, "learning_rate": 1.7543626350426923e-05, "loss": 0.8246, "step": 10136 }, { "epoch": 0.7534002229654404, "grad_norm": 1.7423455123800982, "learning_rate": 1.754309960514239e-05, "loss": 0.8253, "step": 10137 }, { "epoch": 0.7534745447788926, "grad_norm": 1.8444465803148635, "learning_rate": 1.754257281129577e-05, "loss": 0.9639, "step": 10138 }, { "epoch": 0.7535488665923449, "grad_norm": 3.144992599817181, "learning_rate": 1.754204596889046e-05, "loss": 0.7563, "step": 10139 }, { "epoch": 0.7536231884057971, "grad_norm": 2.009011575327501, "learning_rate": 1.7541519077929847e-05, "loss": 0.7691, "step": 10140 }, { "epoch": 0.7536975102192494, "grad_norm": 1.9825338212324422, "learning_rate": 1.7540992138417325e-05, "loss": 0.8667, "step": 10141 }, { "epoch": 0.7537718320327016, "grad_norm": 1.8314397912682303, "learning_rate": 1.754046515035629e-05, "loss": 0.7478, "step": 10142 }, { "epoch": 0.7538461538461538, "grad_norm": 1.941593609644769, "learning_rate": 1.753993811375013e-05, "loss": 0.8378, "step": 10143 }, { "epoch": 0.7539204756596061, "grad_norm": 1.7200349159484767, "learning_rate": 1.753941102860224e-05, "loss": 0.7686, "step": 10144 }, { "epoch": 0.7539947974730583, "grad_norm": 2.0028377879354444, "learning_rate": 1.7538883894916014e-05, "loss": 0.8251, "step": 10145 }, { "epoch": 0.7540691192865105, "grad_norm": 2.078300583454377, "learning_rate": 1.753835671269484e-05, "loss": 0.7559, "step": 10146 }, { "epoch": 0.7541434410999628, "grad_norm": 2.0724929272776302, "learning_rate": 1.7537829481942122e-05, "loss": 0.9194, "step": 10147 }, { "epoch": 0.7542177629134151, "grad_norm": 1.7100454114587205, "learning_rate": 1.7537302202661246e-05, "loss": 0.8218, "step": 10148 }, { "epoch": 0.7542920847268674, "grad_norm": 2.0602074111980517, "learning_rate": 1.753677487485561e-05, "loss": 0.8711, "step": 10149 }, { "epoch": 0.7543664065403196, "grad_norm": 1.4238628025832851, "learning_rate": 1.7536247498528605e-05, "loss": 0.656, "step": 10150 }, { "epoch": 0.7544407283537719, "grad_norm": 1.9842201802085841, "learning_rate": 1.7535720073683637e-05, "loss": 0.686, "step": 10151 }, { "epoch": 0.7545150501672241, "grad_norm": 2.030568027701416, "learning_rate": 1.753519260032409e-05, "loss": 0.9423, "step": 10152 }, { "epoch": 0.7545893719806763, "grad_norm": 1.5252672674072876, "learning_rate": 1.7534665078453365e-05, "loss": 0.6234, "step": 10153 }, { "epoch": 0.7546636937941286, "grad_norm": 1.745626404739797, "learning_rate": 1.7534137508074858e-05, "loss": 0.7331, "step": 10154 }, { "epoch": 0.7547380156075808, "grad_norm": 2.006736852441768, "learning_rate": 1.753360988919196e-05, "loss": 0.9593, "step": 10155 }, { "epoch": 0.754812337421033, "grad_norm": 2.298879200860602, "learning_rate": 1.7533082221808077e-05, "loss": 0.9903, "step": 10156 }, { "epoch": 0.7548866592344853, "grad_norm": 2.1743795294717447, "learning_rate": 1.75325545059266e-05, "loss": 0.8291, "step": 10157 }, { "epoch": 0.7549609810479375, "grad_norm": 1.9103370410712053, "learning_rate": 1.7532026741550927e-05, "loss": 0.7287, "step": 10158 }, { "epoch": 0.7550353028613899, "grad_norm": 2.949074084851174, "learning_rate": 1.7531498928684458e-05, "loss": 0.9742, "step": 10159 }, { "epoch": 0.7551096246748421, "grad_norm": 2.0807733618350674, "learning_rate": 1.753097106733059e-05, "loss": 0.8696, "step": 10160 }, { "epoch": 0.7551839464882943, "grad_norm": 2.148543072579154, "learning_rate": 1.7530443157492724e-05, "loss": 0.7109, "step": 10161 }, { "epoch": 0.7552582683017466, "grad_norm": 1.7371755384481657, "learning_rate": 1.752991519917425e-05, "loss": 0.748, "step": 10162 }, { "epoch": 0.7553325901151988, "grad_norm": 1.7667756789105646, "learning_rate": 1.7529387192378573e-05, "loss": 0.8686, "step": 10163 }, { "epoch": 0.7554069119286511, "grad_norm": 1.8034690562208076, "learning_rate": 1.7528859137109093e-05, "loss": 0.7706, "step": 10164 }, { "epoch": 0.7554812337421033, "grad_norm": 2.285410057407718, "learning_rate": 1.7528331033369213e-05, "loss": 0.826, "step": 10165 }, { "epoch": 0.7555555555555555, "grad_norm": 1.927383395948396, "learning_rate": 1.7527802881162324e-05, "loss": 0.7972, "step": 10166 }, { "epoch": 0.7556298773690078, "grad_norm": 2.3341313722620347, "learning_rate": 1.752727468049183e-05, "loss": 0.7143, "step": 10167 }, { "epoch": 0.75570419918246, "grad_norm": 1.6399613699409439, "learning_rate": 1.7526746431361132e-05, "loss": 0.8097, "step": 10168 }, { "epoch": 0.7557785209959123, "grad_norm": 2.5034022085334304, "learning_rate": 1.7526218133773632e-05, "loss": 1.084, "step": 10169 }, { "epoch": 0.7558528428093646, "grad_norm": 1.8493179794929009, "learning_rate": 1.7525689787732734e-05, "loss": 0.9848, "step": 10170 }, { "epoch": 0.7559271646228168, "grad_norm": 2.033995488115861, "learning_rate": 1.7525161393241828e-05, "loss": 0.94, "step": 10171 }, { "epoch": 0.7560014864362691, "grad_norm": 2.226016145917044, "learning_rate": 1.752463295030433e-05, "loss": 0.8953, "step": 10172 }, { "epoch": 0.7560758082497213, "grad_norm": 2.0141499522193, "learning_rate": 1.7524104458923635e-05, "loss": 0.8981, "step": 10173 }, { "epoch": 0.7561501300631736, "grad_norm": 2.073188164281174, "learning_rate": 1.7523575919103143e-05, "loss": 1.1059, "step": 10174 }, { "epoch": 0.7562244518766258, "grad_norm": 1.7287592268191874, "learning_rate": 1.752304733084626e-05, "loss": 0.7841, "step": 10175 }, { "epoch": 0.756298773690078, "grad_norm": 1.9795348148928955, "learning_rate": 1.752251869415639e-05, "loss": 0.9163, "step": 10176 }, { "epoch": 0.7563730955035303, "grad_norm": 1.8139717453477233, "learning_rate": 1.7521990009036935e-05, "loss": 0.7348, "step": 10177 }, { "epoch": 0.7564474173169825, "grad_norm": 2.316506822983757, "learning_rate": 1.7521461275491298e-05, "loss": 0.8755, "step": 10178 }, { "epoch": 0.7565217391304347, "grad_norm": 1.772547403147987, "learning_rate": 1.752093249352288e-05, "loss": 0.9714, "step": 10179 }, { "epoch": 0.756596060943887, "grad_norm": 2.075714451767495, "learning_rate": 1.7520403663135095e-05, "loss": 0.9553, "step": 10180 }, { "epoch": 0.7566703827573393, "grad_norm": 2.0970077740461814, "learning_rate": 1.751987478433134e-05, "loss": 0.9164, "step": 10181 }, { "epoch": 0.7567447045707916, "grad_norm": 1.7415678277007114, "learning_rate": 1.751934585711502e-05, "loss": 0.7372, "step": 10182 }, { "epoch": 0.7568190263842438, "grad_norm": 2.0225932024169895, "learning_rate": 1.751881688148954e-05, "loss": 1.0335, "step": 10183 }, { "epoch": 0.756893348197696, "grad_norm": 1.5831013031598677, "learning_rate": 1.751828785745831e-05, "loss": 0.7425, "step": 10184 }, { "epoch": 0.7569676700111483, "grad_norm": 1.8807324807648145, "learning_rate": 1.7517758785024733e-05, "loss": 0.9488, "step": 10185 }, { "epoch": 0.7570419918246005, "grad_norm": 1.7390935145314543, "learning_rate": 1.7517229664192214e-05, "loss": 0.9031, "step": 10186 }, { "epoch": 0.7571163136380528, "grad_norm": 1.7267962703551702, "learning_rate": 1.7516700494964158e-05, "loss": 0.6829, "step": 10187 }, { "epoch": 0.757190635451505, "grad_norm": 1.6525173112191336, "learning_rate": 1.7516171277343975e-05, "loss": 0.8086, "step": 10188 }, { "epoch": 0.7572649572649572, "grad_norm": 1.864687330087559, "learning_rate": 1.7515642011335073e-05, "loss": 0.7677, "step": 10189 }, { "epoch": 0.7573392790784095, "grad_norm": 1.9827935882363328, "learning_rate": 1.7515112696940858e-05, "loss": 1.0251, "step": 10190 }, { "epoch": 0.7574136008918617, "grad_norm": 2.0195553429755524, "learning_rate": 1.7514583334164737e-05, "loss": 1.0255, "step": 10191 }, { "epoch": 0.7574879227053141, "grad_norm": 2.0768308089700724, "learning_rate": 1.7514053923010116e-05, "loss": 0.6882, "step": 10192 }, { "epoch": 0.7575622445187663, "grad_norm": 1.5298222648346051, "learning_rate": 1.751352446348041e-05, "loss": 0.7541, "step": 10193 }, { "epoch": 0.7576365663322185, "grad_norm": 1.7243868691129172, "learning_rate": 1.751299495557902e-05, "loss": 0.7736, "step": 10194 }, { "epoch": 0.7577108881456708, "grad_norm": 1.5986006602379597, "learning_rate": 1.7512465399309356e-05, "loss": 0.7279, "step": 10195 }, { "epoch": 0.757785209959123, "grad_norm": 1.7210498465914974, "learning_rate": 1.7511935794674835e-05, "loss": 0.7857, "step": 10196 }, { "epoch": 0.7578595317725753, "grad_norm": 1.9334661280337615, "learning_rate": 1.751140614167886e-05, "loss": 0.8759, "step": 10197 }, { "epoch": 0.7579338535860275, "grad_norm": 1.6086974926000526, "learning_rate": 1.7510876440324843e-05, "loss": 0.6772, "step": 10198 }, { "epoch": 0.7580081753994797, "grad_norm": 2.3074196750062343, "learning_rate": 1.7510346690616187e-05, "loss": 0.9188, "step": 10199 }, { "epoch": 0.758082497212932, "grad_norm": 2.0665008071458053, "learning_rate": 1.7509816892556316e-05, "loss": 0.8496, "step": 10200 }, { "epoch": 0.7581568190263842, "grad_norm": 2.077987248281101, "learning_rate": 1.750928704614863e-05, "loss": 0.7857, "step": 10201 }, { "epoch": 0.7582311408398364, "grad_norm": 2.160269483151777, "learning_rate": 1.7508757151396544e-05, "loss": 0.9651, "step": 10202 }, { "epoch": 0.7583054626532887, "grad_norm": 1.8145171479845905, "learning_rate": 1.750822720830347e-05, "loss": 0.8496, "step": 10203 }, { "epoch": 0.758379784466741, "grad_norm": 2.0363876533484175, "learning_rate": 1.7507697216872815e-05, "loss": 0.7384, "step": 10204 }, { "epoch": 0.7584541062801933, "grad_norm": 1.9528556188694917, "learning_rate": 1.7507167177108e-05, "loss": 0.9377, "step": 10205 }, { "epoch": 0.7585284280936455, "grad_norm": 2.1539686882106763, "learning_rate": 1.750663708901243e-05, "loss": 0.7135, "step": 10206 }, { "epoch": 0.7586027499070978, "grad_norm": 1.707286962451131, "learning_rate": 1.750610695258952e-05, "loss": 0.8022, "step": 10207 }, { "epoch": 0.75867707172055, "grad_norm": 2.1986236068786367, "learning_rate": 1.7505576767842683e-05, "loss": 0.9314, "step": 10208 }, { "epoch": 0.7587513935340022, "grad_norm": 2.023148823645078, "learning_rate": 1.750504653477533e-05, "loss": 0.8144, "step": 10209 }, { "epoch": 0.7588257153474545, "grad_norm": 1.8850758536854773, "learning_rate": 1.750451625339088e-05, "loss": 0.9391, "step": 10210 }, { "epoch": 0.7589000371609067, "grad_norm": 2.085387873667278, "learning_rate": 1.7503985923692742e-05, "loss": 0.8781, "step": 10211 }, { "epoch": 0.7589743589743589, "grad_norm": 2.940852192739095, "learning_rate": 1.7503455545684333e-05, "loss": 0.9099, "step": 10212 }, { "epoch": 0.7590486807878112, "grad_norm": 2.009288347585601, "learning_rate": 1.7502925119369067e-05, "loss": 0.9386, "step": 10213 }, { "epoch": 0.7591230026012634, "grad_norm": 2.106726761143542, "learning_rate": 1.7502394644750357e-05, "loss": 1.0158, "step": 10214 }, { "epoch": 0.7591973244147158, "grad_norm": 1.8053580102444764, "learning_rate": 1.7501864121831623e-05, "loss": 0.9873, "step": 10215 }, { "epoch": 0.759271646228168, "grad_norm": 1.9360933193731984, "learning_rate": 1.7501333550616273e-05, "loss": 0.9141, "step": 10216 }, { "epoch": 0.7593459680416202, "grad_norm": 2.0159973315936095, "learning_rate": 1.750080293110773e-05, "loss": 0.7328, "step": 10217 }, { "epoch": 0.7594202898550725, "grad_norm": 1.7625431795083983, "learning_rate": 1.75002722633094e-05, "loss": 0.8912, "step": 10218 }, { "epoch": 0.7594946116685247, "grad_norm": 2.1560387138217987, "learning_rate": 1.7499741547224714e-05, "loss": 0.8178, "step": 10219 }, { "epoch": 0.759568933481977, "grad_norm": 1.626419946185059, "learning_rate": 1.7499210782857077e-05, "loss": 0.7379, "step": 10220 }, { "epoch": 0.7596432552954292, "grad_norm": 1.8432958880880987, "learning_rate": 1.749867997020991e-05, "loss": 0.8931, "step": 10221 }, { "epoch": 0.7597175771088814, "grad_norm": 2.202786256445796, "learning_rate": 1.7498149109286632e-05, "loss": 1.0136, "step": 10222 }, { "epoch": 0.7597918989223337, "grad_norm": 2.0396159798349895, "learning_rate": 1.749761820009066e-05, "loss": 0.6417, "step": 10223 }, { "epoch": 0.7598662207357859, "grad_norm": 1.7952530467985905, "learning_rate": 1.7497087242625408e-05, "loss": 0.7898, "step": 10224 }, { "epoch": 0.7599405425492382, "grad_norm": 2.2426071083580403, "learning_rate": 1.7496556236894303e-05, "loss": 0.8827, "step": 10225 }, { "epoch": 0.7600148643626905, "grad_norm": 1.584908949269298, "learning_rate": 1.7496025182900754e-05, "loss": 0.7261, "step": 10226 }, { "epoch": 0.7600891861761427, "grad_norm": 2.1134989784932814, "learning_rate": 1.749549408064818e-05, "loss": 0.943, "step": 10227 }, { "epoch": 0.760163507989595, "grad_norm": 2.1409540426245552, "learning_rate": 1.749496293014001e-05, "loss": 1.0467, "step": 10228 }, { "epoch": 0.7602378298030472, "grad_norm": 1.933265814753145, "learning_rate": 1.7494431731379656e-05, "loss": 0.9902, "step": 10229 }, { "epoch": 0.7603121516164995, "grad_norm": 2.051665181900566, "learning_rate": 1.7493900484370538e-05, "loss": 0.9145, "step": 10230 }, { "epoch": 0.7603864734299517, "grad_norm": 1.8267212906370958, "learning_rate": 1.749336918911608e-05, "loss": 0.7387, "step": 10231 }, { "epoch": 0.7604607952434039, "grad_norm": 2.0221340597318966, "learning_rate": 1.7492837845619697e-05, "loss": 0.8487, "step": 10232 }, { "epoch": 0.7605351170568562, "grad_norm": 1.9522602679023864, "learning_rate": 1.7492306453884812e-05, "loss": 0.9623, "step": 10233 }, { "epoch": 0.7606094388703084, "grad_norm": 1.9287228306700321, "learning_rate": 1.749177501391485e-05, "loss": 0.9212, "step": 10234 }, { "epoch": 0.7606837606837606, "grad_norm": 2.5116388998152033, "learning_rate": 1.749124352571323e-05, "loss": 0.7475, "step": 10235 }, { "epoch": 0.7607580824972129, "grad_norm": 1.6648662307915152, "learning_rate": 1.7490711989283368e-05, "loss": 0.8882, "step": 10236 }, { "epoch": 0.7608324043106652, "grad_norm": 1.7704354814366157, "learning_rate": 1.7490180404628695e-05, "loss": 0.9195, "step": 10237 }, { "epoch": 0.7609067261241175, "grad_norm": 2.399107329714388, "learning_rate": 1.7489648771752627e-05, "loss": 0.7524, "step": 10238 }, { "epoch": 0.7609810479375697, "grad_norm": 2.610214139220603, "learning_rate": 1.7489117090658592e-05, "loss": 0.9612, "step": 10239 }, { "epoch": 0.761055369751022, "grad_norm": 2.523072065327914, "learning_rate": 1.7488585361350008e-05, "loss": 0.7064, "step": 10240 }, { "epoch": 0.7611296915644742, "grad_norm": 1.6503432180976054, "learning_rate": 1.7488053583830297e-05, "loss": 0.8552, "step": 10241 }, { "epoch": 0.7612040133779264, "grad_norm": 3.9351509875115678, "learning_rate": 1.748752175810289e-05, "loss": 0.9886, "step": 10242 }, { "epoch": 0.7612783351913787, "grad_norm": 1.6914035832776526, "learning_rate": 1.7486989884171205e-05, "loss": 0.7505, "step": 10243 }, { "epoch": 0.7613526570048309, "grad_norm": 1.6959055229865951, "learning_rate": 1.7486457962038664e-05, "loss": 0.9118, "step": 10244 }, { "epoch": 0.7614269788182831, "grad_norm": 5.861812948447666, "learning_rate": 1.7485925991708702e-05, "loss": 0.8343, "step": 10245 }, { "epoch": 0.7615013006317354, "grad_norm": 1.9759392608398376, "learning_rate": 1.748539397318473e-05, "loss": 0.8223, "step": 10246 }, { "epoch": 0.7615756224451876, "grad_norm": 1.8849550676704645, "learning_rate": 1.7484861906470187e-05, "loss": 0.819, "step": 10247 }, { "epoch": 0.76164994425864, "grad_norm": 1.7922247395712627, "learning_rate": 1.7484329791568485e-05, "loss": 1.0453, "step": 10248 }, { "epoch": 0.7617242660720922, "grad_norm": 2.4874677336016466, "learning_rate": 1.748379762848306e-05, "loss": 0.9441, "step": 10249 }, { "epoch": 0.7617985878855444, "grad_norm": 1.5909924989087023, "learning_rate": 1.7483265417217334e-05, "loss": 0.7414, "step": 10250 }, { "epoch": 0.7618729096989967, "grad_norm": 2.3711238239321255, "learning_rate": 1.7482733157774728e-05, "loss": 0.7193, "step": 10251 }, { "epoch": 0.7619472315124489, "grad_norm": 1.830337808059494, "learning_rate": 1.7482200850158682e-05, "loss": 0.8384, "step": 10252 }, { "epoch": 0.7620215533259012, "grad_norm": 1.9732137194169623, "learning_rate": 1.748166849437261e-05, "loss": 0.8804, "step": 10253 }, { "epoch": 0.7620958751393534, "grad_norm": 1.9018809896890339, "learning_rate": 1.7481136090419947e-05, "loss": 0.9326, "step": 10254 }, { "epoch": 0.7621701969528056, "grad_norm": 1.8866600278759178, "learning_rate": 1.7480603638304115e-05, "loss": 0.8877, "step": 10255 }, { "epoch": 0.7622445187662579, "grad_norm": 2.6709072316265434, "learning_rate": 1.7480071138028548e-05, "loss": 0.896, "step": 10256 }, { "epoch": 0.7623188405797101, "grad_norm": 2.112543919904374, "learning_rate": 1.7479538589596668e-05, "loss": 0.98, "step": 10257 }, { "epoch": 0.7623931623931623, "grad_norm": 2.1208913368315714, "learning_rate": 1.747900599301191e-05, "loss": 0.8721, "step": 10258 }, { "epoch": 0.7624674842066146, "grad_norm": 2.026160547702232, "learning_rate": 1.74784733482777e-05, "loss": 0.8559, "step": 10259 }, { "epoch": 0.7625418060200669, "grad_norm": 2.518873188975318, "learning_rate": 1.7477940655397466e-05, "loss": 0.8409, "step": 10260 }, { "epoch": 0.7626161278335192, "grad_norm": 1.6170766958737768, "learning_rate": 1.7477407914374637e-05, "loss": 0.8146, "step": 10261 }, { "epoch": 0.7626904496469714, "grad_norm": 2.147947637707468, "learning_rate": 1.7476875125212642e-05, "loss": 1.0169, "step": 10262 }, { "epoch": 0.7627647714604237, "grad_norm": 1.796985424794715, "learning_rate": 1.7476342287914914e-05, "loss": 0.9276, "step": 10263 }, { "epoch": 0.7628390932738759, "grad_norm": 1.6621648370053133, "learning_rate": 1.7475809402484884e-05, "loss": 0.7981, "step": 10264 }, { "epoch": 0.7629134150873281, "grad_norm": 1.9487314740181876, "learning_rate": 1.747527646892598e-05, "loss": 0.9509, "step": 10265 }, { "epoch": 0.7629877369007804, "grad_norm": 2.2999738152788254, "learning_rate": 1.7474743487241634e-05, "loss": 0.9478, "step": 10266 }, { "epoch": 0.7630620587142326, "grad_norm": 2.3220571984478973, "learning_rate": 1.7474210457435278e-05, "loss": 0.9577, "step": 10267 }, { "epoch": 0.7631363805276848, "grad_norm": 2.152554526282024, "learning_rate": 1.747367737951034e-05, "loss": 1.0246, "step": 10268 }, { "epoch": 0.7632107023411371, "grad_norm": 1.7097092841965162, "learning_rate": 1.747314425347026e-05, "loss": 0.8434, "step": 10269 }, { "epoch": 0.7632850241545893, "grad_norm": 2.5431794200563735, "learning_rate": 1.7472611079318462e-05, "loss": 0.8677, "step": 10270 }, { "epoch": 0.7633593459680417, "grad_norm": 2.486184995866871, "learning_rate": 1.7472077857058377e-05, "loss": 1.0142, "step": 10271 }, { "epoch": 0.7634336677814939, "grad_norm": 1.9300162147513933, "learning_rate": 1.7471544586693448e-05, "loss": 0.7978, "step": 10272 }, { "epoch": 0.7635079895949461, "grad_norm": 1.6634747470252707, "learning_rate": 1.7471011268227104e-05, "loss": 0.7263, "step": 10273 }, { "epoch": 0.7635823114083984, "grad_norm": 2.1027076610846542, "learning_rate": 1.7470477901662773e-05, "loss": 0.8652, "step": 10274 }, { "epoch": 0.7636566332218506, "grad_norm": 1.657793765849372, "learning_rate": 1.7469944487003898e-05, "loss": 0.6497, "step": 10275 }, { "epoch": 0.7637309550353029, "grad_norm": 1.9837395116495908, "learning_rate": 1.7469411024253903e-05, "loss": 0.8468, "step": 10276 }, { "epoch": 0.7638052768487551, "grad_norm": 2.023724399341516, "learning_rate": 1.746887751341623e-05, "loss": 0.8832, "step": 10277 }, { "epoch": 0.7638795986622073, "grad_norm": 1.9194950983600034, "learning_rate": 1.7468343954494307e-05, "loss": 0.87, "step": 10278 }, { "epoch": 0.7639539204756596, "grad_norm": 1.8728687524741359, "learning_rate": 1.7467810347491577e-05, "loss": 0.8454, "step": 10279 }, { "epoch": 0.7640282422891118, "grad_norm": 1.8864987930957855, "learning_rate": 1.746727669241147e-05, "loss": 1.085, "step": 10280 }, { "epoch": 0.764102564102564, "grad_norm": 2.067782248637887, "learning_rate": 1.7466742989257424e-05, "loss": 0.8564, "step": 10281 }, { "epoch": 0.7641768859160164, "grad_norm": 2.381784770628815, "learning_rate": 1.7466209238032874e-05, "loss": 0.7447, "step": 10282 }, { "epoch": 0.7642512077294686, "grad_norm": 1.98211293570532, "learning_rate": 1.7465675438741257e-05, "loss": 0.8516, "step": 10283 }, { "epoch": 0.7643255295429209, "grad_norm": 2.1165513594898524, "learning_rate": 1.746514159138601e-05, "loss": 0.7397, "step": 10284 }, { "epoch": 0.7643998513563731, "grad_norm": 1.9689907683162582, "learning_rate": 1.7464607695970562e-05, "loss": 0.8896, "step": 10285 }, { "epoch": 0.7644741731698254, "grad_norm": 1.9161067919770414, "learning_rate": 1.7464073752498362e-05, "loss": 0.8362, "step": 10286 }, { "epoch": 0.7645484949832776, "grad_norm": 1.6512750059381516, "learning_rate": 1.7463539760972844e-05, "loss": 0.8584, "step": 10287 }, { "epoch": 0.7646228167967298, "grad_norm": 2.6377951042725285, "learning_rate": 1.7463005721397445e-05, "loss": 0.8019, "step": 10288 }, { "epoch": 0.7646971386101821, "grad_norm": 2.244737726669579, "learning_rate": 1.7462471633775596e-05, "loss": 0.9242, "step": 10289 }, { "epoch": 0.7647714604236343, "grad_norm": 2.292599934668861, "learning_rate": 1.746193749811075e-05, "loss": 0.8928, "step": 10290 }, { "epoch": 0.7648457822370865, "grad_norm": 2.0648788655261425, "learning_rate": 1.746140331440633e-05, "loss": 1.0086, "step": 10291 }, { "epoch": 0.7649201040505388, "grad_norm": 1.9753499710109585, "learning_rate": 1.7460869082665787e-05, "loss": 0.9571, "step": 10292 }, { "epoch": 0.7649944258639911, "grad_norm": 2.2285251475745467, "learning_rate": 1.7460334802892552e-05, "loss": 0.7239, "step": 10293 }, { "epoch": 0.7650687476774434, "grad_norm": 1.8172698163864098, "learning_rate": 1.7459800475090075e-05, "loss": 0.9727, "step": 10294 }, { "epoch": 0.7651430694908956, "grad_norm": 1.7549708498294683, "learning_rate": 1.7459266099261783e-05, "loss": 0.8872, "step": 10295 }, { "epoch": 0.7652173913043478, "grad_norm": 2.1139756780724053, "learning_rate": 1.7458731675411127e-05, "loss": 1.0221, "step": 10296 }, { "epoch": 0.7652917131178001, "grad_norm": 1.7642427381598305, "learning_rate": 1.7458197203541543e-05, "loss": 0.9475, "step": 10297 }, { "epoch": 0.7653660349312523, "grad_norm": 2.1364592140003307, "learning_rate": 1.7457662683656473e-05, "loss": 1.0463, "step": 10298 }, { "epoch": 0.7654403567447046, "grad_norm": 3.444208461719466, "learning_rate": 1.7457128115759357e-05, "loss": 0.9791, "step": 10299 }, { "epoch": 0.7655146785581568, "grad_norm": 2.1840555470577896, "learning_rate": 1.7456593499853637e-05, "loss": 0.838, "step": 10300 }, { "epoch": 0.765589000371609, "grad_norm": 1.987418304417003, "learning_rate": 1.7456058835942754e-05, "loss": 0.6321, "step": 10301 }, { "epoch": 0.7656633221850613, "grad_norm": 1.845016525957018, "learning_rate": 1.7455524124030148e-05, "loss": 0.8946, "step": 10302 }, { "epoch": 0.7657376439985135, "grad_norm": 2.3229797262794993, "learning_rate": 1.745498936411927e-05, "loss": 1.0172, "step": 10303 }, { "epoch": 0.7658119658119659, "grad_norm": 1.8243555788411319, "learning_rate": 1.7454454556213558e-05, "loss": 0.7818, "step": 10304 }, { "epoch": 0.7658862876254181, "grad_norm": 1.517636334359498, "learning_rate": 1.745391970031645e-05, "loss": 0.7015, "step": 10305 }, { "epoch": 0.7659606094388703, "grad_norm": 2.2913981045573184, "learning_rate": 1.7453384796431393e-05, "loss": 1.0091, "step": 10306 }, { "epoch": 0.7660349312523226, "grad_norm": 2.1464297856293655, "learning_rate": 1.7452849844561838e-05, "loss": 0.6604, "step": 10307 }, { "epoch": 0.7661092530657748, "grad_norm": 1.823527086248671, "learning_rate": 1.7452314844711217e-05, "loss": 0.7978, "step": 10308 }, { "epoch": 0.7661835748792271, "grad_norm": 1.8731501973428952, "learning_rate": 1.7451779796882983e-05, "loss": 0.8615, "step": 10309 }, { "epoch": 0.7662578966926793, "grad_norm": 1.9127468716579803, "learning_rate": 1.745124470108057e-05, "loss": 0.9465, "step": 10310 }, { "epoch": 0.7663322185061315, "grad_norm": 1.8841651077790522, "learning_rate": 1.745070955730744e-05, "loss": 0.9471, "step": 10311 }, { "epoch": 0.7664065403195838, "grad_norm": 1.775952615996408, "learning_rate": 1.7450174365567023e-05, "loss": 0.6968, "step": 10312 }, { "epoch": 0.766480862133036, "grad_norm": 1.5274884114676366, "learning_rate": 1.744963912586277e-05, "loss": 0.7712, "step": 10313 }, { "epoch": 0.7665551839464882, "grad_norm": 1.6498966834358975, "learning_rate": 1.744910383819813e-05, "loss": 0.7422, "step": 10314 }, { "epoch": 0.7666295057599405, "grad_norm": 2.154581050993442, "learning_rate": 1.744856850257654e-05, "loss": 0.9008, "step": 10315 }, { "epoch": 0.7667038275733928, "grad_norm": 1.7266731367007495, "learning_rate": 1.744803311900146e-05, "loss": 0.8492, "step": 10316 }, { "epoch": 0.7667781493868451, "grad_norm": 1.7706159168000526, "learning_rate": 1.7447497687476325e-05, "loss": 0.8781, "step": 10317 }, { "epoch": 0.7668524712002973, "grad_norm": 1.72652388777271, "learning_rate": 1.7446962208004587e-05, "loss": 0.7054, "step": 10318 }, { "epoch": 0.7669267930137496, "grad_norm": 1.845219836028818, "learning_rate": 1.744642668058969e-05, "loss": 0.9155, "step": 10319 }, { "epoch": 0.7670011148272018, "grad_norm": 2.1087966448085376, "learning_rate": 1.7445891105235086e-05, "loss": 1.0203, "step": 10320 }, { "epoch": 0.767075436640654, "grad_norm": 1.9083464625860729, "learning_rate": 1.7445355481944225e-05, "loss": 0.8949, "step": 10321 }, { "epoch": 0.7671497584541063, "grad_norm": 1.8226976735719025, "learning_rate": 1.7444819810720548e-05, "loss": 0.7606, "step": 10322 }, { "epoch": 0.7672240802675585, "grad_norm": 2.3017569477924513, "learning_rate": 1.744428409156751e-05, "loss": 0.8221, "step": 10323 }, { "epoch": 0.7672984020810107, "grad_norm": 1.9066842837541644, "learning_rate": 1.7443748324488556e-05, "loss": 0.8826, "step": 10324 }, { "epoch": 0.767372723894463, "grad_norm": 2.0293649571745123, "learning_rate": 1.7443212509487135e-05, "loss": 0.7273, "step": 10325 }, { "epoch": 0.7674470457079152, "grad_norm": 1.8151510444208974, "learning_rate": 1.7442676646566702e-05, "loss": 0.9501, "step": 10326 }, { "epoch": 0.7675213675213676, "grad_norm": 1.6330964615219445, "learning_rate": 1.7442140735730697e-05, "loss": 0.7704, "step": 10327 }, { "epoch": 0.7675956893348198, "grad_norm": 2.10029841174978, "learning_rate": 1.744160477698258e-05, "loss": 0.711, "step": 10328 }, { "epoch": 0.767670011148272, "grad_norm": 1.824665070095767, "learning_rate": 1.7441068770325797e-05, "loss": 0.7733, "step": 10329 }, { "epoch": 0.7677443329617243, "grad_norm": 1.7419171022056152, "learning_rate": 1.7440532715763797e-05, "loss": 0.714, "step": 10330 }, { "epoch": 0.7678186547751765, "grad_norm": 1.8684918401410346, "learning_rate": 1.7439996613300038e-05, "loss": 0.8683, "step": 10331 }, { "epoch": 0.7678929765886288, "grad_norm": 2.4979896336306027, "learning_rate": 1.743946046293796e-05, "loss": 1.0358, "step": 10332 }, { "epoch": 0.767967298402081, "grad_norm": 4.417773403085052, "learning_rate": 1.743892426468103e-05, "loss": 0.7975, "step": 10333 }, { "epoch": 0.7680416202155332, "grad_norm": 1.989701453480613, "learning_rate": 1.7438388018532687e-05, "loss": 0.7791, "step": 10334 }, { "epoch": 0.7681159420289855, "grad_norm": 1.9344393222957246, "learning_rate": 1.743785172449639e-05, "loss": 1.0246, "step": 10335 }, { "epoch": 0.7681902638424377, "grad_norm": 2.1795977390879044, "learning_rate": 1.7437315382575584e-05, "loss": 0.9325, "step": 10336 }, { "epoch": 0.76826458565589, "grad_norm": 1.913337019093885, "learning_rate": 1.7436778992773736e-05, "loss": 0.9473, "step": 10337 }, { "epoch": 0.7683389074693423, "grad_norm": 2.005405804762888, "learning_rate": 1.7436242555094286e-05, "loss": 0.8793, "step": 10338 }, { "epoch": 0.7684132292827945, "grad_norm": 2.139058386759865, "learning_rate": 1.743570606954069e-05, "loss": 1.173, "step": 10339 }, { "epoch": 0.7684875510962468, "grad_norm": 1.8552663740776563, "learning_rate": 1.7435169536116406e-05, "loss": 0.7517, "step": 10340 }, { "epoch": 0.768561872909699, "grad_norm": 1.992527556039777, "learning_rate": 1.7434632954824888e-05, "loss": 0.7524, "step": 10341 }, { "epoch": 0.7686361947231513, "grad_norm": 2.1395673692907864, "learning_rate": 1.7434096325669588e-05, "loss": 0.7775, "step": 10342 }, { "epoch": 0.7687105165366035, "grad_norm": 7.535958484316721, "learning_rate": 1.743355964865396e-05, "loss": 0.8399, "step": 10343 }, { "epoch": 0.7687848383500557, "grad_norm": 1.9162263840850606, "learning_rate": 1.743302292378146e-05, "loss": 0.8496, "step": 10344 }, { "epoch": 0.768859160163508, "grad_norm": 2.23002657845238, "learning_rate": 1.743248615105555e-05, "loss": 0.8897, "step": 10345 }, { "epoch": 0.7689334819769602, "grad_norm": 1.7008032665578028, "learning_rate": 1.7431949330479675e-05, "loss": 0.7732, "step": 10346 }, { "epoch": 0.7690078037904124, "grad_norm": 1.9111467552368053, "learning_rate": 1.74314124620573e-05, "loss": 0.9949, "step": 10347 }, { "epoch": 0.7690821256038647, "grad_norm": 1.3564610462754412, "learning_rate": 1.743087554579187e-05, "loss": 0.62, "step": 10348 }, { "epoch": 0.769156447417317, "grad_norm": 2.108710298284778, "learning_rate": 1.743033858168685e-05, "loss": 0.9992, "step": 10349 }, { "epoch": 0.7692307692307693, "grad_norm": 1.85280612100487, "learning_rate": 1.74298015697457e-05, "loss": 0.874, "step": 10350 }, { "epoch": 0.7693050910442215, "grad_norm": 2.3059267418829426, "learning_rate": 1.7429264509971874e-05, "loss": 0.896, "step": 10351 }, { "epoch": 0.7693794128576738, "grad_norm": 2.2024842513818705, "learning_rate": 1.7428727402368824e-05, "loss": 0.9642, "step": 10352 }, { "epoch": 0.769453734671126, "grad_norm": 1.8403499960648182, "learning_rate": 1.7428190246940014e-05, "loss": 0.7565, "step": 10353 }, { "epoch": 0.7695280564845782, "grad_norm": 2.1983313908489386, "learning_rate": 1.7427653043688903e-05, "loss": 0.9165, "step": 10354 }, { "epoch": 0.7696023782980305, "grad_norm": 1.8833121425588073, "learning_rate": 1.7427115792618947e-05, "loss": 0.9288, "step": 10355 }, { "epoch": 0.7696767001114827, "grad_norm": 3.0884501809750353, "learning_rate": 1.74265784937336e-05, "loss": 1.2164, "step": 10356 }, { "epoch": 0.7697510219249349, "grad_norm": 1.9983567522401988, "learning_rate": 1.742604114703633e-05, "loss": 0.8512, "step": 10357 }, { "epoch": 0.7698253437383872, "grad_norm": 3.174778174625539, "learning_rate": 1.7425503752530592e-05, "loss": 0.8815, "step": 10358 }, { "epoch": 0.7698996655518394, "grad_norm": 2.4951650160626277, "learning_rate": 1.7424966310219844e-05, "loss": 0.8236, "step": 10359 }, { "epoch": 0.7699739873652918, "grad_norm": 2.115871325115527, "learning_rate": 1.742442882010755e-05, "loss": 0.9886, "step": 10360 }, { "epoch": 0.770048309178744, "grad_norm": 1.874275934113328, "learning_rate": 1.7423891282197168e-05, "loss": 0.9721, "step": 10361 }, { "epoch": 0.7701226309921962, "grad_norm": 2.102794997747718, "learning_rate": 1.742335369649216e-05, "loss": 0.9451, "step": 10362 }, { "epoch": 0.7701969528056485, "grad_norm": 2.079479623725553, "learning_rate": 1.7422816062995984e-05, "loss": 1.091, "step": 10363 }, { "epoch": 0.7702712746191007, "grad_norm": 2.2722039234326745, "learning_rate": 1.7422278381712105e-05, "loss": 0.9629, "step": 10364 }, { "epoch": 0.770345596432553, "grad_norm": 1.854673160445391, "learning_rate": 1.7421740652643984e-05, "loss": 0.8263, "step": 10365 }, { "epoch": 0.7704199182460052, "grad_norm": 2.1241403660201486, "learning_rate": 1.7421202875795076e-05, "loss": 0.8367, "step": 10366 }, { "epoch": 0.7704942400594574, "grad_norm": 1.7332178091355288, "learning_rate": 1.7420665051168855e-05, "loss": 0.7705, "step": 10367 }, { "epoch": 0.7705685618729097, "grad_norm": 5.20329190093022, "learning_rate": 1.7420127178768777e-05, "loss": 0.7819, "step": 10368 }, { "epoch": 0.7706428836863619, "grad_norm": 1.6548033491023, "learning_rate": 1.7419589258598303e-05, "loss": 0.7506, "step": 10369 }, { "epoch": 0.7707172054998142, "grad_norm": 1.6301349142468857, "learning_rate": 1.74190512906609e-05, "loss": 0.7655, "step": 10370 }, { "epoch": 0.7707915273132664, "grad_norm": 2.730875260157999, "learning_rate": 1.741851327496003e-05, "loss": 0.9592, "step": 10371 }, { "epoch": 0.7708658491267187, "grad_norm": 1.8829213035976393, "learning_rate": 1.7417975211499153e-05, "loss": 0.7382, "step": 10372 }, { "epoch": 0.770940170940171, "grad_norm": 2.317845291706144, "learning_rate": 1.7417437100281743e-05, "loss": 0.9616, "step": 10373 }, { "epoch": 0.7710144927536232, "grad_norm": 2.3817954689670504, "learning_rate": 1.7416898941311252e-05, "loss": 0.8309, "step": 10374 }, { "epoch": 0.7710888145670755, "grad_norm": 2.092426561923455, "learning_rate": 1.7416360734591152e-05, "loss": 0.8922, "step": 10375 }, { "epoch": 0.7711631363805277, "grad_norm": 1.9245048927002715, "learning_rate": 1.7415822480124906e-05, "loss": 0.7384, "step": 10376 }, { "epoch": 0.7712374581939799, "grad_norm": 1.966930553820716, "learning_rate": 1.741528417791598e-05, "loss": 0.9528, "step": 10377 }, { "epoch": 0.7713117800074322, "grad_norm": 1.4851789960296486, "learning_rate": 1.741474582796784e-05, "loss": 0.7172, "step": 10378 }, { "epoch": 0.7713861018208844, "grad_norm": 1.95156498489735, "learning_rate": 1.741420743028395e-05, "loss": 0.8665, "step": 10379 }, { "epoch": 0.7714604236343366, "grad_norm": 1.9266867103084877, "learning_rate": 1.7413668984867778e-05, "loss": 0.8876, "step": 10380 }, { "epoch": 0.7715347454477889, "grad_norm": 1.8043549184532435, "learning_rate": 1.7413130491722792e-05, "loss": 0.8774, "step": 10381 }, { "epoch": 0.7716090672612411, "grad_norm": 2.0605884554317915, "learning_rate": 1.7412591950852453e-05, "loss": 0.7987, "step": 10382 }, { "epoch": 0.7716833890746935, "grad_norm": 2.444392203622085, "learning_rate": 1.741205336226023e-05, "loss": 0.9056, "step": 10383 }, { "epoch": 0.7717577108881457, "grad_norm": 1.6250654823696289, "learning_rate": 1.7411514725949597e-05, "loss": 0.6691, "step": 10384 }, { "epoch": 0.771832032701598, "grad_norm": 1.8109667588107365, "learning_rate": 1.7410976041924015e-05, "loss": 0.8572, "step": 10385 }, { "epoch": 0.7719063545150502, "grad_norm": 1.900855129319542, "learning_rate": 1.7410437310186955e-05, "loss": 0.8637, "step": 10386 }, { "epoch": 0.7719806763285024, "grad_norm": 1.7171325872327896, "learning_rate": 1.740989853074188e-05, "loss": 0.8931, "step": 10387 }, { "epoch": 0.7720549981419547, "grad_norm": 2.3559164030788975, "learning_rate": 1.7409359703592262e-05, "loss": 0.9205, "step": 10388 }, { "epoch": 0.7721293199554069, "grad_norm": 1.9364577705662502, "learning_rate": 1.740882082874158e-05, "loss": 0.7627, "step": 10389 }, { "epoch": 0.7722036417688591, "grad_norm": 2.0405160853861006, "learning_rate": 1.7408281906193285e-05, "loss": 0.9924, "step": 10390 }, { "epoch": 0.7722779635823114, "grad_norm": 2.3564391439652903, "learning_rate": 1.740774293595086e-05, "loss": 0.8664, "step": 10391 }, { "epoch": 0.7723522853957636, "grad_norm": 2.014058348534812, "learning_rate": 1.7407203918017764e-05, "loss": 0.7682, "step": 10392 }, { "epoch": 0.7724266072092159, "grad_norm": 1.6753870420318435, "learning_rate": 1.740666485239748e-05, "loss": 0.8861, "step": 10393 }, { "epoch": 0.7725009290226682, "grad_norm": 2.135037520145124, "learning_rate": 1.740612573909347e-05, "loss": 0.918, "step": 10394 }, { "epoch": 0.7725752508361204, "grad_norm": 2.1437832314549903, "learning_rate": 1.7405586578109205e-05, "loss": 0.8848, "step": 10395 }, { "epoch": 0.7726495726495727, "grad_norm": 2.0846095862876366, "learning_rate": 1.740504736944816e-05, "loss": 0.9259, "step": 10396 }, { "epoch": 0.7727238944630249, "grad_norm": 1.834481623495128, "learning_rate": 1.7404508113113804e-05, "loss": 0.9054, "step": 10397 }, { "epoch": 0.7727982162764772, "grad_norm": 1.7529355812208178, "learning_rate": 1.740396880910961e-05, "loss": 0.8774, "step": 10398 }, { "epoch": 0.7728725380899294, "grad_norm": 1.815518273401679, "learning_rate": 1.7403429457439048e-05, "loss": 0.9034, "step": 10399 }, { "epoch": 0.7729468599033816, "grad_norm": 2.1269039136129013, "learning_rate": 1.740289005810559e-05, "loss": 0.8619, "step": 10400 }, { "epoch": 0.7730211817168339, "grad_norm": 1.8144545285678653, "learning_rate": 1.7402350611112714e-05, "loss": 0.7572, "step": 10401 }, { "epoch": 0.7730955035302861, "grad_norm": 2.8470376880659867, "learning_rate": 1.7401811116463886e-05, "loss": 0.995, "step": 10402 }, { "epoch": 0.7731698253437383, "grad_norm": 1.911905367858261, "learning_rate": 1.740127157416258e-05, "loss": 0.7756, "step": 10403 }, { "epoch": 0.7732441471571906, "grad_norm": 2.371843198547572, "learning_rate": 1.7400731984212273e-05, "loss": 1.1146, "step": 10404 }, { "epoch": 0.7733184689706429, "grad_norm": 1.7924028664941816, "learning_rate": 1.7400192346616437e-05, "loss": 0.7531, "step": 10405 }, { "epoch": 0.7733927907840952, "grad_norm": 2.132178090403572, "learning_rate": 1.739965266137855e-05, "loss": 1.0463, "step": 10406 }, { "epoch": 0.7734671125975474, "grad_norm": 2.274200932676114, "learning_rate": 1.739911292850208e-05, "loss": 0.9581, "step": 10407 }, { "epoch": 0.7735414344109997, "grad_norm": 2.1578782869239594, "learning_rate": 1.7398573147990506e-05, "loss": 0.8054, "step": 10408 }, { "epoch": 0.7736157562244519, "grad_norm": 1.6183091374444099, "learning_rate": 1.7398033319847304e-05, "loss": 0.9302, "step": 10409 }, { "epoch": 0.7736900780379041, "grad_norm": 2.0897917146283067, "learning_rate": 1.7397493444075944e-05, "loss": 1.092, "step": 10410 }, { "epoch": 0.7737643998513564, "grad_norm": 1.9079310635892572, "learning_rate": 1.7396953520679905e-05, "loss": 0.8194, "step": 10411 }, { "epoch": 0.7738387216648086, "grad_norm": 1.8247585480717001, "learning_rate": 1.7396413549662665e-05, "loss": 0.8787, "step": 10412 }, { "epoch": 0.7739130434782608, "grad_norm": 2.701733896098732, "learning_rate": 1.7395873531027696e-05, "loss": 1.0625, "step": 10413 }, { "epoch": 0.7739873652917131, "grad_norm": 2.0519908282177055, "learning_rate": 1.739533346477848e-05, "loss": 0.8273, "step": 10414 }, { "epoch": 0.7740616871051653, "grad_norm": 1.6684659024351607, "learning_rate": 1.739479335091849e-05, "loss": 0.6785, "step": 10415 }, { "epoch": 0.7741360089186177, "grad_norm": 2.660248684938978, "learning_rate": 1.7394253189451203e-05, "loss": 0.9173, "step": 10416 }, { "epoch": 0.7742103307320699, "grad_norm": 2.300154511950289, "learning_rate": 1.7393712980380096e-05, "loss": 0.9448, "step": 10417 }, { "epoch": 0.7742846525455221, "grad_norm": 1.8141282820871294, "learning_rate": 1.739317272370865e-05, "loss": 0.8741, "step": 10418 }, { "epoch": 0.7743589743589744, "grad_norm": 1.6715295554799916, "learning_rate": 1.7392632419440345e-05, "loss": 0.7238, "step": 10419 }, { "epoch": 0.7744332961724266, "grad_norm": 1.738728744410303, "learning_rate": 1.739209206757865e-05, "loss": 0.79, "step": 10420 }, { "epoch": 0.7745076179858789, "grad_norm": 1.7317643728424466, "learning_rate": 1.7391551668127053e-05, "loss": 0.7987, "step": 10421 }, { "epoch": 0.7745819397993311, "grad_norm": 1.7419123665758065, "learning_rate": 1.7391011221089033e-05, "loss": 0.8212, "step": 10422 }, { "epoch": 0.7746562616127833, "grad_norm": 3.1171582718666886, "learning_rate": 1.7390470726468063e-05, "loss": 0.8663, "step": 10423 }, { "epoch": 0.7747305834262356, "grad_norm": 1.7363499758966585, "learning_rate": 1.7389930184267626e-05, "loss": 0.8163, "step": 10424 }, { "epoch": 0.7748049052396878, "grad_norm": 1.9219726535758022, "learning_rate": 1.73893895944912e-05, "loss": 1.0979, "step": 10425 }, { "epoch": 0.77487922705314, "grad_norm": 2.062573450127347, "learning_rate": 1.738884895714227e-05, "loss": 0.8869, "step": 10426 }, { "epoch": 0.7749535488665923, "grad_norm": 1.7419220220004739, "learning_rate": 1.7388308272224313e-05, "loss": 0.8159, "step": 10427 }, { "epoch": 0.7750278706800446, "grad_norm": 1.6687691071391122, "learning_rate": 1.738776753974081e-05, "loss": 0.6997, "step": 10428 }, { "epoch": 0.7751021924934969, "grad_norm": 1.7098315744040287, "learning_rate": 1.7387226759695244e-05, "loss": 0.785, "step": 10429 }, { "epoch": 0.7751765143069491, "grad_norm": 1.6579047646533607, "learning_rate": 1.7386685932091098e-05, "loss": 0.6354, "step": 10430 }, { "epoch": 0.7752508361204014, "grad_norm": 1.7891066633521533, "learning_rate": 1.7386145056931845e-05, "loss": 0.9269, "step": 10431 }, { "epoch": 0.7753251579338536, "grad_norm": 1.8733397790024158, "learning_rate": 1.7385604134220976e-05, "loss": 0.8984, "step": 10432 }, { "epoch": 0.7753994797473058, "grad_norm": 1.6720477507351814, "learning_rate": 1.738506316396197e-05, "loss": 0.9533, "step": 10433 }, { "epoch": 0.7754738015607581, "grad_norm": 2.180284184796758, "learning_rate": 1.7384522146158313e-05, "loss": 0.9922, "step": 10434 }, { "epoch": 0.7755481233742103, "grad_norm": 1.970803125102582, "learning_rate": 1.7383981080813485e-05, "loss": 0.8546, "step": 10435 }, { "epoch": 0.7756224451876625, "grad_norm": 2.5717394958138944, "learning_rate": 1.738343996793097e-05, "loss": 1.0697, "step": 10436 }, { "epoch": 0.7756967670011148, "grad_norm": 1.8857936642570172, "learning_rate": 1.738289880751425e-05, "loss": 0.557, "step": 10437 }, { "epoch": 0.775771088814567, "grad_norm": 2.1360856626716367, "learning_rate": 1.7382357599566814e-05, "loss": 0.7152, "step": 10438 }, { "epoch": 0.7758454106280194, "grad_norm": 1.8910186343743443, "learning_rate": 1.7381816344092136e-05, "loss": 0.9227, "step": 10439 }, { "epoch": 0.7759197324414716, "grad_norm": 2.0804660471393386, "learning_rate": 1.7381275041093713e-05, "loss": 0.8597, "step": 10440 }, { "epoch": 0.7759940542549238, "grad_norm": 1.8519432030576861, "learning_rate": 1.7380733690575023e-05, "loss": 0.8946, "step": 10441 }, { "epoch": 0.7760683760683761, "grad_norm": 1.9961952088566235, "learning_rate": 1.7380192292539554e-05, "loss": 0.9869, "step": 10442 }, { "epoch": 0.7761426978818283, "grad_norm": 2.2494747712551386, "learning_rate": 1.7379650846990788e-05, "loss": 0.8916, "step": 10443 }, { "epoch": 0.7762170196952806, "grad_norm": 2.009705758269734, "learning_rate": 1.7379109353932215e-05, "loss": 0.7637, "step": 10444 }, { "epoch": 0.7762913415087328, "grad_norm": 1.8452944792218433, "learning_rate": 1.7378567813367314e-05, "loss": 0.8031, "step": 10445 }, { "epoch": 0.776365663322185, "grad_norm": 1.973486108489292, "learning_rate": 1.7378026225299583e-05, "loss": 0.7742, "step": 10446 }, { "epoch": 0.7764399851356373, "grad_norm": 2.4435028986442116, "learning_rate": 1.7377484589732497e-05, "loss": 0.8303, "step": 10447 }, { "epoch": 0.7765143069490895, "grad_norm": 2.0109569827549816, "learning_rate": 1.737694290666955e-05, "loss": 1.0708, "step": 10448 }, { "epoch": 0.7765886287625418, "grad_norm": 2.1345216377517833, "learning_rate": 1.7376401176114224e-05, "loss": 0.8073, "step": 10449 }, { "epoch": 0.7766629505759941, "grad_norm": 2.574781072948418, "learning_rate": 1.7375859398070013e-05, "loss": 0.9928, "step": 10450 }, { "epoch": 0.7767372723894463, "grad_norm": 1.708623080105233, "learning_rate": 1.7375317572540404e-05, "loss": 1.0217, "step": 10451 }, { "epoch": 0.7768115942028986, "grad_norm": 7.777826972282766, "learning_rate": 1.7374775699528878e-05, "loss": 0.9183, "step": 10452 }, { "epoch": 0.7768859160163508, "grad_norm": 2.0424306137753985, "learning_rate": 1.737423377903893e-05, "loss": 0.8911, "step": 10453 }, { "epoch": 0.7769602378298031, "grad_norm": 1.9954373199995104, "learning_rate": 1.7373691811074052e-05, "loss": 0.9034, "step": 10454 }, { "epoch": 0.7770345596432553, "grad_norm": 2.1662847354056023, "learning_rate": 1.7373149795637724e-05, "loss": 1.1109, "step": 10455 }, { "epoch": 0.7771088814567075, "grad_norm": 1.58003259572494, "learning_rate": 1.7372607732733443e-05, "loss": 0.7532, "step": 10456 }, { "epoch": 0.7771832032701598, "grad_norm": 9.748293240173089, "learning_rate": 1.7372065622364695e-05, "loss": 1.0061, "step": 10457 }, { "epoch": 0.777257525083612, "grad_norm": 2.1211533826426647, "learning_rate": 1.737152346453497e-05, "loss": 0.9298, "step": 10458 }, { "epoch": 0.7773318468970642, "grad_norm": 1.9193224692150888, "learning_rate": 1.737098125924776e-05, "loss": 0.8588, "step": 10459 }, { "epoch": 0.7774061687105165, "grad_norm": 1.8357060845496955, "learning_rate": 1.7370439006506554e-05, "loss": 0.9171, "step": 10460 }, { "epoch": 0.7774804905239688, "grad_norm": 1.9358738712721795, "learning_rate": 1.7369896706314846e-05, "loss": 0.6939, "step": 10461 }, { "epoch": 0.7775548123374211, "grad_norm": 2.221772106754715, "learning_rate": 1.7369354358676126e-05, "loss": 0.876, "step": 10462 }, { "epoch": 0.7776291341508733, "grad_norm": 1.9923252065576114, "learning_rate": 1.7368811963593882e-05, "loss": 0.8522, "step": 10463 }, { "epoch": 0.7777034559643256, "grad_norm": 3.8839734053284394, "learning_rate": 1.736826952107161e-05, "loss": 0.7593, "step": 10464 }, { "epoch": 0.7777777777777778, "grad_norm": 1.9666461312203973, "learning_rate": 1.7367727031112805e-05, "loss": 0.8253, "step": 10465 }, { "epoch": 0.77785209959123, "grad_norm": 2.048957051256206, "learning_rate": 1.736718449372095e-05, "loss": 0.9082, "step": 10466 }, { "epoch": 0.7779264214046823, "grad_norm": 1.7939747621140882, "learning_rate": 1.7366641908899545e-05, "loss": 0.7145, "step": 10467 }, { "epoch": 0.7780007432181345, "grad_norm": 2.0194003281029365, "learning_rate": 1.7366099276652084e-05, "loss": 0.9008, "step": 10468 }, { "epoch": 0.7780750650315867, "grad_norm": 2.231295369230262, "learning_rate": 1.7365556596982057e-05, "loss": 0.8536, "step": 10469 }, { "epoch": 0.778149386845039, "grad_norm": 2.5317238716559487, "learning_rate": 1.736501386989296e-05, "loss": 0.8774, "step": 10470 }, { "epoch": 0.7782237086584912, "grad_norm": 1.9119977997509998, "learning_rate": 1.7364471095388284e-05, "loss": 0.9664, "step": 10471 }, { "epoch": 0.7782980304719436, "grad_norm": 2.1357495747521966, "learning_rate": 1.7363928273471524e-05, "loss": 1.0773, "step": 10472 }, { "epoch": 0.7783723522853958, "grad_norm": 1.6691233131480077, "learning_rate": 1.7363385404146178e-05, "loss": 0.8004, "step": 10473 }, { "epoch": 0.778446674098848, "grad_norm": 2.071257976324895, "learning_rate": 1.736284248741574e-05, "loss": 0.8906, "step": 10474 }, { "epoch": 0.7785209959123003, "grad_norm": 2.076398170913958, "learning_rate": 1.73622995232837e-05, "loss": 1.0167, "step": 10475 }, { "epoch": 0.7785953177257525, "grad_norm": 2.1212815285232405, "learning_rate": 1.7361756511753563e-05, "loss": 0.9111, "step": 10476 }, { "epoch": 0.7786696395392048, "grad_norm": 2.0521853558748853, "learning_rate": 1.7361213452828812e-05, "loss": 1.0135, "step": 10477 }, { "epoch": 0.778743961352657, "grad_norm": 1.8597967931627026, "learning_rate": 1.7360670346512955e-05, "loss": 0.7467, "step": 10478 }, { "epoch": 0.7788182831661092, "grad_norm": 1.9763918546430834, "learning_rate": 1.7360127192809487e-05, "loss": 0.6781, "step": 10479 }, { "epoch": 0.7788926049795615, "grad_norm": 1.8256876951673544, "learning_rate": 1.73595839917219e-05, "loss": 0.6951, "step": 10480 }, { "epoch": 0.7789669267930137, "grad_norm": 1.9001268089139451, "learning_rate": 1.7359040743253692e-05, "loss": 0.907, "step": 10481 }, { "epoch": 0.779041248606466, "grad_norm": 2.011442388278857, "learning_rate": 1.735849744740836e-05, "loss": 0.8558, "step": 10482 }, { "epoch": 0.7791155704199182, "grad_norm": 2.046279634330208, "learning_rate": 1.7357954104189406e-05, "loss": 0.8993, "step": 10483 }, { "epoch": 0.7791898922333705, "grad_norm": 1.7393868874292193, "learning_rate": 1.7357410713600326e-05, "loss": 0.8949, "step": 10484 }, { "epoch": 0.7792642140468228, "grad_norm": 1.845377661061857, "learning_rate": 1.735686727564462e-05, "loss": 1.059, "step": 10485 }, { "epoch": 0.779338535860275, "grad_norm": 2.884815510445426, "learning_rate": 1.7356323790325777e-05, "loss": 0.8293, "step": 10486 }, { "epoch": 0.7794128576737273, "grad_norm": 2.7296466369688113, "learning_rate": 1.7355780257647307e-05, "loss": 0.825, "step": 10487 }, { "epoch": 0.7794871794871795, "grad_norm": 1.7770825398016128, "learning_rate": 1.7355236677612705e-05, "loss": 0.9399, "step": 10488 }, { "epoch": 0.7795615013006317, "grad_norm": 1.9592964868285287, "learning_rate": 1.7354693050225472e-05, "loss": 0.784, "step": 10489 }, { "epoch": 0.779635823114084, "grad_norm": 1.9637998861518329, "learning_rate": 1.7354149375489107e-05, "loss": 0.8491, "step": 10490 }, { "epoch": 0.7797101449275362, "grad_norm": 1.9469470731076386, "learning_rate": 1.7353605653407107e-05, "loss": 0.9483, "step": 10491 }, { "epoch": 0.7797844667409884, "grad_norm": 1.6910978346386283, "learning_rate": 1.7353061883982976e-05, "loss": 0.8867, "step": 10492 }, { "epoch": 0.7798587885544407, "grad_norm": 2.0640304670704985, "learning_rate": 1.7352518067220216e-05, "loss": 0.9454, "step": 10493 }, { "epoch": 0.7799331103678929, "grad_norm": 1.813572094661442, "learning_rate": 1.7351974203122324e-05, "loss": 0.8943, "step": 10494 }, { "epoch": 0.7800074321813453, "grad_norm": 1.7374161344822163, "learning_rate": 1.7351430291692804e-05, "loss": 0.841, "step": 10495 }, { "epoch": 0.7800817539947975, "grad_norm": 2.1680257041104247, "learning_rate": 1.735088633293516e-05, "loss": 0.9892, "step": 10496 }, { "epoch": 0.7801560758082497, "grad_norm": 1.9065741514083534, "learning_rate": 1.7350342326852888e-05, "loss": 0.8307, "step": 10497 }, { "epoch": 0.780230397621702, "grad_norm": 1.701515730125158, "learning_rate": 1.7349798273449495e-05, "loss": 0.8661, "step": 10498 }, { "epoch": 0.7803047194351542, "grad_norm": 2.096445036196154, "learning_rate": 1.734925417272848e-05, "loss": 0.7364, "step": 10499 }, { "epoch": 0.7803790412486065, "grad_norm": 1.9402336655402912, "learning_rate": 1.734871002469335e-05, "loss": 0.7256, "step": 10500 }, { "epoch": 0.7804533630620587, "grad_norm": 2.036900459034549, "learning_rate": 1.7348165829347606e-05, "loss": 0.6852, "step": 10501 }, { "epoch": 0.7805276848755109, "grad_norm": 2.0440055545560734, "learning_rate": 1.7347621586694752e-05, "loss": 0.8155, "step": 10502 }, { "epoch": 0.7806020066889632, "grad_norm": 1.9224600370627611, "learning_rate": 1.7347077296738294e-05, "loss": 0.6298, "step": 10503 }, { "epoch": 0.7806763285024154, "grad_norm": 1.776544325383402, "learning_rate": 1.734653295948173e-05, "loss": 0.6927, "step": 10504 }, { "epoch": 0.7807506503158677, "grad_norm": 2.164578479707083, "learning_rate": 1.7345988574928567e-05, "loss": 0.9482, "step": 10505 }, { "epoch": 0.78082497212932, "grad_norm": 1.664827588475789, "learning_rate": 1.7345444143082312e-05, "loss": 0.8499, "step": 10506 }, { "epoch": 0.7808992939427722, "grad_norm": 1.7119724235330267, "learning_rate": 1.7344899663946472e-05, "loss": 0.6939, "step": 10507 }, { "epoch": 0.7809736157562245, "grad_norm": 2.138939148073119, "learning_rate": 1.7344355137524543e-05, "loss": 0.7857, "step": 10508 }, { "epoch": 0.7810479375696767, "grad_norm": 1.906105351573203, "learning_rate": 1.734381056382004e-05, "loss": 1.0255, "step": 10509 }, { "epoch": 0.781122259383129, "grad_norm": 1.6312125461561404, "learning_rate": 1.7343265942836464e-05, "loss": 0.7545, "step": 10510 }, { "epoch": 0.7811965811965812, "grad_norm": 1.8833626732444222, "learning_rate": 1.7342721274577324e-05, "loss": 0.7402, "step": 10511 }, { "epoch": 0.7812709030100334, "grad_norm": 1.931762975220415, "learning_rate": 1.7342176559046125e-05, "loss": 0.8829, "step": 10512 }, { "epoch": 0.7813452248234857, "grad_norm": 2.4842127626081023, "learning_rate": 1.7341631796246378e-05, "loss": 0.8184, "step": 10513 }, { "epoch": 0.7814195466369379, "grad_norm": 1.7467064803617256, "learning_rate": 1.7341086986181583e-05, "loss": 0.7246, "step": 10514 }, { "epoch": 0.7814938684503902, "grad_norm": 1.98887451863552, "learning_rate": 1.734054212885525e-05, "loss": 0.8713, "step": 10515 }, { "epoch": 0.7815681902638424, "grad_norm": 2.2331848272701658, "learning_rate": 1.7339997224270887e-05, "loss": 0.9131, "step": 10516 }, { "epoch": 0.7816425120772947, "grad_norm": 1.9338031442449746, "learning_rate": 1.7339452272432007e-05, "loss": 0.8293, "step": 10517 }, { "epoch": 0.781716833890747, "grad_norm": 2.0010337613024336, "learning_rate": 1.7338907273342113e-05, "loss": 0.8566, "step": 10518 }, { "epoch": 0.7817911557041992, "grad_norm": 2.238424566791784, "learning_rate": 1.7338362227004714e-05, "loss": 0.8835, "step": 10519 }, { "epoch": 0.7818654775176515, "grad_norm": 1.8185351462476238, "learning_rate": 1.7337817133423317e-05, "loss": 0.966, "step": 10520 }, { "epoch": 0.7819397993311037, "grad_norm": 1.7632241886115685, "learning_rate": 1.7337271992601436e-05, "loss": 0.699, "step": 10521 }, { "epoch": 0.7820141211445559, "grad_norm": 1.9695393354872404, "learning_rate": 1.7336726804542577e-05, "loss": 0.8863, "step": 10522 }, { "epoch": 0.7820884429580082, "grad_norm": 2.4797171037867836, "learning_rate": 1.7336181569250254e-05, "loss": 0.9032, "step": 10523 }, { "epoch": 0.7821627647714604, "grad_norm": 2.268144098701673, "learning_rate": 1.7335636286727977e-05, "loss": 0.764, "step": 10524 }, { "epoch": 0.7822370865849126, "grad_norm": 1.7549814386408682, "learning_rate": 1.733509095697925e-05, "loss": 0.8905, "step": 10525 }, { "epoch": 0.7823114083983649, "grad_norm": 1.8942011971004555, "learning_rate": 1.733454558000759e-05, "loss": 0.9113, "step": 10526 }, { "epoch": 0.7823857302118171, "grad_norm": 2.046656996650031, "learning_rate": 1.7334000155816505e-05, "loss": 0.8418, "step": 10527 }, { "epoch": 0.7824600520252695, "grad_norm": 2.742942432751112, "learning_rate": 1.7333454684409506e-05, "loss": 0.9737, "step": 10528 }, { "epoch": 0.7825343738387217, "grad_norm": 2.2984095431279656, "learning_rate": 1.7332909165790107e-05, "loss": 0.8996, "step": 10529 }, { "epoch": 0.782608695652174, "grad_norm": 1.7941563685972397, "learning_rate": 1.7332363599961825e-05, "loss": 0.8764, "step": 10530 }, { "epoch": 0.7826830174656262, "grad_norm": 2.1280359245964378, "learning_rate": 1.7331817986928162e-05, "loss": 0.9384, "step": 10531 }, { "epoch": 0.7827573392790784, "grad_norm": 2.1677674827163336, "learning_rate": 1.7331272326692633e-05, "loss": 1.0162, "step": 10532 }, { "epoch": 0.7828316610925307, "grad_norm": 2.0399914262070915, "learning_rate": 1.7330726619258755e-05, "loss": 0.933, "step": 10533 }, { "epoch": 0.7829059829059829, "grad_norm": 1.8813479406455274, "learning_rate": 1.733018086463004e-05, "loss": 0.8658, "step": 10534 }, { "epoch": 0.7829803047194351, "grad_norm": 1.5900856342845038, "learning_rate": 1.732963506281e-05, "loss": 0.824, "step": 10535 }, { "epoch": 0.7830546265328874, "grad_norm": 3.2222387840165503, "learning_rate": 1.7329089213802154e-05, "loss": 0.9527, "step": 10536 }, { "epoch": 0.7831289483463396, "grad_norm": 2.020526765553082, "learning_rate": 1.7328543317610006e-05, "loss": 1.033, "step": 10537 }, { "epoch": 0.7832032701597919, "grad_norm": 2.0397032713144, "learning_rate": 1.732799737423708e-05, "loss": 0.8278, "step": 10538 }, { "epoch": 0.7832775919732441, "grad_norm": 2.19902426829851, "learning_rate": 1.7327451383686886e-05, "loss": 1.0216, "step": 10539 }, { "epoch": 0.7833519137866964, "grad_norm": 1.7994297152235277, "learning_rate": 1.732690534596294e-05, "loss": 0.7937, "step": 10540 }, { "epoch": 0.7834262356001487, "grad_norm": 1.7305664675732741, "learning_rate": 1.7326359261068756e-05, "loss": 0.7933, "step": 10541 }, { "epoch": 0.7835005574136009, "grad_norm": 1.6644076485173072, "learning_rate": 1.7325813129007856e-05, "loss": 0.8954, "step": 10542 }, { "epoch": 0.7835748792270532, "grad_norm": 1.845566024983391, "learning_rate": 1.7325266949783745e-05, "loss": 1.0565, "step": 10543 }, { "epoch": 0.7836492010405054, "grad_norm": 2.0373582418785716, "learning_rate": 1.732472072339995e-05, "loss": 0.9861, "step": 10544 }, { "epoch": 0.7837235228539576, "grad_norm": 2.3081212359292937, "learning_rate": 1.7324174449859982e-05, "loss": 0.9307, "step": 10545 }, { "epoch": 0.7837978446674099, "grad_norm": 2.8100911742583254, "learning_rate": 1.732362812916736e-05, "loss": 0.9164, "step": 10546 }, { "epoch": 0.7838721664808621, "grad_norm": 1.7571712332749783, "learning_rate": 1.7323081761325598e-05, "loss": 0.8414, "step": 10547 }, { "epoch": 0.7839464882943143, "grad_norm": 2.151748294936517, "learning_rate": 1.7322535346338214e-05, "loss": 0.8745, "step": 10548 }, { "epoch": 0.7840208101077666, "grad_norm": 1.9430060001725353, "learning_rate": 1.732198888420873e-05, "loss": 0.736, "step": 10549 }, { "epoch": 0.7840951319212188, "grad_norm": 2.1018185961015163, "learning_rate": 1.7321442374940664e-05, "loss": 0.8411, "step": 10550 }, { "epoch": 0.7841694537346712, "grad_norm": 2.0726448010636855, "learning_rate": 1.7320895818537527e-05, "loss": 0.9439, "step": 10551 }, { "epoch": 0.7842437755481234, "grad_norm": 2.006244226503227, "learning_rate": 1.7320349215002842e-05, "loss": 1.0934, "step": 10552 }, { "epoch": 0.7843180973615757, "grad_norm": 1.555533907113647, "learning_rate": 1.7319802564340135e-05, "loss": 0.7389, "step": 10553 }, { "epoch": 0.7843924191750279, "grad_norm": 1.6048872915473307, "learning_rate": 1.7319255866552912e-05, "loss": 0.7321, "step": 10554 }, { "epoch": 0.7844667409884801, "grad_norm": 2.1256106612691577, "learning_rate": 1.7318709121644704e-05, "loss": 0.8947, "step": 10555 }, { "epoch": 0.7845410628019324, "grad_norm": 1.9583758559065194, "learning_rate": 1.7318162329619025e-05, "loss": 0.8634, "step": 10556 }, { "epoch": 0.7846153846153846, "grad_norm": 1.9298791881285287, "learning_rate": 1.7317615490479394e-05, "loss": 0.7496, "step": 10557 }, { "epoch": 0.7846897064288368, "grad_norm": 1.7986637061580788, "learning_rate": 1.731706860422934e-05, "loss": 0.7696, "step": 10558 }, { "epoch": 0.7847640282422891, "grad_norm": 2.09650505528334, "learning_rate": 1.7316521670872375e-05, "loss": 1.0137, "step": 10559 }, { "epoch": 0.7848383500557413, "grad_norm": 1.6639754882103654, "learning_rate": 1.731597469041202e-05, "loss": 0.793, "step": 10560 }, { "epoch": 0.7849126718691936, "grad_norm": 2.431571823640443, "learning_rate": 1.7315427662851803e-05, "loss": 0.9255, "step": 10561 }, { "epoch": 0.7849869936826459, "grad_norm": 2.0601646898538, "learning_rate": 1.731488058819524e-05, "loss": 1.0255, "step": 10562 }, { "epoch": 0.7850613154960981, "grad_norm": 1.6707286286275567, "learning_rate": 1.731433346644586e-05, "loss": 0.7473, "step": 10563 }, { "epoch": 0.7851356373095504, "grad_norm": 2.0298905757106653, "learning_rate": 1.731378629760718e-05, "loss": 0.8106, "step": 10564 }, { "epoch": 0.7852099591230026, "grad_norm": 1.6615157838819077, "learning_rate": 1.7313239081682716e-05, "loss": 0.731, "step": 10565 }, { "epoch": 0.7852842809364549, "grad_norm": 1.6559524377125954, "learning_rate": 1.7312691818676005e-05, "loss": 0.8202, "step": 10566 }, { "epoch": 0.7853586027499071, "grad_norm": 1.9619173191545658, "learning_rate": 1.731214450859056e-05, "loss": 0.8803, "step": 10567 }, { "epoch": 0.7854329245633593, "grad_norm": 1.8272104189923626, "learning_rate": 1.731159715142991e-05, "loss": 0.9548, "step": 10568 }, { "epoch": 0.7855072463768116, "grad_norm": 2.333441479647601, "learning_rate": 1.7311049747197576e-05, "loss": 0.7816, "step": 10569 }, { "epoch": 0.7855815681902638, "grad_norm": 1.5760111057651094, "learning_rate": 1.7310502295897084e-05, "loss": 0.7581, "step": 10570 }, { "epoch": 0.785655890003716, "grad_norm": 1.7314680169473875, "learning_rate": 1.730995479753196e-05, "loss": 0.9654, "step": 10571 }, { "epoch": 0.7857302118171683, "grad_norm": 1.5086208878665315, "learning_rate": 1.7309407252105723e-05, "loss": 0.7801, "step": 10572 }, { "epoch": 0.7858045336306206, "grad_norm": 2.2378109596990003, "learning_rate": 1.73088596596219e-05, "loss": 0.759, "step": 10573 }, { "epoch": 0.7858788554440729, "grad_norm": 2.1724750367639722, "learning_rate": 1.730831202008402e-05, "loss": 0.9974, "step": 10574 }, { "epoch": 0.7859531772575251, "grad_norm": 2.0538333792367744, "learning_rate": 1.7307764333495606e-05, "loss": 1.0847, "step": 10575 }, { "epoch": 0.7860274990709774, "grad_norm": 1.8860424658179642, "learning_rate": 1.7307216599860185e-05, "loss": 0.9094, "step": 10576 }, { "epoch": 0.7861018208844296, "grad_norm": 1.9941043775258558, "learning_rate": 1.730666881918128e-05, "loss": 0.9169, "step": 10577 }, { "epoch": 0.7861761426978818, "grad_norm": 2.147371234571379, "learning_rate": 1.7306120991462424e-05, "loss": 0.7886, "step": 10578 }, { "epoch": 0.7862504645113341, "grad_norm": 9.963618944681764, "learning_rate": 1.730557311670714e-05, "loss": 1.035, "step": 10579 }, { "epoch": 0.7863247863247863, "grad_norm": 2.2952629116777046, "learning_rate": 1.7305025194918954e-05, "loss": 1.053, "step": 10580 }, { "epoch": 0.7863991081382385, "grad_norm": 2.0368427267778735, "learning_rate": 1.7304477226101392e-05, "loss": 0.8902, "step": 10581 }, { "epoch": 0.7864734299516908, "grad_norm": 3.7961105608092574, "learning_rate": 1.730392921025799e-05, "loss": 0.7207, "step": 10582 }, { "epoch": 0.786547751765143, "grad_norm": 1.637324808828942, "learning_rate": 1.730338114739227e-05, "loss": 0.8119, "step": 10583 }, { "epoch": 0.7866220735785954, "grad_norm": 2.1706970027913424, "learning_rate": 1.7302833037507757e-05, "loss": 1.0227, "step": 10584 }, { "epoch": 0.7866963953920476, "grad_norm": 1.920072990629757, "learning_rate": 1.7302284880607983e-05, "loss": 0.7832, "step": 10585 }, { "epoch": 0.7867707172054998, "grad_norm": 1.9681443435308035, "learning_rate": 1.7301736676696484e-05, "loss": 0.8645, "step": 10586 }, { "epoch": 0.7868450390189521, "grad_norm": 7.872098742831211, "learning_rate": 1.7301188425776778e-05, "loss": 0.7724, "step": 10587 }, { "epoch": 0.7869193608324043, "grad_norm": 1.8026984773879606, "learning_rate": 1.73006401278524e-05, "loss": 0.8176, "step": 10588 }, { "epoch": 0.7869936826458566, "grad_norm": 1.5848655023597944, "learning_rate": 1.730009178292688e-05, "loss": 0.6612, "step": 10589 }, { "epoch": 0.7870680044593088, "grad_norm": 2.076834580175785, "learning_rate": 1.729954339100375e-05, "loss": 0.8854, "step": 10590 }, { "epoch": 0.787142326272761, "grad_norm": 1.7811939989883034, "learning_rate": 1.7298994952086542e-05, "loss": 0.9451, "step": 10591 }, { "epoch": 0.7872166480862133, "grad_norm": 1.7735392776811396, "learning_rate": 1.7298446466178778e-05, "loss": 0.7947, "step": 10592 }, { "epoch": 0.7872909698996655, "grad_norm": 2.215025469532591, "learning_rate": 1.7297897933283992e-05, "loss": 0.8014, "step": 10593 }, { "epoch": 0.7873652917131178, "grad_norm": 1.7156772550890502, "learning_rate": 1.7297349353405722e-05, "loss": 0.7717, "step": 10594 }, { "epoch": 0.7874396135265701, "grad_norm": 2.1395048318930736, "learning_rate": 1.7296800726547495e-05, "loss": 0.7109, "step": 10595 }, { "epoch": 0.7875139353400223, "grad_norm": 1.8850539005476719, "learning_rate": 1.7296252052712847e-05, "loss": 0.8611, "step": 10596 }, { "epoch": 0.7875882571534746, "grad_norm": 1.9340516154974583, "learning_rate": 1.72957033319053e-05, "loss": 0.6342, "step": 10597 }, { "epoch": 0.7876625789669268, "grad_norm": 5.5884215761918785, "learning_rate": 1.7295154564128396e-05, "loss": 0.8004, "step": 10598 }, { "epoch": 0.7877369007803791, "grad_norm": 14.836157537834284, "learning_rate": 1.7294605749385667e-05, "loss": 1.0214, "step": 10599 }, { "epoch": 0.7878112225938313, "grad_norm": 1.814454830824174, "learning_rate": 1.7294056887680643e-05, "loss": 0.7226, "step": 10600 }, { "epoch": 0.7878855444072835, "grad_norm": 1.9960010103226793, "learning_rate": 1.7293507979016858e-05, "loss": 0.8702, "step": 10601 }, { "epoch": 0.7879598662207358, "grad_norm": 2.1622110125349345, "learning_rate": 1.729295902339785e-05, "loss": 0.8403, "step": 10602 }, { "epoch": 0.788034188034188, "grad_norm": 1.9446995502431295, "learning_rate": 1.729241002082715e-05, "loss": 0.7445, "step": 10603 }, { "epoch": 0.7881085098476402, "grad_norm": 2.124330373866471, "learning_rate": 1.7291860971308293e-05, "loss": 0.9481, "step": 10604 }, { "epoch": 0.7881828316610925, "grad_norm": 1.7512415359409699, "learning_rate": 1.7291311874844808e-05, "loss": 0.7753, "step": 10605 }, { "epoch": 0.7882571534745447, "grad_norm": 2.2649577888174113, "learning_rate": 1.729076273144024e-05, "loss": 0.8216, "step": 10606 }, { "epoch": 0.7883314752879971, "grad_norm": 1.7690141858735506, "learning_rate": 1.729021354109812e-05, "loss": 0.9101, "step": 10607 }, { "epoch": 0.7884057971014493, "grad_norm": 2.0241387327825406, "learning_rate": 1.7289664303821983e-05, "loss": 0.7857, "step": 10608 }, { "epoch": 0.7884801189149016, "grad_norm": 2.1383632125145176, "learning_rate": 1.7289115019615367e-05, "loss": 1.0851, "step": 10609 }, { "epoch": 0.7885544407283538, "grad_norm": 2.867817398605661, "learning_rate": 1.7288565688481803e-05, "loss": 0.7306, "step": 10610 }, { "epoch": 0.788628762541806, "grad_norm": 1.9052557509616257, "learning_rate": 1.728801631042483e-05, "loss": 0.9752, "step": 10611 }, { "epoch": 0.7887030843552583, "grad_norm": 2.128518365860971, "learning_rate": 1.7287466885447992e-05, "loss": 0.8279, "step": 10612 }, { "epoch": 0.7887774061687105, "grad_norm": 2.121310874715369, "learning_rate": 1.728691741355482e-05, "loss": 0.8874, "step": 10613 }, { "epoch": 0.7888517279821627, "grad_norm": 2.003747498305491, "learning_rate": 1.7286367894748845e-05, "loss": 0.4288, "step": 10614 }, { "epoch": 0.788926049795615, "grad_norm": 1.8809139785996116, "learning_rate": 1.728581832903362e-05, "loss": 0.9064, "step": 10615 }, { "epoch": 0.7890003716090672, "grad_norm": 2.0411515755014493, "learning_rate": 1.728526871641267e-05, "loss": 0.6664, "step": 10616 }, { "epoch": 0.7890746934225195, "grad_norm": 2.095277437765582, "learning_rate": 1.7284719056889537e-05, "loss": 0.7858, "step": 10617 }, { "epoch": 0.7891490152359718, "grad_norm": 2.0178641766636605, "learning_rate": 1.7284169350467764e-05, "loss": 1.0561, "step": 10618 }, { "epoch": 0.789223337049424, "grad_norm": 1.8267196816965905, "learning_rate": 1.7283619597150886e-05, "loss": 0.855, "step": 10619 }, { "epoch": 0.7892976588628763, "grad_norm": 2.0180015613023756, "learning_rate": 1.728306979694244e-05, "loss": 1.0116, "step": 10620 }, { "epoch": 0.7893719806763285, "grad_norm": 2.1118988476148877, "learning_rate": 1.728251994984597e-05, "loss": 0.9773, "step": 10621 }, { "epoch": 0.7894463024897808, "grad_norm": 1.6144808472143968, "learning_rate": 1.7281970055865014e-05, "loss": 0.7835, "step": 10622 }, { "epoch": 0.789520624303233, "grad_norm": 1.8238334275968444, "learning_rate": 1.7281420115003115e-05, "loss": 0.8528, "step": 10623 }, { "epoch": 0.7895949461166852, "grad_norm": 2.0037615296305726, "learning_rate": 1.728087012726381e-05, "loss": 0.6867, "step": 10624 }, { "epoch": 0.7896692679301375, "grad_norm": 2.450336446090132, "learning_rate": 1.7280320092650638e-05, "loss": 0.6202, "step": 10625 }, { "epoch": 0.7897435897435897, "grad_norm": 1.7092569739332248, "learning_rate": 1.7279770011167148e-05, "loss": 0.7198, "step": 10626 }, { "epoch": 0.789817911557042, "grad_norm": 1.9981492356791313, "learning_rate": 1.7279219882816875e-05, "loss": 0.9266, "step": 10627 }, { "epoch": 0.7898922333704942, "grad_norm": 1.6996423888342131, "learning_rate": 1.7278669707603358e-05, "loss": 0.8074, "step": 10628 }, { "epoch": 0.7899665551839465, "grad_norm": 2.397323272995284, "learning_rate": 1.727811948553015e-05, "loss": 1.0208, "step": 10629 }, { "epoch": 0.7900408769973988, "grad_norm": 1.9189427472779599, "learning_rate": 1.727756921660078e-05, "loss": 0.7703, "step": 10630 }, { "epoch": 0.790115198810851, "grad_norm": 2.1487563541411285, "learning_rate": 1.72770189008188e-05, "loss": 0.6746, "step": 10631 }, { "epoch": 0.7901895206243033, "grad_norm": 2.3659436986987417, "learning_rate": 1.727646853818775e-05, "loss": 0.776, "step": 10632 }, { "epoch": 0.7902638424377555, "grad_norm": 2.0398737674275886, "learning_rate": 1.727591812871117e-05, "loss": 0.902, "step": 10633 }, { "epoch": 0.7903381642512077, "grad_norm": 1.743954958898236, "learning_rate": 1.7275367672392613e-05, "loss": 0.8448, "step": 10634 }, { "epoch": 0.79041248606466, "grad_norm": 1.747231752637505, "learning_rate": 1.727481716923561e-05, "loss": 0.7747, "step": 10635 }, { "epoch": 0.7904868078781122, "grad_norm": 2.113346927344701, "learning_rate": 1.7274266619243713e-05, "loss": 0.9748, "step": 10636 }, { "epoch": 0.7905611296915644, "grad_norm": 1.982736006911301, "learning_rate": 1.7273716022420468e-05, "loss": 0.8971, "step": 10637 }, { "epoch": 0.7906354515050167, "grad_norm": 1.8613583540892853, "learning_rate": 1.7273165378769414e-05, "loss": 0.8653, "step": 10638 }, { "epoch": 0.7907097733184689, "grad_norm": 1.790801546174261, "learning_rate": 1.72726146882941e-05, "loss": 0.8623, "step": 10639 }, { "epoch": 0.7907840951319213, "grad_norm": 1.8486513279710532, "learning_rate": 1.7272063950998072e-05, "loss": 0.8523, "step": 10640 }, { "epoch": 0.7908584169453735, "grad_norm": 2.0696462688167396, "learning_rate": 1.727151316688487e-05, "loss": 0.9028, "step": 10641 }, { "epoch": 0.7909327387588257, "grad_norm": 1.9280531694112042, "learning_rate": 1.727096233595804e-05, "loss": 0.7901, "step": 10642 }, { "epoch": 0.791007060572278, "grad_norm": 2.141417809216473, "learning_rate": 1.7270411458221138e-05, "loss": 0.9243, "step": 10643 }, { "epoch": 0.7910813823857302, "grad_norm": 1.8271389651758243, "learning_rate": 1.7269860533677702e-05, "loss": 0.8695, "step": 10644 }, { "epoch": 0.7911557041991825, "grad_norm": 1.710581239245638, "learning_rate": 1.726930956233128e-05, "loss": 0.8562, "step": 10645 }, { "epoch": 0.7912300260126347, "grad_norm": 2.101605397573824, "learning_rate": 1.7268758544185426e-05, "loss": 0.8153, "step": 10646 }, { "epoch": 0.7913043478260869, "grad_norm": 1.539669864772829, "learning_rate": 1.7268207479243673e-05, "loss": 0.7532, "step": 10647 }, { "epoch": 0.7913786696395392, "grad_norm": 1.6493118339444812, "learning_rate": 1.7267656367509585e-05, "loss": 0.9889, "step": 10648 }, { "epoch": 0.7914529914529914, "grad_norm": 1.8995413759822515, "learning_rate": 1.7267105208986697e-05, "loss": 0.6283, "step": 10649 }, { "epoch": 0.7915273132664437, "grad_norm": 2.0662755082774775, "learning_rate": 1.726655400367857e-05, "loss": 0.8288, "step": 10650 }, { "epoch": 0.791601635079896, "grad_norm": 2.023888828731629, "learning_rate": 1.726600275158874e-05, "loss": 0.8498, "step": 10651 }, { "epoch": 0.7916759568933482, "grad_norm": 2.0847491960772664, "learning_rate": 1.726545145272076e-05, "loss": 1.0385, "step": 10652 }, { "epoch": 0.7917502787068005, "grad_norm": 1.6811047451544594, "learning_rate": 1.726490010707818e-05, "loss": 0.8553, "step": 10653 }, { "epoch": 0.7918246005202527, "grad_norm": 2.1477405320084073, "learning_rate": 1.7264348714664555e-05, "loss": 0.9089, "step": 10654 }, { "epoch": 0.791898922333705, "grad_norm": 2.022908400661995, "learning_rate": 1.7263797275483426e-05, "loss": 1.0053, "step": 10655 }, { "epoch": 0.7919732441471572, "grad_norm": 1.7261166568065043, "learning_rate": 1.7263245789538347e-05, "loss": 0.9702, "step": 10656 }, { "epoch": 0.7920475659606094, "grad_norm": 2.5402300924990726, "learning_rate": 1.7262694256832868e-05, "loss": 0.8972, "step": 10657 }, { "epoch": 0.7921218877740617, "grad_norm": 2.0980995235653914, "learning_rate": 1.7262142677370544e-05, "loss": 1.0491, "step": 10658 }, { "epoch": 0.7921962095875139, "grad_norm": 1.7306556712013113, "learning_rate": 1.726159105115492e-05, "loss": 0.7939, "step": 10659 }, { "epoch": 0.7922705314009661, "grad_norm": 2.0027271238135733, "learning_rate": 1.7261039378189548e-05, "loss": 0.9371, "step": 10660 }, { "epoch": 0.7923448532144184, "grad_norm": 2.0068911542135504, "learning_rate": 1.726048765847798e-05, "loss": 1.0075, "step": 10661 }, { "epoch": 0.7924191750278706, "grad_norm": 1.9675020698891457, "learning_rate": 1.7259935892023776e-05, "loss": 0.9823, "step": 10662 }, { "epoch": 0.792493496841323, "grad_norm": 2.2319665676307316, "learning_rate": 1.7259384078830475e-05, "loss": 0.9502, "step": 10663 }, { "epoch": 0.7925678186547752, "grad_norm": 2.122236077755437, "learning_rate": 1.725883221890164e-05, "loss": 0.8639, "step": 10664 }, { "epoch": 0.7926421404682275, "grad_norm": 2.122142344462172, "learning_rate": 1.725828031224082e-05, "loss": 0.9764, "step": 10665 }, { "epoch": 0.7927164622816797, "grad_norm": 2.19790958100908, "learning_rate": 1.7257728358851565e-05, "loss": 0.7792, "step": 10666 }, { "epoch": 0.7927907840951319, "grad_norm": 1.9867316015008, "learning_rate": 1.725717635873743e-05, "loss": 0.795, "step": 10667 }, { "epoch": 0.7928651059085842, "grad_norm": 1.7037396166906709, "learning_rate": 1.7256624311901976e-05, "loss": 0.7851, "step": 10668 }, { "epoch": 0.7929394277220364, "grad_norm": 1.8976893145357399, "learning_rate": 1.725607221834875e-05, "loss": 0.6873, "step": 10669 }, { "epoch": 0.7930137495354886, "grad_norm": 1.6689285934398579, "learning_rate": 1.7255520078081302e-05, "loss": 0.8973, "step": 10670 }, { "epoch": 0.7930880713489409, "grad_norm": 1.7989925122731012, "learning_rate": 1.7254967891103197e-05, "loss": 0.7775, "step": 10671 }, { "epoch": 0.7931623931623931, "grad_norm": 1.9285161840287663, "learning_rate": 1.7254415657417984e-05, "loss": 0.9723, "step": 10672 }, { "epoch": 0.7932367149758454, "grad_norm": 2.0145335544630507, "learning_rate": 1.7253863377029218e-05, "loss": 0.8612, "step": 10673 }, { "epoch": 0.7933110367892977, "grad_norm": 1.8375978940244497, "learning_rate": 1.725331104994046e-05, "loss": 0.9956, "step": 10674 }, { "epoch": 0.79338535860275, "grad_norm": 2.1615281601612857, "learning_rate": 1.725275867615526e-05, "loss": 0.816, "step": 10675 }, { "epoch": 0.7934596804162022, "grad_norm": 1.6211751618815164, "learning_rate": 1.725220625567717e-05, "loss": 0.8103, "step": 10676 }, { "epoch": 0.7935340022296544, "grad_norm": 2.623294645791727, "learning_rate": 1.7251653788509758e-05, "loss": 0.7835, "step": 10677 }, { "epoch": 0.7936083240431067, "grad_norm": 1.8138583558236006, "learning_rate": 1.7251101274656577e-05, "loss": 0.8926, "step": 10678 }, { "epoch": 0.7936826458565589, "grad_norm": 2.127964885441536, "learning_rate": 1.7250548714121177e-05, "loss": 0.9102, "step": 10679 }, { "epoch": 0.7937569676700111, "grad_norm": 2.0125707170361222, "learning_rate": 1.7249996106907127e-05, "loss": 0.8953, "step": 10680 }, { "epoch": 0.7938312894834634, "grad_norm": 1.8675386945101042, "learning_rate": 1.7249443453017973e-05, "loss": 0.9471, "step": 10681 }, { "epoch": 0.7939056112969156, "grad_norm": 2.0503934307644247, "learning_rate": 1.724889075245728e-05, "loss": 0.7612, "step": 10682 }, { "epoch": 0.7939799331103679, "grad_norm": 1.710772516167271, "learning_rate": 1.7248338005228605e-05, "loss": 0.8419, "step": 10683 }, { "epoch": 0.7940542549238201, "grad_norm": 1.693482484090167, "learning_rate": 1.7247785211335503e-05, "loss": 0.6664, "step": 10684 }, { "epoch": 0.7941285767372724, "grad_norm": 2.3373544005091245, "learning_rate": 1.7247232370781538e-05, "loss": 0.8109, "step": 10685 }, { "epoch": 0.7942028985507247, "grad_norm": 2.4023679356093073, "learning_rate": 1.7246679483570267e-05, "loss": 0.8744, "step": 10686 }, { "epoch": 0.7942772203641769, "grad_norm": 2.2241382034216293, "learning_rate": 1.7246126549705252e-05, "loss": 1.1064, "step": 10687 }, { "epoch": 0.7943515421776292, "grad_norm": 1.8370529041742694, "learning_rate": 1.7245573569190046e-05, "loss": 0.9052, "step": 10688 }, { "epoch": 0.7944258639910814, "grad_norm": 1.7149049156989236, "learning_rate": 1.7245020542028217e-05, "loss": 0.7422, "step": 10689 }, { "epoch": 0.7945001858045336, "grad_norm": 1.7587716964540172, "learning_rate": 1.7244467468223317e-05, "loss": 0.7123, "step": 10690 }, { "epoch": 0.7945745076179859, "grad_norm": 1.810979941555554, "learning_rate": 1.7243914347778915e-05, "loss": 0.8202, "step": 10691 }, { "epoch": 0.7946488294314381, "grad_norm": 1.7719249421814347, "learning_rate": 1.7243361180698567e-05, "loss": 1.0218, "step": 10692 }, { "epoch": 0.7947231512448903, "grad_norm": 1.717213480951741, "learning_rate": 1.7242807966985836e-05, "loss": 0.9083, "step": 10693 }, { "epoch": 0.7947974730583426, "grad_norm": 2.1335732473115914, "learning_rate": 1.7242254706644287e-05, "loss": 0.9651, "step": 10694 }, { "epoch": 0.7948717948717948, "grad_norm": 1.7964710591184547, "learning_rate": 1.724170139967747e-05, "loss": 0.8484, "step": 10695 }, { "epoch": 0.7949461166852472, "grad_norm": 1.8914375344932202, "learning_rate": 1.724114804608896e-05, "loss": 0.8999, "step": 10696 }, { "epoch": 0.7950204384986994, "grad_norm": 2.2184199787371517, "learning_rate": 1.7240594645882317e-05, "loss": 0.854, "step": 10697 }, { "epoch": 0.7950947603121516, "grad_norm": 1.8505491632748592, "learning_rate": 1.7240041199061094e-05, "loss": 0.8402, "step": 10698 }, { "epoch": 0.7951690821256039, "grad_norm": 2.130230678486554, "learning_rate": 1.7239487705628865e-05, "loss": 0.8954, "step": 10699 }, { "epoch": 0.7952434039390561, "grad_norm": 2.062592925460172, "learning_rate": 1.7238934165589193e-05, "loss": 0.9318, "step": 10700 }, { "epoch": 0.7953177257525084, "grad_norm": 1.8231621638605433, "learning_rate": 1.7238380578945637e-05, "loss": 0.876, "step": 10701 }, { "epoch": 0.7953920475659606, "grad_norm": 2.0583533561725624, "learning_rate": 1.7237826945701763e-05, "loss": 0.901, "step": 10702 }, { "epoch": 0.7954663693794128, "grad_norm": 2.09757444188707, "learning_rate": 1.723727326586113e-05, "loss": 0.9682, "step": 10703 }, { "epoch": 0.7955406911928651, "grad_norm": 2.1591449362589192, "learning_rate": 1.723671953942731e-05, "loss": 0.9729, "step": 10704 }, { "epoch": 0.7956150130063173, "grad_norm": 2.209452192933538, "learning_rate": 1.7236165766403864e-05, "loss": 0.9042, "step": 10705 }, { "epoch": 0.7956893348197696, "grad_norm": 1.6788352971500504, "learning_rate": 1.7235611946794358e-05, "loss": 0.9547, "step": 10706 }, { "epoch": 0.7957636566332219, "grad_norm": 2.00471003822913, "learning_rate": 1.7235058080602363e-05, "loss": 0.8656, "step": 10707 }, { "epoch": 0.7958379784466741, "grad_norm": 1.7364645952082483, "learning_rate": 1.7234504167831433e-05, "loss": 0.7544, "step": 10708 }, { "epoch": 0.7959123002601264, "grad_norm": 2.090331989474634, "learning_rate": 1.7233950208485147e-05, "loss": 1.0728, "step": 10709 }, { "epoch": 0.7959866220735786, "grad_norm": 2.0524320519344776, "learning_rate": 1.7233396202567058e-05, "loss": 0.9312, "step": 10710 }, { "epoch": 0.7960609438870309, "grad_norm": 1.8031896573426647, "learning_rate": 1.7232842150080744e-05, "loss": 0.7805, "step": 10711 }, { "epoch": 0.7961352657004831, "grad_norm": 1.8230884374224567, "learning_rate": 1.7232288051029766e-05, "loss": 0.6985, "step": 10712 }, { "epoch": 0.7962095875139353, "grad_norm": 1.7044129892868038, "learning_rate": 1.7231733905417695e-05, "loss": 0.917, "step": 10713 }, { "epoch": 0.7962839093273876, "grad_norm": 2.041833718600444, "learning_rate": 1.7231179713248092e-05, "loss": 0.8298, "step": 10714 }, { "epoch": 0.7963582311408398, "grad_norm": 2.0199842460056425, "learning_rate": 1.7230625474524532e-05, "loss": 0.9646, "step": 10715 }, { "epoch": 0.796432552954292, "grad_norm": 2.0826001486802066, "learning_rate": 1.723007118925058e-05, "loss": 0.8264, "step": 10716 }, { "epoch": 0.7965068747677443, "grad_norm": 2.1376804588689633, "learning_rate": 1.7229516857429804e-05, "loss": 0.7589, "step": 10717 }, { "epoch": 0.7965811965811965, "grad_norm": 2.115386740324393, "learning_rate": 1.7228962479065775e-05, "loss": 0.8017, "step": 10718 }, { "epoch": 0.7966555183946489, "grad_norm": 1.7610313004744995, "learning_rate": 1.722840805416206e-05, "loss": 1.0008, "step": 10719 }, { "epoch": 0.7967298402081011, "grad_norm": 2.546199844335361, "learning_rate": 1.7227853582722228e-05, "loss": 1.0151, "step": 10720 }, { "epoch": 0.7968041620215534, "grad_norm": 1.6104003247988492, "learning_rate": 1.7227299064749852e-05, "loss": 0.6383, "step": 10721 }, { "epoch": 0.7968784838350056, "grad_norm": 1.605431020013958, "learning_rate": 1.72267445002485e-05, "loss": 0.9239, "step": 10722 }, { "epoch": 0.7969528056484578, "grad_norm": 3.329976573335058, "learning_rate": 1.722618988922174e-05, "loss": 0.8013, "step": 10723 }, { "epoch": 0.7970271274619101, "grad_norm": 2.0927357334260015, "learning_rate": 1.7225635231673142e-05, "loss": 1.0409, "step": 10724 }, { "epoch": 0.7971014492753623, "grad_norm": 1.858441068577465, "learning_rate": 1.7225080527606283e-05, "loss": 0.7438, "step": 10725 }, { "epoch": 0.7971757710888145, "grad_norm": 1.7892245831841043, "learning_rate": 1.722452577702473e-05, "loss": 0.6776, "step": 10726 }, { "epoch": 0.7972500929022668, "grad_norm": 1.7120120908611702, "learning_rate": 1.7223970979932055e-05, "loss": 0.5975, "step": 10727 }, { "epoch": 0.797324414715719, "grad_norm": 1.96510728819169, "learning_rate": 1.722341613633183e-05, "loss": 0.8149, "step": 10728 }, { "epoch": 0.7973987365291713, "grad_norm": 1.9972977355090298, "learning_rate": 1.7222861246227623e-05, "loss": 0.742, "step": 10729 }, { "epoch": 0.7974730583426236, "grad_norm": 2.0447171366223, "learning_rate": 1.7222306309623013e-05, "loss": 0.9331, "step": 10730 }, { "epoch": 0.7975473801560758, "grad_norm": 1.8297080466552926, "learning_rate": 1.722175132652157e-05, "loss": 1.0512, "step": 10731 }, { "epoch": 0.7976217019695281, "grad_norm": 1.8981392965701605, "learning_rate": 1.7221196296926864e-05, "loss": 0.8169, "step": 10732 }, { "epoch": 0.7976960237829803, "grad_norm": 2.2099614000506276, "learning_rate": 1.7220641220842473e-05, "loss": 1.11, "step": 10733 }, { "epoch": 0.7977703455964326, "grad_norm": 2.1536813996822715, "learning_rate": 1.722008609827197e-05, "loss": 0.9705, "step": 10734 }, { "epoch": 0.7978446674098848, "grad_norm": 1.7342525854825936, "learning_rate": 1.7219530929218927e-05, "loss": 0.7227, "step": 10735 }, { "epoch": 0.797918989223337, "grad_norm": 2.7862268714395024, "learning_rate": 1.7218975713686916e-05, "loss": 1.0159, "step": 10736 }, { "epoch": 0.7979933110367893, "grad_norm": 1.7571941674793277, "learning_rate": 1.7218420451679515e-05, "loss": 0.924, "step": 10737 }, { "epoch": 0.7980676328502415, "grad_norm": 1.8497754372112953, "learning_rate": 1.7217865143200296e-05, "loss": 0.9257, "step": 10738 }, { "epoch": 0.7981419546636938, "grad_norm": 1.92801418786807, "learning_rate": 1.721730978825284e-05, "loss": 0.7836, "step": 10739 }, { "epoch": 0.798216276477146, "grad_norm": 1.553845230604717, "learning_rate": 1.7216754386840714e-05, "loss": 0.7579, "step": 10740 }, { "epoch": 0.7982905982905983, "grad_norm": 1.6979420836597658, "learning_rate": 1.7216198938967502e-05, "loss": 0.9392, "step": 10741 }, { "epoch": 0.7983649201040506, "grad_norm": 1.7155001392574154, "learning_rate": 1.7215643444636772e-05, "loss": 0.7832, "step": 10742 }, { "epoch": 0.7984392419175028, "grad_norm": 1.86195096427288, "learning_rate": 1.7215087903852102e-05, "loss": 0.9822, "step": 10743 }, { "epoch": 0.7985135637309551, "grad_norm": 1.600560020289003, "learning_rate": 1.7214532316617077e-05, "loss": 0.6835, "step": 10744 }, { "epoch": 0.7985878855444073, "grad_norm": 2.721786667491525, "learning_rate": 1.721397668293526e-05, "loss": 1.1562, "step": 10745 }, { "epoch": 0.7986622073578595, "grad_norm": 2.0234922213547275, "learning_rate": 1.721342100281024e-05, "loss": 0.9222, "step": 10746 }, { "epoch": 0.7987365291713118, "grad_norm": 3.1142902035775917, "learning_rate": 1.721286527624559e-05, "loss": 0.7545, "step": 10747 }, { "epoch": 0.798810850984764, "grad_norm": 3.5530258703175135, "learning_rate": 1.721230950324489e-05, "loss": 0.7465, "step": 10748 }, { "epoch": 0.7988851727982162, "grad_norm": 1.9375836800286559, "learning_rate": 1.7211753683811706e-05, "loss": 0.9348, "step": 10749 }, { "epoch": 0.7989594946116685, "grad_norm": 1.6205108982787257, "learning_rate": 1.7211197817949635e-05, "loss": 0.876, "step": 10750 }, { "epoch": 0.7990338164251207, "grad_norm": 2.058295486991422, "learning_rate": 1.7210641905662243e-05, "loss": 0.6392, "step": 10751 }, { "epoch": 0.7991081382385731, "grad_norm": 2.442284039660091, "learning_rate": 1.7210085946953113e-05, "loss": 0.9864, "step": 10752 }, { "epoch": 0.7991824600520253, "grad_norm": 1.959844074153725, "learning_rate": 1.7209529941825825e-05, "loss": 0.7721, "step": 10753 }, { "epoch": 0.7992567818654776, "grad_norm": 1.8174902888610878, "learning_rate": 1.7208973890283957e-05, "loss": 0.8481, "step": 10754 }, { "epoch": 0.7993311036789298, "grad_norm": 2.0022406000073456, "learning_rate": 1.7208417792331088e-05, "loss": 0.873, "step": 10755 }, { "epoch": 0.799405425492382, "grad_norm": 2.102012109662064, "learning_rate": 1.72078616479708e-05, "loss": 0.7507, "step": 10756 }, { "epoch": 0.7994797473058343, "grad_norm": 1.8156040489445968, "learning_rate": 1.720730545720667e-05, "loss": 0.8792, "step": 10757 }, { "epoch": 0.7995540691192865, "grad_norm": 2.088165541334774, "learning_rate": 1.7206749220042283e-05, "loss": 0.8742, "step": 10758 }, { "epoch": 0.7996283909327387, "grad_norm": 1.9621421807428023, "learning_rate": 1.7206192936481222e-05, "loss": 0.8376, "step": 10759 }, { "epoch": 0.799702712746191, "grad_norm": 2.4660169795617057, "learning_rate": 1.7205636606527065e-05, "loss": 0.8727, "step": 10760 }, { "epoch": 0.7997770345596432, "grad_norm": 2.1456431997802126, "learning_rate": 1.720508023018339e-05, "loss": 0.8653, "step": 10761 }, { "epoch": 0.7998513563730955, "grad_norm": 1.9056520990786996, "learning_rate": 1.720452380745378e-05, "loss": 1.0617, "step": 10762 }, { "epoch": 0.7999256781865478, "grad_norm": 2.1842551486297097, "learning_rate": 1.7203967338341825e-05, "loss": 0.9913, "step": 10763 }, { "epoch": 0.8, "grad_norm": 1.912868268174689, "learning_rate": 1.7203410822851098e-05, "loss": 0.6725, "step": 10764 }, { "epoch": 0.8000743218134523, "grad_norm": 2.0265923070302363, "learning_rate": 1.7202854260985188e-05, "loss": 0.8985, "step": 10765 }, { "epoch": 0.8001486436269045, "grad_norm": 2.0154215717847634, "learning_rate": 1.7202297652747673e-05, "loss": 0.8638, "step": 10766 }, { "epoch": 0.8002229654403568, "grad_norm": 1.5204719184320958, "learning_rate": 1.7201740998142146e-05, "loss": 0.79, "step": 10767 }, { "epoch": 0.800297287253809, "grad_norm": 1.799605537564201, "learning_rate": 1.720118429717218e-05, "loss": 0.7187, "step": 10768 }, { "epoch": 0.8003716090672612, "grad_norm": 1.8306859620271116, "learning_rate": 1.720062754984136e-05, "loss": 0.9521, "step": 10769 }, { "epoch": 0.8004459308807135, "grad_norm": 2.470942460449551, "learning_rate": 1.7200070756153277e-05, "loss": 0.9286, "step": 10770 }, { "epoch": 0.8005202526941657, "grad_norm": 2.0869856849097994, "learning_rate": 1.7199513916111512e-05, "loss": 0.9454, "step": 10771 }, { "epoch": 0.800594574507618, "grad_norm": 1.8713985451457331, "learning_rate": 1.719895702971965e-05, "loss": 0.8854, "step": 10772 }, { "epoch": 0.8006688963210702, "grad_norm": 2.3826425944780256, "learning_rate": 1.7198400096981276e-05, "loss": 0.8938, "step": 10773 }, { "epoch": 0.8007432181345224, "grad_norm": 1.8423333956267596, "learning_rate": 1.7197843117899974e-05, "loss": 0.8273, "step": 10774 }, { "epoch": 0.8008175399479748, "grad_norm": 1.7224285400187043, "learning_rate": 1.7197286092479332e-05, "loss": 0.89, "step": 10775 }, { "epoch": 0.800891861761427, "grad_norm": 2.132728647755024, "learning_rate": 1.719672902072294e-05, "loss": 0.7216, "step": 10776 }, { "epoch": 0.8009661835748793, "grad_norm": 1.6799286249381329, "learning_rate": 1.7196171902634376e-05, "loss": 0.7518, "step": 10777 }, { "epoch": 0.8010405053883315, "grad_norm": 1.8235927043439955, "learning_rate": 1.719561473821723e-05, "loss": 0.7699, "step": 10778 }, { "epoch": 0.8011148272017837, "grad_norm": 1.6880839123876639, "learning_rate": 1.719505752747509e-05, "loss": 0.8417, "step": 10779 }, { "epoch": 0.801189149015236, "grad_norm": 2.1078757657293683, "learning_rate": 1.7194500270411544e-05, "loss": 1.0091, "step": 10780 }, { "epoch": 0.8012634708286882, "grad_norm": 1.9033264931144926, "learning_rate": 1.7193942967030177e-05, "loss": 0.9247, "step": 10781 }, { "epoch": 0.8013377926421404, "grad_norm": 2.3078558257688098, "learning_rate": 1.7193385617334582e-05, "loss": 0.8441, "step": 10782 }, { "epoch": 0.8014121144555927, "grad_norm": 1.8244674212007672, "learning_rate": 1.7192828221328343e-05, "loss": 0.7106, "step": 10783 }, { "epoch": 0.8014864362690449, "grad_norm": 1.679896578198416, "learning_rate": 1.7192270779015046e-05, "loss": 0.5773, "step": 10784 }, { "epoch": 0.8015607580824972, "grad_norm": 1.6533216895213814, "learning_rate": 1.7191713290398282e-05, "loss": 0.6595, "step": 10785 }, { "epoch": 0.8016350798959495, "grad_norm": 1.82937752684412, "learning_rate": 1.7191155755481644e-05, "loss": 0.7896, "step": 10786 }, { "epoch": 0.8017094017094017, "grad_norm": 1.9163593718940453, "learning_rate": 1.7190598174268717e-05, "loss": 0.8023, "step": 10787 }, { "epoch": 0.801783723522854, "grad_norm": 2.0349658278645624, "learning_rate": 1.7190040546763094e-05, "loss": 0.92, "step": 10788 }, { "epoch": 0.8018580453363062, "grad_norm": 1.8775520574113278, "learning_rate": 1.718948287296836e-05, "loss": 0.8821, "step": 10789 }, { "epoch": 0.8019323671497585, "grad_norm": 1.6107225083949892, "learning_rate": 1.718892515288811e-05, "loss": 0.775, "step": 10790 }, { "epoch": 0.8020066889632107, "grad_norm": 1.7732041100869573, "learning_rate": 1.7188367386525934e-05, "loss": 0.7474, "step": 10791 }, { "epoch": 0.8020810107766629, "grad_norm": 1.767414522825799, "learning_rate": 1.718780957388542e-05, "loss": 0.9452, "step": 10792 }, { "epoch": 0.8021553325901152, "grad_norm": 1.8609924997397471, "learning_rate": 1.7187251714970163e-05, "loss": 0.7483, "step": 10793 }, { "epoch": 0.8022296544035674, "grad_norm": 3.732701560127004, "learning_rate": 1.718669380978375e-05, "loss": 0.9008, "step": 10794 }, { "epoch": 0.8023039762170197, "grad_norm": 1.7841192223130966, "learning_rate": 1.718613585832978e-05, "loss": 0.6716, "step": 10795 }, { "epoch": 0.8023782980304719, "grad_norm": 2.088297371901975, "learning_rate": 1.7185577860611835e-05, "loss": 0.7394, "step": 10796 }, { "epoch": 0.8024526198439242, "grad_norm": 1.686155115100922, "learning_rate": 1.7185019816633512e-05, "loss": 0.8813, "step": 10797 }, { "epoch": 0.8025269416573765, "grad_norm": 1.821512453583969, "learning_rate": 1.7184461726398408e-05, "loss": 0.7508, "step": 10798 }, { "epoch": 0.8026012634708287, "grad_norm": 2.748901783720247, "learning_rate": 1.718390358991011e-05, "loss": 0.8875, "step": 10799 }, { "epoch": 0.802675585284281, "grad_norm": 1.808015486143117, "learning_rate": 1.7183345407172212e-05, "loss": 0.8596, "step": 10800 }, { "epoch": 0.8027499070977332, "grad_norm": 1.9203793633701087, "learning_rate": 1.718278717818831e-05, "loss": 0.7565, "step": 10801 }, { "epoch": 0.8028242289111854, "grad_norm": 2.562963429146493, "learning_rate": 1.7182228902962e-05, "loss": 0.9361, "step": 10802 }, { "epoch": 0.8028985507246377, "grad_norm": 3.6704296029020282, "learning_rate": 1.718167058149687e-05, "loss": 0.7105, "step": 10803 }, { "epoch": 0.8029728725380899, "grad_norm": 1.932231589741551, "learning_rate": 1.7181112213796517e-05, "loss": 0.7964, "step": 10804 }, { "epoch": 0.8030471943515421, "grad_norm": 2.730652326230779, "learning_rate": 1.718055379986454e-05, "loss": 0.8658, "step": 10805 }, { "epoch": 0.8031215161649944, "grad_norm": 2.2455786782057054, "learning_rate": 1.7179995339704526e-05, "loss": 0.9974, "step": 10806 }, { "epoch": 0.8031958379784466, "grad_norm": 2.171817556736617, "learning_rate": 1.7179436833320075e-05, "loss": 0.915, "step": 10807 }, { "epoch": 0.803270159791899, "grad_norm": 2.1092326520044593, "learning_rate": 1.7178878280714785e-05, "loss": 0.8611, "step": 10808 }, { "epoch": 0.8033444816053512, "grad_norm": 4.566550729877806, "learning_rate": 1.7178319681892246e-05, "loss": 0.7603, "step": 10809 }, { "epoch": 0.8034188034188035, "grad_norm": 2.212660054777628, "learning_rate": 1.7177761036856057e-05, "loss": 0.6942, "step": 10810 }, { "epoch": 0.8034931252322557, "grad_norm": 1.8377117257693178, "learning_rate": 1.717720234560982e-05, "loss": 0.8842, "step": 10811 }, { "epoch": 0.8035674470457079, "grad_norm": 1.683640409144636, "learning_rate": 1.7176643608157122e-05, "loss": 0.6995, "step": 10812 }, { "epoch": 0.8036417688591602, "grad_norm": 2.3973812116562176, "learning_rate": 1.7176084824501565e-05, "loss": 0.8217, "step": 10813 }, { "epoch": 0.8037160906726124, "grad_norm": 2.0086277928543885, "learning_rate": 1.7175525994646748e-05, "loss": 0.8525, "step": 10814 }, { "epoch": 0.8037904124860646, "grad_norm": 1.9735501752418059, "learning_rate": 1.7174967118596265e-05, "loss": 0.8159, "step": 10815 }, { "epoch": 0.8038647342995169, "grad_norm": 2.505642366321037, "learning_rate": 1.7174408196353717e-05, "loss": 0.871, "step": 10816 }, { "epoch": 0.8039390561129691, "grad_norm": 1.76431548556319, "learning_rate": 1.7173849227922704e-05, "loss": 0.7178, "step": 10817 }, { "epoch": 0.8040133779264214, "grad_norm": 2.1956896875381253, "learning_rate": 1.7173290213306816e-05, "loss": 0.676, "step": 10818 }, { "epoch": 0.8040876997398737, "grad_norm": 2.369309480870791, "learning_rate": 1.717273115250966e-05, "loss": 0.8816, "step": 10819 }, { "epoch": 0.804162021553326, "grad_norm": 2.199995165182051, "learning_rate": 1.7172172045534836e-05, "loss": 1.0619, "step": 10820 }, { "epoch": 0.8042363433667782, "grad_norm": 1.8539658522893985, "learning_rate": 1.717161289238594e-05, "loss": 0.8719, "step": 10821 }, { "epoch": 0.8043106651802304, "grad_norm": 1.7473390240851823, "learning_rate": 1.717105369306657e-05, "loss": 0.8134, "step": 10822 }, { "epoch": 0.8043849869936827, "grad_norm": 2.0680686084530735, "learning_rate": 1.7170494447580332e-05, "loss": 0.8768, "step": 10823 }, { "epoch": 0.8044593088071349, "grad_norm": 2.22579772794519, "learning_rate": 1.7169935155930816e-05, "loss": 1.0247, "step": 10824 }, { "epoch": 0.8045336306205871, "grad_norm": 1.6444001032116284, "learning_rate": 1.7169375818121636e-05, "loss": 0.8188, "step": 10825 }, { "epoch": 0.8046079524340394, "grad_norm": 2.0450491025955606, "learning_rate": 1.7168816434156384e-05, "loss": 0.7973, "step": 10826 }, { "epoch": 0.8046822742474916, "grad_norm": 2.0954870549573283, "learning_rate": 1.7168257004038665e-05, "loss": 0.8706, "step": 10827 }, { "epoch": 0.8047565960609439, "grad_norm": 1.7644451086918946, "learning_rate": 1.7167697527772077e-05, "loss": 0.6844, "step": 10828 }, { "epoch": 0.8048309178743961, "grad_norm": 1.8407373101838056, "learning_rate": 1.7167138005360225e-05, "loss": 0.9058, "step": 10829 }, { "epoch": 0.8049052396878483, "grad_norm": 1.6989021546290592, "learning_rate": 1.716657843680671e-05, "loss": 0.7832, "step": 10830 }, { "epoch": 0.8049795615013007, "grad_norm": 1.8692061336902563, "learning_rate": 1.7166018822115136e-05, "loss": 0.8066, "step": 10831 }, { "epoch": 0.8050538833147529, "grad_norm": 2.2772280486485164, "learning_rate": 1.7165459161289105e-05, "loss": 0.8136, "step": 10832 }, { "epoch": 0.8051282051282052, "grad_norm": 2.186920502862317, "learning_rate": 1.716489945433222e-05, "loss": 0.8761, "step": 10833 }, { "epoch": 0.8052025269416574, "grad_norm": 1.9737543197881176, "learning_rate": 1.716433970124808e-05, "loss": 1.1798, "step": 10834 }, { "epoch": 0.8052768487551096, "grad_norm": 2.069406469633996, "learning_rate": 1.71637799020403e-05, "loss": 0.9165, "step": 10835 }, { "epoch": 0.8053511705685619, "grad_norm": 2.369812002737709, "learning_rate": 1.716322005671247e-05, "loss": 0.7312, "step": 10836 }, { "epoch": 0.8054254923820141, "grad_norm": 1.7101380303791467, "learning_rate": 1.7162660165268205e-05, "loss": 0.873, "step": 10837 }, { "epoch": 0.8054998141954663, "grad_norm": 2.240627402156909, "learning_rate": 1.7162100227711106e-05, "loss": 0.857, "step": 10838 }, { "epoch": 0.8055741360089186, "grad_norm": 1.6363373344942456, "learning_rate": 1.7161540244044772e-05, "loss": 0.8442, "step": 10839 }, { "epoch": 0.8056484578223708, "grad_norm": 2.1945278440010902, "learning_rate": 1.716098021427282e-05, "loss": 0.9564, "step": 10840 }, { "epoch": 0.8057227796358231, "grad_norm": 1.693000879425587, "learning_rate": 1.7160420138398845e-05, "loss": 0.7249, "step": 10841 }, { "epoch": 0.8057971014492754, "grad_norm": 2.0182173607403096, "learning_rate": 1.7159860016426458e-05, "loss": 0.8098, "step": 10842 }, { "epoch": 0.8058714232627276, "grad_norm": 1.9659319058665334, "learning_rate": 1.7159299848359263e-05, "loss": 0.5966, "step": 10843 }, { "epoch": 0.8059457450761799, "grad_norm": 1.9630901941542587, "learning_rate": 1.7158739634200868e-05, "loss": 0.9733, "step": 10844 }, { "epoch": 0.8060200668896321, "grad_norm": 2.099913563863609, "learning_rate": 1.7158179373954875e-05, "loss": 0.8272, "step": 10845 }, { "epoch": 0.8060943887030844, "grad_norm": 2.0291671446481834, "learning_rate": 1.7157619067624897e-05, "loss": 0.8754, "step": 10846 }, { "epoch": 0.8061687105165366, "grad_norm": 1.6230494593080136, "learning_rate": 1.715705871521454e-05, "loss": 0.8594, "step": 10847 }, { "epoch": 0.8062430323299888, "grad_norm": 2.36498907746341, "learning_rate": 1.715649831672741e-05, "loss": 1.1549, "step": 10848 }, { "epoch": 0.8063173541434411, "grad_norm": 1.6832846899114235, "learning_rate": 1.7155937872167116e-05, "loss": 0.8991, "step": 10849 }, { "epoch": 0.8063916759568933, "grad_norm": 1.623619606968565, "learning_rate": 1.7155377381537262e-05, "loss": 0.7059, "step": 10850 }, { "epoch": 0.8064659977703456, "grad_norm": 1.9722429451100842, "learning_rate": 1.7154816844841464e-05, "loss": 0.9658, "step": 10851 }, { "epoch": 0.8065403195837978, "grad_norm": 1.6726710763992563, "learning_rate": 1.7154256262083323e-05, "loss": 0.9234, "step": 10852 }, { "epoch": 0.8066146413972501, "grad_norm": 1.8979991075428309, "learning_rate": 1.7153695633266454e-05, "loss": 0.9782, "step": 10853 }, { "epoch": 0.8066889632107024, "grad_norm": 2.115748050400608, "learning_rate": 1.7153134958394463e-05, "loss": 0.8678, "step": 10854 }, { "epoch": 0.8067632850241546, "grad_norm": 1.7720257098382368, "learning_rate": 1.715257423747096e-05, "loss": 0.8617, "step": 10855 }, { "epoch": 0.8068376068376069, "grad_norm": 1.9195352899822733, "learning_rate": 1.7152013470499555e-05, "loss": 0.8202, "step": 10856 }, { "epoch": 0.8069119286510591, "grad_norm": 2.494314425597582, "learning_rate": 1.715145265748386e-05, "loss": 0.958, "step": 10857 }, { "epoch": 0.8069862504645113, "grad_norm": 1.90425914403187, "learning_rate": 1.7150891798427477e-05, "loss": 0.9953, "step": 10858 }, { "epoch": 0.8070605722779636, "grad_norm": 1.6707567241937147, "learning_rate": 1.715033089333403e-05, "loss": 0.9165, "step": 10859 }, { "epoch": 0.8071348940914158, "grad_norm": 1.8537159820133016, "learning_rate": 1.7149769942207123e-05, "loss": 0.7134, "step": 10860 }, { "epoch": 0.807209215904868, "grad_norm": 2.228921805783793, "learning_rate": 1.7149208945050367e-05, "loss": 0.8313, "step": 10861 }, { "epoch": 0.8072835377183203, "grad_norm": 1.6687464460653212, "learning_rate": 1.7148647901867376e-05, "loss": 0.6422, "step": 10862 }, { "epoch": 0.8073578595317725, "grad_norm": 1.9982347051478042, "learning_rate": 1.714808681266176e-05, "loss": 0.7172, "step": 10863 }, { "epoch": 0.8074321813452249, "grad_norm": 1.9901232972911675, "learning_rate": 1.714752567743713e-05, "loss": 0.9352, "step": 10864 }, { "epoch": 0.8075065031586771, "grad_norm": 1.88524141037145, "learning_rate": 1.7146964496197107e-05, "loss": 0.6515, "step": 10865 }, { "epoch": 0.8075808249721294, "grad_norm": 1.7035032485887198, "learning_rate": 1.714640326894529e-05, "loss": 0.9077, "step": 10866 }, { "epoch": 0.8076551467855816, "grad_norm": 1.8232842654750827, "learning_rate": 1.71458419956853e-05, "loss": 0.7086, "step": 10867 }, { "epoch": 0.8077294685990338, "grad_norm": 3.8630507338693074, "learning_rate": 1.7145280676420754e-05, "loss": 0.8791, "step": 10868 }, { "epoch": 0.8078037904124861, "grad_norm": 1.6601921592718512, "learning_rate": 1.714471931115526e-05, "loss": 0.7512, "step": 10869 }, { "epoch": 0.8078781122259383, "grad_norm": 1.7333678507869976, "learning_rate": 1.714415789989243e-05, "loss": 0.9177, "step": 10870 }, { "epoch": 0.8079524340393905, "grad_norm": 1.6874766388441989, "learning_rate": 1.714359644263589e-05, "loss": 0.7751, "step": 10871 }, { "epoch": 0.8080267558528428, "grad_norm": 2.1597231326367585, "learning_rate": 1.7143034939389244e-05, "loss": 1.0807, "step": 10872 }, { "epoch": 0.808101077666295, "grad_norm": 6.589785070153977, "learning_rate": 1.7142473390156108e-05, "loss": 0.786, "step": 10873 }, { "epoch": 0.8081753994797473, "grad_norm": 2.2160675982947424, "learning_rate": 1.7141911794940102e-05, "loss": 1.1202, "step": 10874 }, { "epoch": 0.8082497212931996, "grad_norm": 1.8982505006151253, "learning_rate": 1.7141350153744833e-05, "loss": 0.8996, "step": 10875 }, { "epoch": 0.8083240431066518, "grad_norm": 1.5497793933275923, "learning_rate": 1.7140788466573924e-05, "loss": 0.8338, "step": 10876 }, { "epoch": 0.8083983649201041, "grad_norm": 2.146940058933402, "learning_rate": 1.7140226733430993e-05, "loss": 0.8725, "step": 10877 }, { "epoch": 0.8084726867335563, "grad_norm": 1.9897391765066141, "learning_rate": 1.713966495431965e-05, "loss": 0.8268, "step": 10878 }, { "epoch": 0.8085470085470086, "grad_norm": 1.9138127235619853, "learning_rate": 1.7139103129243514e-05, "loss": 0.7838, "step": 10879 }, { "epoch": 0.8086213303604608, "grad_norm": 2.2028717044559203, "learning_rate": 1.7138541258206204e-05, "loss": 1.0045, "step": 10880 }, { "epoch": 0.808695652173913, "grad_norm": 1.8419079014011601, "learning_rate": 1.7137979341211336e-05, "loss": 0.9031, "step": 10881 }, { "epoch": 0.8087699739873653, "grad_norm": 5.286335568107413, "learning_rate": 1.7137417378262525e-05, "loss": 1.0493, "step": 10882 }, { "epoch": 0.8088442958008175, "grad_norm": 2.2281555421728356, "learning_rate": 1.7136855369363393e-05, "loss": 0.9729, "step": 10883 }, { "epoch": 0.8089186176142698, "grad_norm": 2.178778880170538, "learning_rate": 1.7136293314517555e-05, "loss": 0.9452, "step": 10884 }, { "epoch": 0.808992939427722, "grad_norm": 1.8490198540543965, "learning_rate": 1.7135731213728633e-05, "loss": 0.8205, "step": 10885 }, { "epoch": 0.8090672612411742, "grad_norm": 1.8942528436023995, "learning_rate": 1.7135169067000245e-05, "loss": 0.9426, "step": 10886 }, { "epoch": 0.8091415830546266, "grad_norm": 2.0533446504134254, "learning_rate": 1.7134606874336004e-05, "loss": 0.8588, "step": 10887 }, { "epoch": 0.8092159048680788, "grad_norm": 2.3268771982192162, "learning_rate": 1.7134044635739538e-05, "loss": 0.908, "step": 10888 }, { "epoch": 0.809290226681531, "grad_norm": 1.8193456545559543, "learning_rate": 1.7133482351214458e-05, "loss": 0.7311, "step": 10889 }, { "epoch": 0.8093645484949833, "grad_norm": 1.9259058849248702, "learning_rate": 1.713292002076439e-05, "loss": 0.9013, "step": 10890 }, { "epoch": 0.8094388703084355, "grad_norm": 1.884399097739959, "learning_rate": 1.7132357644392957e-05, "loss": 0.7663, "step": 10891 }, { "epoch": 0.8095131921218878, "grad_norm": 2.4641890073426227, "learning_rate": 1.7131795222103776e-05, "loss": 0.7504, "step": 10892 }, { "epoch": 0.80958751393534, "grad_norm": 1.8255446911337843, "learning_rate": 1.7131232753900466e-05, "loss": 0.9098, "step": 10893 }, { "epoch": 0.8096618357487922, "grad_norm": 1.7810443117228472, "learning_rate": 1.7130670239786645e-05, "loss": 0.8117, "step": 10894 }, { "epoch": 0.8097361575622445, "grad_norm": 1.6653263716195248, "learning_rate": 1.7130107679765946e-05, "loss": 0.9559, "step": 10895 }, { "epoch": 0.8098104793756967, "grad_norm": 2.309404352508975, "learning_rate": 1.7129545073841977e-05, "loss": 0.7408, "step": 10896 }, { "epoch": 0.809884801189149, "grad_norm": 1.9764276317959906, "learning_rate": 1.7128982422018373e-05, "loss": 0.8056, "step": 10897 }, { "epoch": 0.8099591230026013, "grad_norm": 1.8767108264156378, "learning_rate": 1.7128419724298746e-05, "loss": 0.8507, "step": 10898 }, { "epoch": 0.8100334448160535, "grad_norm": 2.3668971168311375, "learning_rate": 1.7127856980686722e-05, "loss": 0.8093, "step": 10899 }, { "epoch": 0.8101077666295058, "grad_norm": 1.9645166610385538, "learning_rate": 1.712729419118593e-05, "loss": 0.8911, "step": 10900 }, { "epoch": 0.810182088442958, "grad_norm": 2.170877785799553, "learning_rate": 1.7126731355799982e-05, "loss": 0.8603, "step": 10901 }, { "epoch": 0.8102564102564103, "grad_norm": 1.59348030346207, "learning_rate": 1.712616847453251e-05, "loss": 0.768, "step": 10902 }, { "epoch": 0.8103307320698625, "grad_norm": 1.7097571226726225, "learning_rate": 1.7125605547387135e-05, "loss": 0.9223, "step": 10903 }, { "epoch": 0.8104050538833147, "grad_norm": 2.2732392495330025, "learning_rate": 1.712504257436748e-05, "loss": 0.9017, "step": 10904 }, { "epoch": 0.810479375696767, "grad_norm": 1.8544994252729528, "learning_rate": 1.712447955547717e-05, "loss": 0.9275, "step": 10905 }, { "epoch": 0.8105536975102192, "grad_norm": 1.7639479308837331, "learning_rate": 1.7123916490719835e-05, "loss": 0.7487, "step": 10906 }, { "epoch": 0.8106280193236715, "grad_norm": 2.1060960489572564, "learning_rate": 1.712335338009909e-05, "loss": 0.6167, "step": 10907 }, { "epoch": 0.8107023411371237, "grad_norm": 2.0381906100393965, "learning_rate": 1.712279022361857e-05, "loss": 0.8091, "step": 10908 }, { "epoch": 0.810776662950576, "grad_norm": 1.6120622458298834, "learning_rate": 1.7122227021281893e-05, "loss": 0.882, "step": 10909 }, { "epoch": 0.8108509847640283, "grad_norm": 1.9653897125678759, "learning_rate": 1.7121663773092688e-05, "loss": 0.9282, "step": 10910 }, { "epoch": 0.8109253065774805, "grad_norm": 9.4213814114656, "learning_rate": 1.7121100479054582e-05, "loss": 0.8907, "step": 10911 }, { "epoch": 0.8109996283909328, "grad_norm": 1.736153582899248, "learning_rate": 1.71205371391712e-05, "loss": 0.7367, "step": 10912 }, { "epoch": 0.811073950204385, "grad_norm": 2.4875657124561243, "learning_rate": 1.711997375344617e-05, "loss": 1.0978, "step": 10913 }, { "epoch": 0.8111482720178372, "grad_norm": 1.7549636885125415, "learning_rate": 1.711941032188312e-05, "loss": 0.6555, "step": 10914 }, { "epoch": 0.8112225938312895, "grad_norm": 1.656632435610753, "learning_rate": 1.7118846844485674e-05, "loss": 0.8811, "step": 10915 }, { "epoch": 0.8112969156447417, "grad_norm": 2.004199144115483, "learning_rate": 1.7118283321257457e-05, "loss": 1.0191, "step": 10916 }, { "epoch": 0.811371237458194, "grad_norm": 1.950554623241075, "learning_rate": 1.711771975220211e-05, "loss": 0.8965, "step": 10917 }, { "epoch": 0.8114455592716462, "grad_norm": 2.8075612716402243, "learning_rate": 1.7117156137323244e-05, "loss": 0.7963, "step": 10918 }, { "epoch": 0.8115198810850984, "grad_norm": 2.5763079824834803, "learning_rate": 1.7116592476624504e-05, "loss": 1.0496, "step": 10919 }, { "epoch": 0.8115942028985508, "grad_norm": 1.6678536959777692, "learning_rate": 1.7116028770109508e-05, "loss": 0.8714, "step": 10920 }, { "epoch": 0.811668524712003, "grad_norm": 2.369790539663249, "learning_rate": 1.7115465017781885e-05, "loss": 1.027, "step": 10921 }, { "epoch": 0.8117428465254553, "grad_norm": 2.1262142120900487, "learning_rate": 1.711490121964527e-05, "loss": 0.9962, "step": 10922 }, { "epoch": 0.8118171683389075, "grad_norm": 2.059192995762954, "learning_rate": 1.711433737570329e-05, "loss": 0.7617, "step": 10923 }, { "epoch": 0.8118914901523597, "grad_norm": 1.9278587535838958, "learning_rate": 1.711377348595957e-05, "loss": 0.9402, "step": 10924 }, { "epoch": 0.811965811965812, "grad_norm": 1.7130529792529043, "learning_rate": 1.7113209550417756e-05, "loss": 0.8263, "step": 10925 }, { "epoch": 0.8120401337792642, "grad_norm": 1.8507847860685263, "learning_rate": 1.711264556908146e-05, "loss": 0.9971, "step": 10926 }, { "epoch": 0.8121144555927164, "grad_norm": 2.1043833673044343, "learning_rate": 1.7112081541954322e-05, "loss": 0.8382, "step": 10927 }, { "epoch": 0.8121887774061687, "grad_norm": 1.8605243287237359, "learning_rate": 1.7111517469039975e-05, "loss": 0.8799, "step": 10928 }, { "epoch": 0.8122630992196209, "grad_norm": 1.7752426137843094, "learning_rate": 1.7110953350342047e-05, "loss": 0.7549, "step": 10929 }, { "epoch": 0.8123374210330732, "grad_norm": 2.223519126473951, "learning_rate": 1.7110389185864166e-05, "loss": 0.9114, "step": 10930 }, { "epoch": 0.8124117428465255, "grad_norm": 2.9418647145703565, "learning_rate": 1.7109824975609973e-05, "loss": 0.7691, "step": 10931 }, { "epoch": 0.8124860646599777, "grad_norm": 1.931265057447373, "learning_rate": 1.7109260719583094e-05, "loss": 0.8982, "step": 10932 }, { "epoch": 0.81256038647343, "grad_norm": 1.7555580159687558, "learning_rate": 1.7108696417787163e-05, "loss": 0.9048, "step": 10933 }, { "epoch": 0.8126347082868822, "grad_norm": 2.254529072585656, "learning_rate": 1.7108132070225815e-05, "loss": 0.9409, "step": 10934 }, { "epoch": 0.8127090301003345, "grad_norm": 1.985918038403862, "learning_rate": 1.710756767690268e-05, "loss": 0.7504, "step": 10935 }, { "epoch": 0.8127833519137867, "grad_norm": 2.2003389756759644, "learning_rate": 1.7107003237821392e-05, "loss": 0.8685, "step": 10936 }, { "epoch": 0.8128576737272389, "grad_norm": 1.59941504380956, "learning_rate": 1.7106438752985588e-05, "loss": 0.6508, "step": 10937 }, { "epoch": 0.8129319955406912, "grad_norm": 2.7763706540111164, "learning_rate": 1.7105874222398896e-05, "loss": 0.6711, "step": 10938 }, { "epoch": 0.8130063173541434, "grad_norm": 1.7208257865295613, "learning_rate": 1.710530964606496e-05, "loss": 0.7038, "step": 10939 }, { "epoch": 0.8130806391675957, "grad_norm": 1.7980799913207555, "learning_rate": 1.7104745023987406e-05, "loss": 0.9099, "step": 10940 }, { "epoch": 0.8131549609810479, "grad_norm": 2.73717825545169, "learning_rate": 1.7104180356169873e-05, "loss": 0.9145, "step": 10941 }, { "epoch": 0.8132292827945002, "grad_norm": 1.6991171661849926, "learning_rate": 1.7103615642615997e-05, "loss": 0.9635, "step": 10942 }, { "epoch": 0.8133036046079525, "grad_norm": 1.8417375700555716, "learning_rate": 1.710305088332941e-05, "loss": 0.9133, "step": 10943 }, { "epoch": 0.8133779264214047, "grad_norm": 2.097739570875618, "learning_rate": 1.7102486078313752e-05, "loss": 0.9273, "step": 10944 }, { "epoch": 0.813452248234857, "grad_norm": 2.091522358913293, "learning_rate": 1.7101921227572654e-05, "loss": 0.85, "step": 10945 }, { "epoch": 0.8135265700483092, "grad_norm": 1.663392942068948, "learning_rate": 1.7101356331109758e-05, "loss": 0.7248, "step": 10946 }, { "epoch": 0.8136008918617614, "grad_norm": 2.6874869429683064, "learning_rate": 1.7100791388928696e-05, "loss": 0.8708, "step": 10947 }, { "epoch": 0.8136752136752137, "grad_norm": 2.237776462468451, "learning_rate": 1.710022640103311e-05, "loss": 0.9231, "step": 10948 }, { "epoch": 0.8137495354886659, "grad_norm": 1.7358969407964586, "learning_rate": 1.7099661367426634e-05, "loss": 0.8243, "step": 10949 }, { "epoch": 0.8138238573021181, "grad_norm": 1.847944107475104, "learning_rate": 1.709909628811291e-05, "loss": 0.8054, "step": 10950 }, { "epoch": 0.8138981791155704, "grad_norm": 2.1399300762253284, "learning_rate": 1.7098531163095568e-05, "loss": 0.6434, "step": 10951 }, { "epoch": 0.8139725009290226, "grad_norm": 1.64364150372804, "learning_rate": 1.709796599237825e-05, "loss": 0.744, "step": 10952 }, { "epoch": 0.8140468227424749, "grad_norm": 2.0072550859122695, "learning_rate": 1.7097400775964596e-05, "loss": 0.8817, "step": 10953 }, { "epoch": 0.8141211445559272, "grad_norm": 2.263048810316231, "learning_rate": 1.7096835513858248e-05, "loss": 0.9214, "step": 10954 }, { "epoch": 0.8141954663693795, "grad_norm": 1.862708062376881, "learning_rate": 1.7096270206062837e-05, "loss": 0.8484, "step": 10955 }, { "epoch": 0.8142697881828317, "grad_norm": 1.876095059818382, "learning_rate": 1.7095704852582006e-05, "loss": 0.8892, "step": 10956 }, { "epoch": 0.8143441099962839, "grad_norm": 1.7955035037925096, "learning_rate": 1.7095139453419398e-05, "loss": 0.7966, "step": 10957 }, { "epoch": 0.8144184318097362, "grad_norm": 1.7193923607412722, "learning_rate": 1.7094574008578648e-05, "loss": 0.6679, "step": 10958 }, { "epoch": 0.8144927536231884, "grad_norm": 1.9658839780931285, "learning_rate": 1.7094008518063404e-05, "loss": 0.9208, "step": 10959 }, { "epoch": 0.8145670754366406, "grad_norm": 1.6078607145448731, "learning_rate": 1.7093442981877295e-05, "loss": 0.8197, "step": 10960 }, { "epoch": 0.8146413972500929, "grad_norm": 1.8499903110983962, "learning_rate": 1.709287740002397e-05, "loss": 0.8598, "step": 10961 }, { "epoch": 0.8147157190635451, "grad_norm": 2.006128996640425, "learning_rate": 1.709231177250707e-05, "loss": 0.8887, "step": 10962 }, { "epoch": 0.8147900408769974, "grad_norm": 1.9102750520160214, "learning_rate": 1.7091746099330233e-05, "loss": 0.6933, "step": 10963 }, { "epoch": 0.8148643626904496, "grad_norm": 2.096125031069862, "learning_rate": 1.7091180380497102e-05, "loss": 1.0828, "step": 10964 }, { "epoch": 0.8149386845039019, "grad_norm": 2.140274413058429, "learning_rate": 1.709061461601132e-05, "loss": 1.0274, "step": 10965 }, { "epoch": 0.8150130063173542, "grad_norm": 2.2586533007449847, "learning_rate": 1.7090048805876533e-05, "loss": 1.0227, "step": 10966 }, { "epoch": 0.8150873281308064, "grad_norm": 1.6359163254427116, "learning_rate": 1.7089482950096376e-05, "loss": 0.7099, "step": 10967 }, { "epoch": 0.8151616499442587, "grad_norm": 2.389132235665146, "learning_rate": 1.7088917048674496e-05, "loss": 0.9614, "step": 10968 }, { "epoch": 0.8152359717577109, "grad_norm": 1.992973703446302, "learning_rate": 1.7088351101614534e-05, "loss": 1.0869, "step": 10969 }, { "epoch": 0.8153102935711631, "grad_norm": 1.8958635317602848, "learning_rate": 1.7087785108920138e-05, "loss": 0.827, "step": 10970 }, { "epoch": 0.8153846153846154, "grad_norm": 2.2408277764055113, "learning_rate": 1.7087219070594948e-05, "loss": 0.8361, "step": 10971 }, { "epoch": 0.8154589371980676, "grad_norm": 2.139576260822588, "learning_rate": 1.708665298664261e-05, "loss": 0.8358, "step": 10972 }, { "epoch": 0.8155332590115199, "grad_norm": 1.9928482640778564, "learning_rate": 1.7086086857066768e-05, "loss": 0.9113, "step": 10973 }, { "epoch": 0.8156075808249721, "grad_norm": 1.6315398690156866, "learning_rate": 1.708552068187107e-05, "loss": 0.5832, "step": 10974 }, { "epoch": 0.8156819026384243, "grad_norm": 2.19185454284479, "learning_rate": 1.708495446105915e-05, "loss": 0.7702, "step": 10975 }, { "epoch": 0.8157562244518767, "grad_norm": 2.4628976346857225, "learning_rate": 1.7084388194634665e-05, "loss": 1.0802, "step": 10976 }, { "epoch": 0.8158305462653289, "grad_norm": 2.27614789851662, "learning_rate": 1.7083821882601258e-05, "loss": 1.0553, "step": 10977 }, { "epoch": 0.8159048680787812, "grad_norm": 2.5006156291056936, "learning_rate": 1.708325552496257e-05, "loss": 0.9852, "step": 10978 }, { "epoch": 0.8159791898922334, "grad_norm": 1.5343928103221895, "learning_rate": 1.708268912172225e-05, "loss": 0.7991, "step": 10979 }, { "epoch": 0.8160535117056856, "grad_norm": 2.1384037787878927, "learning_rate": 1.7082122672883946e-05, "loss": 0.9736, "step": 10980 }, { "epoch": 0.8161278335191379, "grad_norm": 1.9933497628270083, "learning_rate": 1.7081556178451308e-05, "loss": 0.8885, "step": 10981 }, { "epoch": 0.8162021553325901, "grad_norm": 1.9369061822054803, "learning_rate": 1.7080989638427975e-05, "loss": 0.8801, "step": 10982 }, { "epoch": 0.8162764771460423, "grad_norm": 1.7409168164204494, "learning_rate": 1.7080423052817596e-05, "loss": 0.771, "step": 10983 }, { "epoch": 0.8163507989594946, "grad_norm": 1.4925729744293519, "learning_rate": 1.7079856421623824e-05, "loss": 0.5848, "step": 10984 }, { "epoch": 0.8164251207729468, "grad_norm": 1.734766801226447, "learning_rate": 1.7079289744850302e-05, "loss": 0.8574, "step": 10985 }, { "epoch": 0.8164994425863991, "grad_norm": 1.6862417468914217, "learning_rate": 1.707872302250068e-05, "loss": 0.7459, "step": 10986 }, { "epoch": 0.8165737643998514, "grad_norm": 1.68360637141958, "learning_rate": 1.707815625457861e-05, "loss": 0.7877, "step": 10987 }, { "epoch": 0.8166480862133036, "grad_norm": 2.351305618501562, "learning_rate": 1.7077589441087732e-05, "loss": 0.8218, "step": 10988 }, { "epoch": 0.8167224080267559, "grad_norm": 2.0560267612729537, "learning_rate": 1.7077022582031703e-05, "loss": 0.9156, "step": 10989 }, { "epoch": 0.8167967298402081, "grad_norm": 2.0092140481122955, "learning_rate": 1.707645567741417e-05, "loss": 0.7135, "step": 10990 }, { "epoch": 0.8168710516536604, "grad_norm": 1.888284854041001, "learning_rate": 1.707588872723878e-05, "loss": 0.7171, "step": 10991 }, { "epoch": 0.8169453734671126, "grad_norm": 2.0243406245316407, "learning_rate": 1.7075321731509186e-05, "loss": 0.961, "step": 10992 }, { "epoch": 0.8170196952805648, "grad_norm": 2.102555277330914, "learning_rate": 1.707475469022904e-05, "loss": 0.7376, "step": 10993 }, { "epoch": 0.8170940170940171, "grad_norm": 1.7294408318994536, "learning_rate": 1.707418760340199e-05, "loss": 0.8151, "step": 10994 }, { "epoch": 0.8171683389074693, "grad_norm": 1.8116061596939315, "learning_rate": 1.7073620471031688e-05, "loss": 0.8359, "step": 10995 }, { "epoch": 0.8172426607209216, "grad_norm": 1.954150864609749, "learning_rate": 1.7073053293121785e-05, "loss": 0.7773, "step": 10996 }, { "epoch": 0.8173169825343738, "grad_norm": 1.487766549132458, "learning_rate": 1.7072486069675932e-05, "loss": 0.6812, "step": 10997 }, { "epoch": 0.8173913043478261, "grad_norm": 1.9945014763040505, "learning_rate": 1.7071918800697776e-05, "loss": 0.9207, "step": 10998 }, { "epoch": 0.8174656261612784, "grad_norm": 2.1630216069304695, "learning_rate": 1.7071351486190976e-05, "loss": 0.7844, "step": 10999 }, { "epoch": 0.8175399479747306, "grad_norm": 1.9666145149891257, "learning_rate": 1.7070784126159187e-05, "loss": 1.0584, "step": 11000 }, { "epoch": 0.8176142697881829, "grad_norm": 2.2507352016160036, "learning_rate": 1.707021672060605e-05, "loss": 0.8121, "step": 11001 }, { "epoch": 0.8176885916016351, "grad_norm": 1.7315122196925272, "learning_rate": 1.7069649269535226e-05, "loss": 0.9453, "step": 11002 }, { "epoch": 0.8177629134150873, "grad_norm": 1.9651960502241879, "learning_rate": 1.706908177295037e-05, "loss": 0.8613, "step": 11003 }, { "epoch": 0.8178372352285396, "grad_norm": 2.4799417421261096, "learning_rate": 1.706851423085513e-05, "loss": 0.7816, "step": 11004 }, { "epoch": 0.8179115570419918, "grad_norm": 2.2552906039480223, "learning_rate": 1.706794664325316e-05, "loss": 0.961, "step": 11005 }, { "epoch": 0.817985878855444, "grad_norm": 1.6279913390311993, "learning_rate": 1.706737901014812e-05, "loss": 0.8212, "step": 11006 }, { "epoch": 0.8180602006688963, "grad_norm": 2.142732713343424, "learning_rate": 1.706681133154366e-05, "loss": 1.1252, "step": 11007 }, { "epoch": 0.8181345224823485, "grad_norm": 1.884794457428757, "learning_rate": 1.7066243607443434e-05, "loss": 0.8401, "step": 11008 }, { "epoch": 0.8182088442958008, "grad_norm": 1.7609415910321726, "learning_rate": 1.70656758378511e-05, "loss": 0.9659, "step": 11009 }, { "epoch": 0.8182831661092531, "grad_norm": 1.8022126504322131, "learning_rate": 1.7065108022770308e-05, "loss": 0.7411, "step": 11010 }, { "epoch": 0.8183574879227054, "grad_norm": 1.84578053922727, "learning_rate": 1.706454016220472e-05, "loss": 0.8459, "step": 11011 }, { "epoch": 0.8184318097361576, "grad_norm": 2.0286815956329556, "learning_rate": 1.706397225615799e-05, "loss": 0.7766, "step": 11012 }, { "epoch": 0.8185061315496098, "grad_norm": 1.9937846595855238, "learning_rate": 1.706340430463377e-05, "loss": 0.8945, "step": 11013 }, { "epoch": 0.8185804533630621, "grad_norm": 1.7607140779112849, "learning_rate": 1.7062836307635723e-05, "loss": 0.8213, "step": 11014 }, { "epoch": 0.8186547751765143, "grad_norm": 2.3336101898194803, "learning_rate": 1.70622682651675e-05, "loss": 1.0477, "step": 11015 }, { "epoch": 0.8187290969899665, "grad_norm": 1.6151379763715883, "learning_rate": 1.706170017723276e-05, "loss": 0.8731, "step": 11016 }, { "epoch": 0.8188034188034188, "grad_norm": 1.8957864698983105, "learning_rate": 1.7061132043835164e-05, "loss": 0.8667, "step": 11017 }, { "epoch": 0.818877740616871, "grad_norm": 1.7517827384625029, "learning_rate": 1.7060563864978363e-05, "loss": 0.9511, "step": 11018 }, { "epoch": 0.8189520624303233, "grad_norm": 2.55848468932499, "learning_rate": 1.7059995640666016e-05, "loss": 0.9691, "step": 11019 }, { "epoch": 0.8190263842437755, "grad_norm": 2.2344709884282294, "learning_rate": 1.7059427370901787e-05, "loss": 1.0489, "step": 11020 }, { "epoch": 0.8191007060572278, "grad_norm": 1.3509924506946298, "learning_rate": 1.705885905568933e-05, "loss": 0.63, "step": 11021 }, { "epoch": 0.8191750278706801, "grad_norm": 1.694636044601238, "learning_rate": 1.7058290695032304e-05, "loss": 0.7931, "step": 11022 }, { "epoch": 0.8192493496841323, "grad_norm": 1.793027950500092, "learning_rate": 1.7057722288934374e-05, "loss": 0.6945, "step": 11023 }, { "epoch": 0.8193236714975846, "grad_norm": 1.9176462471064284, "learning_rate": 1.705715383739919e-05, "loss": 0.8268, "step": 11024 }, { "epoch": 0.8193979933110368, "grad_norm": 1.8278107957393113, "learning_rate": 1.7056585340430413e-05, "loss": 0.8235, "step": 11025 }, { "epoch": 0.819472315124489, "grad_norm": 2.628999637760311, "learning_rate": 1.705601679803171e-05, "loss": 1.0552, "step": 11026 }, { "epoch": 0.8195466369379413, "grad_norm": 2.2783723214066955, "learning_rate": 1.7055448210206734e-05, "loss": 0.8606, "step": 11027 }, { "epoch": 0.8196209587513935, "grad_norm": 2.7980761320095917, "learning_rate": 1.705487957695915e-05, "loss": 0.902, "step": 11028 }, { "epoch": 0.8196952805648458, "grad_norm": 1.3886943127444011, "learning_rate": 1.7054310898292618e-05, "loss": 0.7267, "step": 11029 }, { "epoch": 0.819769602378298, "grad_norm": 1.6563268885420313, "learning_rate": 1.7053742174210795e-05, "loss": 0.8214, "step": 11030 }, { "epoch": 0.8198439241917502, "grad_norm": 1.9278665662001755, "learning_rate": 1.705317340471735e-05, "loss": 0.8255, "step": 11031 }, { "epoch": 0.8199182460052026, "grad_norm": 2.320214666198716, "learning_rate": 1.7052604589815937e-05, "loss": 0.9989, "step": 11032 }, { "epoch": 0.8199925678186548, "grad_norm": 1.7949455104420546, "learning_rate": 1.7052035729510224e-05, "loss": 0.87, "step": 11033 }, { "epoch": 0.820066889632107, "grad_norm": 1.954681359744159, "learning_rate": 1.705146682380387e-05, "loss": 0.8912, "step": 11034 }, { "epoch": 0.8201412114455593, "grad_norm": 2.4537237069433226, "learning_rate": 1.7050897872700537e-05, "loss": 0.7914, "step": 11035 }, { "epoch": 0.8202155332590115, "grad_norm": 2.6938352983266527, "learning_rate": 1.7050328876203893e-05, "loss": 0.7642, "step": 11036 }, { "epoch": 0.8202898550724638, "grad_norm": 2.1971604396931195, "learning_rate": 1.70497598343176e-05, "loss": 1.0283, "step": 11037 }, { "epoch": 0.820364176885916, "grad_norm": 2.0723422050637796, "learning_rate": 1.704919074704531e-05, "loss": 0.9752, "step": 11038 }, { "epoch": 0.8204384986993682, "grad_norm": 1.5883115823445915, "learning_rate": 1.7048621614390702e-05, "loss": 1.0027, "step": 11039 }, { "epoch": 0.8205128205128205, "grad_norm": 2.0355666825546233, "learning_rate": 1.704805243635743e-05, "loss": 0.7178, "step": 11040 }, { "epoch": 0.8205871423262727, "grad_norm": 9.020901347398935, "learning_rate": 1.7047483212949164e-05, "loss": 1.1051, "step": 11041 }, { "epoch": 0.820661464139725, "grad_norm": 3.2235071805347313, "learning_rate": 1.704691394416957e-05, "loss": 0.7044, "step": 11042 }, { "epoch": 0.8207357859531773, "grad_norm": 2.0196629363461804, "learning_rate": 1.704634463002231e-05, "loss": 0.7515, "step": 11043 }, { "epoch": 0.8208101077666295, "grad_norm": 2.931228297027055, "learning_rate": 1.704577527051104e-05, "loss": 0.9982, "step": 11044 }, { "epoch": 0.8208844295800818, "grad_norm": 2.2975620612334087, "learning_rate": 1.7045205865639443e-05, "loss": 0.8574, "step": 11045 }, { "epoch": 0.820958751393534, "grad_norm": 1.8859189615119425, "learning_rate": 1.7044636415411173e-05, "loss": 0.7806, "step": 11046 }, { "epoch": 0.8210330732069863, "grad_norm": 1.78148266287282, "learning_rate": 1.7044066919829898e-05, "loss": 0.8514, "step": 11047 }, { "epoch": 0.8211073950204385, "grad_norm": 1.7078599424439111, "learning_rate": 1.704349737889929e-05, "loss": 0.6828, "step": 11048 }, { "epoch": 0.8211817168338907, "grad_norm": 3.2058973939269606, "learning_rate": 1.7042927792623006e-05, "loss": 0.9575, "step": 11049 }, { "epoch": 0.821256038647343, "grad_norm": 1.5482699517169871, "learning_rate": 1.7042358161004724e-05, "loss": 0.6022, "step": 11050 }, { "epoch": 0.8213303604607952, "grad_norm": 2.0644263441644055, "learning_rate": 1.70417884840481e-05, "loss": 0.9424, "step": 11051 }, { "epoch": 0.8214046822742475, "grad_norm": 1.8183863421283208, "learning_rate": 1.704121876175681e-05, "loss": 0.8744, "step": 11052 }, { "epoch": 0.8214790040876997, "grad_norm": 1.936201338492705, "learning_rate": 1.7040648994134515e-05, "loss": 0.7356, "step": 11053 }, { "epoch": 0.821553325901152, "grad_norm": 1.9693281363353667, "learning_rate": 1.7040079181184893e-05, "loss": 0.9627, "step": 11054 }, { "epoch": 0.8216276477146043, "grad_norm": 1.9003830369508479, "learning_rate": 1.7039509322911605e-05, "loss": 1.0276, "step": 11055 }, { "epoch": 0.8217019695280565, "grad_norm": 2.910633664321065, "learning_rate": 1.703893941931832e-05, "loss": 0.9084, "step": 11056 }, { "epoch": 0.8217762913415088, "grad_norm": 1.416640469822503, "learning_rate": 1.7038369470408707e-05, "loss": 0.6687, "step": 11057 }, { "epoch": 0.821850613154961, "grad_norm": 2.0076614912049284, "learning_rate": 1.703779947618644e-05, "loss": 0.9121, "step": 11058 }, { "epoch": 0.8219249349684132, "grad_norm": 1.7302923610260739, "learning_rate": 1.7037229436655184e-05, "loss": 0.8591, "step": 11059 }, { "epoch": 0.8219992567818655, "grad_norm": 1.6992645183575528, "learning_rate": 1.703665935181861e-05, "loss": 0.7992, "step": 11060 }, { "epoch": 0.8220735785953177, "grad_norm": 2.5086614443972497, "learning_rate": 1.7036089221680385e-05, "loss": 0.8742, "step": 11061 }, { "epoch": 0.82214790040877, "grad_norm": 1.770573740592771, "learning_rate": 1.7035519046244186e-05, "loss": 0.8617, "step": 11062 }, { "epoch": 0.8222222222222222, "grad_norm": 2.2243067657024778, "learning_rate": 1.703494882551368e-05, "loss": 0.9876, "step": 11063 }, { "epoch": 0.8222965440356744, "grad_norm": 1.7327955973066318, "learning_rate": 1.7034378559492537e-05, "loss": 0.8901, "step": 11064 }, { "epoch": 0.8223708658491267, "grad_norm": 1.9995403579526652, "learning_rate": 1.7033808248184433e-05, "loss": 0.7627, "step": 11065 }, { "epoch": 0.822445187662579, "grad_norm": 3.732130592736196, "learning_rate": 1.7033237891593032e-05, "loss": 0.7054, "step": 11066 }, { "epoch": 0.8225195094760313, "grad_norm": 1.8409795623404424, "learning_rate": 1.7032667489722016e-05, "loss": 0.9162, "step": 11067 }, { "epoch": 0.8225938312894835, "grad_norm": 2.0793972769301816, "learning_rate": 1.7032097042575048e-05, "loss": 0.9611, "step": 11068 }, { "epoch": 0.8226681531029357, "grad_norm": 2.3259549734323546, "learning_rate": 1.7031526550155803e-05, "loss": 0.8157, "step": 11069 }, { "epoch": 0.822742474916388, "grad_norm": 2.797830987613635, "learning_rate": 1.7030956012467955e-05, "loss": 0.9113, "step": 11070 }, { "epoch": 0.8228167967298402, "grad_norm": 1.7772396138807764, "learning_rate": 1.703038542951518e-05, "loss": 0.9189, "step": 11071 }, { "epoch": 0.8228911185432924, "grad_norm": 1.8842006659989534, "learning_rate": 1.7029814801301146e-05, "loss": 0.7061, "step": 11072 }, { "epoch": 0.8229654403567447, "grad_norm": 2.0339309923955677, "learning_rate": 1.7029244127829526e-05, "loss": 0.8843, "step": 11073 }, { "epoch": 0.8230397621701969, "grad_norm": 2.375377794666477, "learning_rate": 1.7028673409104003e-05, "loss": 1.0066, "step": 11074 }, { "epoch": 0.8231140839836492, "grad_norm": 2.0124449748324613, "learning_rate": 1.7028102645128244e-05, "loss": 0.865, "step": 11075 }, { "epoch": 0.8231884057971014, "grad_norm": 2.74998520945539, "learning_rate": 1.7027531835905923e-05, "loss": 0.9788, "step": 11076 }, { "epoch": 0.8232627276105537, "grad_norm": 2.1343633888516163, "learning_rate": 1.7026960981440717e-05, "loss": 0.9502, "step": 11077 }, { "epoch": 0.823337049424006, "grad_norm": 1.587344765165907, "learning_rate": 1.7026390081736304e-05, "loss": 0.7622, "step": 11078 }, { "epoch": 0.8234113712374582, "grad_norm": 1.8411603735621653, "learning_rate": 1.702581913679635e-05, "loss": 0.8857, "step": 11079 }, { "epoch": 0.8234856930509105, "grad_norm": 1.652299233806679, "learning_rate": 1.7025248146624543e-05, "loss": 0.8163, "step": 11080 }, { "epoch": 0.8235600148643627, "grad_norm": 2.2568287139563745, "learning_rate": 1.702467711122455e-05, "loss": 0.9875, "step": 11081 }, { "epoch": 0.8236343366778149, "grad_norm": 3.5747799588027886, "learning_rate": 1.7024106030600053e-05, "loss": 0.5901, "step": 11082 }, { "epoch": 0.8237086584912672, "grad_norm": 2.063385507555431, "learning_rate": 1.7023534904754723e-05, "loss": 0.8963, "step": 11083 }, { "epoch": 0.8237829803047194, "grad_norm": 1.6494639723655997, "learning_rate": 1.702296373369224e-05, "loss": 0.7111, "step": 11084 }, { "epoch": 0.8238573021181717, "grad_norm": 2.0222189677496827, "learning_rate": 1.702239251741628e-05, "loss": 0.8956, "step": 11085 }, { "epoch": 0.8239316239316239, "grad_norm": 1.9313972502848757, "learning_rate": 1.702182125593052e-05, "loss": 0.7882, "step": 11086 }, { "epoch": 0.8240059457450761, "grad_norm": 1.8407661275120855, "learning_rate": 1.7021249949238643e-05, "loss": 0.7538, "step": 11087 }, { "epoch": 0.8240802675585285, "grad_norm": 2.079166261567188, "learning_rate": 1.7020678597344322e-05, "loss": 0.6127, "step": 11088 }, { "epoch": 0.8241545893719807, "grad_norm": 1.8621363069608101, "learning_rate": 1.7020107200251236e-05, "loss": 0.7056, "step": 11089 }, { "epoch": 0.824228911185433, "grad_norm": 1.582874191574686, "learning_rate": 1.7019535757963063e-05, "loss": 0.8723, "step": 11090 }, { "epoch": 0.8243032329988852, "grad_norm": 2.2146056910213514, "learning_rate": 1.7018964270483487e-05, "loss": 0.8493, "step": 11091 }, { "epoch": 0.8243775548123374, "grad_norm": 1.8965474203278918, "learning_rate": 1.701839273781618e-05, "loss": 0.886, "step": 11092 }, { "epoch": 0.8244518766257897, "grad_norm": 2.143182840941976, "learning_rate": 1.7017821159964824e-05, "loss": 0.6766, "step": 11093 }, { "epoch": 0.8245261984392419, "grad_norm": 2.262400016733421, "learning_rate": 1.7017249536933102e-05, "loss": 1.0025, "step": 11094 }, { "epoch": 0.8246005202526941, "grad_norm": 2.0005236215292124, "learning_rate": 1.701667786872469e-05, "loss": 0.6752, "step": 11095 }, { "epoch": 0.8246748420661464, "grad_norm": 2.34656123425483, "learning_rate": 1.7016106155343272e-05, "loss": 1.161, "step": 11096 }, { "epoch": 0.8247491638795986, "grad_norm": 1.7394313485513508, "learning_rate": 1.7015534396792523e-05, "loss": 0.7916, "step": 11097 }, { "epoch": 0.8248234856930509, "grad_norm": 2.5202308272222624, "learning_rate": 1.701496259307613e-05, "loss": 1.0411, "step": 11098 }, { "epoch": 0.8248978075065032, "grad_norm": 2.183852252377042, "learning_rate": 1.7014390744197774e-05, "loss": 0.9596, "step": 11099 }, { "epoch": 0.8249721293199554, "grad_norm": 2.0175245761730385, "learning_rate": 1.701381885016113e-05, "loss": 0.9813, "step": 11100 }, { "epoch": 0.8250464511334077, "grad_norm": 1.9602641061245778, "learning_rate": 1.7013246910969885e-05, "loss": 0.8683, "step": 11101 }, { "epoch": 0.8251207729468599, "grad_norm": 1.796890833081079, "learning_rate": 1.701267492662772e-05, "loss": 0.6565, "step": 11102 }, { "epoch": 0.8251950947603122, "grad_norm": 1.91834547661028, "learning_rate": 1.701210289713832e-05, "loss": 0.744, "step": 11103 }, { "epoch": 0.8252694165737644, "grad_norm": 2.3064317897529047, "learning_rate": 1.7011530822505363e-05, "loss": 1.0718, "step": 11104 }, { "epoch": 0.8253437383872166, "grad_norm": 2.7617684136490492, "learning_rate": 1.7010958702732535e-05, "loss": 0.8714, "step": 11105 }, { "epoch": 0.8254180602006689, "grad_norm": 1.9432433405968075, "learning_rate": 1.701038653782352e-05, "loss": 0.9836, "step": 11106 }, { "epoch": 0.8254923820141211, "grad_norm": 1.7276505302739251, "learning_rate": 1.7009814327782e-05, "loss": 0.9403, "step": 11107 }, { "epoch": 0.8255667038275734, "grad_norm": 1.9907639630123215, "learning_rate": 1.7009242072611657e-05, "loss": 0.8362, "step": 11108 }, { "epoch": 0.8256410256410256, "grad_norm": 1.5971109006706974, "learning_rate": 1.7008669772316178e-05, "loss": 0.5567, "step": 11109 }, { "epoch": 0.8257153474544779, "grad_norm": 1.8724940223702784, "learning_rate": 1.7008097426899248e-05, "loss": 0.723, "step": 11110 }, { "epoch": 0.8257896692679302, "grad_norm": 1.9435163907397683, "learning_rate": 1.700752503636455e-05, "loss": 0.8445, "step": 11111 }, { "epoch": 0.8258639910813824, "grad_norm": 1.913830443870726, "learning_rate": 1.700695260071577e-05, "loss": 0.7127, "step": 11112 }, { "epoch": 0.8259383128948347, "grad_norm": 1.7056699127594057, "learning_rate": 1.700638011995659e-05, "loss": 0.7668, "step": 11113 }, { "epoch": 0.8260126347082869, "grad_norm": 1.5822214437474882, "learning_rate": 1.70058075940907e-05, "loss": 0.7847, "step": 11114 }, { "epoch": 0.8260869565217391, "grad_norm": 1.9527817353074794, "learning_rate": 1.7005235023121787e-05, "loss": 0.7004, "step": 11115 }, { "epoch": 0.8261612783351914, "grad_norm": 1.5912010979694484, "learning_rate": 1.700466240705353e-05, "loss": 0.775, "step": 11116 }, { "epoch": 0.8262356001486436, "grad_norm": 1.6953745015747246, "learning_rate": 1.7004089745889623e-05, "loss": 0.8802, "step": 11117 }, { "epoch": 0.8263099219620959, "grad_norm": 1.7037274585887465, "learning_rate": 1.7003517039633746e-05, "loss": 0.6069, "step": 11118 }, { "epoch": 0.8263842437755481, "grad_norm": 1.7716776762500894, "learning_rate": 1.7002944288289595e-05, "loss": 0.7372, "step": 11119 }, { "epoch": 0.8264585655890003, "grad_norm": 2.1878351349752734, "learning_rate": 1.7002371491860844e-05, "loss": 0.7462, "step": 11120 }, { "epoch": 0.8265328874024526, "grad_norm": 1.8457917631397716, "learning_rate": 1.7001798650351197e-05, "loss": 0.8839, "step": 11121 }, { "epoch": 0.8266072092159049, "grad_norm": 2.465809695777865, "learning_rate": 1.700122576376433e-05, "loss": 0.8113, "step": 11122 }, { "epoch": 0.8266815310293572, "grad_norm": 1.7444814527718229, "learning_rate": 1.7000652832103934e-05, "loss": 0.8505, "step": 11123 }, { "epoch": 0.8267558528428094, "grad_norm": 2.143476519260393, "learning_rate": 1.70000798553737e-05, "loss": 0.7426, "step": 11124 }, { "epoch": 0.8268301746562616, "grad_norm": 1.7138370518853443, "learning_rate": 1.6999506833577316e-05, "loss": 0.8647, "step": 11125 }, { "epoch": 0.8269044964697139, "grad_norm": 2.140602630496281, "learning_rate": 1.6998933766718467e-05, "loss": 1.1675, "step": 11126 }, { "epoch": 0.8269788182831661, "grad_norm": 2.0212271162151554, "learning_rate": 1.699836065480085e-05, "loss": 0.7542, "step": 11127 }, { "epoch": 0.8270531400966183, "grad_norm": 1.8493688112913547, "learning_rate": 1.6997787497828144e-05, "loss": 0.9157, "step": 11128 }, { "epoch": 0.8271274619100706, "grad_norm": 1.8385326740363541, "learning_rate": 1.699721429580405e-05, "loss": 0.8174, "step": 11129 }, { "epoch": 0.8272017837235228, "grad_norm": 2.0874550614367213, "learning_rate": 1.6996641048732253e-05, "loss": 0.8845, "step": 11130 }, { "epoch": 0.8272761055369751, "grad_norm": 1.4787205137936574, "learning_rate": 1.699606775661644e-05, "loss": 0.5029, "step": 11131 }, { "epoch": 0.8273504273504273, "grad_norm": 1.8895763302203081, "learning_rate": 1.699549441946031e-05, "loss": 0.8331, "step": 11132 }, { "epoch": 0.8274247491638796, "grad_norm": 1.5611950163343953, "learning_rate": 1.6994921037267552e-05, "loss": 0.6428, "step": 11133 }, { "epoch": 0.8274990709773319, "grad_norm": 2.035944457289108, "learning_rate": 1.699434761004185e-05, "loss": 0.9381, "step": 11134 }, { "epoch": 0.8275733927907841, "grad_norm": 1.579866390593609, "learning_rate": 1.6993774137786902e-05, "loss": 0.6809, "step": 11135 }, { "epoch": 0.8276477146042364, "grad_norm": 1.4837377019125615, "learning_rate": 1.69932006205064e-05, "loss": 0.8663, "step": 11136 }, { "epoch": 0.8277220364176886, "grad_norm": 1.8132112954977146, "learning_rate": 1.6992627058204036e-05, "loss": 0.7102, "step": 11137 }, { "epoch": 0.8277963582311408, "grad_norm": 2.114674720478639, "learning_rate": 1.6992053450883503e-05, "loss": 0.8166, "step": 11138 }, { "epoch": 0.8278706800445931, "grad_norm": 1.7801895015363407, "learning_rate": 1.699147979854849e-05, "loss": 0.9887, "step": 11139 }, { "epoch": 0.8279450018580453, "grad_norm": 2.167366074776053, "learning_rate": 1.699090610120269e-05, "loss": 1.0871, "step": 11140 }, { "epoch": 0.8280193236714976, "grad_norm": 2.0103710609728593, "learning_rate": 1.6990332358849804e-05, "loss": 0.9365, "step": 11141 }, { "epoch": 0.8280936454849498, "grad_norm": 1.9354883132550285, "learning_rate": 1.698975857149352e-05, "loss": 0.8835, "step": 11142 }, { "epoch": 0.828167967298402, "grad_norm": 1.5118253229142302, "learning_rate": 1.698918473913753e-05, "loss": 0.5618, "step": 11143 }, { "epoch": 0.8282422891118544, "grad_norm": 2.055155685738483, "learning_rate": 1.6988610861785536e-05, "loss": 0.9931, "step": 11144 }, { "epoch": 0.8283166109253066, "grad_norm": 1.4771410676544074, "learning_rate": 1.6988036939441225e-05, "loss": 0.7327, "step": 11145 }, { "epoch": 0.8283909327387589, "grad_norm": 1.7944134692823808, "learning_rate": 1.6987462972108298e-05, "loss": 0.9024, "step": 11146 }, { "epoch": 0.8284652545522111, "grad_norm": 2.413112650442228, "learning_rate": 1.6986888959790443e-05, "loss": 1.088, "step": 11147 }, { "epoch": 0.8285395763656633, "grad_norm": 1.5888690957273905, "learning_rate": 1.6986314902491364e-05, "loss": 0.7389, "step": 11148 }, { "epoch": 0.8286138981791156, "grad_norm": 1.4621204405139292, "learning_rate": 1.6985740800214748e-05, "loss": 0.6813, "step": 11149 }, { "epoch": 0.8286882199925678, "grad_norm": 2.197473525728485, "learning_rate": 1.6985166652964297e-05, "loss": 0.824, "step": 11150 }, { "epoch": 0.82876254180602, "grad_norm": 1.6985290752580453, "learning_rate": 1.6984592460743705e-05, "loss": 0.7136, "step": 11151 }, { "epoch": 0.8288368636194723, "grad_norm": 1.6828360551587485, "learning_rate": 1.698401822355667e-05, "loss": 0.8959, "step": 11152 }, { "epoch": 0.8289111854329245, "grad_norm": 2.4619350484301856, "learning_rate": 1.6983443941406883e-05, "loss": 0.9405, "step": 11153 }, { "epoch": 0.8289855072463768, "grad_norm": 1.7950524925946956, "learning_rate": 1.6982869614298053e-05, "loss": 0.7658, "step": 11154 }, { "epoch": 0.8290598290598291, "grad_norm": 2.0228670954693015, "learning_rate": 1.6982295242233867e-05, "loss": 0.7878, "step": 11155 }, { "epoch": 0.8291341508732814, "grad_norm": 1.8711142694451282, "learning_rate": 1.698172082521803e-05, "loss": 0.8234, "step": 11156 }, { "epoch": 0.8292084726867336, "grad_norm": 2.214880911381382, "learning_rate": 1.698114636325423e-05, "loss": 0.8276, "step": 11157 }, { "epoch": 0.8292827945001858, "grad_norm": 2.6718396780159255, "learning_rate": 1.6980571856346178e-05, "loss": 0.8787, "step": 11158 }, { "epoch": 0.8293571163136381, "grad_norm": 2.2776928175869466, "learning_rate": 1.6979997304497565e-05, "loss": 0.8475, "step": 11159 }, { "epoch": 0.8294314381270903, "grad_norm": 2.2764792379725614, "learning_rate": 1.697942270771209e-05, "loss": 0.813, "step": 11160 }, { "epoch": 0.8295057599405425, "grad_norm": 1.934355591520825, "learning_rate": 1.6978848065993457e-05, "loss": 0.7753, "step": 11161 }, { "epoch": 0.8295800817539948, "grad_norm": 1.9485613480038781, "learning_rate": 1.6978273379345358e-05, "loss": 0.9546, "step": 11162 }, { "epoch": 0.829654403567447, "grad_norm": 1.7828568010882817, "learning_rate": 1.6977698647771502e-05, "loss": 0.9032, "step": 11163 }, { "epoch": 0.8297287253808993, "grad_norm": 2.0112794289430997, "learning_rate": 1.697712387127558e-05, "loss": 0.8096, "step": 11164 }, { "epoch": 0.8298030471943515, "grad_norm": 1.8087274399982605, "learning_rate": 1.6976549049861296e-05, "loss": 0.781, "step": 11165 }, { "epoch": 0.8298773690078038, "grad_norm": 2.0034454858145896, "learning_rate": 1.6975974183532355e-05, "loss": 0.9031, "step": 11166 }, { "epoch": 0.8299516908212561, "grad_norm": 2.054796461702397, "learning_rate": 1.6975399272292453e-05, "loss": 1.0264, "step": 11167 }, { "epoch": 0.8300260126347083, "grad_norm": 1.8306044907547978, "learning_rate": 1.697482431614529e-05, "loss": 0.8795, "step": 11168 }, { "epoch": 0.8301003344481606, "grad_norm": 1.9129903763736595, "learning_rate": 1.6974249315094574e-05, "loss": 0.9634, "step": 11169 }, { "epoch": 0.8301746562616128, "grad_norm": 1.99069690346596, "learning_rate": 1.6973674269144e-05, "loss": 0.716, "step": 11170 }, { "epoch": 0.830248978075065, "grad_norm": 1.8960181939203717, "learning_rate": 1.6973099178297273e-05, "loss": 0.6528, "step": 11171 }, { "epoch": 0.8303232998885173, "grad_norm": 2.250680305283807, "learning_rate": 1.6972524042558096e-05, "loss": 0.8578, "step": 11172 }, { "epoch": 0.8303976217019695, "grad_norm": 1.9612321642952886, "learning_rate": 1.697194886193017e-05, "loss": 0.9616, "step": 11173 }, { "epoch": 0.8304719435154218, "grad_norm": 1.7495417363469903, "learning_rate": 1.69713736364172e-05, "loss": 0.7636, "step": 11174 }, { "epoch": 0.830546265328874, "grad_norm": 1.9651209630331687, "learning_rate": 1.697079836602289e-05, "loss": 0.7952, "step": 11175 }, { "epoch": 0.8306205871423262, "grad_norm": 1.6652722215831048, "learning_rate": 1.6970223050750936e-05, "loss": 0.8452, "step": 11176 }, { "epoch": 0.8306949089557785, "grad_norm": 2.915900118710013, "learning_rate": 1.696964769060505e-05, "loss": 0.8989, "step": 11177 }, { "epoch": 0.8307692307692308, "grad_norm": 2.0212805804650493, "learning_rate": 1.6969072285588935e-05, "loss": 0.926, "step": 11178 }, { "epoch": 0.830843552582683, "grad_norm": 1.9602993303783423, "learning_rate": 1.69684968357063e-05, "loss": 1.1029, "step": 11179 }, { "epoch": 0.8309178743961353, "grad_norm": 1.9384676295079366, "learning_rate": 1.6967921340960836e-05, "loss": 0.7974, "step": 11180 }, { "epoch": 0.8309921962095875, "grad_norm": 1.8735425997365873, "learning_rate": 1.6967345801356258e-05, "loss": 0.7632, "step": 11181 }, { "epoch": 0.8310665180230398, "grad_norm": 2.1318578087109192, "learning_rate": 1.696677021689627e-05, "loss": 0.9451, "step": 11182 }, { "epoch": 0.831140839836492, "grad_norm": 1.8886358821473355, "learning_rate": 1.6966194587584577e-05, "loss": 0.7402, "step": 11183 }, { "epoch": 0.8312151616499442, "grad_norm": 1.945677890085678, "learning_rate": 1.6965618913424885e-05, "loss": 0.8433, "step": 11184 }, { "epoch": 0.8312894834633965, "grad_norm": 1.932305258567609, "learning_rate": 1.6965043194420897e-05, "loss": 0.676, "step": 11185 }, { "epoch": 0.8313638052768487, "grad_norm": 1.6862737721879149, "learning_rate": 1.6964467430576324e-05, "loss": 0.85, "step": 11186 }, { "epoch": 0.831438127090301, "grad_norm": 2.1285626161467355, "learning_rate": 1.6963891621894874e-05, "loss": 0.7942, "step": 11187 }, { "epoch": 0.8315124489037532, "grad_norm": 2.0668430690171067, "learning_rate": 1.6963315768380244e-05, "loss": 0.8097, "step": 11188 }, { "epoch": 0.8315867707172055, "grad_norm": 2.1406313984226317, "learning_rate": 1.6962739870036156e-05, "loss": 0.9341, "step": 11189 }, { "epoch": 0.8316610925306578, "grad_norm": 4.007716516798672, "learning_rate": 1.6962163926866308e-05, "loss": 0.7608, "step": 11190 }, { "epoch": 0.83173541434411, "grad_norm": 2.128669965975489, "learning_rate": 1.6961587938874404e-05, "loss": 0.8167, "step": 11191 }, { "epoch": 0.8318097361575623, "grad_norm": 2.097163279440986, "learning_rate": 1.6961011906064163e-05, "loss": 0.8817, "step": 11192 }, { "epoch": 0.8318840579710145, "grad_norm": 2.1441690325927367, "learning_rate": 1.6960435828439287e-05, "loss": 0.8927, "step": 11193 }, { "epoch": 0.8319583797844667, "grad_norm": 2.138799351806095, "learning_rate": 1.695985970600349e-05, "loss": 0.7383, "step": 11194 }, { "epoch": 0.832032701597919, "grad_norm": 1.9651518127145946, "learning_rate": 1.6959283538760475e-05, "loss": 0.9084, "step": 11195 }, { "epoch": 0.8321070234113712, "grad_norm": 1.8347445721058158, "learning_rate": 1.6958707326713954e-05, "loss": 0.8246, "step": 11196 }, { "epoch": 0.8321813452248235, "grad_norm": 2.0483957381962954, "learning_rate": 1.6958131069867636e-05, "loss": 0.6277, "step": 11197 }, { "epoch": 0.8322556670382757, "grad_norm": 1.5576253190050027, "learning_rate": 1.695755476822523e-05, "loss": 0.826, "step": 11198 }, { "epoch": 0.8323299888517279, "grad_norm": 2.147500100516931, "learning_rate": 1.6956978421790448e-05, "loss": 1.0716, "step": 11199 }, { "epoch": 0.8324043106651803, "grad_norm": 1.7929920697925814, "learning_rate": 1.6956402030567e-05, "loss": 0.7604, "step": 11200 }, { "epoch": 0.8324786324786325, "grad_norm": 1.6968485888176648, "learning_rate": 1.69558255945586e-05, "loss": 0.7654, "step": 11201 }, { "epoch": 0.8325529542920848, "grad_norm": 2.0265430223648453, "learning_rate": 1.695524911376895e-05, "loss": 0.9575, "step": 11202 }, { "epoch": 0.832627276105537, "grad_norm": 1.7331940040802691, "learning_rate": 1.695467258820177e-05, "loss": 0.7101, "step": 11203 }, { "epoch": 0.8327015979189892, "grad_norm": 1.881422802789866, "learning_rate": 1.6954096017860767e-05, "loss": 0.821, "step": 11204 }, { "epoch": 0.8327759197324415, "grad_norm": 2.0247924012072023, "learning_rate": 1.6953519402749657e-05, "loss": 0.9696, "step": 11205 }, { "epoch": 0.8328502415458937, "grad_norm": 1.84811696561312, "learning_rate": 1.695294274287215e-05, "loss": 0.6825, "step": 11206 }, { "epoch": 0.832924563359346, "grad_norm": 3.4330821065796715, "learning_rate": 1.6952366038231958e-05, "loss": 0.8586, "step": 11207 }, { "epoch": 0.8329988851727982, "grad_norm": 2.1710518562821037, "learning_rate": 1.6951789288832793e-05, "loss": 0.944, "step": 11208 }, { "epoch": 0.8330732069862504, "grad_norm": 2.2544106128098935, "learning_rate": 1.6951212494678372e-05, "loss": 0.9655, "step": 11209 }, { "epoch": 0.8331475287997027, "grad_norm": 2.2356014494941765, "learning_rate": 1.6950635655772402e-05, "loss": 0.5385, "step": 11210 }, { "epoch": 0.833221850613155, "grad_norm": 1.536827308837875, "learning_rate": 1.69500587721186e-05, "loss": 0.6198, "step": 11211 }, { "epoch": 0.8332961724266073, "grad_norm": 1.9096462741598685, "learning_rate": 1.6949481843720685e-05, "loss": 0.9047, "step": 11212 }, { "epoch": 0.8333704942400595, "grad_norm": 1.8876143068232731, "learning_rate": 1.694890487058236e-05, "loss": 0.9196, "step": 11213 }, { "epoch": 0.8334448160535117, "grad_norm": 1.9147488468046128, "learning_rate": 1.6948327852707352e-05, "loss": 0.8119, "step": 11214 }, { "epoch": 0.833519137866964, "grad_norm": 2.781232889008969, "learning_rate": 1.6947750790099367e-05, "loss": 0.8826, "step": 11215 }, { "epoch": 0.8335934596804162, "grad_norm": 2.664069255804042, "learning_rate": 1.694717368276212e-05, "loss": 0.7505, "step": 11216 }, { "epoch": 0.8336677814938684, "grad_norm": 1.872258291747585, "learning_rate": 1.6946596530699335e-05, "loss": 0.8082, "step": 11217 }, { "epoch": 0.8337421033073207, "grad_norm": 1.9764980971278558, "learning_rate": 1.6946019333914718e-05, "loss": 0.7692, "step": 11218 }, { "epoch": 0.8338164251207729, "grad_norm": 1.6900396401165292, "learning_rate": 1.6945442092411993e-05, "loss": 0.8563, "step": 11219 }, { "epoch": 0.8338907469342252, "grad_norm": 2.100364027637587, "learning_rate": 1.694486480619487e-05, "loss": 0.7394, "step": 11220 }, { "epoch": 0.8339650687476774, "grad_norm": 1.7890415058302525, "learning_rate": 1.6944287475267064e-05, "loss": 0.7041, "step": 11221 }, { "epoch": 0.8340393905611297, "grad_norm": 13.591080795139263, "learning_rate": 1.69437100996323e-05, "loss": 0.9402, "step": 11222 }, { "epoch": 0.834113712374582, "grad_norm": 3.998660661752107, "learning_rate": 1.6943132679294292e-05, "loss": 0.8532, "step": 11223 }, { "epoch": 0.8341880341880342, "grad_norm": 1.6938673843271743, "learning_rate": 1.6942555214256753e-05, "loss": 0.7991, "step": 11224 }, { "epoch": 0.8342623560014865, "grad_norm": 1.916464627542662, "learning_rate": 1.6941977704523403e-05, "loss": 0.8724, "step": 11225 }, { "epoch": 0.8343366778149387, "grad_norm": 2.0916422065092055, "learning_rate": 1.6941400150097962e-05, "loss": 0.8873, "step": 11226 }, { "epoch": 0.8344109996283909, "grad_norm": 1.7148012774516508, "learning_rate": 1.6940822550984147e-05, "loss": 0.618, "step": 11227 }, { "epoch": 0.8344853214418432, "grad_norm": 2.604531585799863, "learning_rate": 1.6940244907185678e-05, "loss": 0.783, "step": 11228 }, { "epoch": 0.8345596432552954, "grad_norm": 1.7245239934995207, "learning_rate": 1.693966721870627e-05, "loss": 0.8851, "step": 11229 }, { "epoch": 0.8346339650687477, "grad_norm": 7.292986606196151, "learning_rate": 1.6939089485549645e-05, "loss": 0.9178, "step": 11230 }, { "epoch": 0.8347082868821999, "grad_norm": 1.8866835323989555, "learning_rate": 1.693851170771952e-05, "loss": 0.7802, "step": 11231 }, { "epoch": 0.8347826086956521, "grad_norm": 1.8741216488312589, "learning_rate": 1.693793388521962e-05, "loss": 0.6596, "step": 11232 }, { "epoch": 0.8348569305091044, "grad_norm": 2.0970597207297175, "learning_rate": 1.6937356018053665e-05, "loss": 0.7376, "step": 11233 }, { "epoch": 0.8349312523225567, "grad_norm": 2.096117981084867, "learning_rate": 1.6936778106225365e-05, "loss": 0.8476, "step": 11234 }, { "epoch": 0.835005574136009, "grad_norm": 2.193621565371228, "learning_rate": 1.693620014973845e-05, "loss": 1.0033, "step": 11235 }, { "epoch": 0.8350798959494612, "grad_norm": 1.5667728033253432, "learning_rate": 1.693562214859664e-05, "loss": 0.7188, "step": 11236 }, { "epoch": 0.8351542177629134, "grad_norm": 2.1387125362113997, "learning_rate": 1.6935044102803654e-05, "loss": 1.1074, "step": 11237 }, { "epoch": 0.8352285395763657, "grad_norm": 1.8442776044051397, "learning_rate": 1.693446601236321e-05, "loss": 0.7618, "step": 11238 }, { "epoch": 0.8353028613898179, "grad_norm": 1.81826128567143, "learning_rate": 1.6933887877279035e-05, "loss": 0.9152, "step": 11239 }, { "epoch": 0.8353771832032701, "grad_norm": 2.1208652038870803, "learning_rate": 1.6933309697554855e-05, "loss": 0.8529, "step": 11240 }, { "epoch": 0.8354515050167224, "grad_norm": 1.8107008855210693, "learning_rate": 1.6932731473194383e-05, "loss": 0.8852, "step": 11241 }, { "epoch": 0.8355258268301746, "grad_norm": 2.5323947953317174, "learning_rate": 1.6932153204201346e-05, "loss": 1.0165, "step": 11242 }, { "epoch": 0.8356001486436269, "grad_norm": 1.9060967263773436, "learning_rate": 1.6931574890579465e-05, "loss": 0.9792, "step": 11243 }, { "epoch": 0.8356744704570791, "grad_norm": 2.0035420093825156, "learning_rate": 1.6930996532332465e-05, "loss": 0.8301, "step": 11244 }, { "epoch": 0.8357487922705314, "grad_norm": 2.22731922402109, "learning_rate": 1.693041812946407e-05, "loss": 0.8853, "step": 11245 }, { "epoch": 0.8358231140839837, "grad_norm": 2.7380608222981206, "learning_rate": 1.6929839681978e-05, "loss": 0.6679, "step": 11246 }, { "epoch": 0.8358974358974359, "grad_norm": 1.8333988880186032, "learning_rate": 1.6929261189877984e-05, "loss": 0.9558, "step": 11247 }, { "epoch": 0.8359717577108882, "grad_norm": 2.141096435376113, "learning_rate": 1.6928682653167743e-05, "loss": 0.7737, "step": 11248 }, { "epoch": 0.8360460795243404, "grad_norm": 1.8165482933522648, "learning_rate": 1.6928104071851005e-05, "loss": 0.7697, "step": 11249 }, { "epoch": 0.8361204013377926, "grad_norm": 1.8897305975936403, "learning_rate": 1.6927525445931488e-05, "loss": 0.8588, "step": 11250 }, { "epoch": 0.8361947231512449, "grad_norm": 1.8718648021805122, "learning_rate": 1.6926946775412928e-05, "loss": 0.9131, "step": 11251 }, { "epoch": 0.8362690449646971, "grad_norm": 2.0342023800187783, "learning_rate": 1.6926368060299042e-05, "loss": 0.8001, "step": 11252 }, { "epoch": 0.8363433667781494, "grad_norm": 1.8962697659271976, "learning_rate": 1.6925789300593554e-05, "loss": 0.7436, "step": 11253 }, { "epoch": 0.8364176885916016, "grad_norm": 1.4602876157366385, "learning_rate": 1.6925210496300195e-05, "loss": 0.7423, "step": 11254 }, { "epoch": 0.8364920104050538, "grad_norm": 2.5256065390910902, "learning_rate": 1.692463164742269e-05, "loss": 0.8998, "step": 11255 }, { "epoch": 0.8365663322185062, "grad_norm": 2.1644757147499476, "learning_rate": 1.6924052753964767e-05, "loss": 0.7284, "step": 11256 }, { "epoch": 0.8366406540319584, "grad_norm": 2.145038129840273, "learning_rate": 1.6923473815930153e-05, "loss": 0.9059, "step": 11257 }, { "epoch": 0.8367149758454107, "grad_norm": 2.5882898756493304, "learning_rate": 1.692289483332257e-05, "loss": 0.8957, "step": 11258 }, { "epoch": 0.8367892976588629, "grad_norm": 1.6877924587657682, "learning_rate": 1.6922315806145752e-05, "loss": 0.8475, "step": 11259 }, { "epoch": 0.8368636194723151, "grad_norm": 2.002496372646739, "learning_rate": 1.6921736734403426e-05, "loss": 0.8243, "step": 11260 }, { "epoch": 0.8369379412857674, "grad_norm": 2.5894976945832737, "learning_rate": 1.6921157618099314e-05, "loss": 0.8025, "step": 11261 }, { "epoch": 0.8370122630992196, "grad_norm": 1.7422958381622657, "learning_rate": 1.6920578457237152e-05, "loss": 0.8771, "step": 11262 }, { "epoch": 0.8370865849126718, "grad_norm": 2.1003626497602434, "learning_rate": 1.6919999251820664e-05, "loss": 0.9137, "step": 11263 }, { "epoch": 0.8371609067261241, "grad_norm": 2.962320574181659, "learning_rate": 1.6919420001853576e-05, "loss": 1.0353, "step": 11264 }, { "epoch": 0.8372352285395763, "grad_norm": 1.9520839804757342, "learning_rate": 1.6918840707339624e-05, "loss": 0.7771, "step": 11265 }, { "epoch": 0.8373095503530286, "grad_norm": 2.1828783578343183, "learning_rate": 1.6918261368282536e-05, "loss": 0.904, "step": 11266 }, { "epoch": 0.8373838721664809, "grad_norm": 1.4950696736781426, "learning_rate": 1.6917681984686037e-05, "loss": 0.7482, "step": 11267 }, { "epoch": 0.8374581939799332, "grad_norm": 2.0250254164199126, "learning_rate": 1.6917102556553864e-05, "loss": 0.9598, "step": 11268 }, { "epoch": 0.8375325157933854, "grad_norm": 2.1108250895780873, "learning_rate": 1.6916523083889743e-05, "loss": 1.0586, "step": 11269 }, { "epoch": 0.8376068376068376, "grad_norm": 1.6192142122516315, "learning_rate": 1.6915943566697405e-05, "loss": 0.6302, "step": 11270 }, { "epoch": 0.8376811594202899, "grad_norm": 2.2670159572927844, "learning_rate": 1.6915364004980576e-05, "loss": 0.739, "step": 11271 }, { "epoch": 0.8377554812337421, "grad_norm": 2.172729458831285, "learning_rate": 1.6914784398743e-05, "loss": 0.9615, "step": 11272 }, { "epoch": 0.8378298030471943, "grad_norm": 2.0598093165811053, "learning_rate": 1.6914204747988394e-05, "loss": 0.9018, "step": 11273 }, { "epoch": 0.8379041248606466, "grad_norm": 2.0579468043757227, "learning_rate": 1.6913625052720505e-05, "loss": 0.7862, "step": 11274 }, { "epoch": 0.8379784466740988, "grad_norm": 2.722532920555504, "learning_rate": 1.691304531294305e-05, "loss": 0.779, "step": 11275 }, { "epoch": 0.8380527684875511, "grad_norm": 2.4425481271356833, "learning_rate": 1.6912465528659766e-05, "loss": 0.8828, "step": 11276 }, { "epoch": 0.8381270903010033, "grad_norm": 1.9840890475976483, "learning_rate": 1.6911885699874392e-05, "loss": 0.972, "step": 11277 }, { "epoch": 0.8382014121144556, "grad_norm": 2.191322560478295, "learning_rate": 1.6911305826590656e-05, "loss": 0.9864, "step": 11278 }, { "epoch": 0.8382757339279079, "grad_norm": 2.2682844653563716, "learning_rate": 1.691072590881229e-05, "loss": 0.8786, "step": 11279 }, { "epoch": 0.8383500557413601, "grad_norm": 1.9866177252966446, "learning_rate": 1.6910145946543034e-05, "loss": 0.7666, "step": 11280 }, { "epoch": 0.8384243775548124, "grad_norm": 1.6733626212428288, "learning_rate": 1.690956593978661e-05, "loss": 0.7033, "step": 11281 }, { "epoch": 0.8384986993682646, "grad_norm": 2.248460683521383, "learning_rate": 1.6908985888546766e-05, "loss": 0.8214, "step": 11282 }, { "epoch": 0.8385730211817168, "grad_norm": 1.9509863125727513, "learning_rate": 1.6908405792827226e-05, "loss": 0.8904, "step": 11283 }, { "epoch": 0.8386473429951691, "grad_norm": 1.8101303715203638, "learning_rate": 1.6907825652631723e-05, "loss": 0.7957, "step": 11284 }, { "epoch": 0.8387216648086213, "grad_norm": 2.099456616914485, "learning_rate": 1.6907245467964005e-05, "loss": 0.813, "step": 11285 }, { "epoch": 0.8387959866220736, "grad_norm": 2.7935871307842604, "learning_rate": 1.6906665238827797e-05, "loss": 0.6121, "step": 11286 }, { "epoch": 0.8388703084355258, "grad_norm": 2.587775275403348, "learning_rate": 1.690608496522683e-05, "loss": 1.0467, "step": 11287 }, { "epoch": 0.838944630248978, "grad_norm": 2.8344395188567253, "learning_rate": 1.6905504647164854e-05, "loss": 0.8386, "step": 11288 }, { "epoch": 0.8390189520624304, "grad_norm": 1.8699787345859131, "learning_rate": 1.6904924284645595e-05, "loss": 0.6464, "step": 11289 }, { "epoch": 0.8390932738758826, "grad_norm": 2.1001023347055288, "learning_rate": 1.690434387767279e-05, "loss": 0.9333, "step": 11290 }, { "epoch": 0.8391675956893349, "grad_norm": 1.859914019052363, "learning_rate": 1.6903763426250182e-05, "loss": 0.8348, "step": 11291 }, { "epoch": 0.8392419175027871, "grad_norm": 2.51763699798005, "learning_rate": 1.69031829303815e-05, "loss": 1.043, "step": 11292 }, { "epoch": 0.8393162393162393, "grad_norm": 1.9778360815018192, "learning_rate": 1.6902602390070485e-05, "loss": 0.7875, "step": 11293 }, { "epoch": 0.8393905611296916, "grad_norm": 1.613918628057569, "learning_rate": 1.6902021805320873e-05, "loss": 0.6624, "step": 11294 }, { "epoch": 0.8394648829431438, "grad_norm": 1.8976334120633247, "learning_rate": 1.6901441176136404e-05, "loss": 0.7124, "step": 11295 }, { "epoch": 0.839539204756596, "grad_norm": 1.9771524221477832, "learning_rate": 1.6900860502520812e-05, "loss": 0.8581, "step": 11296 }, { "epoch": 0.8396135265700483, "grad_norm": 2.551017004263979, "learning_rate": 1.6900279784477844e-05, "loss": 0.9712, "step": 11297 }, { "epoch": 0.8396878483835005, "grad_norm": 2.395443852985983, "learning_rate": 1.689969902201123e-05, "loss": 0.7584, "step": 11298 }, { "epoch": 0.8397621701969528, "grad_norm": 2.286940776405646, "learning_rate": 1.6899118215124706e-05, "loss": 0.9152, "step": 11299 }, { "epoch": 0.839836492010405, "grad_norm": 1.807070044598178, "learning_rate": 1.6898537363822022e-05, "loss": 0.9418, "step": 11300 }, { "epoch": 0.8399108138238573, "grad_norm": 2.087287714269989, "learning_rate": 1.6897956468106917e-05, "loss": 0.936, "step": 11301 }, { "epoch": 0.8399851356373096, "grad_norm": 2.042530036434439, "learning_rate": 1.689737552798312e-05, "loss": 0.8437, "step": 11302 }, { "epoch": 0.8400594574507618, "grad_norm": 2.2826600316148964, "learning_rate": 1.6896794543454378e-05, "loss": 1.0719, "step": 11303 }, { "epoch": 0.8401337792642141, "grad_norm": 1.4572102033037098, "learning_rate": 1.6896213514524433e-05, "loss": 0.4993, "step": 11304 }, { "epoch": 0.8402081010776663, "grad_norm": 4.193491437426046, "learning_rate": 1.6895632441197018e-05, "loss": 0.8283, "step": 11305 }, { "epoch": 0.8402824228911185, "grad_norm": 2.8118788458318766, "learning_rate": 1.6895051323475882e-05, "loss": 0.8262, "step": 11306 }, { "epoch": 0.8403567447045708, "grad_norm": 2.2843286064812656, "learning_rate": 1.6894470161364764e-05, "loss": 0.8343, "step": 11307 }, { "epoch": 0.840431066518023, "grad_norm": 2.4817318677445614, "learning_rate": 1.6893888954867405e-05, "loss": 0.9074, "step": 11308 }, { "epoch": 0.8405053883314753, "grad_norm": 1.760739464350571, "learning_rate": 1.6893307703987547e-05, "loss": 0.7819, "step": 11309 }, { "epoch": 0.8405797101449275, "grad_norm": 2.0808845071004676, "learning_rate": 1.689272640872893e-05, "loss": 0.8116, "step": 11310 }, { "epoch": 0.8406540319583797, "grad_norm": 2.1357605661807444, "learning_rate": 1.68921450690953e-05, "loss": 0.861, "step": 11311 }, { "epoch": 0.8407283537718321, "grad_norm": 2.0426396721996163, "learning_rate": 1.6891563685090397e-05, "loss": 0.8413, "step": 11312 }, { "epoch": 0.8408026755852843, "grad_norm": 2.405802179778469, "learning_rate": 1.6890982256717965e-05, "loss": 0.93, "step": 11313 }, { "epoch": 0.8408769973987366, "grad_norm": 2.2554954144828017, "learning_rate": 1.6890400783981746e-05, "loss": 0.9806, "step": 11314 }, { "epoch": 0.8409513192121888, "grad_norm": 2.5022394016500153, "learning_rate": 1.6889819266885483e-05, "loss": 1.0866, "step": 11315 }, { "epoch": 0.841025641025641, "grad_norm": 1.8337004746325214, "learning_rate": 1.6889237705432924e-05, "loss": 1.1133, "step": 11316 }, { "epoch": 0.8410999628390933, "grad_norm": 1.9919948313349227, "learning_rate": 1.6888656099627808e-05, "loss": 0.929, "step": 11317 }, { "epoch": 0.8411742846525455, "grad_norm": 1.7475719072287612, "learning_rate": 1.688807444947388e-05, "loss": 0.9174, "step": 11318 }, { "epoch": 0.8412486064659978, "grad_norm": 2.263621237197472, "learning_rate": 1.688749275497489e-05, "loss": 0.7775, "step": 11319 }, { "epoch": 0.84132292827945, "grad_norm": 2.121428252466313, "learning_rate": 1.6886911016134578e-05, "loss": 0.7399, "step": 11320 }, { "epoch": 0.8413972500929022, "grad_norm": 2.465137709072466, "learning_rate": 1.6886329232956687e-05, "loss": 0.9326, "step": 11321 }, { "epoch": 0.8414715719063545, "grad_norm": 1.8303463926315455, "learning_rate": 1.6885747405444967e-05, "loss": 0.9827, "step": 11322 }, { "epoch": 0.8415458937198068, "grad_norm": 1.699298614797971, "learning_rate": 1.6885165533603164e-05, "loss": 0.836, "step": 11323 }, { "epoch": 0.841620215533259, "grad_norm": 1.7361825452303234, "learning_rate": 1.6884583617435022e-05, "loss": 0.9151, "step": 11324 }, { "epoch": 0.8416945373467113, "grad_norm": 1.9730776451682517, "learning_rate": 1.6884001656944286e-05, "loss": 1.0965, "step": 11325 }, { "epoch": 0.8417688591601635, "grad_norm": 1.7365366763098866, "learning_rate": 1.688341965213471e-05, "loss": 0.8501, "step": 11326 }, { "epoch": 0.8418431809736158, "grad_norm": 1.8332493843919677, "learning_rate": 1.6882837603010028e-05, "loss": 0.8656, "step": 11327 }, { "epoch": 0.841917502787068, "grad_norm": 6.793528281298378, "learning_rate": 1.6882255509574e-05, "loss": 0.7567, "step": 11328 }, { "epoch": 0.8419918246005202, "grad_norm": 1.7700636166688708, "learning_rate": 1.6881673371830368e-05, "loss": 0.9621, "step": 11329 }, { "epoch": 0.8420661464139725, "grad_norm": 1.929859659688825, "learning_rate": 1.6881091189782877e-05, "loss": 0.6595, "step": 11330 }, { "epoch": 0.8421404682274247, "grad_norm": 2.148427459826937, "learning_rate": 1.6880508963435282e-05, "loss": 0.8241, "step": 11331 }, { "epoch": 0.842214790040877, "grad_norm": 1.5008413947927832, "learning_rate": 1.6879926692791323e-05, "loss": 0.5613, "step": 11332 }, { "epoch": 0.8422891118543292, "grad_norm": 1.991386438155936, "learning_rate": 1.6879344377854753e-05, "loss": 0.9883, "step": 11333 }, { "epoch": 0.8423634336677815, "grad_norm": 1.4079427720663278, "learning_rate": 1.6878762018629325e-05, "loss": 0.6834, "step": 11334 }, { "epoch": 0.8424377554812338, "grad_norm": 1.8591195428163754, "learning_rate": 1.687817961511878e-05, "loss": 0.6876, "step": 11335 }, { "epoch": 0.842512077294686, "grad_norm": 2.0290793758324743, "learning_rate": 1.6877597167326874e-05, "loss": 1.0264, "step": 11336 }, { "epoch": 0.8425863991081383, "grad_norm": 1.9452546804449216, "learning_rate": 1.6877014675257352e-05, "loss": 0.8598, "step": 11337 }, { "epoch": 0.8426607209215905, "grad_norm": 1.7351064513981347, "learning_rate": 1.6876432138913967e-05, "loss": 0.7073, "step": 11338 }, { "epoch": 0.8427350427350427, "grad_norm": 2.1733083123551116, "learning_rate": 1.687584955830047e-05, "loss": 1.0683, "step": 11339 }, { "epoch": 0.842809364548495, "grad_norm": 1.789530310383705, "learning_rate": 1.687526693342061e-05, "loss": 0.8165, "step": 11340 }, { "epoch": 0.8428836863619472, "grad_norm": 1.9464841533644657, "learning_rate": 1.6874684264278138e-05, "loss": 0.8815, "step": 11341 }, { "epoch": 0.8429580081753995, "grad_norm": 2.2223456268847106, "learning_rate": 1.6874101550876806e-05, "loss": 0.6778, "step": 11342 }, { "epoch": 0.8430323299888517, "grad_norm": 2.0650202717548014, "learning_rate": 1.687351879322036e-05, "loss": 0.7637, "step": 11343 }, { "epoch": 0.8431066518023039, "grad_norm": 1.6691164917519978, "learning_rate": 1.6872935991312563e-05, "loss": 0.8358, "step": 11344 }, { "epoch": 0.8431809736157563, "grad_norm": 1.8369135269487666, "learning_rate": 1.687235314515716e-05, "loss": 0.6466, "step": 11345 }, { "epoch": 0.8432552954292085, "grad_norm": 1.730115143042963, "learning_rate": 1.6871770254757903e-05, "loss": 0.8846, "step": 11346 }, { "epoch": 0.8433296172426608, "grad_norm": 1.744611476455062, "learning_rate": 1.6871187320118545e-05, "loss": 0.7754, "step": 11347 }, { "epoch": 0.843403939056113, "grad_norm": 2.364897907485604, "learning_rate": 1.687060434124284e-05, "loss": 0.8708, "step": 11348 }, { "epoch": 0.8434782608695652, "grad_norm": 1.665035480224421, "learning_rate": 1.687002131813454e-05, "loss": 0.7344, "step": 11349 }, { "epoch": 0.8435525826830175, "grad_norm": 1.6285552884877077, "learning_rate": 1.68694382507974e-05, "loss": 0.6625, "step": 11350 }, { "epoch": 0.8436269044964697, "grad_norm": 2.2930392447103087, "learning_rate": 1.686885513923517e-05, "loss": 0.9024, "step": 11351 }, { "epoch": 0.843701226309922, "grad_norm": 1.9162632776351167, "learning_rate": 1.686827198345161e-05, "loss": 0.843, "step": 11352 }, { "epoch": 0.8437755481233742, "grad_norm": 1.8904001120958456, "learning_rate": 1.686768878345047e-05, "loss": 0.8199, "step": 11353 }, { "epoch": 0.8438498699368264, "grad_norm": 1.8754903945717971, "learning_rate": 1.686710553923551e-05, "loss": 0.7068, "step": 11354 }, { "epoch": 0.8439241917502787, "grad_norm": 1.5088242249290909, "learning_rate": 1.6866522250810476e-05, "loss": 0.8168, "step": 11355 }, { "epoch": 0.8439985135637309, "grad_norm": 1.7880819694711905, "learning_rate": 1.6865938918179132e-05, "loss": 0.8759, "step": 11356 }, { "epoch": 0.8440728353771833, "grad_norm": 1.7488400786532718, "learning_rate": 1.6865355541345224e-05, "loss": 0.61, "step": 11357 }, { "epoch": 0.8441471571906355, "grad_norm": 1.9080477677440326, "learning_rate": 1.6864772120312517e-05, "loss": 0.6154, "step": 11358 }, { "epoch": 0.8442214790040877, "grad_norm": 2.435605884829533, "learning_rate": 1.6864188655084765e-05, "loss": 0.7452, "step": 11359 }, { "epoch": 0.84429580081754, "grad_norm": 2.1328348752689092, "learning_rate": 1.686360514566572e-05, "loss": 0.9983, "step": 11360 }, { "epoch": 0.8443701226309922, "grad_norm": 1.7517247596455903, "learning_rate": 1.6863021592059142e-05, "loss": 0.9252, "step": 11361 }, { "epoch": 0.8444444444444444, "grad_norm": 1.6732319716264743, "learning_rate": 1.686243799426879e-05, "loss": 0.8351, "step": 11362 }, { "epoch": 0.8445187662578967, "grad_norm": 1.917154527137083, "learning_rate": 1.6861854352298414e-05, "loss": 0.8175, "step": 11363 }, { "epoch": 0.8445930880713489, "grad_norm": 1.7709099345851658, "learning_rate": 1.6861270666151774e-05, "loss": 0.852, "step": 11364 }, { "epoch": 0.8446674098848012, "grad_norm": 1.8813627333352894, "learning_rate": 1.6860686935832633e-05, "loss": 0.8191, "step": 11365 }, { "epoch": 0.8447417316982534, "grad_norm": 1.769157207697481, "learning_rate": 1.6860103161344746e-05, "loss": 0.703, "step": 11366 }, { "epoch": 0.8448160535117056, "grad_norm": 2.1540615151888987, "learning_rate": 1.6859519342691867e-05, "loss": 0.9873, "step": 11367 }, { "epoch": 0.844890375325158, "grad_norm": 1.4466997024221973, "learning_rate": 1.6858935479877763e-05, "loss": 0.6294, "step": 11368 }, { "epoch": 0.8449646971386102, "grad_norm": 2.275700378588268, "learning_rate": 1.6858351572906184e-05, "loss": 1.1842, "step": 11369 }, { "epoch": 0.8450390189520625, "grad_norm": 1.717050126211752, "learning_rate": 1.68577676217809e-05, "loss": 0.7068, "step": 11370 }, { "epoch": 0.8451133407655147, "grad_norm": 1.9533256492024578, "learning_rate": 1.6857183626505658e-05, "loss": 0.8235, "step": 11371 }, { "epoch": 0.8451876625789669, "grad_norm": 1.624657314650646, "learning_rate": 1.6856599587084222e-05, "loss": 0.8179, "step": 11372 }, { "epoch": 0.8452619843924192, "grad_norm": 2.4708573365671196, "learning_rate": 1.685601550352036e-05, "loss": 0.9566, "step": 11373 }, { "epoch": 0.8453363062058714, "grad_norm": 2.0715076841422735, "learning_rate": 1.6855431375817822e-05, "loss": 0.8863, "step": 11374 }, { "epoch": 0.8454106280193237, "grad_norm": 2.501527614983886, "learning_rate": 1.6854847203980372e-05, "loss": 0.9984, "step": 11375 }, { "epoch": 0.8454849498327759, "grad_norm": 2.255130404616316, "learning_rate": 1.6854262988011774e-05, "loss": 1.034, "step": 11376 }, { "epoch": 0.8455592716462281, "grad_norm": 1.946630090477418, "learning_rate": 1.6853678727915782e-05, "loss": 0.831, "step": 11377 }, { "epoch": 0.8456335934596804, "grad_norm": 2.0316197275355385, "learning_rate": 1.6853094423696166e-05, "loss": 0.5805, "step": 11378 }, { "epoch": 0.8457079152731327, "grad_norm": 1.5088616204379828, "learning_rate": 1.685251007535668e-05, "loss": 0.7943, "step": 11379 }, { "epoch": 0.845782237086585, "grad_norm": 1.9493343560317042, "learning_rate": 1.6851925682901095e-05, "loss": 1.0193, "step": 11380 }, { "epoch": 0.8458565589000372, "grad_norm": 1.715679138298461, "learning_rate": 1.6851341246333168e-05, "loss": 1.0396, "step": 11381 }, { "epoch": 0.8459308807134894, "grad_norm": 2.103210116752609, "learning_rate": 1.6850756765656657e-05, "loss": 1.0548, "step": 11382 }, { "epoch": 0.8460052025269417, "grad_norm": 1.8433913724828026, "learning_rate": 1.685017224087533e-05, "loss": 0.7315, "step": 11383 }, { "epoch": 0.8460795243403939, "grad_norm": 7.310393678962175, "learning_rate": 1.684958767199295e-05, "loss": 1.0652, "step": 11384 }, { "epoch": 0.8461538461538461, "grad_norm": 1.5529790971918729, "learning_rate": 1.6849003059013282e-05, "loss": 0.7502, "step": 11385 }, { "epoch": 0.8462281679672984, "grad_norm": 1.678100012632091, "learning_rate": 1.6848418401940087e-05, "loss": 0.7717, "step": 11386 }, { "epoch": 0.8463024897807506, "grad_norm": 1.8297897706913442, "learning_rate": 1.684783370077713e-05, "loss": 0.7444, "step": 11387 }, { "epoch": 0.8463768115942029, "grad_norm": 1.7665338534959378, "learning_rate": 1.6847248955528173e-05, "loss": 0.8184, "step": 11388 }, { "epoch": 0.8464511334076551, "grad_norm": 2.3845042758043737, "learning_rate": 1.6846664166196986e-05, "loss": 0.9246, "step": 11389 }, { "epoch": 0.8465254552211074, "grad_norm": 1.8040890605901578, "learning_rate": 1.6846079332787323e-05, "loss": 0.762, "step": 11390 }, { "epoch": 0.8465997770345597, "grad_norm": 1.4831059443401955, "learning_rate": 1.6845494455302964e-05, "loss": 0.715, "step": 11391 }, { "epoch": 0.8466740988480119, "grad_norm": 2.149918876815248, "learning_rate": 1.684490953374766e-05, "loss": 1.0482, "step": 11392 }, { "epoch": 0.8467484206614642, "grad_norm": 1.8117217858366268, "learning_rate": 1.684432456812519e-05, "loss": 0.8622, "step": 11393 }, { "epoch": 0.8468227424749164, "grad_norm": 1.937789724608027, "learning_rate": 1.6843739558439314e-05, "loss": 0.869, "step": 11394 }, { "epoch": 0.8468970642883686, "grad_norm": 2.3882547707619346, "learning_rate": 1.6843154504693796e-05, "loss": 0.8104, "step": 11395 }, { "epoch": 0.8469713861018209, "grad_norm": 2.0876931280409208, "learning_rate": 1.68425694068924e-05, "loss": 0.875, "step": 11396 }, { "epoch": 0.8470457079152731, "grad_norm": 2.1897629033808275, "learning_rate": 1.68419842650389e-05, "loss": 0.9362, "step": 11397 }, { "epoch": 0.8471200297287254, "grad_norm": 6.944868057662067, "learning_rate": 1.6841399079137063e-05, "loss": 0.8653, "step": 11398 }, { "epoch": 0.8471943515421776, "grad_norm": 1.8571620693815516, "learning_rate": 1.684081384919065e-05, "loss": 0.8656, "step": 11399 }, { "epoch": 0.8472686733556298, "grad_norm": 2.2480605039412396, "learning_rate": 1.6840228575203437e-05, "loss": 0.7593, "step": 11400 }, { "epoch": 0.8473429951690822, "grad_norm": 2.3460154124612433, "learning_rate": 1.6839643257179184e-05, "loss": 0.8833, "step": 11401 }, { "epoch": 0.8474173169825344, "grad_norm": 2.326347302425256, "learning_rate": 1.6839057895121662e-05, "loss": 0.7879, "step": 11402 }, { "epoch": 0.8474916387959867, "grad_norm": 2.2764179658488586, "learning_rate": 1.6838472489034643e-05, "loss": 0.8287, "step": 11403 }, { "epoch": 0.8475659606094389, "grad_norm": 2.2961597797154836, "learning_rate": 1.683788703892189e-05, "loss": 0.8979, "step": 11404 }, { "epoch": 0.8476402824228911, "grad_norm": 1.675242521264999, "learning_rate": 1.683730154478718e-05, "loss": 0.5563, "step": 11405 }, { "epoch": 0.8477146042363434, "grad_norm": 1.8464963897199758, "learning_rate": 1.6836716006634272e-05, "loss": 0.8729, "step": 11406 }, { "epoch": 0.8477889260497956, "grad_norm": 1.9123801825519955, "learning_rate": 1.683613042446694e-05, "loss": 0.8487, "step": 11407 }, { "epoch": 0.8478632478632478, "grad_norm": 2.7631498901352485, "learning_rate": 1.6835544798288958e-05, "loss": 0.8545, "step": 11408 }, { "epoch": 0.8479375696767001, "grad_norm": 2.1843997275819786, "learning_rate": 1.6834959128104092e-05, "loss": 1.043, "step": 11409 }, { "epoch": 0.8480118914901523, "grad_norm": 2.167313609267023, "learning_rate": 1.6834373413916116e-05, "loss": 0.9737, "step": 11410 }, { "epoch": 0.8480862133036046, "grad_norm": 1.6339131274141, "learning_rate": 1.6833787655728797e-05, "loss": 0.5871, "step": 11411 }, { "epoch": 0.8481605351170568, "grad_norm": 2.0338041874295305, "learning_rate": 1.6833201853545912e-05, "loss": 0.9397, "step": 11412 }, { "epoch": 0.8482348569305092, "grad_norm": 1.758121393523219, "learning_rate": 1.6832616007371222e-05, "loss": 0.888, "step": 11413 }, { "epoch": 0.8483091787439614, "grad_norm": 1.9408850004373601, "learning_rate": 1.6832030117208505e-05, "loss": 0.8186, "step": 11414 }, { "epoch": 0.8483835005574136, "grad_norm": 1.791082524395934, "learning_rate": 1.6831444183061536e-05, "loss": 0.9473, "step": 11415 }, { "epoch": 0.8484578223708659, "grad_norm": 1.6267981801478877, "learning_rate": 1.6830858204934082e-05, "loss": 0.767, "step": 11416 }, { "epoch": 0.8485321441843181, "grad_norm": 1.9403105855808354, "learning_rate": 1.6830272182829918e-05, "loss": 0.9037, "step": 11417 }, { "epoch": 0.8486064659977703, "grad_norm": 2.027654537044272, "learning_rate": 1.6829686116752813e-05, "loss": 0.8529, "step": 11418 }, { "epoch": 0.8486807878112226, "grad_norm": 1.7905356936966537, "learning_rate": 1.6829100006706547e-05, "loss": 0.6484, "step": 11419 }, { "epoch": 0.8487551096246748, "grad_norm": 2.057296641389278, "learning_rate": 1.6828513852694887e-05, "loss": 0.8803, "step": 11420 }, { "epoch": 0.8488294314381271, "grad_norm": 1.6537800319830867, "learning_rate": 1.6827927654721614e-05, "loss": 0.748, "step": 11421 }, { "epoch": 0.8489037532515793, "grad_norm": 2.098124116827336, "learning_rate": 1.682734141279049e-05, "loss": 0.8565, "step": 11422 }, { "epoch": 0.8489780750650315, "grad_norm": 2.00707597275141, "learning_rate": 1.68267551269053e-05, "loss": 0.7992, "step": 11423 }, { "epoch": 0.8490523968784839, "grad_norm": 2.180756992407916, "learning_rate": 1.6826168797069815e-05, "loss": 0.6694, "step": 11424 }, { "epoch": 0.8491267186919361, "grad_norm": 1.7868133442734786, "learning_rate": 1.682558242328781e-05, "loss": 1.0067, "step": 11425 }, { "epoch": 0.8492010405053884, "grad_norm": 2.2116636269563625, "learning_rate": 1.6824996005563056e-05, "loss": 0.7171, "step": 11426 }, { "epoch": 0.8492753623188406, "grad_norm": 2.0331021968762903, "learning_rate": 1.6824409543899336e-05, "loss": 0.9204, "step": 11427 }, { "epoch": 0.8493496841322928, "grad_norm": 2.5103615896040474, "learning_rate": 1.6823823038300417e-05, "loss": 1.0556, "step": 11428 }, { "epoch": 0.8494240059457451, "grad_norm": 1.7815248117614093, "learning_rate": 1.6823236488770083e-05, "loss": 0.9061, "step": 11429 }, { "epoch": 0.8494983277591973, "grad_norm": 1.7321342798184056, "learning_rate": 1.6822649895312105e-05, "loss": 0.9128, "step": 11430 }, { "epoch": 0.8495726495726496, "grad_norm": 1.7935723364315466, "learning_rate": 1.6822063257930262e-05, "loss": 0.9865, "step": 11431 }, { "epoch": 0.8496469713861018, "grad_norm": 1.6556679606199396, "learning_rate": 1.6821476576628327e-05, "loss": 0.8293, "step": 11432 }, { "epoch": 0.849721293199554, "grad_norm": 2.730772664171164, "learning_rate": 1.682088985141008e-05, "loss": 0.5891, "step": 11433 }, { "epoch": 0.8497956150130063, "grad_norm": 1.7965025709430265, "learning_rate": 1.6820303082279304e-05, "loss": 0.7524, "step": 11434 }, { "epoch": 0.8498699368264586, "grad_norm": 2.3873840512463573, "learning_rate": 1.6819716269239768e-05, "loss": 1.1246, "step": 11435 }, { "epoch": 0.8499442586399109, "grad_norm": 1.9200307993094896, "learning_rate": 1.6819129412295248e-05, "loss": 0.6468, "step": 11436 }, { "epoch": 0.8500185804533631, "grad_norm": 1.9410507031878323, "learning_rate": 1.681854251144953e-05, "loss": 0.8978, "step": 11437 }, { "epoch": 0.8500929022668153, "grad_norm": 1.6689023565142662, "learning_rate": 1.681795556670639e-05, "loss": 0.723, "step": 11438 }, { "epoch": 0.8501672240802676, "grad_norm": 2.4828350918644926, "learning_rate": 1.6817368578069604e-05, "loss": 0.8595, "step": 11439 }, { "epoch": 0.8502415458937198, "grad_norm": 1.6527508334129735, "learning_rate": 1.6816781545542957e-05, "loss": 0.914, "step": 11440 }, { "epoch": 0.850315867707172, "grad_norm": 2.196743610720119, "learning_rate": 1.6816194469130218e-05, "loss": 0.8552, "step": 11441 }, { "epoch": 0.8503901895206243, "grad_norm": 3.4770804010312895, "learning_rate": 1.6815607348835176e-05, "loss": 0.8746, "step": 11442 }, { "epoch": 0.8504645113340765, "grad_norm": 2.841020014744808, "learning_rate": 1.6815020184661604e-05, "loss": 0.7989, "step": 11443 }, { "epoch": 0.8505388331475288, "grad_norm": 2.204823888548254, "learning_rate": 1.6814432976613288e-05, "loss": 0.9483, "step": 11444 }, { "epoch": 0.850613154960981, "grad_norm": 1.9929852673293362, "learning_rate": 1.6813845724694008e-05, "loss": 0.9373, "step": 11445 }, { "epoch": 0.8506874767744333, "grad_norm": 1.910069847951069, "learning_rate": 1.681325842890754e-05, "loss": 0.9235, "step": 11446 }, { "epoch": 0.8507617985878856, "grad_norm": 1.9000822122043355, "learning_rate": 1.6812671089257672e-05, "loss": 0.9877, "step": 11447 }, { "epoch": 0.8508361204013378, "grad_norm": 1.8797340324600935, "learning_rate": 1.6812083705748175e-05, "loss": 0.915, "step": 11448 }, { "epoch": 0.8509104422147901, "grad_norm": 1.854663018438596, "learning_rate": 1.6811496278382842e-05, "loss": 0.809, "step": 11449 }, { "epoch": 0.8509847640282423, "grad_norm": 1.8980884145086616, "learning_rate": 1.6810908807165445e-05, "loss": 0.8508, "step": 11450 }, { "epoch": 0.8510590858416945, "grad_norm": 2.062236420585546, "learning_rate": 1.6810321292099774e-05, "loss": 0.8209, "step": 11451 }, { "epoch": 0.8511334076551468, "grad_norm": 1.8079313849250622, "learning_rate": 1.6809733733189605e-05, "loss": 0.6684, "step": 11452 }, { "epoch": 0.851207729468599, "grad_norm": 1.7890719960255883, "learning_rate": 1.6809146130438724e-05, "loss": 0.8138, "step": 11453 }, { "epoch": 0.8512820512820513, "grad_norm": 1.7592685071150034, "learning_rate": 1.6808558483850914e-05, "loss": 0.7804, "step": 11454 }, { "epoch": 0.8513563730955035, "grad_norm": 2.1192680277352953, "learning_rate": 1.6807970793429958e-05, "loss": 0.872, "step": 11455 }, { "epoch": 0.8514306949089557, "grad_norm": 2.1926132437750483, "learning_rate": 1.6807383059179642e-05, "loss": 1.0059, "step": 11456 }, { "epoch": 0.8515050167224081, "grad_norm": 1.89482411733873, "learning_rate": 1.6806795281103743e-05, "loss": 0.7839, "step": 11457 }, { "epoch": 0.8515793385358603, "grad_norm": 1.973471322413375, "learning_rate": 1.6806207459206052e-05, "loss": 0.9201, "step": 11458 }, { "epoch": 0.8516536603493126, "grad_norm": 6.284209831802708, "learning_rate": 1.680561959349035e-05, "loss": 0.9146, "step": 11459 }, { "epoch": 0.8517279821627648, "grad_norm": 1.8868029145433218, "learning_rate": 1.680503168396042e-05, "loss": 0.7856, "step": 11460 }, { "epoch": 0.851802303976217, "grad_norm": 1.7724014422437888, "learning_rate": 1.680444373062005e-05, "loss": 0.8398, "step": 11461 }, { "epoch": 0.8518766257896693, "grad_norm": 1.4977094690706136, "learning_rate": 1.6803855733473026e-05, "loss": 0.7021, "step": 11462 }, { "epoch": 0.8519509476031215, "grad_norm": 1.9754612615096172, "learning_rate": 1.680326769252313e-05, "loss": 0.9021, "step": 11463 }, { "epoch": 0.8520252694165737, "grad_norm": 1.9295173866539026, "learning_rate": 1.680267960777415e-05, "loss": 0.8724, "step": 11464 }, { "epoch": 0.852099591230026, "grad_norm": 1.5794494667338002, "learning_rate": 1.680209147922987e-05, "loss": 0.8034, "step": 11465 }, { "epoch": 0.8521739130434782, "grad_norm": 1.7399610154043186, "learning_rate": 1.6801503306894084e-05, "loss": 0.9129, "step": 11466 }, { "epoch": 0.8522482348569305, "grad_norm": 2.1940225030808675, "learning_rate": 1.6800915090770566e-05, "loss": 0.8097, "step": 11467 }, { "epoch": 0.8523225566703827, "grad_norm": 2.1497137204151913, "learning_rate": 1.6800326830863112e-05, "loss": 0.8179, "step": 11468 }, { "epoch": 0.852396878483835, "grad_norm": 1.9948668837327985, "learning_rate": 1.679973852717551e-05, "loss": 0.8891, "step": 11469 }, { "epoch": 0.8524712002972873, "grad_norm": 1.8451120474567428, "learning_rate": 1.6799150179711538e-05, "loss": 0.928, "step": 11470 }, { "epoch": 0.8525455221107395, "grad_norm": 1.7567665666643277, "learning_rate": 1.6798561788474993e-05, "loss": 0.9993, "step": 11471 }, { "epoch": 0.8526198439241918, "grad_norm": 5.344631328921872, "learning_rate": 1.679797335346966e-05, "loss": 0.5386, "step": 11472 }, { "epoch": 0.852694165737644, "grad_norm": 1.588594765107116, "learning_rate": 1.6797384874699326e-05, "loss": 0.652, "step": 11473 }, { "epoch": 0.8527684875510962, "grad_norm": 2.2726828549063574, "learning_rate": 1.6796796352167782e-05, "loss": 0.8579, "step": 11474 }, { "epoch": 0.8528428093645485, "grad_norm": 2.4875441670278433, "learning_rate": 1.6796207785878818e-05, "loss": 0.8037, "step": 11475 }, { "epoch": 0.8529171311780007, "grad_norm": 1.641360443496091, "learning_rate": 1.6795619175836218e-05, "loss": 0.7698, "step": 11476 }, { "epoch": 0.852991452991453, "grad_norm": 2.0391492919525476, "learning_rate": 1.6795030522043773e-05, "loss": 0.7187, "step": 11477 }, { "epoch": 0.8530657748049052, "grad_norm": 3.0720780921456345, "learning_rate": 1.6794441824505278e-05, "loss": 0.7485, "step": 11478 }, { "epoch": 0.8531400966183574, "grad_norm": 2.03690364512927, "learning_rate": 1.6793853083224518e-05, "loss": 0.6776, "step": 11479 }, { "epoch": 0.8532144184318098, "grad_norm": 2.591886007477682, "learning_rate": 1.679326429820528e-05, "loss": 0.8749, "step": 11480 }, { "epoch": 0.853288740245262, "grad_norm": 1.6586449914484327, "learning_rate": 1.6792675469451363e-05, "loss": 0.5808, "step": 11481 }, { "epoch": 0.8533630620587143, "grad_norm": 2.784951870968742, "learning_rate": 1.6792086596966554e-05, "loss": 0.798, "step": 11482 }, { "epoch": 0.8534373838721665, "grad_norm": 2.032058884967939, "learning_rate": 1.6791497680754646e-05, "loss": 1.0676, "step": 11483 }, { "epoch": 0.8535117056856187, "grad_norm": 1.976814786944358, "learning_rate": 1.6790908720819425e-05, "loss": 0.7119, "step": 11484 }, { "epoch": 0.853586027499071, "grad_norm": 1.886640599104251, "learning_rate": 1.6790319717164683e-05, "loss": 0.8616, "step": 11485 }, { "epoch": 0.8536603493125232, "grad_norm": 1.767116637197302, "learning_rate": 1.678973066979422e-05, "loss": 0.9515, "step": 11486 }, { "epoch": 0.8537346711259755, "grad_norm": 2.5154292658892192, "learning_rate": 1.678914157871182e-05, "loss": 0.8184, "step": 11487 }, { "epoch": 0.8538089929394277, "grad_norm": 1.976966863408298, "learning_rate": 1.678855244392128e-05, "loss": 0.9824, "step": 11488 }, { "epoch": 0.8538833147528799, "grad_norm": 4.383923877549068, "learning_rate": 1.678796326542639e-05, "loss": 0.9, "step": 11489 }, { "epoch": 0.8539576365663322, "grad_norm": 1.7417227164114437, "learning_rate": 1.678737404323095e-05, "loss": 0.7572, "step": 11490 }, { "epoch": 0.8540319583797845, "grad_norm": 1.9334211626848907, "learning_rate": 1.6786784777338744e-05, "loss": 0.886, "step": 11491 }, { "epoch": 0.8541062801932368, "grad_norm": 1.802443040187828, "learning_rate": 1.678619546775357e-05, "loss": 0.9328, "step": 11492 }, { "epoch": 0.854180602006689, "grad_norm": 1.8248210222627772, "learning_rate": 1.6785606114479224e-05, "loss": 0.6368, "step": 11493 }, { "epoch": 0.8542549238201412, "grad_norm": 1.9911332255802976, "learning_rate": 1.6785016717519495e-05, "loss": 0.8407, "step": 11494 }, { "epoch": 0.8543292456335935, "grad_norm": 2.355559203265731, "learning_rate": 1.6784427276878183e-05, "loss": 0.9767, "step": 11495 }, { "epoch": 0.8544035674470457, "grad_norm": 1.9284762686038337, "learning_rate": 1.6783837792559077e-05, "loss": 0.653, "step": 11496 }, { "epoch": 0.854477889260498, "grad_norm": 1.6616004652256877, "learning_rate": 1.6783248264565978e-05, "loss": 0.8284, "step": 11497 }, { "epoch": 0.8545522110739502, "grad_norm": 2.083918187664324, "learning_rate": 1.6782658692902674e-05, "loss": 1.0766, "step": 11498 }, { "epoch": 0.8546265328874024, "grad_norm": 1.8758701684823162, "learning_rate": 1.678206907757297e-05, "loss": 0.8766, "step": 11499 }, { "epoch": 0.8547008547008547, "grad_norm": 3.6723847855632283, "learning_rate": 1.6781479418580658e-05, "loss": 0.7514, "step": 11500 }, { "epoch": 0.8547751765143069, "grad_norm": 1.8974800738452238, "learning_rate": 1.678088971592953e-05, "loss": 1.0184, "step": 11501 }, { "epoch": 0.8548494983277592, "grad_norm": 2.0163925370094073, "learning_rate": 1.678029996962339e-05, "loss": 1.0769, "step": 11502 }, { "epoch": 0.8549238201412115, "grad_norm": 1.7784649027005224, "learning_rate": 1.6779710179666024e-05, "loss": 0.7619, "step": 11503 }, { "epoch": 0.8549981419546637, "grad_norm": 1.9934803082876977, "learning_rate": 1.677912034606124e-05, "loss": 0.7925, "step": 11504 }, { "epoch": 0.855072463768116, "grad_norm": 2.1975321995241677, "learning_rate": 1.6778530468812832e-05, "loss": 0.9097, "step": 11505 }, { "epoch": 0.8551467855815682, "grad_norm": 1.8541394569503669, "learning_rate": 1.6777940547924597e-05, "loss": 0.7954, "step": 11506 }, { "epoch": 0.8552211073950204, "grad_norm": 1.8844083391939006, "learning_rate": 1.677735058340033e-05, "loss": 0.6976, "step": 11507 }, { "epoch": 0.8552954292084727, "grad_norm": 1.940446216335413, "learning_rate": 1.677676057524383e-05, "loss": 0.904, "step": 11508 }, { "epoch": 0.8553697510219249, "grad_norm": 1.8788853211465868, "learning_rate": 1.67761705234589e-05, "loss": 0.9911, "step": 11509 }, { "epoch": 0.8554440728353772, "grad_norm": 2.1836196563148533, "learning_rate": 1.6775580428049334e-05, "loss": 1.1159, "step": 11510 }, { "epoch": 0.8555183946488294, "grad_norm": 2.129241776824128, "learning_rate": 1.6774990289018935e-05, "loss": 1.0489, "step": 11511 }, { "epoch": 0.8555927164622816, "grad_norm": 1.958293671891989, "learning_rate": 1.67744001063715e-05, "loss": 0.9423, "step": 11512 }, { "epoch": 0.855667038275734, "grad_norm": 1.7193924753296823, "learning_rate": 1.6773809880110824e-05, "loss": 0.8048, "step": 11513 }, { "epoch": 0.8557413600891862, "grad_norm": 1.7780673823910225, "learning_rate": 1.6773219610240715e-05, "loss": 0.8568, "step": 11514 }, { "epoch": 0.8558156819026385, "grad_norm": 1.7529275622977374, "learning_rate": 1.677262929676497e-05, "loss": 0.7552, "step": 11515 }, { "epoch": 0.8558900037160907, "grad_norm": 3.201512736449494, "learning_rate": 1.6772038939687386e-05, "loss": 0.8796, "step": 11516 }, { "epoch": 0.8559643255295429, "grad_norm": 1.7569594619271725, "learning_rate": 1.6771448539011772e-05, "loss": 0.7146, "step": 11517 }, { "epoch": 0.8560386473429952, "grad_norm": 2.298789636088071, "learning_rate": 1.677085809474192e-05, "loss": 0.8234, "step": 11518 }, { "epoch": 0.8561129691564474, "grad_norm": 2.379858808706777, "learning_rate": 1.6770267606881635e-05, "loss": 1.0233, "step": 11519 }, { "epoch": 0.8561872909698997, "grad_norm": 1.7054323747139823, "learning_rate": 1.6769677075434718e-05, "loss": 0.6493, "step": 11520 }, { "epoch": 0.8562616127833519, "grad_norm": 2.33125874768159, "learning_rate": 1.6769086500404975e-05, "loss": 0.8969, "step": 11521 }, { "epoch": 0.8563359345968041, "grad_norm": 1.7555483243594874, "learning_rate": 1.67684958817962e-05, "loss": 0.8758, "step": 11522 }, { "epoch": 0.8564102564102564, "grad_norm": 2.026554182629885, "learning_rate": 1.67679052196122e-05, "loss": 0.8026, "step": 11523 }, { "epoch": 0.8564845782237086, "grad_norm": 2.5862708921366258, "learning_rate": 1.676731451385678e-05, "loss": 0.9872, "step": 11524 }, { "epoch": 0.856558900037161, "grad_norm": 11.0163546823112, "learning_rate": 1.676672376453374e-05, "loss": 1.0529, "step": 11525 }, { "epoch": 0.8566332218506132, "grad_norm": 2.049274053311364, "learning_rate": 1.6766132971646882e-05, "loss": 0.8652, "step": 11526 }, { "epoch": 0.8567075436640654, "grad_norm": 1.7778562094909423, "learning_rate": 1.6765542135200013e-05, "loss": 0.9049, "step": 11527 }, { "epoch": 0.8567818654775177, "grad_norm": 1.6879250279470268, "learning_rate": 1.676495125519693e-05, "loss": 0.7404, "step": 11528 }, { "epoch": 0.8568561872909699, "grad_norm": 1.5786040685920946, "learning_rate": 1.6764360331641448e-05, "loss": 0.7897, "step": 11529 }, { "epoch": 0.8569305091044221, "grad_norm": 1.8991924755008958, "learning_rate": 1.676376936453736e-05, "loss": 0.7881, "step": 11530 }, { "epoch": 0.8570048309178744, "grad_norm": 1.856005000477168, "learning_rate": 1.6763178353888477e-05, "loss": 0.7078, "step": 11531 }, { "epoch": 0.8570791527313266, "grad_norm": 2.2819347624154016, "learning_rate": 1.6762587299698606e-05, "loss": 0.7441, "step": 11532 }, { "epoch": 0.8571534745447789, "grad_norm": 2.0722825668083025, "learning_rate": 1.6761996201971546e-05, "loss": 1.0668, "step": 11533 }, { "epoch": 0.8572277963582311, "grad_norm": 2.0205405619816332, "learning_rate": 1.6761405060711106e-05, "loss": 0.8456, "step": 11534 }, { "epoch": 0.8573021181716833, "grad_norm": 1.602418697359053, "learning_rate": 1.676081387592109e-05, "loss": 0.6399, "step": 11535 }, { "epoch": 0.8573764399851357, "grad_norm": 1.4954409483517763, "learning_rate": 1.6760222647605307e-05, "loss": 0.7657, "step": 11536 }, { "epoch": 0.8574507617985879, "grad_norm": 1.7967201742042982, "learning_rate": 1.675963137576756e-05, "loss": 0.8976, "step": 11537 }, { "epoch": 0.8575250836120402, "grad_norm": 1.8419539782802203, "learning_rate": 1.6759040060411656e-05, "loss": 0.7104, "step": 11538 }, { "epoch": 0.8575994054254924, "grad_norm": 1.6956145257373592, "learning_rate": 1.6758448701541405e-05, "loss": 0.7752, "step": 11539 }, { "epoch": 0.8576737272389446, "grad_norm": 1.789930382658988, "learning_rate": 1.6757857299160612e-05, "loss": 0.887, "step": 11540 }, { "epoch": 0.8577480490523969, "grad_norm": 1.494852634609331, "learning_rate": 1.675726585327308e-05, "loss": 0.7659, "step": 11541 }, { "epoch": 0.8578223708658491, "grad_norm": 2.0424712621939656, "learning_rate": 1.6756674363882627e-05, "loss": 0.8317, "step": 11542 }, { "epoch": 0.8578966926793014, "grad_norm": 2.1565312049396055, "learning_rate": 1.675608283099305e-05, "loss": 0.9464, "step": 11543 }, { "epoch": 0.8579710144927536, "grad_norm": 2.0822415203243687, "learning_rate": 1.6755491254608168e-05, "loss": 1.0515, "step": 11544 }, { "epoch": 0.8580453363062058, "grad_norm": 2.0277091511479832, "learning_rate": 1.675489963473178e-05, "loss": 0.7648, "step": 11545 }, { "epoch": 0.8581196581196581, "grad_norm": 2.380379478734217, "learning_rate": 1.67543079713677e-05, "loss": 0.6766, "step": 11546 }, { "epoch": 0.8581939799331104, "grad_norm": 2.221471905718567, "learning_rate": 1.6753716264519732e-05, "loss": 0.8594, "step": 11547 }, { "epoch": 0.8582683017465627, "grad_norm": 1.8008364201749056, "learning_rate": 1.6753124514191696e-05, "loss": 0.7713, "step": 11548 }, { "epoch": 0.8583426235600149, "grad_norm": 3.1887962301827004, "learning_rate": 1.675253272038739e-05, "loss": 0.5697, "step": 11549 }, { "epoch": 0.8584169453734671, "grad_norm": 2.145534511255611, "learning_rate": 1.6751940883110626e-05, "loss": 0.8562, "step": 11550 }, { "epoch": 0.8584912671869194, "grad_norm": 2.034929338032615, "learning_rate": 1.675134900236522e-05, "loss": 0.8665, "step": 11551 }, { "epoch": 0.8585655890003716, "grad_norm": 2.2956548103648973, "learning_rate": 1.6750757078154983e-05, "loss": 0.8207, "step": 11552 }, { "epoch": 0.8586399108138238, "grad_norm": 1.8770795490390257, "learning_rate": 1.6750165110483717e-05, "loss": 0.8537, "step": 11553 }, { "epoch": 0.8587142326272761, "grad_norm": 1.5383933080584866, "learning_rate": 1.674957309935524e-05, "loss": 0.6331, "step": 11554 }, { "epoch": 0.8587885544407283, "grad_norm": 2.0456638497008113, "learning_rate": 1.6748981044773362e-05, "loss": 0.9481, "step": 11555 }, { "epoch": 0.8588628762541806, "grad_norm": 2.0955257573666493, "learning_rate": 1.6748388946741896e-05, "loss": 1.1208, "step": 11556 }, { "epoch": 0.8589371980676328, "grad_norm": 1.8356080417949736, "learning_rate": 1.674779680526465e-05, "loss": 0.9036, "step": 11557 }, { "epoch": 0.8590115198810852, "grad_norm": 1.8441272311855794, "learning_rate": 1.674720462034544e-05, "loss": 0.8355, "step": 11558 }, { "epoch": 0.8590858416945374, "grad_norm": 1.9764619658490796, "learning_rate": 1.6746612391988074e-05, "loss": 1.0462, "step": 11559 }, { "epoch": 0.8591601635079896, "grad_norm": 2.2237660146156566, "learning_rate": 1.6746020120196367e-05, "loss": 0.7734, "step": 11560 }, { "epoch": 0.8592344853214419, "grad_norm": 1.9334217414342687, "learning_rate": 1.6745427804974134e-05, "loss": 0.9112, "step": 11561 }, { "epoch": 0.8593088071348941, "grad_norm": 1.8117096894672284, "learning_rate": 1.674483544632519e-05, "loss": 0.7647, "step": 11562 }, { "epoch": 0.8593831289483463, "grad_norm": 1.5817550198817827, "learning_rate": 1.674424304425334e-05, "loss": 0.7913, "step": 11563 }, { "epoch": 0.8594574507617986, "grad_norm": 2.0596145854284162, "learning_rate": 1.6743650598762407e-05, "loss": 0.9017, "step": 11564 }, { "epoch": 0.8595317725752508, "grad_norm": 1.8241299982409127, "learning_rate": 1.67430581098562e-05, "loss": 1.051, "step": 11565 }, { "epoch": 0.8596060943887031, "grad_norm": 2.1570184334671274, "learning_rate": 1.6742465577538534e-05, "loss": 0.8449, "step": 11566 }, { "epoch": 0.8596804162021553, "grad_norm": 1.6872913236436886, "learning_rate": 1.6741873001813226e-05, "loss": 0.5743, "step": 11567 }, { "epoch": 0.8597547380156075, "grad_norm": 1.7048667285663766, "learning_rate": 1.674128038268409e-05, "loss": 1.0353, "step": 11568 }, { "epoch": 0.8598290598290599, "grad_norm": 1.954311510674331, "learning_rate": 1.6740687720154935e-05, "loss": 0.926, "step": 11569 }, { "epoch": 0.8599033816425121, "grad_norm": 1.861346409740398, "learning_rate": 1.6740095014229587e-05, "loss": 0.8236, "step": 11570 }, { "epoch": 0.8599777034559644, "grad_norm": 1.718946601120322, "learning_rate": 1.6739502264911858e-05, "loss": 0.702, "step": 11571 }, { "epoch": 0.8600520252694166, "grad_norm": 1.89835879781232, "learning_rate": 1.673890947220556e-05, "loss": 0.7292, "step": 11572 }, { "epoch": 0.8601263470828688, "grad_norm": 2.1303303715468753, "learning_rate": 1.6738316636114516e-05, "loss": 0.8221, "step": 11573 }, { "epoch": 0.8602006688963211, "grad_norm": 1.8609300396133739, "learning_rate": 1.6737723756642538e-05, "loss": 0.8594, "step": 11574 }, { "epoch": 0.8602749907097733, "grad_norm": 4.804656793547536, "learning_rate": 1.673713083379344e-05, "loss": 0.7916, "step": 11575 }, { "epoch": 0.8603493125232256, "grad_norm": 1.9404762683310488, "learning_rate": 1.673653786757105e-05, "loss": 0.8006, "step": 11576 }, { "epoch": 0.8604236343366778, "grad_norm": 4.088698561464513, "learning_rate": 1.673594485797917e-05, "loss": 0.8316, "step": 11577 }, { "epoch": 0.86049795615013, "grad_norm": 2.1317229245352873, "learning_rate": 1.6735351805021633e-05, "loss": 0.9671, "step": 11578 }, { "epoch": 0.8605722779635823, "grad_norm": 2.120050009948111, "learning_rate": 1.673475870870225e-05, "loss": 1.0046, "step": 11579 }, { "epoch": 0.8606465997770345, "grad_norm": 3.3141260845953484, "learning_rate": 1.673416556902484e-05, "loss": 0.8204, "step": 11580 }, { "epoch": 0.8607209215904869, "grad_norm": 1.8023542394861076, "learning_rate": 1.673357238599322e-05, "loss": 0.6968, "step": 11581 }, { "epoch": 0.8607952434039391, "grad_norm": 1.9616839781163598, "learning_rate": 1.6732979159611212e-05, "loss": 0.8482, "step": 11582 }, { "epoch": 0.8608695652173913, "grad_norm": 2.3288574881925483, "learning_rate": 1.673238588988263e-05, "loss": 0.8481, "step": 11583 }, { "epoch": 0.8609438870308436, "grad_norm": 2.16816940973232, "learning_rate": 1.67317925768113e-05, "loss": 1.0371, "step": 11584 }, { "epoch": 0.8610182088442958, "grad_norm": 7.874408883821924, "learning_rate": 1.6731199220401043e-05, "loss": 0.9563, "step": 11585 }, { "epoch": 0.861092530657748, "grad_norm": 2.1469639441888986, "learning_rate": 1.6730605820655666e-05, "loss": 0.9699, "step": 11586 }, { "epoch": 0.8611668524712003, "grad_norm": 1.9665187841161575, "learning_rate": 1.6730012377579004e-05, "loss": 0.8814, "step": 11587 }, { "epoch": 0.8612411742846525, "grad_norm": 2.059552079548789, "learning_rate": 1.6729418891174865e-05, "loss": 0.8849, "step": 11588 }, { "epoch": 0.8613154960981048, "grad_norm": 2.6463483024889576, "learning_rate": 1.672882536144708e-05, "loss": 0.7129, "step": 11589 }, { "epoch": 0.861389817911557, "grad_norm": 1.802281960498484, "learning_rate": 1.672823178839947e-05, "loss": 0.8802, "step": 11590 }, { "epoch": 0.8614641397250092, "grad_norm": 1.7220387413069715, "learning_rate": 1.672763817203585e-05, "loss": 0.8873, "step": 11591 }, { "epoch": 0.8615384615384616, "grad_norm": 2.1954874382813983, "learning_rate": 1.6727044512360046e-05, "loss": 0.7011, "step": 11592 }, { "epoch": 0.8616127833519138, "grad_norm": 1.6512130794517788, "learning_rate": 1.6726450809375875e-05, "loss": 0.7009, "step": 11593 }, { "epoch": 0.8616871051653661, "grad_norm": 2.208663185833077, "learning_rate": 1.6725857063087167e-05, "loss": 1.0702, "step": 11594 }, { "epoch": 0.8617614269788183, "grad_norm": 1.830822769095657, "learning_rate": 1.6725263273497736e-05, "loss": 0.7782, "step": 11595 }, { "epoch": 0.8618357487922705, "grad_norm": 2.449214522873224, "learning_rate": 1.6724669440611416e-05, "loss": 1.0078, "step": 11596 }, { "epoch": 0.8619100706057228, "grad_norm": 1.9209302375472084, "learning_rate": 1.6724075564432015e-05, "loss": 0.7957, "step": 11597 }, { "epoch": 0.861984392419175, "grad_norm": 1.6975175005532832, "learning_rate": 1.6723481644963366e-05, "loss": 0.7682, "step": 11598 }, { "epoch": 0.8620587142326273, "grad_norm": 1.7661802896555643, "learning_rate": 1.6722887682209294e-05, "loss": 0.7594, "step": 11599 }, { "epoch": 0.8621330360460795, "grad_norm": 2.3631943551090555, "learning_rate": 1.6722293676173617e-05, "loss": 0.8876, "step": 11600 }, { "epoch": 0.8622073578595317, "grad_norm": 1.6379089027859808, "learning_rate": 1.6721699626860163e-05, "loss": 0.7399, "step": 11601 }, { "epoch": 0.862281679672984, "grad_norm": 2.0745857676480144, "learning_rate": 1.6721105534272755e-05, "loss": 0.8508, "step": 11602 }, { "epoch": 0.8623560014864363, "grad_norm": 3.215703409134143, "learning_rate": 1.6720511398415222e-05, "loss": 1.0455, "step": 11603 }, { "epoch": 0.8624303232998886, "grad_norm": 1.5242736894132245, "learning_rate": 1.671991721929138e-05, "loss": 0.6798, "step": 11604 }, { "epoch": 0.8625046451133408, "grad_norm": 1.7435064891374035, "learning_rate": 1.6719322996905064e-05, "loss": 0.5551, "step": 11605 }, { "epoch": 0.862578966926793, "grad_norm": 2.0494720677841523, "learning_rate": 1.671872873126009e-05, "loss": 0.8245, "step": 11606 }, { "epoch": 0.8626532887402453, "grad_norm": 1.564496833947448, "learning_rate": 1.6718134422360295e-05, "loss": 0.6893, "step": 11607 }, { "epoch": 0.8627276105536975, "grad_norm": 1.7114418668945082, "learning_rate": 1.6717540070209497e-05, "loss": 0.8273, "step": 11608 }, { "epoch": 0.8628019323671497, "grad_norm": 1.976403421071258, "learning_rate": 1.6716945674811524e-05, "loss": 0.6695, "step": 11609 }, { "epoch": 0.862876254180602, "grad_norm": 2.0266528954956065, "learning_rate": 1.67163512361702e-05, "loss": 0.8395, "step": 11610 }, { "epoch": 0.8629505759940542, "grad_norm": 1.8378469421509471, "learning_rate": 1.671575675428936e-05, "loss": 0.7994, "step": 11611 }, { "epoch": 0.8630248978075065, "grad_norm": 1.8073095665656909, "learning_rate": 1.6715162229172826e-05, "loss": 0.7868, "step": 11612 }, { "epoch": 0.8630992196209587, "grad_norm": 2.999225554253908, "learning_rate": 1.6714567660824427e-05, "loss": 0.7564, "step": 11613 }, { "epoch": 0.863173541434411, "grad_norm": 1.8685735805970163, "learning_rate": 1.6713973049247985e-05, "loss": 0.7157, "step": 11614 }, { "epoch": 0.8632478632478633, "grad_norm": 1.6616123956747, "learning_rate": 1.6713378394447338e-05, "loss": 0.7644, "step": 11615 }, { "epoch": 0.8633221850613155, "grad_norm": 1.6103253642531477, "learning_rate": 1.671278369642631e-05, "loss": 0.7118, "step": 11616 }, { "epoch": 0.8633965068747678, "grad_norm": 1.7982319033241987, "learning_rate": 1.671218895518872e-05, "loss": 0.8538, "step": 11617 }, { "epoch": 0.86347082868822, "grad_norm": 1.5383425196542642, "learning_rate": 1.6711594170738418e-05, "loss": 0.7896, "step": 11618 }, { "epoch": 0.8635451505016722, "grad_norm": 1.723551070077912, "learning_rate": 1.6710999343079213e-05, "loss": 0.687, "step": 11619 }, { "epoch": 0.8636194723151245, "grad_norm": 1.621550222132602, "learning_rate": 1.671040447221494e-05, "loss": 0.9152, "step": 11620 }, { "epoch": 0.8636937941285767, "grad_norm": 2.272549904701449, "learning_rate": 1.670980955814944e-05, "loss": 0.9783, "step": 11621 }, { "epoch": 0.863768115942029, "grad_norm": 3.6055620333409277, "learning_rate": 1.670921460088653e-05, "loss": 0.8882, "step": 11622 }, { "epoch": 0.8638424377554812, "grad_norm": 1.5743005466582507, "learning_rate": 1.670861960043004e-05, "loss": 0.705, "step": 11623 }, { "epoch": 0.8639167595689334, "grad_norm": 2.29442025952434, "learning_rate": 1.6708024556783812e-05, "loss": 0.9403, "step": 11624 }, { "epoch": 0.8639910813823858, "grad_norm": 2.1925662109713424, "learning_rate": 1.6707429469951665e-05, "loss": 0.7614, "step": 11625 }, { "epoch": 0.864065403195838, "grad_norm": 2.1015488209264586, "learning_rate": 1.6706834339937436e-05, "loss": 0.7973, "step": 11626 }, { "epoch": 0.8641397250092903, "grad_norm": 1.9364526453037005, "learning_rate": 1.670623916674496e-05, "loss": 1.0539, "step": 11627 }, { "epoch": 0.8642140468227425, "grad_norm": 1.821447722166894, "learning_rate": 1.6705643950378057e-05, "loss": 0.7407, "step": 11628 }, { "epoch": 0.8642883686361947, "grad_norm": 2.163471952467928, "learning_rate": 1.670504869084057e-05, "loss": 0.8028, "step": 11629 }, { "epoch": 0.864362690449647, "grad_norm": 1.6154387773401373, "learning_rate": 1.670445338813633e-05, "loss": 0.7035, "step": 11630 }, { "epoch": 0.8644370122630992, "grad_norm": 1.7432851648622703, "learning_rate": 1.6703858042269163e-05, "loss": 0.6678, "step": 11631 }, { "epoch": 0.8645113340765515, "grad_norm": 1.6213619533839756, "learning_rate": 1.6703262653242908e-05, "loss": 0.847, "step": 11632 }, { "epoch": 0.8645856558900037, "grad_norm": 1.6445570512025758, "learning_rate": 1.6702667221061398e-05, "loss": 0.7594, "step": 11633 }, { "epoch": 0.8646599777034559, "grad_norm": 2.040680447876991, "learning_rate": 1.670207174572846e-05, "loss": 0.9125, "step": 11634 }, { "epoch": 0.8647342995169082, "grad_norm": 2.4893276806114906, "learning_rate": 1.6701476227247932e-05, "loss": 0.7052, "step": 11635 }, { "epoch": 0.8648086213303604, "grad_norm": 1.9184623355275605, "learning_rate": 1.670088066562365e-05, "loss": 0.8182, "step": 11636 }, { "epoch": 0.8648829431438128, "grad_norm": 1.8860351693923223, "learning_rate": 1.670028506085944e-05, "loss": 0.7911, "step": 11637 }, { "epoch": 0.864957264957265, "grad_norm": 1.8198409984611295, "learning_rate": 1.6699689412959152e-05, "loss": 0.9247, "step": 11638 }, { "epoch": 0.8650315867707172, "grad_norm": 1.9566860870015514, "learning_rate": 1.6699093721926606e-05, "loss": 0.9601, "step": 11639 }, { "epoch": 0.8651059085841695, "grad_norm": 1.893683426611647, "learning_rate": 1.6698497987765643e-05, "loss": 0.938, "step": 11640 }, { "epoch": 0.8651802303976217, "grad_norm": 1.7528984255600804, "learning_rate": 1.6697902210480098e-05, "loss": 0.7674, "step": 11641 }, { "epoch": 0.865254552211074, "grad_norm": 1.6149104515953063, "learning_rate": 1.6697306390073804e-05, "loss": 0.5553, "step": 11642 }, { "epoch": 0.8653288740245262, "grad_norm": 1.6848552514863342, "learning_rate": 1.66967105265506e-05, "loss": 0.7554, "step": 11643 }, { "epoch": 0.8654031958379784, "grad_norm": 1.6566500185301685, "learning_rate": 1.669611461991432e-05, "loss": 0.8916, "step": 11644 }, { "epoch": 0.8654775176514307, "grad_norm": 1.7997765283924692, "learning_rate": 1.6695518670168804e-05, "loss": 0.9746, "step": 11645 }, { "epoch": 0.8655518394648829, "grad_norm": 2.310260993743536, "learning_rate": 1.6694922677317884e-05, "loss": 0.7946, "step": 11646 }, { "epoch": 0.8656261612783351, "grad_norm": 1.9169729044349209, "learning_rate": 1.66943266413654e-05, "loss": 0.7732, "step": 11647 }, { "epoch": 0.8657004830917875, "grad_norm": 2.3761727435077087, "learning_rate": 1.669373056231519e-05, "loss": 0.7017, "step": 11648 }, { "epoch": 0.8657748049052397, "grad_norm": 1.9248795519095478, "learning_rate": 1.6693134440171087e-05, "loss": 1.0613, "step": 11649 }, { "epoch": 0.865849126718692, "grad_norm": 1.7498667857905283, "learning_rate": 1.6692538274936937e-05, "loss": 0.8503, "step": 11650 }, { "epoch": 0.8659234485321442, "grad_norm": 2.0954686689983686, "learning_rate": 1.669194206661657e-05, "loss": 0.9915, "step": 11651 }, { "epoch": 0.8659977703455964, "grad_norm": 2.1259603944503485, "learning_rate": 1.6691345815213823e-05, "loss": 0.8499, "step": 11652 }, { "epoch": 0.8660720921590487, "grad_norm": 1.97802969658355, "learning_rate": 1.6690749520732542e-05, "loss": 0.8074, "step": 11653 }, { "epoch": 0.8661464139725009, "grad_norm": 2.3225396446826925, "learning_rate": 1.6690153183176563e-05, "loss": 0.95, "step": 11654 }, { "epoch": 0.8662207357859532, "grad_norm": 2.384335815094022, "learning_rate": 1.6689556802549724e-05, "loss": 0.7682, "step": 11655 }, { "epoch": 0.8662950575994054, "grad_norm": 1.6274824348597294, "learning_rate": 1.6688960378855868e-05, "loss": 0.7732, "step": 11656 }, { "epoch": 0.8663693794128576, "grad_norm": 1.683356158644698, "learning_rate": 1.668836391209883e-05, "loss": 0.7375, "step": 11657 }, { "epoch": 0.8664437012263099, "grad_norm": 1.9932655857955495, "learning_rate": 1.6687767402282455e-05, "loss": 0.6477, "step": 11658 }, { "epoch": 0.8665180230397622, "grad_norm": 1.7987871201917955, "learning_rate": 1.6687170849410575e-05, "loss": 0.995, "step": 11659 }, { "epoch": 0.8665923448532145, "grad_norm": 1.6382059642568196, "learning_rate": 1.668657425348704e-05, "loss": 0.9601, "step": 11660 }, { "epoch": 0.8666666666666667, "grad_norm": 1.9082124115993995, "learning_rate": 1.6685977614515688e-05, "loss": 0.8021, "step": 11661 }, { "epoch": 0.8667409884801189, "grad_norm": 1.6224256389247074, "learning_rate": 1.6685380932500357e-05, "loss": 0.7114, "step": 11662 }, { "epoch": 0.8668153102935712, "grad_norm": 1.914997463818202, "learning_rate": 1.6684784207444887e-05, "loss": 0.9071, "step": 11663 }, { "epoch": 0.8668896321070234, "grad_norm": 2.2395462396008923, "learning_rate": 1.668418743935313e-05, "loss": 0.7655, "step": 11664 }, { "epoch": 0.8669639539204756, "grad_norm": 2.159557373701406, "learning_rate": 1.6683590628228918e-05, "loss": 1.0505, "step": 11665 }, { "epoch": 0.8670382757339279, "grad_norm": 2.1220773210253325, "learning_rate": 1.6682993774076096e-05, "loss": 0.8962, "step": 11666 }, { "epoch": 0.8671125975473801, "grad_norm": 1.7116059498156424, "learning_rate": 1.6682396876898506e-05, "loss": 0.7184, "step": 11667 }, { "epoch": 0.8671869193608324, "grad_norm": 2.184253336221984, "learning_rate": 1.668179993669999e-05, "loss": 0.7122, "step": 11668 }, { "epoch": 0.8672612411742846, "grad_norm": 1.7967455017605345, "learning_rate": 1.6681202953484397e-05, "loss": 0.7995, "step": 11669 }, { "epoch": 0.867335562987737, "grad_norm": 1.9995892051330904, "learning_rate": 1.6680605927255564e-05, "loss": 1.0009, "step": 11670 }, { "epoch": 0.8674098848011892, "grad_norm": 1.6943290631462111, "learning_rate": 1.668000885801734e-05, "loss": 0.6714, "step": 11671 }, { "epoch": 0.8674842066146414, "grad_norm": 2.012376479824131, "learning_rate": 1.6679411745773562e-05, "loss": 0.7923, "step": 11672 }, { "epoch": 0.8675585284280937, "grad_norm": 1.9520870125515777, "learning_rate": 1.6678814590528076e-05, "loss": 0.7934, "step": 11673 }, { "epoch": 0.8676328502415459, "grad_norm": 2.0909373061116123, "learning_rate": 1.667821739228473e-05, "loss": 0.9186, "step": 11674 }, { "epoch": 0.8677071720549981, "grad_norm": 1.6705278505011398, "learning_rate": 1.667762015104737e-05, "loss": 0.7704, "step": 11675 }, { "epoch": 0.8677814938684504, "grad_norm": 1.5564942878779284, "learning_rate": 1.6677022866819833e-05, "loss": 0.7856, "step": 11676 }, { "epoch": 0.8678558156819026, "grad_norm": 1.5031011138786365, "learning_rate": 1.6676425539605975e-05, "loss": 0.6188, "step": 11677 }, { "epoch": 0.8679301374953549, "grad_norm": 2.3139063922211265, "learning_rate": 1.667582816940963e-05, "loss": 0.816, "step": 11678 }, { "epoch": 0.8680044593088071, "grad_norm": 2.072088816999972, "learning_rate": 1.6675230756234652e-05, "loss": 0.9402, "step": 11679 }, { "epoch": 0.8680787811222593, "grad_norm": 2.0770124126550398, "learning_rate": 1.6674633300084883e-05, "loss": 0.9845, "step": 11680 }, { "epoch": 0.8681531029357117, "grad_norm": 2.101008192489861, "learning_rate": 1.6674035800964176e-05, "loss": 0.751, "step": 11681 }, { "epoch": 0.8682274247491639, "grad_norm": 1.7995191715458383, "learning_rate": 1.6673438258876368e-05, "loss": 0.8056, "step": 11682 }, { "epoch": 0.8683017465626162, "grad_norm": 2.51350312472264, "learning_rate": 1.667284067382531e-05, "loss": 0.8726, "step": 11683 }, { "epoch": 0.8683760683760684, "grad_norm": 1.9617078408712174, "learning_rate": 1.6672243045814854e-05, "loss": 0.7748, "step": 11684 }, { "epoch": 0.8684503901895206, "grad_norm": 2.0881137977738553, "learning_rate": 1.6671645374848845e-05, "loss": 0.8654, "step": 11685 }, { "epoch": 0.8685247120029729, "grad_norm": 2.6272661317875, "learning_rate": 1.6671047660931124e-05, "loss": 0.7217, "step": 11686 }, { "epoch": 0.8685990338164251, "grad_norm": 1.9942726424376163, "learning_rate": 1.667044990406555e-05, "loss": 0.957, "step": 11687 }, { "epoch": 0.8686733556298774, "grad_norm": 1.7640420591608748, "learning_rate": 1.666985210425596e-05, "loss": 0.9184, "step": 11688 }, { "epoch": 0.8687476774433296, "grad_norm": 2.0972744944040866, "learning_rate": 1.666925426150621e-05, "loss": 0.98, "step": 11689 }, { "epoch": 0.8688219992567818, "grad_norm": 2.0567332392853976, "learning_rate": 1.6668656375820145e-05, "loss": 0.8089, "step": 11690 }, { "epoch": 0.8688963210702341, "grad_norm": 1.5875266056625008, "learning_rate": 1.666805844720162e-05, "loss": 0.7885, "step": 11691 }, { "epoch": 0.8689706428836864, "grad_norm": 1.6530521151401936, "learning_rate": 1.666746047565448e-05, "loss": 0.7608, "step": 11692 }, { "epoch": 0.8690449646971387, "grad_norm": 2.0618196834059876, "learning_rate": 1.6666862461182573e-05, "loss": 0.8842, "step": 11693 }, { "epoch": 0.8691192865105909, "grad_norm": 1.5716673876919294, "learning_rate": 1.6666264403789755e-05, "loss": 0.6439, "step": 11694 }, { "epoch": 0.8691936083240431, "grad_norm": 1.596468500310378, "learning_rate": 1.666566630347987e-05, "loss": 0.7453, "step": 11695 }, { "epoch": 0.8692679301374954, "grad_norm": 2.1182419649316353, "learning_rate": 1.6665068160256773e-05, "loss": 0.8451, "step": 11696 }, { "epoch": 0.8693422519509476, "grad_norm": 2.3643312989567615, "learning_rate": 1.666446997412431e-05, "loss": 0.8807, "step": 11697 }, { "epoch": 0.8694165737643998, "grad_norm": 1.9366616277787267, "learning_rate": 1.666387174508634e-05, "loss": 0.6776, "step": 11698 }, { "epoch": 0.8694908955778521, "grad_norm": 1.663636428810862, "learning_rate": 1.6663273473146705e-05, "loss": 0.8435, "step": 11699 }, { "epoch": 0.8695652173913043, "grad_norm": 2.2900020253398607, "learning_rate": 1.6662675158309262e-05, "loss": 0.8099, "step": 11700 }, { "epoch": 0.8696395392047566, "grad_norm": 1.7852883971532514, "learning_rate": 1.666207680057786e-05, "loss": 0.9358, "step": 11701 }, { "epoch": 0.8697138610182088, "grad_norm": 1.9409059393120511, "learning_rate": 1.666147839995636e-05, "loss": 0.7364, "step": 11702 }, { "epoch": 0.869788182831661, "grad_norm": 2.0695701436758545, "learning_rate": 1.6660879956448602e-05, "loss": 0.9504, "step": 11703 }, { "epoch": 0.8698625046451134, "grad_norm": 1.9993887010738913, "learning_rate": 1.666028147005845e-05, "loss": 0.7704, "step": 11704 }, { "epoch": 0.8699368264585656, "grad_norm": 2.40919608168463, "learning_rate": 1.6659682940789747e-05, "loss": 0.8886, "step": 11705 }, { "epoch": 0.8700111482720179, "grad_norm": 1.9259434925278305, "learning_rate": 1.665908436864635e-05, "loss": 0.8906, "step": 11706 }, { "epoch": 0.8700854700854701, "grad_norm": 1.936317827588754, "learning_rate": 1.6658485753632114e-05, "loss": 0.9847, "step": 11707 }, { "epoch": 0.8701597918989223, "grad_norm": 1.7351290828650352, "learning_rate": 1.6657887095750894e-05, "loss": 0.7272, "step": 11708 }, { "epoch": 0.8702341137123746, "grad_norm": 1.885944515134207, "learning_rate": 1.6657288395006542e-05, "loss": 0.7717, "step": 11709 }, { "epoch": 0.8703084355258268, "grad_norm": 1.6214217739426966, "learning_rate": 1.665668965140291e-05, "loss": 0.7358, "step": 11710 }, { "epoch": 0.8703827573392791, "grad_norm": 1.3502485672479505, "learning_rate": 1.665609086494386e-05, "loss": 0.5975, "step": 11711 }, { "epoch": 0.8704570791527313, "grad_norm": 1.3861924367869165, "learning_rate": 1.6655492035633238e-05, "loss": 0.7313, "step": 11712 }, { "epoch": 0.8705314009661835, "grad_norm": 1.7062750032322909, "learning_rate": 1.6654893163474905e-05, "loss": 0.7271, "step": 11713 }, { "epoch": 0.8706057227796358, "grad_norm": 1.592371067193721, "learning_rate": 1.6654294248472715e-05, "loss": 0.9008, "step": 11714 }, { "epoch": 0.8706800445930881, "grad_norm": 2.782498922440373, "learning_rate": 1.665369529063053e-05, "loss": 0.892, "step": 11715 }, { "epoch": 0.8707543664065404, "grad_norm": 1.7988244654118364, "learning_rate": 1.665309628995219e-05, "loss": 1.1044, "step": 11716 }, { "epoch": 0.8708286882199926, "grad_norm": 3.134690818380676, "learning_rate": 1.6652497246441565e-05, "loss": 0.9323, "step": 11717 }, { "epoch": 0.8709030100334448, "grad_norm": 1.821625415573044, "learning_rate": 1.665189816010251e-05, "loss": 0.928, "step": 11718 }, { "epoch": 0.8709773318468971, "grad_norm": 2.01331256366024, "learning_rate": 1.6651299030938876e-05, "loss": 0.8831, "step": 11719 }, { "epoch": 0.8710516536603493, "grad_norm": 1.8425685715664866, "learning_rate": 1.665069985895453e-05, "loss": 0.8053, "step": 11720 }, { "epoch": 0.8711259754738016, "grad_norm": 1.8219314084739437, "learning_rate": 1.665010064415332e-05, "loss": 0.9024, "step": 11721 }, { "epoch": 0.8712002972872538, "grad_norm": 1.7037687395547174, "learning_rate": 1.6649501386539104e-05, "loss": 0.756, "step": 11722 }, { "epoch": 0.871274619100706, "grad_norm": 1.7776594983677834, "learning_rate": 1.6648902086115747e-05, "loss": 0.9108, "step": 11723 }, { "epoch": 0.8713489409141583, "grad_norm": 1.6455002064711859, "learning_rate": 1.66483027428871e-05, "loss": 0.8204, "step": 11724 }, { "epoch": 0.8714232627276105, "grad_norm": 1.8346856275469114, "learning_rate": 1.664770335685703e-05, "loss": 0.8887, "step": 11725 }, { "epoch": 0.8714975845410629, "grad_norm": 1.7617839499883687, "learning_rate": 1.6647103928029385e-05, "loss": 0.8414, "step": 11726 }, { "epoch": 0.8715719063545151, "grad_norm": 1.9887490941131378, "learning_rate": 1.6646504456408035e-05, "loss": 0.9632, "step": 11727 }, { "epoch": 0.8716462281679673, "grad_norm": 1.7948788483955762, "learning_rate": 1.664590494199683e-05, "loss": 0.7314, "step": 11728 }, { "epoch": 0.8717205499814196, "grad_norm": 2.517294276876819, "learning_rate": 1.6645305384799636e-05, "loss": 0.9351, "step": 11729 }, { "epoch": 0.8717948717948718, "grad_norm": 2.166750926168123, "learning_rate": 1.664470578482031e-05, "loss": 1.0074, "step": 11730 }, { "epoch": 0.871869193608324, "grad_norm": 1.9675751370706003, "learning_rate": 1.6644106142062714e-05, "loss": 0.798, "step": 11731 }, { "epoch": 0.8719435154217763, "grad_norm": 1.8945487738901745, "learning_rate": 1.6643506456530708e-05, "loss": 1.0691, "step": 11732 }, { "epoch": 0.8720178372352285, "grad_norm": 2.5689908150643626, "learning_rate": 1.6642906728228146e-05, "loss": 0.8777, "step": 11733 }, { "epoch": 0.8720921590486808, "grad_norm": 2.0548184838457795, "learning_rate": 1.6642306957158904e-05, "loss": 0.9701, "step": 11734 }, { "epoch": 0.872166480862133, "grad_norm": 1.583864584081229, "learning_rate": 1.6641707143326827e-05, "loss": 0.9493, "step": 11735 }, { "epoch": 0.8722408026755852, "grad_norm": 1.8351122543827452, "learning_rate": 1.664110728673579e-05, "loss": 0.7697, "step": 11736 }, { "epoch": 0.8723151244890376, "grad_norm": 3.1007937810685218, "learning_rate": 1.6640507387389648e-05, "loss": 0.9439, "step": 11737 }, { "epoch": 0.8723894463024898, "grad_norm": 1.8399565303541414, "learning_rate": 1.663990744529226e-05, "loss": 0.8563, "step": 11738 }, { "epoch": 0.8724637681159421, "grad_norm": 1.7460067269749464, "learning_rate": 1.6639307460447497e-05, "loss": 0.7182, "step": 11739 }, { "epoch": 0.8725380899293943, "grad_norm": 2.2550968096806225, "learning_rate": 1.6638707432859213e-05, "loss": 0.7936, "step": 11740 }, { "epoch": 0.8726124117428465, "grad_norm": 2.5775117230372016, "learning_rate": 1.663810736253128e-05, "loss": 0.9472, "step": 11741 }, { "epoch": 0.8726867335562988, "grad_norm": 1.3866119634221599, "learning_rate": 1.663750724946755e-05, "loss": 0.6877, "step": 11742 }, { "epoch": 0.872761055369751, "grad_norm": 1.9345997263424322, "learning_rate": 1.6636907093671895e-05, "loss": 0.7178, "step": 11743 }, { "epoch": 0.8728353771832033, "grad_norm": 1.7304474025271679, "learning_rate": 1.6636306895148178e-05, "loss": 0.8202, "step": 11744 }, { "epoch": 0.8729096989966555, "grad_norm": 1.7724326013536447, "learning_rate": 1.663570665390026e-05, "loss": 0.8229, "step": 11745 }, { "epoch": 0.8729840208101077, "grad_norm": 2.642585779946794, "learning_rate": 1.663510636993201e-05, "loss": 0.8008, "step": 11746 }, { "epoch": 0.87305834262356, "grad_norm": 1.8042006476551244, "learning_rate": 1.6634506043247288e-05, "loss": 0.7654, "step": 11747 }, { "epoch": 0.8731326644370123, "grad_norm": 2.2830481109701255, "learning_rate": 1.6633905673849957e-05, "loss": 0.9007, "step": 11748 }, { "epoch": 0.8732069862504646, "grad_norm": 1.9515817989173003, "learning_rate": 1.663330526174389e-05, "loss": 0.7882, "step": 11749 }, { "epoch": 0.8732813080639168, "grad_norm": 1.684219661315038, "learning_rate": 1.6632704806932946e-05, "loss": 0.7746, "step": 11750 }, { "epoch": 0.873355629877369, "grad_norm": 2.173628891054735, "learning_rate": 1.663210430942099e-05, "loss": 0.7627, "step": 11751 }, { "epoch": 0.8734299516908213, "grad_norm": 1.7518980914803817, "learning_rate": 1.6631503769211893e-05, "loss": 0.8426, "step": 11752 }, { "epoch": 0.8735042735042735, "grad_norm": 1.6435201651690208, "learning_rate": 1.663090318630952e-05, "loss": 0.7753, "step": 11753 }, { "epoch": 0.8735785953177257, "grad_norm": 3.747392335596246, "learning_rate": 1.6630302560717737e-05, "loss": 0.7949, "step": 11754 }, { "epoch": 0.873652917131178, "grad_norm": 1.824523874464839, "learning_rate": 1.6629701892440405e-05, "loss": 0.8175, "step": 11755 }, { "epoch": 0.8737272389446302, "grad_norm": 1.7865292059347133, "learning_rate": 1.66291011814814e-05, "loss": 0.7903, "step": 11756 }, { "epoch": 0.8738015607580825, "grad_norm": 1.874579684082546, "learning_rate": 1.6628500427844583e-05, "loss": 0.7844, "step": 11757 }, { "epoch": 0.8738758825715347, "grad_norm": 1.9057998422609608, "learning_rate": 1.662789963153383e-05, "loss": 0.725, "step": 11758 }, { "epoch": 0.8739502043849869, "grad_norm": 2.156326209218419, "learning_rate": 1.6627298792552998e-05, "loss": 1.097, "step": 11759 }, { "epoch": 0.8740245261984393, "grad_norm": 2.135055504508441, "learning_rate": 1.6626697910905963e-05, "loss": 0.7513, "step": 11760 }, { "epoch": 0.8740988480118915, "grad_norm": 2.082746676415977, "learning_rate": 1.662609698659659e-05, "loss": 0.9264, "step": 11761 }, { "epoch": 0.8741731698253438, "grad_norm": 1.8090995008079884, "learning_rate": 1.6625496019628746e-05, "loss": 0.7282, "step": 11762 }, { "epoch": 0.874247491638796, "grad_norm": 1.8644399656732773, "learning_rate": 1.6624895010006302e-05, "loss": 0.8234, "step": 11763 }, { "epoch": 0.8743218134522482, "grad_norm": 1.7932957180852331, "learning_rate": 1.662429395773313e-05, "loss": 0.7246, "step": 11764 }, { "epoch": 0.8743961352657005, "grad_norm": 1.8455353409283928, "learning_rate": 1.66236928628131e-05, "loss": 0.7129, "step": 11765 }, { "epoch": 0.8744704570791527, "grad_norm": 1.8340882849400306, "learning_rate": 1.6623091725250076e-05, "loss": 1.0143, "step": 11766 }, { "epoch": 0.874544778892605, "grad_norm": 1.793455559419051, "learning_rate": 1.662249054504793e-05, "loss": 0.8247, "step": 11767 }, { "epoch": 0.8746191007060572, "grad_norm": 1.8587254936964652, "learning_rate": 1.6621889322210534e-05, "loss": 0.8753, "step": 11768 }, { "epoch": 0.8746934225195094, "grad_norm": 1.7632918100735255, "learning_rate": 1.662128805674176e-05, "loss": 0.6096, "step": 11769 }, { "epoch": 0.8747677443329617, "grad_norm": 2.261045797265405, "learning_rate": 1.6620686748645473e-05, "loss": 0.6317, "step": 11770 }, { "epoch": 0.874842066146414, "grad_norm": 1.8346572681211988, "learning_rate": 1.6620085397925554e-05, "loss": 0.8581, "step": 11771 }, { "epoch": 0.8749163879598663, "grad_norm": 1.9197936206192239, "learning_rate": 1.6619484004585865e-05, "loss": 0.8065, "step": 11772 }, { "epoch": 0.8749907097733185, "grad_norm": 1.70901976105802, "learning_rate": 1.6618882568630282e-05, "loss": 0.8917, "step": 11773 }, { "epoch": 0.8750650315867707, "grad_norm": 2.0548845929851334, "learning_rate": 1.6618281090062677e-05, "loss": 0.8131, "step": 11774 }, { "epoch": 0.875139353400223, "grad_norm": 1.742826433629596, "learning_rate": 1.6617679568886922e-05, "loss": 0.7576, "step": 11775 }, { "epoch": 0.8752136752136752, "grad_norm": 1.6320846420376924, "learning_rate": 1.6617078005106887e-05, "loss": 0.9378, "step": 11776 }, { "epoch": 0.8752879970271275, "grad_norm": 1.5410560402703524, "learning_rate": 1.6616476398726452e-05, "loss": 0.6417, "step": 11777 }, { "epoch": 0.8753623188405797, "grad_norm": 1.852238869813113, "learning_rate": 1.661587474974948e-05, "loss": 0.903, "step": 11778 }, { "epoch": 0.8754366406540319, "grad_norm": 2.5351292540898234, "learning_rate": 1.661527305817985e-05, "loss": 0.9819, "step": 11779 }, { "epoch": 0.8755109624674842, "grad_norm": 1.9902825646993922, "learning_rate": 1.6614671324021435e-05, "loss": 0.8276, "step": 11780 }, { "epoch": 0.8755852842809364, "grad_norm": 2.2164805554314553, "learning_rate": 1.6614069547278114e-05, "loss": 0.6912, "step": 11781 }, { "epoch": 0.8756596060943888, "grad_norm": 1.8217367471756276, "learning_rate": 1.6613467727953755e-05, "loss": 0.9179, "step": 11782 }, { "epoch": 0.875733927907841, "grad_norm": 2.002280150064127, "learning_rate": 1.661286586605223e-05, "loss": 1.0214, "step": 11783 }, { "epoch": 0.8758082497212932, "grad_norm": 2.364054338142764, "learning_rate": 1.6612263961577422e-05, "loss": 1.0326, "step": 11784 }, { "epoch": 0.8758825715347455, "grad_norm": 1.7746071211813277, "learning_rate": 1.66116620145332e-05, "loss": 0.9127, "step": 11785 }, { "epoch": 0.8759568933481977, "grad_norm": 1.9934612588103766, "learning_rate": 1.661106002492344e-05, "loss": 0.9591, "step": 11786 }, { "epoch": 0.87603121516165, "grad_norm": 2.1627675097952754, "learning_rate": 1.661045799275202e-05, "loss": 0.753, "step": 11787 }, { "epoch": 0.8761055369751022, "grad_norm": 1.8926745099360283, "learning_rate": 1.6609855918022814e-05, "loss": 0.8854, "step": 11788 }, { "epoch": 0.8761798587885544, "grad_norm": 2.314615538082833, "learning_rate": 1.6609253800739698e-05, "loss": 0.9105, "step": 11789 }, { "epoch": 0.8762541806020067, "grad_norm": 1.705667648973039, "learning_rate": 1.660865164090655e-05, "loss": 0.9785, "step": 11790 }, { "epoch": 0.8763285024154589, "grad_norm": 1.960937348488335, "learning_rate": 1.6608049438527244e-05, "loss": 0.7442, "step": 11791 }, { "epoch": 0.8764028242289111, "grad_norm": 2.071414407717187, "learning_rate": 1.6607447193605658e-05, "loss": 1.0782, "step": 11792 }, { "epoch": 0.8764771460423635, "grad_norm": 2.0996239176112472, "learning_rate": 1.6606844906145674e-05, "loss": 0.8437, "step": 11793 }, { "epoch": 0.8765514678558157, "grad_norm": 2.410993248925818, "learning_rate": 1.6606242576151162e-05, "loss": 0.6102, "step": 11794 }, { "epoch": 0.876625789669268, "grad_norm": 1.8542559970922754, "learning_rate": 1.6605640203626004e-05, "loss": 0.752, "step": 11795 }, { "epoch": 0.8767001114827202, "grad_norm": 2.123056165195829, "learning_rate": 1.660503778857408e-05, "loss": 0.7253, "step": 11796 }, { "epoch": 0.8767744332961724, "grad_norm": 1.8663590496522882, "learning_rate": 1.660443533099926e-05, "loss": 0.8715, "step": 11797 }, { "epoch": 0.8768487551096247, "grad_norm": 1.956115390783169, "learning_rate": 1.6603832830905433e-05, "loss": 0.9733, "step": 11798 }, { "epoch": 0.8769230769230769, "grad_norm": 1.6698930001872858, "learning_rate": 1.660323028829647e-05, "loss": 0.7907, "step": 11799 }, { "epoch": 0.8769973987365292, "grad_norm": 1.540148615097064, "learning_rate": 1.6602627703176253e-05, "loss": 0.8251, "step": 11800 }, { "epoch": 0.8770717205499814, "grad_norm": 1.9902280173810867, "learning_rate": 1.6602025075548657e-05, "loss": 0.7888, "step": 11801 }, { "epoch": 0.8771460423634336, "grad_norm": 1.7421273071864352, "learning_rate": 1.6601422405417572e-05, "loss": 0.8701, "step": 11802 }, { "epoch": 0.8772203641768859, "grad_norm": 1.9238645679825146, "learning_rate": 1.6600819692786873e-05, "loss": 1.0531, "step": 11803 }, { "epoch": 0.8772946859903382, "grad_norm": 2.9329891721474617, "learning_rate": 1.6600216937660436e-05, "loss": 0.9348, "step": 11804 }, { "epoch": 0.8773690078037905, "grad_norm": 1.854493333740105, "learning_rate": 1.6599614140042147e-05, "loss": 0.8131, "step": 11805 }, { "epoch": 0.8774433296172427, "grad_norm": 1.7296260868970703, "learning_rate": 1.6599011299935883e-05, "loss": 0.6551, "step": 11806 }, { "epoch": 0.8775176514306949, "grad_norm": 2.138235021687955, "learning_rate": 1.659840841734553e-05, "loss": 0.7806, "step": 11807 }, { "epoch": 0.8775919732441472, "grad_norm": 2.6419462658305233, "learning_rate": 1.6597805492274962e-05, "loss": 0.893, "step": 11808 }, { "epoch": 0.8776662950575994, "grad_norm": 1.9589512296957559, "learning_rate": 1.6597202524728064e-05, "loss": 0.7738, "step": 11809 }, { "epoch": 0.8777406168710516, "grad_norm": 1.7643370977819988, "learning_rate": 1.659659951470872e-05, "loss": 0.69, "step": 11810 }, { "epoch": 0.8778149386845039, "grad_norm": 2.108890172677622, "learning_rate": 1.6595996462220815e-05, "loss": 0.8467, "step": 11811 }, { "epoch": 0.8778892604979561, "grad_norm": 2.09438141171623, "learning_rate": 1.6595393367268222e-05, "loss": 1.0429, "step": 11812 }, { "epoch": 0.8779635823114084, "grad_norm": 1.873171409353892, "learning_rate": 1.6594790229854826e-05, "loss": 0.7042, "step": 11813 }, { "epoch": 0.8780379041248606, "grad_norm": 2.0569924121179453, "learning_rate": 1.6594187049984518e-05, "loss": 0.6546, "step": 11814 }, { "epoch": 0.8781122259383128, "grad_norm": 1.7161857524589477, "learning_rate": 1.659358382766117e-05, "loss": 0.7661, "step": 11815 }, { "epoch": 0.8781865477517652, "grad_norm": 1.841657870724717, "learning_rate": 1.659298056288868e-05, "loss": 0.8138, "step": 11816 }, { "epoch": 0.8782608695652174, "grad_norm": 1.8247674925131099, "learning_rate": 1.659237725567092e-05, "loss": 0.8125, "step": 11817 }, { "epoch": 0.8783351913786697, "grad_norm": 1.6031860636041477, "learning_rate": 1.6591773906011776e-05, "loss": 0.706, "step": 11818 }, { "epoch": 0.8784095131921219, "grad_norm": 1.9503455753332808, "learning_rate": 1.6591170513915132e-05, "loss": 0.735, "step": 11819 }, { "epoch": 0.8784838350055741, "grad_norm": 1.8490534251852824, "learning_rate": 1.6590567079384876e-05, "loss": 0.7706, "step": 11820 }, { "epoch": 0.8785581568190264, "grad_norm": 1.8951956424851264, "learning_rate": 1.6589963602424893e-05, "loss": 0.9141, "step": 11821 }, { "epoch": 0.8786324786324786, "grad_norm": 1.8468252991398646, "learning_rate": 1.6589360083039063e-05, "loss": 0.7254, "step": 11822 }, { "epoch": 0.8787068004459309, "grad_norm": 1.8169835843719209, "learning_rate": 1.6588756521231278e-05, "loss": 0.7485, "step": 11823 }, { "epoch": 0.8787811222593831, "grad_norm": 1.7303950306390907, "learning_rate": 1.6588152917005418e-05, "loss": 0.7958, "step": 11824 }, { "epoch": 0.8788554440728353, "grad_norm": 1.9973091150043856, "learning_rate": 1.6587549270365372e-05, "loss": 0.7831, "step": 11825 }, { "epoch": 0.8789297658862876, "grad_norm": 1.8688501507154123, "learning_rate": 1.6586945581315027e-05, "loss": 0.8085, "step": 11826 }, { "epoch": 0.8790040876997399, "grad_norm": 1.678515696649809, "learning_rate": 1.6586341849858266e-05, "loss": 0.7935, "step": 11827 }, { "epoch": 0.8790784095131922, "grad_norm": 1.5305413832353325, "learning_rate": 1.6585738075998978e-05, "loss": 0.7845, "step": 11828 }, { "epoch": 0.8791527313266444, "grad_norm": 1.7356727455448457, "learning_rate": 1.658513425974105e-05, "loss": 0.8456, "step": 11829 }, { "epoch": 0.8792270531400966, "grad_norm": 2.0592249796639486, "learning_rate": 1.658453040108837e-05, "loss": 0.9492, "step": 11830 }, { "epoch": 0.8793013749535489, "grad_norm": 1.959653011069971, "learning_rate": 1.6583926500044823e-05, "loss": 0.6559, "step": 11831 }, { "epoch": 0.8793756967670011, "grad_norm": 2.0419070924496108, "learning_rate": 1.6583322556614302e-05, "loss": 0.8349, "step": 11832 }, { "epoch": 0.8794500185804534, "grad_norm": 1.8574347699225742, "learning_rate": 1.658271857080069e-05, "loss": 0.8392, "step": 11833 }, { "epoch": 0.8795243403939056, "grad_norm": 1.697454451598382, "learning_rate": 1.658211454260788e-05, "loss": 0.9812, "step": 11834 }, { "epoch": 0.8795986622073578, "grad_norm": 1.8067490189455142, "learning_rate": 1.6581510472039753e-05, "loss": 0.8571, "step": 11835 }, { "epoch": 0.8796729840208101, "grad_norm": 2.248182386234111, "learning_rate": 1.6580906359100206e-05, "loss": 0.8126, "step": 11836 }, { "epoch": 0.8797473058342623, "grad_norm": 2.0605778181123457, "learning_rate": 1.6580302203793125e-05, "loss": 0.977, "step": 11837 }, { "epoch": 0.8798216276477147, "grad_norm": 1.6011046993980054, "learning_rate": 1.65796980061224e-05, "loss": 0.7378, "step": 11838 }, { "epoch": 0.8798959494611669, "grad_norm": 1.7688534629616421, "learning_rate": 1.657909376609192e-05, "loss": 0.9387, "step": 11839 }, { "epoch": 0.8799702712746191, "grad_norm": 1.77160914109398, "learning_rate": 1.6578489483705578e-05, "loss": 0.7484, "step": 11840 }, { "epoch": 0.8800445930880714, "grad_norm": 1.7012604433628713, "learning_rate": 1.657788515896726e-05, "loss": 0.7182, "step": 11841 }, { "epoch": 0.8801189149015236, "grad_norm": 2.091906244162199, "learning_rate": 1.657728079188086e-05, "loss": 0.9224, "step": 11842 }, { "epoch": 0.8801932367149758, "grad_norm": 1.7542126713326873, "learning_rate": 1.6576676382450268e-05, "loss": 0.7859, "step": 11843 }, { "epoch": 0.8802675585284281, "grad_norm": 1.9460875249183298, "learning_rate": 1.6576071930679375e-05, "loss": 0.7677, "step": 11844 }, { "epoch": 0.8803418803418803, "grad_norm": 1.8304363377069444, "learning_rate": 1.657546743657207e-05, "loss": 0.8699, "step": 11845 }, { "epoch": 0.8804162021553326, "grad_norm": 2.5464513716113975, "learning_rate": 1.6574862900132246e-05, "loss": 1.0022, "step": 11846 }, { "epoch": 0.8804905239687848, "grad_norm": 1.657535233214276, "learning_rate": 1.6574258321363795e-05, "loss": 0.8474, "step": 11847 }, { "epoch": 0.880564845782237, "grad_norm": 2.1020900764497585, "learning_rate": 1.6573653700270613e-05, "loss": 0.9067, "step": 11848 }, { "epoch": 0.8806391675956894, "grad_norm": 2.191401948197404, "learning_rate": 1.6573049036856588e-05, "loss": 0.7973, "step": 11849 }, { "epoch": 0.8807134894091416, "grad_norm": 1.995014757890333, "learning_rate": 1.6572444331125617e-05, "loss": 0.9275, "step": 11850 }, { "epoch": 0.8807878112225939, "grad_norm": 1.7977556653418119, "learning_rate": 1.657183958308159e-05, "loss": 0.7543, "step": 11851 }, { "epoch": 0.8808621330360461, "grad_norm": 2.368183066426918, "learning_rate": 1.6571234792728396e-05, "loss": 0.952, "step": 11852 }, { "epoch": 0.8809364548494983, "grad_norm": 3.5872868496538493, "learning_rate": 1.6570629960069937e-05, "loss": 0.9844, "step": 11853 }, { "epoch": 0.8810107766629506, "grad_norm": 2.0408052497591944, "learning_rate": 1.6570025085110105e-05, "loss": 0.7649, "step": 11854 }, { "epoch": 0.8810850984764028, "grad_norm": 1.8974293376865734, "learning_rate": 1.656942016785279e-05, "loss": 0.8107, "step": 11855 }, { "epoch": 0.881159420289855, "grad_norm": 1.66542585964204, "learning_rate": 1.656881520830189e-05, "loss": 0.7303, "step": 11856 }, { "epoch": 0.8812337421033073, "grad_norm": 2.1446484887276456, "learning_rate": 1.65682102064613e-05, "loss": 0.9995, "step": 11857 }, { "epoch": 0.8813080639167595, "grad_norm": 1.9594876546710385, "learning_rate": 1.6567605162334905e-05, "loss": 1.0653, "step": 11858 }, { "epoch": 0.8813823857302118, "grad_norm": 1.9344508718869313, "learning_rate": 1.6567000075926616e-05, "loss": 0.9411, "step": 11859 }, { "epoch": 0.8814567075436641, "grad_norm": 1.357961080707177, "learning_rate": 1.656639494724032e-05, "loss": 0.6738, "step": 11860 }, { "epoch": 0.8815310293571164, "grad_norm": 1.4296780297467153, "learning_rate": 1.6565789776279916e-05, "loss": 0.6652, "step": 11861 }, { "epoch": 0.8816053511705686, "grad_norm": 1.3418801753787135, "learning_rate": 1.6565184563049297e-05, "loss": 0.6703, "step": 11862 }, { "epoch": 0.8816796729840208, "grad_norm": 1.6208776866040735, "learning_rate": 1.6564579307552357e-05, "loss": 0.8188, "step": 11863 }, { "epoch": 0.8817539947974731, "grad_norm": 2.0251716590805997, "learning_rate": 1.6563974009793e-05, "loss": 0.7902, "step": 11864 }, { "epoch": 0.8818283166109253, "grad_norm": 1.6728559968590406, "learning_rate": 1.656336866977512e-05, "loss": 0.5747, "step": 11865 }, { "epoch": 0.8819026384243775, "grad_norm": 1.9844769257272026, "learning_rate": 1.656276328750261e-05, "loss": 0.9801, "step": 11866 }, { "epoch": 0.8819769602378298, "grad_norm": 1.6918685093506038, "learning_rate": 1.656215786297937e-05, "loss": 0.7701, "step": 11867 }, { "epoch": 0.882051282051282, "grad_norm": 2.2326186157142636, "learning_rate": 1.65615523962093e-05, "loss": 0.9264, "step": 11868 }, { "epoch": 0.8821256038647343, "grad_norm": 1.8879281749138634, "learning_rate": 1.6560946887196294e-05, "loss": 0.8026, "step": 11869 }, { "epoch": 0.8821999256781865, "grad_norm": 1.7424647648425198, "learning_rate": 1.6560341335944255e-05, "loss": 0.9075, "step": 11870 }, { "epoch": 0.8822742474916387, "grad_norm": 1.952404742569013, "learning_rate": 1.6559735742457076e-05, "loss": 0.8028, "step": 11871 }, { "epoch": 0.8823485693050911, "grad_norm": 1.762103974038524, "learning_rate": 1.655913010673866e-05, "loss": 0.9001, "step": 11872 }, { "epoch": 0.8824228911185433, "grad_norm": 1.7921700934342402, "learning_rate": 1.6558524428792906e-05, "loss": 0.7843, "step": 11873 }, { "epoch": 0.8824972129319956, "grad_norm": 2.0378260944241684, "learning_rate": 1.655791870862371e-05, "loss": 0.8609, "step": 11874 }, { "epoch": 0.8825715347454478, "grad_norm": 2.0034831431101523, "learning_rate": 1.6557312946234977e-05, "loss": 0.795, "step": 11875 }, { "epoch": 0.8826458565589, "grad_norm": 1.8912052002258026, "learning_rate": 1.65567071416306e-05, "loss": 0.7275, "step": 11876 }, { "epoch": 0.8827201783723523, "grad_norm": 2.0036869941296587, "learning_rate": 1.6556101294814484e-05, "loss": 0.8685, "step": 11877 }, { "epoch": 0.8827945001858045, "grad_norm": 1.9077935399997938, "learning_rate": 1.6555495405790528e-05, "loss": 0.8286, "step": 11878 }, { "epoch": 0.8828688219992568, "grad_norm": 2.2865723314830353, "learning_rate": 1.6554889474562636e-05, "loss": 0.6636, "step": 11879 }, { "epoch": 0.882943143812709, "grad_norm": 2.1514507866131756, "learning_rate": 1.65542835011347e-05, "loss": 1.0619, "step": 11880 }, { "epoch": 0.8830174656261612, "grad_norm": 2.208335067752297, "learning_rate": 1.6553677485510626e-05, "loss": 0.9101, "step": 11881 }, { "epoch": 0.8830917874396135, "grad_norm": 1.9352770092559326, "learning_rate": 1.655307142769432e-05, "loss": 0.8931, "step": 11882 }, { "epoch": 0.8831661092530658, "grad_norm": 2.7796446746334356, "learning_rate": 1.6552465327689683e-05, "loss": 0.9637, "step": 11883 }, { "epoch": 0.8832404310665181, "grad_norm": 1.9362887558995951, "learning_rate": 1.655185918550061e-05, "loss": 0.8107, "step": 11884 }, { "epoch": 0.8833147528799703, "grad_norm": 1.909945216753145, "learning_rate": 1.6551253001131006e-05, "loss": 1.0203, "step": 11885 }, { "epoch": 0.8833890746934225, "grad_norm": 2.3290164248355536, "learning_rate": 1.655064677458478e-05, "loss": 0.8541, "step": 11886 }, { "epoch": 0.8834633965068748, "grad_norm": 1.744251141770162, "learning_rate": 1.6550040505865825e-05, "loss": 0.8554, "step": 11887 }, { "epoch": 0.883537718320327, "grad_norm": 1.7840051974720645, "learning_rate": 1.6549434194978054e-05, "loss": 0.863, "step": 11888 }, { "epoch": 0.8836120401337793, "grad_norm": 1.8129342111597946, "learning_rate": 1.6548827841925363e-05, "loss": 1.005, "step": 11889 }, { "epoch": 0.8836863619472315, "grad_norm": 1.7058754934138467, "learning_rate": 1.6548221446711657e-05, "loss": 0.892, "step": 11890 }, { "epoch": 0.8837606837606837, "grad_norm": 2.3475192912438363, "learning_rate": 1.6547615009340844e-05, "loss": 0.8007, "step": 11891 }, { "epoch": 0.883835005574136, "grad_norm": 1.6082204375645603, "learning_rate": 1.6547008529816823e-05, "loss": 0.5705, "step": 11892 }, { "epoch": 0.8839093273875882, "grad_norm": 1.940897823716847, "learning_rate": 1.6546402008143497e-05, "loss": 0.7902, "step": 11893 }, { "epoch": 0.8839836492010406, "grad_norm": 2.425531580404638, "learning_rate": 1.6545795444324783e-05, "loss": 0.9938, "step": 11894 }, { "epoch": 0.8840579710144928, "grad_norm": 1.8325101747153438, "learning_rate": 1.6545188838364574e-05, "loss": 0.6717, "step": 11895 }, { "epoch": 0.884132292827945, "grad_norm": 2.2103368884863923, "learning_rate": 1.6544582190266776e-05, "loss": 0.9518, "step": 11896 }, { "epoch": 0.8842066146413973, "grad_norm": 2.3762736325281737, "learning_rate": 1.65439755000353e-05, "loss": 0.9092, "step": 11897 }, { "epoch": 0.8842809364548495, "grad_norm": 2.8474252531498854, "learning_rate": 1.6543368767674048e-05, "loss": 0.8222, "step": 11898 }, { "epoch": 0.8843552582683017, "grad_norm": 2.783176701124698, "learning_rate": 1.654276199318693e-05, "loss": 1.1789, "step": 11899 }, { "epoch": 0.884429580081754, "grad_norm": 2.956391967392857, "learning_rate": 1.6542155176577847e-05, "loss": 0.8401, "step": 11900 }, { "epoch": 0.8845039018952062, "grad_norm": 1.922584870403905, "learning_rate": 1.654154831785071e-05, "loss": 0.941, "step": 11901 }, { "epoch": 0.8845782237086585, "grad_norm": 1.7922743151723377, "learning_rate": 1.6540941417009422e-05, "loss": 0.8531, "step": 11902 }, { "epoch": 0.8846525455221107, "grad_norm": 1.8706388498532032, "learning_rate": 1.6540334474057897e-05, "loss": 0.8951, "step": 11903 }, { "epoch": 0.8847268673355629, "grad_norm": 2.055301944849234, "learning_rate": 1.6539727489000036e-05, "loss": 0.5904, "step": 11904 }, { "epoch": 0.8848011891490153, "grad_norm": 7.085869637435627, "learning_rate": 1.6539120461839748e-05, "loss": 0.7247, "step": 11905 }, { "epoch": 0.8848755109624675, "grad_norm": 2.039977517529235, "learning_rate": 1.6538513392580943e-05, "loss": 1.0663, "step": 11906 }, { "epoch": 0.8849498327759198, "grad_norm": 2.2802538689957754, "learning_rate": 1.6537906281227525e-05, "loss": 0.9233, "step": 11907 }, { "epoch": 0.885024154589372, "grad_norm": 2.3442215604236902, "learning_rate": 1.653729912778341e-05, "loss": 1.2015, "step": 11908 }, { "epoch": 0.8850984764028242, "grad_norm": 1.6810280765400147, "learning_rate": 1.65366919322525e-05, "loss": 0.9018, "step": 11909 }, { "epoch": 0.8851727982162765, "grad_norm": 3.3743720602971625, "learning_rate": 1.6536084694638705e-05, "loss": 0.9125, "step": 11910 }, { "epoch": 0.8852471200297287, "grad_norm": 1.601839930572591, "learning_rate": 1.653547741494594e-05, "loss": 0.8272, "step": 11911 }, { "epoch": 0.885321441843181, "grad_norm": 2.4395827536565418, "learning_rate": 1.6534870093178108e-05, "loss": 0.6592, "step": 11912 }, { "epoch": 0.8853957636566332, "grad_norm": 2.1444051680304743, "learning_rate": 1.653426272933912e-05, "loss": 0.896, "step": 11913 }, { "epoch": 0.8854700854700854, "grad_norm": 1.6499767494602697, "learning_rate": 1.6533655323432888e-05, "loss": 0.8152, "step": 11914 }, { "epoch": 0.8855444072835377, "grad_norm": 1.7125415068851138, "learning_rate": 1.653304787546332e-05, "loss": 0.7124, "step": 11915 }, { "epoch": 0.88561872909699, "grad_norm": 1.8455567820489558, "learning_rate": 1.6532440385434334e-05, "loss": 0.774, "step": 11916 }, { "epoch": 0.8856930509104423, "grad_norm": 1.94145532423553, "learning_rate": 1.6531832853349835e-05, "loss": 0.7631, "step": 11917 }, { "epoch": 0.8857673727238945, "grad_norm": 3.5071699684997006, "learning_rate": 1.6531225279213732e-05, "loss": 0.6844, "step": 11918 }, { "epoch": 0.8858416945373467, "grad_norm": 1.9495773478504332, "learning_rate": 1.653061766302994e-05, "loss": 0.6423, "step": 11919 }, { "epoch": 0.885916016350799, "grad_norm": 1.9574043118557956, "learning_rate": 1.6530010004802375e-05, "loss": 0.8654, "step": 11920 }, { "epoch": 0.8859903381642512, "grad_norm": 2.008403352293503, "learning_rate": 1.6529402304534938e-05, "loss": 0.6904, "step": 11921 }, { "epoch": 0.8860646599777035, "grad_norm": 1.6950516913044114, "learning_rate": 1.6528794562231553e-05, "loss": 0.8477, "step": 11922 }, { "epoch": 0.8861389817911557, "grad_norm": 2.0527894597121787, "learning_rate": 1.6528186777896126e-05, "loss": 0.9643, "step": 11923 }, { "epoch": 0.8862133036046079, "grad_norm": 2.7932932880424093, "learning_rate": 1.6527578951532568e-05, "loss": 0.9673, "step": 11924 }, { "epoch": 0.8862876254180602, "grad_norm": 1.8860826245681392, "learning_rate": 1.6526971083144795e-05, "loss": 0.7475, "step": 11925 }, { "epoch": 0.8863619472315124, "grad_norm": 1.9506962496391302, "learning_rate": 1.6526363172736727e-05, "loss": 0.7411, "step": 11926 }, { "epoch": 0.8864362690449646, "grad_norm": 2.281912734108093, "learning_rate": 1.6525755220312264e-05, "loss": 1.0814, "step": 11927 }, { "epoch": 0.886510590858417, "grad_norm": 2.339512823875977, "learning_rate": 1.6525147225875332e-05, "loss": 0.8009, "step": 11928 }, { "epoch": 0.8865849126718692, "grad_norm": 2.912411291501558, "learning_rate": 1.6524539189429843e-05, "loss": 0.9261, "step": 11929 }, { "epoch": 0.8866592344853215, "grad_norm": 2.227363663612419, "learning_rate": 1.6523931110979704e-05, "loss": 0.949, "step": 11930 }, { "epoch": 0.8867335562987737, "grad_norm": 1.5478318761166612, "learning_rate": 1.6523322990528838e-05, "loss": 0.8162, "step": 11931 }, { "epoch": 0.886807878112226, "grad_norm": 2.1363807602391214, "learning_rate": 1.6522714828081156e-05, "loss": 1.022, "step": 11932 }, { "epoch": 0.8868821999256782, "grad_norm": 1.8932238458985036, "learning_rate": 1.6522106623640576e-05, "loss": 0.7253, "step": 11933 }, { "epoch": 0.8869565217391304, "grad_norm": 1.5829459614652812, "learning_rate": 1.652149837721101e-05, "loss": 0.7897, "step": 11934 }, { "epoch": 0.8870308435525827, "grad_norm": 1.7405786746690404, "learning_rate": 1.6520890088796377e-05, "loss": 0.951, "step": 11935 }, { "epoch": 0.8871051653660349, "grad_norm": 1.9812835288644102, "learning_rate": 1.652028175840059e-05, "loss": 0.7544, "step": 11936 }, { "epoch": 0.8871794871794871, "grad_norm": 1.58208210518309, "learning_rate": 1.651967338602757e-05, "loss": 0.7186, "step": 11937 }, { "epoch": 0.8872538089929394, "grad_norm": 2.2228581056235526, "learning_rate": 1.6519064971681232e-05, "loss": 0.8126, "step": 11938 }, { "epoch": 0.8873281308063917, "grad_norm": 1.9698839282316682, "learning_rate": 1.6518456515365485e-05, "loss": 0.7653, "step": 11939 }, { "epoch": 0.887402452619844, "grad_norm": 2.165024022887668, "learning_rate": 1.6517848017084262e-05, "loss": 0.6348, "step": 11940 }, { "epoch": 0.8874767744332962, "grad_norm": 1.7780919262967168, "learning_rate": 1.651723947684147e-05, "loss": 0.743, "step": 11941 }, { "epoch": 0.8875510962467484, "grad_norm": 1.5746086763881961, "learning_rate": 1.6516630894641023e-05, "loss": 0.6958, "step": 11942 }, { "epoch": 0.8876254180602007, "grad_norm": 2.3063782968195685, "learning_rate": 1.651602227048685e-05, "loss": 0.9229, "step": 11943 }, { "epoch": 0.8876997398736529, "grad_norm": 2.1074789407247954, "learning_rate": 1.651541360438286e-05, "loss": 1.0134, "step": 11944 }, { "epoch": 0.8877740616871052, "grad_norm": 2.153405498527682, "learning_rate": 1.6514804896332973e-05, "loss": 0.9105, "step": 11945 }, { "epoch": 0.8878483835005574, "grad_norm": 1.5841689153087584, "learning_rate": 1.6514196146341118e-05, "loss": 0.7744, "step": 11946 }, { "epoch": 0.8879227053140096, "grad_norm": 2.0584777574369664, "learning_rate": 1.65135873544112e-05, "loss": 0.813, "step": 11947 }, { "epoch": 0.8879970271274619, "grad_norm": 1.5881557643447528, "learning_rate": 1.6512978520547146e-05, "loss": 0.8115, "step": 11948 }, { "epoch": 0.8880713489409141, "grad_norm": 2.686197318685157, "learning_rate": 1.6512369644752873e-05, "loss": 0.8593, "step": 11949 }, { "epoch": 0.8881456707543665, "grad_norm": 1.7198609244801293, "learning_rate": 1.6511760727032306e-05, "loss": 0.695, "step": 11950 }, { "epoch": 0.8882199925678187, "grad_norm": 2.0926334864961316, "learning_rate": 1.651115176738936e-05, "loss": 0.7427, "step": 11951 }, { "epoch": 0.8882943143812709, "grad_norm": 1.9761540297783455, "learning_rate": 1.6510542765827955e-05, "loss": 0.8452, "step": 11952 }, { "epoch": 0.8883686361947232, "grad_norm": 1.7320809072013883, "learning_rate": 1.6509933722352014e-05, "loss": 0.7093, "step": 11953 }, { "epoch": 0.8884429580081754, "grad_norm": 1.638106111135333, "learning_rate": 1.650932463696546e-05, "loss": 0.8695, "step": 11954 }, { "epoch": 0.8885172798216276, "grad_norm": 2.1805617889486046, "learning_rate": 1.6508715509672207e-05, "loss": 0.8286, "step": 11955 }, { "epoch": 0.8885916016350799, "grad_norm": 1.5700579725124502, "learning_rate": 1.6508106340476184e-05, "loss": 0.7931, "step": 11956 }, { "epoch": 0.8886659234485321, "grad_norm": 2.3869704044569877, "learning_rate": 1.650749712938131e-05, "loss": 0.8712, "step": 11957 }, { "epoch": 0.8887402452619844, "grad_norm": 2.1171311421104027, "learning_rate": 1.6506887876391506e-05, "loss": 0.777, "step": 11958 }, { "epoch": 0.8888145670754366, "grad_norm": 2.339332109916961, "learning_rate": 1.6506278581510698e-05, "loss": 1.0406, "step": 11959 }, { "epoch": 0.8888888888888888, "grad_norm": 2.249391846874928, "learning_rate": 1.6505669244742803e-05, "loss": 1.1277, "step": 11960 }, { "epoch": 0.8889632107023412, "grad_norm": 2.0165283482207643, "learning_rate": 1.6505059866091748e-05, "loss": 0.9056, "step": 11961 }, { "epoch": 0.8890375325157934, "grad_norm": 1.8902740559983626, "learning_rate": 1.6504450445561456e-05, "loss": 0.8762, "step": 11962 }, { "epoch": 0.8891118543292457, "grad_norm": 2.257500054789042, "learning_rate": 1.650384098315585e-05, "loss": 0.8258, "step": 11963 }, { "epoch": 0.8891861761426979, "grad_norm": 1.4826389121960273, "learning_rate": 1.6503231478878855e-05, "loss": 0.7574, "step": 11964 }, { "epoch": 0.8892604979561501, "grad_norm": 1.6231623584265142, "learning_rate": 1.650262193273439e-05, "loss": 0.8371, "step": 11965 }, { "epoch": 0.8893348197696024, "grad_norm": 2.135109484588638, "learning_rate": 1.650201234472638e-05, "loss": 0.7245, "step": 11966 }, { "epoch": 0.8894091415830546, "grad_norm": 1.733476998802615, "learning_rate": 1.6501402714858758e-05, "loss": 0.931, "step": 11967 }, { "epoch": 0.8894834633965069, "grad_norm": 1.6003589565213554, "learning_rate": 1.6500793043135438e-05, "loss": 0.7392, "step": 11968 }, { "epoch": 0.8895577852099591, "grad_norm": 2.104211194346003, "learning_rate": 1.6500183329560352e-05, "loss": 0.7458, "step": 11969 }, { "epoch": 0.8896321070234113, "grad_norm": 1.7245954205545686, "learning_rate": 1.649957357413742e-05, "loss": 0.6681, "step": 11970 }, { "epoch": 0.8897064288368636, "grad_norm": 2.3853053142443987, "learning_rate": 1.6498963776870573e-05, "loss": 0.9077, "step": 11971 }, { "epoch": 0.8897807506503159, "grad_norm": 1.8290751762036335, "learning_rate": 1.6498353937763734e-05, "loss": 0.6794, "step": 11972 }, { "epoch": 0.8898550724637682, "grad_norm": 1.9895571786555184, "learning_rate": 1.649774405682083e-05, "loss": 0.8729, "step": 11973 }, { "epoch": 0.8899293942772204, "grad_norm": 2.9388245999788762, "learning_rate": 1.6497134134045788e-05, "loss": 0.7273, "step": 11974 }, { "epoch": 0.8900037160906726, "grad_norm": 1.7312416920061515, "learning_rate": 1.649652416944253e-05, "loss": 1.0727, "step": 11975 }, { "epoch": 0.8900780379041249, "grad_norm": 2.072743378811907, "learning_rate": 1.649591416301499e-05, "loss": 0.8793, "step": 11976 }, { "epoch": 0.8901523597175771, "grad_norm": 2.367761552466086, "learning_rate": 1.6495304114767088e-05, "loss": 0.6469, "step": 11977 }, { "epoch": 0.8902266815310294, "grad_norm": 2.1612797820430427, "learning_rate": 1.6494694024702757e-05, "loss": 0.8524, "step": 11978 }, { "epoch": 0.8903010033444816, "grad_norm": 1.6682504361976178, "learning_rate": 1.6494083892825922e-05, "loss": 0.8611, "step": 11979 }, { "epoch": 0.8903753251579338, "grad_norm": 1.720282949862239, "learning_rate": 1.6493473719140517e-05, "loss": 0.7686, "step": 11980 }, { "epoch": 0.8904496469713861, "grad_norm": 1.771738073263514, "learning_rate": 1.649286350365046e-05, "loss": 0.9114, "step": 11981 }, { "epoch": 0.8905239687848383, "grad_norm": 1.8995451740214535, "learning_rate": 1.6492253246359688e-05, "loss": 0.8038, "step": 11982 }, { "epoch": 0.8905982905982905, "grad_norm": 1.822046722537362, "learning_rate": 1.6491642947272122e-05, "loss": 0.8077, "step": 11983 }, { "epoch": 0.8906726124117429, "grad_norm": 1.8081451196126928, "learning_rate": 1.6491032606391696e-05, "loss": 0.8228, "step": 11984 }, { "epoch": 0.8907469342251951, "grad_norm": 2.355176939735567, "learning_rate": 1.649042222372234e-05, "loss": 0.7469, "step": 11985 }, { "epoch": 0.8908212560386474, "grad_norm": 1.8852047748335676, "learning_rate": 1.6489811799267986e-05, "loss": 0.6973, "step": 11986 }, { "epoch": 0.8908955778520996, "grad_norm": 1.9018826436290974, "learning_rate": 1.648920133303256e-05, "loss": 0.9032, "step": 11987 }, { "epoch": 0.8909698996655518, "grad_norm": 1.6126601024261504, "learning_rate": 1.6488590825019987e-05, "loss": 0.7756, "step": 11988 }, { "epoch": 0.8910442214790041, "grad_norm": 1.813753529781324, "learning_rate": 1.648798027523421e-05, "loss": 0.8053, "step": 11989 }, { "epoch": 0.8911185432924563, "grad_norm": 1.8767012906653697, "learning_rate": 1.6487369683679143e-05, "loss": 0.8456, "step": 11990 }, { "epoch": 0.8911928651059086, "grad_norm": 1.5621001141330697, "learning_rate": 1.6486759050358735e-05, "loss": 0.5977, "step": 11991 }, { "epoch": 0.8912671869193608, "grad_norm": 1.500479753867686, "learning_rate": 1.6486148375276906e-05, "loss": 0.6978, "step": 11992 }, { "epoch": 0.891341508732813, "grad_norm": 2.3046063343233625, "learning_rate": 1.648553765843759e-05, "loss": 0.8315, "step": 11993 }, { "epoch": 0.8914158305462653, "grad_norm": 1.9759880808667722, "learning_rate": 1.648492689984472e-05, "loss": 0.8971, "step": 11994 }, { "epoch": 0.8914901523597176, "grad_norm": 2.108606709025188, "learning_rate": 1.6484316099502225e-05, "loss": 0.8857, "step": 11995 }, { "epoch": 0.8915644741731699, "grad_norm": 2.2337464764594284, "learning_rate": 1.648370525741404e-05, "loss": 1.0267, "step": 11996 }, { "epoch": 0.8916387959866221, "grad_norm": 1.944224541421802, "learning_rate": 1.64830943735841e-05, "loss": 0.717, "step": 11997 }, { "epoch": 0.8917131178000743, "grad_norm": 1.8131480483512157, "learning_rate": 1.6482483448016333e-05, "loss": 0.8247, "step": 11998 }, { "epoch": 0.8917874396135266, "grad_norm": 1.933589374209032, "learning_rate": 1.6481872480714673e-05, "loss": 0.628, "step": 11999 }, { "epoch": 0.8918617614269788, "grad_norm": 2.050905540579622, "learning_rate": 1.6481261471683057e-05, "loss": 0.9876, "step": 12000 }, { "epoch": 0.891936083240431, "grad_norm": 1.889764626092637, "learning_rate": 1.6480650420925413e-05, "loss": 0.9847, "step": 12001 }, { "epoch": 0.8920104050538833, "grad_norm": 2.063959484947378, "learning_rate": 1.6480039328445677e-05, "loss": 0.8112, "step": 12002 }, { "epoch": 0.8920847268673355, "grad_norm": 1.6684770996550988, "learning_rate": 1.647942819424779e-05, "loss": 0.7037, "step": 12003 }, { "epoch": 0.8921590486807878, "grad_norm": 2.084593210655635, "learning_rate": 1.6478817018335673e-05, "loss": 0.7187, "step": 12004 }, { "epoch": 0.89223337049424, "grad_norm": 2.175652008041238, "learning_rate": 1.647820580071327e-05, "loss": 0.9337, "step": 12005 }, { "epoch": 0.8923076923076924, "grad_norm": 2.367286347755324, "learning_rate": 1.6477594541384517e-05, "loss": 0.9799, "step": 12006 }, { "epoch": 0.8923820141211446, "grad_norm": 1.8297645934953242, "learning_rate": 1.6476983240353342e-05, "loss": 1.0723, "step": 12007 }, { "epoch": 0.8924563359345968, "grad_norm": 1.7838446967739734, "learning_rate": 1.6476371897623692e-05, "loss": 0.8535, "step": 12008 }, { "epoch": 0.8925306577480491, "grad_norm": 1.9883167970698654, "learning_rate": 1.647576051319949e-05, "loss": 0.7451, "step": 12009 }, { "epoch": 0.8926049795615013, "grad_norm": 3.2661598835899146, "learning_rate": 1.6475149087084682e-05, "loss": 0.8666, "step": 12010 }, { "epoch": 0.8926793013749535, "grad_norm": 1.6585766378700497, "learning_rate": 1.6474537619283197e-05, "loss": 0.7446, "step": 12011 }, { "epoch": 0.8927536231884058, "grad_norm": 1.720417133172764, "learning_rate": 1.6473926109798973e-05, "loss": 0.7432, "step": 12012 }, { "epoch": 0.892827945001858, "grad_norm": 1.560132217154042, "learning_rate": 1.6473314558635954e-05, "loss": 0.7616, "step": 12013 }, { "epoch": 0.8929022668153103, "grad_norm": 1.794495291791386, "learning_rate": 1.6472702965798067e-05, "loss": 0.7706, "step": 12014 }, { "epoch": 0.8929765886287625, "grad_norm": 1.483946276384048, "learning_rate": 1.6472091331289256e-05, "loss": 0.6058, "step": 12015 }, { "epoch": 0.8930509104422147, "grad_norm": 4.725053701723367, "learning_rate": 1.6471479655113458e-05, "loss": 0.7487, "step": 12016 }, { "epoch": 0.8931252322556671, "grad_norm": 1.3375761352742463, "learning_rate": 1.647086793727461e-05, "loss": 0.5838, "step": 12017 }, { "epoch": 0.8931995540691193, "grad_norm": 2.9410922076565282, "learning_rate": 1.6470256177776644e-05, "loss": 1.0653, "step": 12018 }, { "epoch": 0.8932738758825716, "grad_norm": 4.279960178975187, "learning_rate": 1.646964437662351e-05, "loss": 1.0429, "step": 12019 }, { "epoch": 0.8933481976960238, "grad_norm": 2.2976642079776126, "learning_rate": 1.6469032533819142e-05, "loss": 0.8509, "step": 12020 }, { "epoch": 0.893422519509476, "grad_norm": 1.9908009934348532, "learning_rate": 1.6468420649367476e-05, "loss": 0.846, "step": 12021 }, { "epoch": 0.8934968413229283, "grad_norm": 1.5458563862515269, "learning_rate": 1.6467808723272452e-05, "loss": 0.7252, "step": 12022 }, { "epoch": 0.8935711631363805, "grad_norm": 2.0435713159417084, "learning_rate": 1.6467196755538017e-05, "loss": 0.9239, "step": 12023 }, { "epoch": 0.8936454849498328, "grad_norm": 1.82229805568605, "learning_rate": 1.64665847461681e-05, "loss": 0.8313, "step": 12024 }, { "epoch": 0.893719806763285, "grad_norm": 1.7939229705749613, "learning_rate": 1.6465972695166646e-05, "loss": 0.7029, "step": 12025 }, { "epoch": 0.8937941285767372, "grad_norm": 1.9385060285547624, "learning_rate": 1.6465360602537596e-05, "loss": 0.9023, "step": 12026 }, { "epoch": 0.8938684503901895, "grad_norm": 1.9648978762727105, "learning_rate": 1.646474846828489e-05, "loss": 0.7527, "step": 12027 }, { "epoch": 0.8939427722036418, "grad_norm": 3.0709126256938806, "learning_rate": 1.6464136292412466e-05, "loss": 0.9306, "step": 12028 }, { "epoch": 0.8940170940170941, "grad_norm": 1.8766752367679576, "learning_rate": 1.6463524074924274e-05, "loss": 0.8145, "step": 12029 }, { "epoch": 0.8940914158305463, "grad_norm": 1.7771888392817574, "learning_rate": 1.6462911815824247e-05, "loss": 0.6768, "step": 12030 }, { "epoch": 0.8941657376439985, "grad_norm": 1.6252474860357071, "learning_rate": 1.6462299515116328e-05, "loss": 0.6867, "step": 12031 }, { "epoch": 0.8942400594574508, "grad_norm": 2.529658850737742, "learning_rate": 1.646168717280446e-05, "loss": 0.8325, "step": 12032 }, { "epoch": 0.894314381270903, "grad_norm": 1.8432427946235872, "learning_rate": 1.6461074788892585e-05, "loss": 0.8627, "step": 12033 }, { "epoch": 0.8943887030843553, "grad_norm": 1.6357749269241713, "learning_rate": 1.6460462363384645e-05, "loss": 0.6097, "step": 12034 }, { "epoch": 0.8944630248978075, "grad_norm": 2.0256192080057613, "learning_rate": 1.6459849896284585e-05, "loss": 0.7939, "step": 12035 }, { "epoch": 0.8945373467112597, "grad_norm": 1.772540152960766, "learning_rate": 1.645923738759635e-05, "loss": 0.8019, "step": 12036 }, { "epoch": 0.894611668524712, "grad_norm": 2.0576728421256707, "learning_rate": 1.6458624837323874e-05, "loss": 0.9569, "step": 12037 }, { "epoch": 0.8946859903381642, "grad_norm": 1.763777040670484, "learning_rate": 1.6458012245471107e-05, "loss": 0.7103, "step": 12038 }, { "epoch": 0.8947603121516166, "grad_norm": 2.1563461997205606, "learning_rate": 1.6457399612041993e-05, "loss": 0.9376, "step": 12039 }, { "epoch": 0.8948346339650688, "grad_norm": 2.0070741508421195, "learning_rate": 1.6456786937040474e-05, "loss": 0.9097, "step": 12040 }, { "epoch": 0.894908955778521, "grad_norm": 1.5643057155412539, "learning_rate": 1.64561742204705e-05, "loss": 0.7016, "step": 12041 }, { "epoch": 0.8949832775919733, "grad_norm": 1.7737077631272111, "learning_rate": 1.645556146233601e-05, "loss": 0.8111, "step": 12042 }, { "epoch": 0.8950575994054255, "grad_norm": 1.4848094153870475, "learning_rate": 1.6454948662640948e-05, "loss": 0.6687, "step": 12043 }, { "epoch": 0.8951319212188777, "grad_norm": 1.9944192845302768, "learning_rate": 1.6454335821389264e-05, "loss": 0.9205, "step": 12044 }, { "epoch": 0.89520624303233, "grad_norm": 1.790089695335859, "learning_rate": 1.6453722938584898e-05, "loss": 0.9694, "step": 12045 }, { "epoch": 0.8952805648457822, "grad_norm": 1.9393605715824926, "learning_rate": 1.6453110014231798e-05, "loss": 0.8719, "step": 12046 }, { "epoch": 0.8953548866592345, "grad_norm": 2.080639107652014, "learning_rate": 1.6452497048333916e-05, "loss": 0.9865, "step": 12047 }, { "epoch": 0.8954292084726867, "grad_norm": 2.1881829324747777, "learning_rate": 1.6451884040895188e-05, "loss": 0.8137, "step": 12048 }, { "epoch": 0.8955035302861389, "grad_norm": 1.842743817474371, "learning_rate": 1.645127099191957e-05, "loss": 1.0836, "step": 12049 }, { "epoch": 0.8955778520995912, "grad_norm": 1.6875402310955314, "learning_rate": 1.6450657901411e-05, "loss": 0.7282, "step": 12050 }, { "epoch": 0.8956521739130435, "grad_norm": 1.5003234838160289, "learning_rate": 1.6450044769373426e-05, "loss": 0.754, "step": 12051 }, { "epoch": 0.8957264957264958, "grad_norm": 2.396583255881838, "learning_rate": 1.6449431595810805e-05, "loss": 0.844, "step": 12052 }, { "epoch": 0.895800817539948, "grad_norm": 3.1201199439210616, "learning_rate": 1.6448818380727076e-05, "loss": 1.0539, "step": 12053 }, { "epoch": 0.8958751393534002, "grad_norm": 1.7032694980807976, "learning_rate": 1.644820512412619e-05, "loss": 0.6619, "step": 12054 }, { "epoch": 0.8959494611668525, "grad_norm": 1.6360837576181742, "learning_rate": 1.644759182601209e-05, "loss": 0.9688, "step": 12055 }, { "epoch": 0.8960237829803047, "grad_norm": 2.0167200315264293, "learning_rate": 1.644697848638873e-05, "loss": 1.0273, "step": 12056 }, { "epoch": 0.896098104793757, "grad_norm": 1.9006857856085424, "learning_rate": 1.644636510526006e-05, "loss": 0.9375, "step": 12057 }, { "epoch": 0.8961724266072092, "grad_norm": 1.7989414338428593, "learning_rate": 1.6445751682630026e-05, "loss": 0.8999, "step": 12058 }, { "epoch": 0.8962467484206614, "grad_norm": 1.7049491135509003, "learning_rate": 1.644513821850257e-05, "loss": 0.7342, "step": 12059 }, { "epoch": 0.8963210702341137, "grad_norm": 1.990811984741385, "learning_rate": 1.644452471288166e-05, "loss": 0.8509, "step": 12060 }, { "epoch": 0.8963953920475659, "grad_norm": 1.7798963105734158, "learning_rate": 1.6443911165771226e-05, "loss": 0.7413, "step": 12061 }, { "epoch": 0.8964697138610183, "grad_norm": 3.2414418637155293, "learning_rate": 1.6443297577175227e-05, "loss": 0.9354, "step": 12062 }, { "epoch": 0.8965440356744705, "grad_norm": 1.8560119884374744, "learning_rate": 1.6442683947097613e-05, "loss": 0.9186, "step": 12063 }, { "epoch": 0.8966183574879227, "grad_norm": 2.678184413621863, "learning_rate": 1.6442070275542336e-05, "loss": 0.9656, "step": 12064 }, { "epoch": 0.896692679301375, "grad_norm": 2.115537596967814, "learning_rate": 1.644145656251334e-05, "loss": 0.9574, "step": 12065 }, { "epoch": 0.8967670011148272, "grad_norm": 2.039427469927372, "learning_rate": 1.6440842808014586e-05, "loss": 0.9536, "step": 12066 }, { "epoch": 0.8968413229282794, "grad_norm": 1.5537804259085684, "learning_rate": 1.6440229012050018e-05, "loss": 0.6582, "step": 12067 }, { "epoch": 0.8969156447417317, "grad_norm": 1.470428062144996, "learning_rate": 1.643961517462359e-05, "loss": 0.8181, "step": 12068 }, { "epoch": 0.8969899665551839, "grad_norm": 2.018278052246929, "learning_rate": 1.6439001295739255e-05, "loss": 0.8752, "step": 12069 }, { "epoch": 0.8970642883686362, "grad_norm": 1.7743002213400205, "learning_rate": 1.6438387375400963e-05, "loss": 0.9121, "step": 12070 }, { "epoch": 0.8971386101820884, "grad_norm": 2.179879024981926, "learning_rate": 1.6437773413612666e-05, "loss": 0.901, "step": 12071 }, { "epoch": 0.8972129319955406, "grad_norm": 1.8649147411007725, "learning_rate": 1.643715941037832e-05, "loss": 0.6367, "step": 12072 }, { "epoch": 0.897287253808993, "grad_norm": 2.2448409378214413, "learning_rate": 1.643654536570187e-05, "loss": 1.0224, "step": 12073 }, { "epoch": 0.8973615756224452, "grad_norm": 2.08105622080624, "learning_rate": 1.643593127958728e-05, "loss": 1.0658, "step": 12074 }, { "epoch": 0.8974358974358975, "grad_norm": 2.0771219495490945, "learning_rate": 1.64353171520385e-05, "loss": 1.0923, "step": 12075 }, { "epoch": 0.8975102192493497, "grad_norm": 1.4971547199009259, "learning_rate": 1.643470298305948e-05, "loss": 0.9575, "step": 12076 }, { "epoch": 0.8975845410628019, "grad_norm": 1.8520252151041219, "learning_rate": 1.6434088772654174e-05, "loss": 0.8203, "step": 12077 }, { "epoch": 0.8976588628762542, "grad_norm": 2.0220682311748504, "learning_rate": 1.643347452082654e-05, "loss": 0.7525, "step": 12078 }, { "epoch": 0.8977331846897064, "grad_norm": 2.0842540863548633, "learning_rate": 1.643286022758053e-05, "loss": 1.0985, "step": 12079 }, { "epoch": 0.8978075065031587, "grad_norm": 1.682329893487526, "learning_rate": 1.64322458929201e-05, "loss": 0.7349, "step": 12080 }, { "epoch": 0.8978818283166109, "grad_norm": 1.936782898297364, "learning_rate": 1.6431631516849206e-05, "loss": 0.8353, "step": 12081 }, { "epoch": 0.8979561501300631, "grad_norm": 1.5771899403220786, "learning_rate": 1.6431017099371804e-05, "loss": 0.6519, "step": 12082 }, { "epoch": 0.8980304719435154, "grad_norm": 1.3024551512466018, "learning_rate": 1.6430402640491844e-05, "loss": 0.6397, "step": 12083 }, { "epoch": 0.8981047937569677, "grad_norm": 2.4077161598284755, "learning_rate": 1.6429788140213284e-05, "loss": 0.874, "step": 12084 }, { "epoch": 0.89817911557042, "grad_norm": 2.152640517498735, "learning_rate": 1.6429173598540085e-05, "loss": 0.8596, "step": 12085 }, { "epoch": 0.8982534373838722, "grad_norm": 1.7297834120629774, "learning_rate": 1.64285590154762e-05, "loss": 0.9142, "step": 12086 }, { "epoch": 0.8983277591973244, "grad_norm": 1.877720291403227, "learning_rate": 1.6427944391025583e-05, "loss": 0.7782, "step": 12087 }, { "epoch": 0.8984020810107767, "grad_norm": 1.7632094290112228, "learning_rate": 1.6427329725192195e-05, "loss": 0.7403, "step": 12088 }, { "epoch": 0.8984764028242289, "grad_norm": 1.8716971321919997, "learning_rate": 1.6426715017979993e-05, "loss": 0.8885, "step": 12089 }, { "epoch": 0.8985507246376812, "grad_norm": 1.7119127229983582, "learning_rate": 1.6426100269392932e-05, "loss": 0.9152, "step": 12090 }, { "epoch": 0.8986250464511334, "grad_norm": 3.4491989373251006, "learning_rate": 1.642548547943497e-05, "loss": 0.772, "step": 12091 }, { "epoch": 0.8986993682645856, "grad_norm": 1.8507561176991787, "learning_rate": 1.6424870648110065e-05, "loss": 0.7508, "step": 12092 }, { "epoch": 0.8987736900780379, "grad_norm": 2.275240775305881, "learning_rate": 1.642425577542218e-05, "loss": 0.9633, "step": 12093 }, { "epoch": 0.8988480118914901, "grad_norm": 1.5853449189750939, "learning_rate": 1.6423640861375267e-05, "loss": 0.7964, "step": 12094 }, { "epoch": 0.8989223337049425, "grad_norm": 1.7553973249396502, "learning_rate": 1.6423025905973288e-05, "loss": 0.8127, "step": 12095 }, { "epoch": 0.8989966555183947, "grad_norm": 2.4327093704627423, "learning_rate": 1.64224109092202e-05, "loss": 0.5752, "step": 12096 }, { "epoch": 0.8990709773318469, "grad_norm": 3.0420324014925906, "learning_rate": 1.6421795871119964e-05, "loss": 0.9841, "step": 12097 }, { "epoch": 0.8991452991452992, "grad_norm": 1.838741877979497, "learning_rate": 1.6421180791676542e-05, "loss": 0.8185, "step": 12098 }, { "epoch": 0.8992196209587514, "grad_norm": 2.038879981300224, "learning_rate": 1.642056567089389e-05, "loss": 0.7328, "step": 12099 }, { "epoch": 0.8992939427722036, "grad_norm": 2.1891442512819914, "learning_rate": 1.641995050877597e-05, "loss": 0.925, "step": 12100 }, { "epoch": 0.8993682645856559, "grad_norm": 3.710351274679117, "learning_rate": 1.641933530532674e-05, "loss": 0.7072, "step": 12101 }, { "epoch": 0.8994425863991081, "grad_norm": 1.6830550012831709, "learning_rate": 1.641872006055016e-05, "loss": 0.8753, "step": 12102 }, { "epoch": 0.8995169082125604, "grad_norm": 1.6462770699568807, "learning_rate": 1.6418104774450198e-05, "loss": 0.7176, "step": 12103 }, { "epoch": 0.8995912300260126, "grad_norm": 1.8234855174995845, "learning_rate": 1.641748944703081e-05, "loss": 0.9647, "step": 12104 }, { "epoch": 0.8996655518394648, "grad_norm": 1.855580894494975, "learning_rate": 1.6416874078295956e-05, "loss": 0.8515, "step": 12105 }, { "epoch": 0.8997398736529171, "grad_norm": 1.9945014925175466, "learning_rate": 1.6416258668249604e-05, "loss": 0.6383, "step": 12106 }, { "epoch": 0.8998141954663694, "grad_norm": 1.8415884851915483, "learning_rate": 1.6415643216895705e-05, "loss": 0.8519, "step": 12107 }, { "epoch": 0.8998885172798217, "grad_norm": 2.0913604648769453, "learning_rate": 1.641502772423823e-05, "loss": 0.9708, "step": 12108 }, { "epoch": 0.8999628390932739, "grad_norm": 1.9625239107498909, "learning_rate": 1.641441219028114e-05, "loss": 0.9985, "step": 12109 }, { "epoch": 0.9000371609067261, "grad_norm": 1.6720708880857733, "learning_rate": 1.6413796615028398e-05, "loss": 0.7034, "step": 12110 }, { "epoch": 0.9001114827201784, "grad_norm": 1.852973391393234, "learning_rate": 1.641318099848397e-05, "loss": 0.9191, "step": 12111 }, { "epoch": 0.9001858045336306, "grad_norm": 1.6600903005291796, "learning_rate": 1.641256534065181e-05, "loss": 0.7473, "step": 12112 }, { "epoch": 0.9002601263470829, "grad_norm": 3.0284945799484304, "learning_rate": 1.641194964153589e-05, "loss": 0.705, "step": 12113 }, { "epoch": 0.9003344481605351, "grad_norm": 1.878593920032044, "learning_rate": 1.6411333901140165e-05, "loss": 0.8859, "step": 12114 }, { "epoch": 0.9004087699739873, "grad_norm": 2.3098339376001396, "learning_rate": 1.6410718119468608e-05, "loss": 0.9204, "step": 12115 }, { "epoch": 0.9004830917874396, "grad_norm": 1.8678629133015117, "learning_rate": 1.6410102296525184e-05, "loss": 0.9955, "step": 12116 }, { "epoch": 0.9005574136008918, "grad_norm": 1.8163436607473198, "learning_rate": 1.640948643231385e-05, "loss": 0.7323, "step": 12117 }, { "epoch": 0.9006317354143442, "grad_norm": 1.7823049467190288, "learning_rate": 1.6408870526838575e-05, "loss": 0.6875, "step": 12118 }, { "epoch": 0.9007060572277964, "grad_norm": 1.930406513453897, "learning_rate": 1.6408254580103325e-05, "loss": 0.8225, "step": 12119 }, { "epoch": 0.9007803790412486, "grad_norm": 1.8889841781681416, "learning_rate": 1.640763859211207e-05, "loss": 0.9185, "step": 12120 }, { "epoch": 0.9008547008547009, "grad_norm": 2.342588728122295, "learning_rate": 1.6407022562868763e-05, "loss": 0.8567, "step": 12121 }, { "epoch": 0.9009290226681531, "grad_norm": 2.5382702524119214, "learning_rate": 1.6406406492377378e-05, "loss": 0.9073, "step": 12122 }, { "epoch": 0.9010033444816054, "grad_norm": 1.9413771545120415, "learning_rate": 1.6405790380641882e-05, "loss": 0.8989, "step": 12123 }, { "epoch": 0.9010776662950576, "grad_norm": 1.9089316496431212, "learning_rate": 1.640517422766624e-05, "loss": 0.5115, "step": 12124 }, { "epoch": 0.9011519881085098, "grad_norm": 1.9765182186990984, "learning_rate": 1.6404558033454417e-05, "loss": 0.9627, "step": 12125 }, { "epoch": 0.9012263099219621, "grad_norm": 1.890088871333833, "learning_rate": 1.6403941798010383e-05, "loss": 0.7463, "step": 12126 }, { "epoch": 0.9013006317354143, "grad_norm": 1.7343344355477945, "learning_rate": 1.6403325521338107e-05, "loss": 0.8791, "step": 12127 }, { "epoch": 0.9013749535488665, "grad_norm": 2.2189365927231837, "learning_rate": 1.6402709203441548e-05, "loss": 1.0478, "step": 12128 }, { "epoch": 0.9014492753623189, "grad_norm": 1.8811959980867, "learning_rate": 1.6402092844324683e-05, "loss": 0.833, "step": 12129 }, { "epoch": 0.9015235971757711, "grad_norm": 1.7503153004399992, "learning_rate": 1.6401476443991476e-05, "loss": 0.6666, "step": 12130 }, { "epoch": 0.9015979189892234, "grad_norm": 1.549686090243786, "learning_rate": 1.6400860002445894e-05, "loss": 0.7594, "step": 12131 }, { "epoch": 0.9016722408026756, "grad_norm": 1.7513743418907053, "learning_rate": 1.640024351969191e-05, "loss": 0.8083, "step": 12132 }, { "epoch": 0.9017465626161278, "grad_norm": 1.7493153980839302, "learning_rate": 1.639962699573349e-05, "loss": 0.6945, "step": 12133 }, { "epoch": 0.9018208844295801, "grad_norm": 2.1387558792419292, "learning_rate": 1.6399010430574603e-05, "loss": 0.717, "step": 12134 }, { "epoch": 0.9018952062430323, "grad_norm": 4.85735267626693, "learning_rate": 1.6398393824219216e-05, "loss": 0.7801, "step": 12135 }, { "epoch": 0.9019695280564846, "grad_norm": 1.4858055273003474, "learning_rate": 1.6397777176671304e-05, "loss": 0.7622, "step": 12136 }, { "epoch": 0.9020438498699368, "grad_norm": 1.495879190468121, "learning_rate": 1.6397160487934837e-05, "loss": 0.591, "step": 12137 }, { "epoch": 0.902118171683389, "grad_norm": 1.9267364614604174, "learning_rate": 1.6396543758013778e-05, "loss": 0.8925, "step": 12138 }, { "epoch": 0.9021924934968413, "grad_norm": 1.3142989548318498, "learning_rate": 1.6395926986912105e-05, "loss": 0.6085, "step": 12139 }, { "epoch": 0.9022668153102936, "grad_norm": 2.0228588820641256, "learning_rate": 1.639531017463378e-05, "loss": 0.9833, "step": 12140 }, { "epoch": 0.9023411371237459, "grad_norm": 1.5214671430018976, "learning_rate": 1.639469332118279e-05, "loss": 0.9006, "step": 12141 }, { "epoch": 0.9024154589371981, "grad_norm": 1.6743905265980419, "learning_rate": 1.639407642656309e-05, "loss": 0.8243, "step": 12142 }, { "epoch": 0.9024897807506503, "grad_norm": 1.8044778938809956, "learning_rate": 1.6393459490778658e-05, "loss": 0.7334, "step": 12143 }, { "epoch": 0.9025641025641026, "grad_norm": 2.136520985020394, "learning_rate": 1.6392842513833467e-05, "loss": 0.8843, "step": 12144 }, { "epoch": 0.9026384243775548, "grad_norm": 1.7730615026632026, "learning_rate": 1.6392225495731488e-05, "loss": 0.777, "step": 12145 }, { "epoch": 0.902712746191007, "grad_norm": 2.1336303256571845, "learning_rate": 1.639160843647669e-05, "loss": 0.9816, "step": 12146 }, { "epoch": 0.9027870680044593, "grad_norm": 1.465693050366001, "learning_rate": 1.6390991336073053e-05, "loss": 0.7118, "step": 12147 }, { "epoch": 0.9028613898179115, "grad_norm": 2.06083535753838, "learning_rate": 1.6390374194524542e-05, "loss": 0.9302, "step": 12148 }, { "epoch": 0.9029357116313638, "grad_norm": 2.098210890407504, "learning_rate": 1.6389757011835133e-05, "loss": 0.7095, "step": 12149 }, { "epoch": 0.903010033444816, "grad_norm": 1.8828567754518724, "learning_rate": 1.6389139788008807e-05, "loss": 0.8536, "step": 12150 }, { "epoch": 0.9030843552582684, "grad_norm": 4.685120922214638, "learning_rate": 1.638852252304952e-05, "loss": 1.0099, "step": 12151 }, { "epoch": 0.9031586770717206, "grad_norm": 1.5968773397172398, "learning_rate": 1.6387905216961262e-05, "loss": 0.6938, "step": 12152 }, { "epoch": 0.9032329988851728, "grad_norm": 1.6569349622989924, "learning_rate": 1.6387287869748002e-05, "loss": 0.719, "step": 12153 }, { "epoch": 0.9033073206986251, "grad_norm": 1.9286243554155753, "learning_rate": 1.6386670481413715e-05, "loss": 0.8542, "step": 12154 }, { "epoch": 0.9033816425120773, "grad_norm": 1.5562244223422146, "learning_rate": 1.6386053051962368e-05, "loss": 0.8284, "step": 12155 }, { "epoch": 0.9034559643255295, "grad_norm": 1.357182477724641, "learning_rate": 1.6385435581397953e-05, "loss": 0.6585, "step": 12156 }, { "epoch": 0.9035302861389818, "grad_norm": 2.2940842649695807, "learning_rate": 1.6384818069724427e-05, "loss": 0.8953, "step": 12157 }, { "epoch": 0.903604607952434, "grad_norm": 2.090415453496497, "learning_rate": 1.6384200516945778e-05, "loss": 0.9921, "step": 12158 }, { "epoch": 0.9036789297658863, "grad_norm": 2.488374364240609, "learning_rate": 1.6383582923065976e-05, "loss": 0.9106, "step": 12159 }, { "epoch": 0.9037532515793385, "grad_norm": 1.5511503953995445, "learning_rate": 1.6382965288088997e-05, "loss": 0.7924, "step": 12160 }, { "epoch": 0.9038275733927907, "grad_norm": 1.9769686203763899, "learning_rate": 1.6382347612018817e-05, "loss": 0.6207, "step": 12161 }, { "epoch": 0.903901895206243, "grad_norm": 1.834412014032547, "learning_rate": 1.6381729894859418e-05, "loss": 0.7093, "step": 12162 }, { "epoch": 0.9039762170196953, "grad_norm": 2.0420725499462957, "learning_rate": 1.6381112136614774e-05, "loss": 0.6752, "step": 12163 }, { "epoch": 0.9040505388331476, "grad_norm": 2.1936491083298253, "learning_rate": 1.638049433728886e-05, "loss": 0.6899, "step": 12164 }, { "epoch": 0.9041248606465998, "grad_norm": 1.87641455573624, "learning_rate": 1.637987649688565e-05, "loss": 0.7941, "step": 12165 }, { "epoch": 0.904199182460052, "grad_norm": 1.7854647156387364, "learning_rate": 1.6379258615409134e-05, "loss": 0.8251, "step": 12166 }, { "epoch": 0.9042735042735043, "grad_norm": 3.0821260437860425, "learning_rate": 1.6378640692863276e-05, "loss": 0.7828, "step": 12167 }, { "epoch": 0.9043478260869565, "grad_norm": 1.9197817085242983, "learning_rate": 1.6378022729252062e-05, "loss": 1.1122, "step": 12168 }, { "epoch": 0.9044221479004088, "grad_norm": 1.8460835910487898, "learning_rate": 1.637740472457947e-05, "loss": 0.8809, "step": 12169 }, { "epoch": 0.904496469713861, "grad_norm": 3.1912602537888177, "learning_rate": 1.6376786678849474e-05, "loss": 0.6415, "step": 12170 }, { "epoch": 0.9045707915273132, "grad_norm": 1.6242878006255308, "learning_rate": 1.637616859206606e-05, "loss": 0.8842, "step": 12171 }, { "epoch": 0.9046451133407655, "grad_norm": 2.2102711004616338, "learning_rate": 1.63755504642332e-05, "loss": 0.9324, "step": 12172 }, { "epoch": 0.9047194351542177, "grad_norm": 1.5487643882773003, "learning_rate": 1.637493229535488e-05, "loss": 0.7485, "step": 12173 }, { "epoch": 0.9047937569676701, "grad_norm": 1.4102993965378696, "learning_rate": 1.6374314085435075e-05, "loss": 0.8005, "step": 12174 }, { "epoch": 0.9048680787811223, "grad_norm": 1.6310276414268183, "learning_rate": 1.6373695834477766e-05, "loss": 0.9825, "step": 12175 }, { "epoch": 0.9049424005945745, "grad_norm": 2.4108303073484887, "learning_rate": 1.637307754248693e-05, "loss": 0.6772, "step": 12176 }, { "epoch": 0.9050167224080268, "grad_norm": 1.7259985202366328, "learning_rate": 1.637245920946656e-05, "loss": 0.6829, "step": 12177 }, { "epoch": 0.905091044221479, "grad_norm": 1.8047227903053324, "learning_rate": 1.6371840835420628e-05, "loss": 0.712, "step": 12178 }, { "epoch": 0.9051653660349313, "grad_norm": 1.5401648352898514, "learning_rate": 1.6371222420353107e-05, "loss": 0.7518, "step": 12179 }, { "epoch": 0.9052396878483835, "grad_norm": 1.6380977589535102, "learning_rate": 1.6370603964267996e-05, "loss": 0.8031, "step": 12180 }, { "epoch": 0.9053140096618357, "grad_norm": 1.7239512579638037, "learning_rate": 1.6369985467169258e-05, "loss": 0.901, "step": 12181 }, { "epoch": 0.905388331475288, "grad_norm": 2.5420426311282056, "learning_rate": 1.636936692906089e-05, "loss": 1.0417, "step": 12182 }, { "epoch": 0.9054626532887402, "grad_norm": 1.8944649888280762, "learning_rate": 1.636874834994687e-05, "loss": 0.7977, "step": 12183 }, { "epoch": 0.9055369751021924, "grad_norm": 2.2595743112817552, "learning_rate": 1.6368129729831173e-05, "loss": 0.8481, "step": 12184 }, { "epoch": 0.9056112969156448, "grad_norm": 2.62627865386709, "learning_rate": 1.636751106871779e-05, "loss": 0.8794, "step": 12185 }, { "epoch": 0.905685618729097, "grad_norm": 1.8113384577693472, "learning_rate": 1.6366892366610702e-05, "loss": 0.7926, "step": 12186 }, { "epoch": 0.9057599405425493, "grad_norm": 1.9985941427781904, "learning_rate": 1.6366273623513887e-05, "loss": 0.7339, "step": 12187 }, { "epoch": 0.9058342623560015, "grad_norm": 1.9491086516508878, "learning_rate": 1.6365654839431337e-05, "loss": 0.9287, "step": 12188 }, { "epoch": 0.9059085841694537, "grad_norm": 1.8157221710117475, "learning_rate": 1.636503601436703e-05, "loss": 0.8037, "step": 12189 }, { "epoch": 0.905982905982906, "grad_norm": 1.8178515115840574, "learning_rate": 1.636441714832495e-05, "loss": 0.8038, "step": 12190 }, { "epoch": 0.9060572277963582, "grad_norm": 1.8732785785609474, "learning_rate": 1.6363798241309087e-05, "loss": 0.8095, "step": 12191 }, { "epoch": 0.9061315496098105, "grad_norm": 1.990669209939966, "learning_rate": 1.636317929332342e-05, "loss": 0.8443, "step": 12192 }, { "epoch": 0.9062058714232627, "grad_norm": 1.6860630677580504, "learning_rate": 1.6362560304371934e-05, "loss": 0.8096, "step": 12193 }, { "epoch": 0.9062801932367149, "grad_norm": 1.9207052895489847, "learning_rate": 1.6361941274458613e-05, "loss": 0.8996, "step": 12194 }, { "epoch": 0.9063545150501672, "grad_norm": 1.7371408784518252, "learning_rate": 1.6361322203587448e-05, "loss": 0.7431, "step": 12195 }, { "epoch": 0.9064288368636195, "grad_norm": 1.3533201427408108, "learning_rate": 1.6360703091762415e-05, "loss": 0.6159, "step": 12196 }, { "epoch": 0.9065031586770718, "grad_norm": 2.0376448476613636, "learning_rate": 1.636008393898751e-05, "loss": 0.667, "step": 12197 }, { "epoch": 0.906577480490524, "grad_norm": 1.5862590370105392, "learning_rate": 1.6359464745266712e-05, "loss": 0.8538, "step": 12198 }, { "epoch": 0.9066518023039762, "grad_norm": 1.5942208754464902, "learning_rate": 1.635884551060401e-05, "loss": 0.7157, "step": 12199 }, { "epoch": 0.9067261241174285, "grad_norm": 1.8270280220695572, "learning_rate": 1.6358226235003392e-05, "loss": 0.8106, "step": 12200 }, { "epoch": 0.9068004459308807, "grad_norm": 1.8152474562569663, "learning_rate": 1.6357606918468844e-05, "loss": 0.9262, "step": 12201 }, { "epoch": 0.906874767744333, "grad_norm": 1.6980504407773542, "learning_rate": 1.635698756100435e-05, "loss": 0.8048, "step": 12202 }, { "epoch": 0.9069490895577852, "grad_norm": 1.6636979291973495, "learning_rate": 1.6356368162613904e-05, "loss": 0.8435, "step": 12203 }, { "epoch": 0.9070234113712374, "grad_norm": 2.450746670547471, "learning_rate": 1.6355748723301486e-05, "loss": 0.951, "step": 12204 }, { "epoch": 0.9070977331846897, "grad_norm": 1.765800166734359, "learning_rate": 1.6355129243071088e-05, "loss": 0.8428, "step": 12205 }, { "epoch": 0.9071720549981419, "grad_norm": 2.1801454005301193, "learning_rate": 1.6354509721926698e-05, "loss": 0.7868, "step": 12206 }, { "epoch": 0.9072463768115943, "grad_norm": 2.0299198053582006, "learning_rate": 1.6353890159872307e-05, "loss": 0.7945, "step": 12207 }, { "epoch": 0.9073206986250465, "grad_norm": 2.017480275383346, "learning_rate": 1.6353270556911897e-05, "loss": 0.8539, "step": 12208 }, { "epoch": 0.9073950204384987, "grad_norm": 1.9752055127792334, "learning_rate": 1.6352650913049465e-05, "loss": 1.0161, "step": 12209 }, { "epoch": 0.907469342251951, "grad_norm": 1.6804746035611917, "learning_rate": 1.635203122828899e-05, "loss": 0.7911, "step": 12210 }, { "epoch": 0.9075436640654032, "grad_norm": 1.395621386323476, "learning_rate": 1.635141150263447e-05, "loss": 0.7317, "step": 12211 }, { "epoch": 0.9076179858788554, "grad_norm": 1.6575794688248424, "learning_rate": 1.635079173608989e-05, "loss": 0.8464, "step": 12212 }, { "epoch": 0.9076923076923077, "grad_norm": 1.6451875355657533, "learning_rate": 1.635017192865925e-05, "loss": 0.8666, "step": 12213 }, { "epoch": 0.9077666295057599, "grad_norm": 2.5095687146699728, "learning_rate": 1.6349552080346525e-05, "loss": 0.8498, "step": 12214 }, { "epoch": 0.9078409513192122, "grad_norm": 2.0082916246454157, "learning_rate": 1.634893219115572e-05, "loss": 0.8699, "step": 12215 }, { "epoch": 0.9079152731326644, "grad_norm": 2.0197233884305916, "learning_rate": 1.634831226109081e-05, "loss": 0.6023, "step": 12216 }, { "epoch": 0.9079895949461166, "grad_norm": 2.119271838450818, "learning_rate": 1.6347692290155802e-05, "loss": 0.8623, "step": 12217 }, { "epoch": 0.9080639167595689, "grad_norm": 2.167726985577196, "learning_rate": 1.6347072278354683e-05, "loss": 0.9284, "step": 12218 }, { "epoch": 0.9081382385730212, "grad_norm": 2.3275415500169854, "learning_rate": 1.6346452225691436e-05, "loss": 0.8784, "step": 12219 }, { "epoch": 0.9082125603864735, "grad_norm": 1.9738316712898525, "learning_rate": 1.634583213217006e-05, "loss": 0.8606, "step": 12220 }, { "epoch": 0.9082868821999257, "grad_norm": 1.9038633239297034, "learning_rate": 1.6345211997794543e-05, "loss": 0.8657, "step": 12221 }, { "epoch": 0.9083612040133779, "grad_norm": 2.3889140871595442, "learning_rate": 1.6344591822568885e-05, "loss": 0.8988, "step": 12222 }, { "epoch": 0.9084355258268302, "grad_norm": 1.9296102182868449, "learning_rate": 1.6343971606497073e-05, "loss": 1.0597, "step": 12223 }, { "epoch": 0.9085098476402824, "grad_norm": 2.268928779541037, "learning_rate": 1.63433513495831e-05, "loss": 1.0216, "step": 12224 }, { "epoch": 0.9085841694537347, "grad_norm": 2.2616975034932683, "learning_rate": 1.6342731051830963e-05, "loss": 0.8988, "step": 12225 }, { "epoch": 0.9086584912671869, "grad_norm": 2.57111589158917, "learning_rate": 1.634211071324465e-05, "loss": 0.9612, "step": 12226 }, { "epoch": 0.9087328130806391, "grad_norm": 1.6762122741508156, "learning_rate": 1.6341490333828155e-05, "loss": 0.8653, "step": 12227 }, { "epoch": 0.9088071348940914, "grad_norm": 2.166044731897229, "learning_rate": 1.634086991358548e-05, "loss": 0.8705, "step": 12228 }, { "epoch": 0.9088814567075436, "grad_norm": 2.537680161840528, "learning_rate": 1.634024945252061e-05, "loss": 0.8858, "step": 12229 }, { "epoch": 0.908955778520996, "grad_norm": 1.5772943147517904, "learning_rate": 1.6339628950637543e-05, "loss": 0.6276, "step": 12230 }, { "epoch": 0.9090301003344482, "grad_norm": 1.854351545849315, "learning_rate": 1.6339008407940275e-05, "loss": 0.7156, "step": 12231 }, { "epoch": 0.9091044221479004, "grad_norm": 2.0350969909320793, "learning_rate": 1.63383878244328e-05, "loss": 0.9156, "step": 12232 }, { "epoch": 0.9091787439613527, "grad_norm": 1.7743973317535902, "learning_rate": 1.633776720011911e-05, "loss": 0.7376, "step": 12233 }, { "epoch": 0.9092530657748049, "grad_norm": 1.8052010141931, "learning_rate": 1.6337146535003207e-05, "loss": 0.8821, "step": 12234 }, { "epoch": 0.9093273875882572, "grad_norm": 1.4656622652037798, "learning_rate": 1.6336525829089083e-05, "loss": 0.7773, "step": 12235 }, { "epoch": 0.9094017094017094, "grad_norm": 1.6095385335271888, "learning_rate": 1.6335905082380735e-05, "loss": 0.638, "step": 12236 }, { "epoch": 0.9094760312151616, "grad_norm": 4.066193086493201, "learning_rate": 1.6335284294882153e-05, "loss": 0.924, "step": 12237 }, { "epoch": 0.9095503530286139, "grad_norm": 1.7817686519957983, "learning_rate": 1.6334663466597345e-05, "loss": 0.7668, "step": 12238 }, { "epoch": 0.9096246748420661, "grad_norm": 2.338517711983383, "learning_rate": 1.63340425975303e-05, "loss": 0.8346, "step": 12239 }, { "epoch": 0.9096989966555183, "grad_norm": 1.6206239018890025, "learning_rate": 1.6333421687685022e-05, "loss": 0.6883, "step": 12240 }, { "epoch": 0.9097733184689707, "grad_norm": 1.782223853150733, "learning_rate": 1.63328007370655e-05, "loss": 0.7227, "step": 12241 }, { "epoch": 0.9098476402824229, "grad_norm": 2.214389601718817, "learning_rate": 1.6332179745675736e-05, "loss": 0.7787, "step": 12242 }, { "epoch": 0.9099219620958752, "grad_norm": 1.7908386907217848, "learning_rate": 1.6331558713519728e-05, "loss": 0.6844, "step": 12243 }, { "epoch": 0.9099962839093274, "grad_norm": 3.740318080492885, "learning_rate": 1.633093764060147e-05, "loss": 1.0406, "step": 12244 }, { "epoch": 0.9100706057227796, "grad_norm": 2.180334340085959, "learning_rate": 1.633031652692497e-05, "loss": 0.8321, "step": 12245 }, { "epoch": 0.9101449275362319, "grad_norm": 1.9612922394231473, "learning_rate": 1.6329695372494218e-05, "loss": 0.897, "step": 12246 }, { "epoch": 0.9102192493496841, "grad_norm": 1.7208636249137246, "learning_rate": 1.632907417731321e-05, "loss": 0.777, "step": 12247 }, { "epoch": 0.9102935711631364, "grad_norm": 2.0989585841734244, "learning_rate": 1.632845294138596e-05, "loss": 1.0855, "step": 12248 }, { "epoch": 0.9103678929765886, "grad_norm": 2.3000949938870994, "learning_rate": 1.6327831664716452e-05, "loss": 1.0437, "step": 12249 }, { "epoch": 0.9104422147900408, "grad_norm": 2.5505818779234684, "learning_rate": 1.6327210347308695e-05, "loss": 0.9368, "step": 12250 }, { "epoch": 0.9105165366034931, "grad_norm": 1.7056023584879143, "learning_rate": 1.6326588989166686e-05, "loss": 0.7367, "step": 12251 }, { "epoch": 0.9105908584169454, "grad_norm": 2.1646755541130065, "learning_rate": 1.6325967590294424e-05, "loss": 0.8409, "step": 12252 }, { "epoch": 0.9106651802303977, "grad_norm": 1.9696266134255151, "learning_rate": 1.632534615069591e-05, "loss": 0.8313, "step": 12253 }, { "epoch": 0.9107395020438499, "grad_norm": 1.6540590364243075, "learning_rate": 1.6324724670375148e-05, "loss": 0.8022, "step": 12254 }, { "epoch": 0.9108138238573021, "grad_norm": 2.3353198214075213, "learning_rate": 1.6324103149336137e-05, "loss": 0.6661, "step": 12255 }, { "epoch": 0.9108881456707544, "grad_norm": 1.6629112563828743, "learning_rate": 1.6323481587582875e-05, "loss": 0.822, "step": 12256 }, { "epoch": 0.9109624674842066, "grad_norm": 2.021127220349836, "learning_rate": 1.632285998511937e-05, "loss": 0.7903, "step": 12257 }, { "epoch": 0.9110367892976589, "grad_norm": 2.2272587691590355, "learning_rate": 1.6322238341949618e-05, "loss": 0.8835, "step": 12258 }, { "epoch": 0.9111111111111111, "grad_norm": 1.4928159964931471, "learning_rate": 1.6321616658077627e-05, "loss": 0.6864, "step": 12259 }, { "epoch": 0.9111854329245633, "grad_norm": 2.087286323607545, "learning_rate": 1.6320994933507393e-05, "loss": 0.7942, "step": 12260 }, { "epoch": 0.9112597547380156, "grad_norm": 2.131504385800834, "learning_rate": 1.6320373168242922e-05, "loss": 0.8939, "step": 12261 }, { "epoch": 0.9113340765514678, "grad_norm": 6.671346345427741, "learning_rate": 1.6319751362288218e-05, "loss": 0.8454, "step": 12262 }, { "epoch": 0.9114083983649202, "grad_norm": 2.056662116400678, "learning_rate": 1.6319129515647282e-05, "loss": 0.7913, "step": 12263 }, { "epoch": 0.9114827201783724, "grad_norm": 1.8800226791543337, "learning_rate": 1.6318507628324116e-05, "loss": 0.984, "step": 12264 }, { "epoch": 0.9115570419918246, "grad_norm": 1.8894460917029092, "learning_rate": 1.6317885700322725e-05, "loss": 0.8467, "step": 12265 }, { "epoch": 0.9116313638052769, "grad_norm": 2.6178028791432495, "learning_rate": 1.6317263731647117e-05, "loss": 0.9019, "step": 12266 }, { "epoch": 0.9117056856187291, "grad_norm": 1.9918023064015316, "learning_rate": 1.6316641722301292e-05, "loss": 0.8282, "step": 12267 }, { "epoch": 0.9117800074321813, "grad_norm": 1.8164576979636735, "learning_rate": 1.6316019672289257e-05, "loss": 0.8343, "step": 12268 }, { "epoch": 0.9118543292456336, "grad_norm": 3.4505298835905003, "learning_rate": 1.6315397581615012e-05, "loss": 0.7235, "step": 12269 }, { "epoch": 0.9119286510590858, "grad_norm": 1.9301354003173525, "learning_rate": 1.6314775450282566e-05, "loss": 0.9794, "step": 12270 }, { "epoch": 0.9120029728725381, "grad_norm": 2.1693812061272086, "learning_rate": 1.6314153278295923e-05, "loss": 0.712, "step": 12271 }, { "epoch": 0.9120772946859903, "grad_norm": 2.358222345402891, "learning_rate": 1.631353106565909e-05, "loss": 0.9507, "step": 12272 }, { "epoch": 0.9121516164994425, "grad_norm": 1.92131617294095, "learning_rate": 1.631290881237607e-05, "loss": 0.7574, "step": 12273 }, { "epoch": 0.9122259383128948, "grad_norm": 1.8606645377532445, "learning_rate": 1.6312286518450874e-05, "loss": 0.835, "step": 12274 }, { "epoch": 0.9123002601263471, "grad_norm": 1.8668456989489344, "learning_rate": 1.6311664183887502e-05, "loss": 0.7583, "step": 12275 }, { "epoch": 0.9123745819397994, "grad_norm": 1.8212307253130413, "learning_rate": 1.6311041808689964e-05, "loss": 0.7002, "step": 12276 }, { "epoch": 0.9124489037532516, "grad_norm": 1.8155208011763446, "learning_rate": 1.6310419392862264e-05, "loss": 0.9509, "step": 12277 }, { "epoch": 0.9125232255667038, "grad_norm": 1.8581575127479024, "learning_rate": 1.630979693640841e-05, "loss": 0.776, "step": 12278 }, { "epoch": 0.9125975473801561, "grad_norm": 3.857580386650012, "learning_rate": 1.630917443933242e-05, "loss": 0.9327, "step": 12279 }, { "epoch": 0.9126718691936083, "grad_norm": 2.0851646244285145, "learning_rate": 1.6308551901638286e-05, "loss": 0.8566, "step": 12280 }, { "epoch": 0.9127461910070606, "grad_norm": 1.8553100234971933, "learning_rate": 1.630792932333002e-05, "loss": 0.7065, "step": 12281 }, { "epoch": 0.9128205128205128, "grad_norm": 2.5079456969431724, "learning_rate": 1.630730670441164e-05, "loss": 0.8418, "step": 12282 }, { "epoch": 0.912894834633965, "grad_norm": 3.3196530871475485, "learning_rate": 1.630668404488714e-05, "loss": 0.6368, "step": 12283 }, { "epoch": 0.9129691564474173, "grad_norm": 2.0462488003336565, "learning_rate": 1.6306061344760536e-05, "loss": 0.793, "step": 12284 }, { "epoch": 0.9130434782608695, "grad_norm": 2.021471209150307, "learning_rate": 1.630543860403584e-05, "loss": 0.9303, "step": 12285 }, { "epoch": 0.9131178000743219, "grad_norm": 1.893134021145519, "learning_rate": 1.630481582271705e-05, "loss": 0.8839, "step": 12286 }, { "epoch": 0.9131921218877741, "grad_norm": 3.2084597423316743, "learning_rate": 1.630419300080819e-05, "loss": 0.7919, "step": 12287 }, { "epoch": 0.9132664437012263, "grad_norm": 1.7909177954214213, "learning_rate": 1.630357013831326e-05, "loss": 0.7599, "step": 12288 }, { "epoch": 0.9133407655146786, "grad_norm": 2.1427565707511977, "learning_rate": 1.6302947235236274e-05, "loss": 0.8092, "step": 12289 }, { "epoch": 0.9134150873281308, "grad_norm": 3.5051167586574232, "learning_rate": 1.6302324291581236e-05, "loss": 0.6857, "step": 12290 }, { "epoch": 0.913489409141583, "grad_norm": 2.52109712710202, "learning_rate": 1.6301701307352166e-05, "loss": 0.8137, "step": 12291 }, { "epoch": 0.9135637309550353, "grad_norm": 2.0566320430851737, "learning_rate": 1.630107828255307e-05, "loss": 0.8359, "step": 12292 }, { "epoch": 0.9136380527684875, "grad_norm": 2.24199148977096, "learning_rate": 1.630045521718796e-05, "loss": 0.9427, "step": 12293 }, { "epoch": 0.9137123745819398, "grad_norm": 1.7158018408129554, "learning_rate": 1.629983211126084e-05, "loss": 0.7865, "step": 12294 }, { "epoch": 0.913786696395392, "grad_norm": 1.657765826868323, "learning_rate": 1.6299208964775734e-05, "loss": 0.6702, "step": 12295 }, { "epoch": 0.9138610182088442, "grad_norm": 1.8313068074275078, "learning_rate": 1.6298585777736646e-05, "loss": 0.8051, "step": 12296 }, { "epoch": 0.9139353400222966, "grad_norm": 2.5972020499911177, "learning_rate": 1.629796255014759e-05, "loss": 0.8171, "step": 12297 }, { "epoch": 0.9140096618357488, "grad_norm": 2.6204112406655407, "learning_rate": 1.6297339282012573e-05, "loss": 0.8231, "step": 12298 }, { "epoch": 0.9140839836492011, "grad_norm": 1.9215825161019415, "learning_rate": 1.6296715973335616e-05, "loss": 0.7651, "step": 12299 }, { "epoch": 0.9141583054626533, "grad_norm": 1.412766579171603, "learning_rate": 1.629609262412073e-05, "loss": 0.6005, "step": 12300 }, { "epoch": 0.9142326272761055, "grad_norm": 1.5007390343125397, "learning_rate": 1.6295469234371925e-05, "loss": 0.7587, "step": 12301 }, { "epoch": 0.9143069490895578, "grad_norm": 2.069795045687707, "learning_rate": 1.6294845804093213e-05, "loss": 0.8375, "step": 12302 }, { "epoch": 0.91438127090301, "grad_norm": 1.704301645052901, "learning_rate": 1.6294222333288617e-05, "loss": 0.8424, "step": 12303 }, { "epoch": 0.9144555927164623, "grad_norm": 1.7374353974426482, "learning_rate": 1.6293598821962138e-05, "loss": 0.8785, "step": 12304 }, { "epoch": 0.9145299145299145, "grad_norm": 1.7741070715781546, "learning_rate": 1.62929752701178e-05, "loss": 0.7273, "step": 12305 }, { "epoch": 0.9146042363433667, "grad_norm": 2.4706717346979525, "learning_rate": 1.629235167775961e-05, "loss": 1.0542, "step": 12306 }, { "epoch": 0.914678558156819, "grad_norm": 1.8347509305578822, "learning_rate": 1.629172804489159e-05, "loss": 0.7842, "step": 12307 }, { "epoch": 0.9147528799702713, "grad_norm": 1.5975215978149158, "learning_rate": 1.6291104371517746e-05, "loss": 0.8299, "step": 12308 }, { "epoch": 0.9148272017837236, "grad_norm": 1.9972237154398396, "learning_rate": 1.6290480657642104e-05, "loss": 0.8703, "step": 12309 }, { "epoch": 0.9149015235971758, "grad_norm": 1.7205118850264791, "learning_rate": 1.6289856903268672e-05, "loss": 0.9375, "step": 12310 }, { "epoch": 0.914975845410628, "grad_norm": 2.614001262856192, "learning_rate": 1.6289233108401467e-05, "loss": 0.8827, "step": 12311 }, { "epoch": 0.9150501672240803, "grad_norm": 1.7743942018516223, "learning_rate": 1.6288609273044506e-05, "loss": 0.9097, "step": 12312 }, { "epoch": 0.9151244890375325, "grad_norm": 1.8575410555527712, "learning_rate": 1.6287985397201806e-05, "loss": 0.8298, "step": 12313 }, { "epoch": 0.9151988108509848, "grad_norm": 2.1821720930633184, "learning_rate": 1.628736148087738e-05, "loss": 0.9422, "step": 12314 }, { "epoch": 0.915273132664437, "grad_norm": 2.2243479093563696, "learning_rate": 1.628673752407525e-05, "loss": 0.7647, "step": 12315 }, { "epoch": 0.9153474544778892, "grad_norm": 2.267083341187897, "learning_rate": 1.628611352679943e-05, "loss": 0.7926, "step": 12316 }, { "epoch": 0.9154217762913415, "grad_norm": 1.9040330017112812, "learning_rate": 1.6285489489053935e-05, "loss": 0.893, "step": 12317 }, { "epoch": 0.9154960981047937, "grad_norm": 1.7414400631072553, "learning_rate": 1.6284865410842785e-05, "loss": 0.6398, "step": 12318 }, { "epoch": 0.9155704199182461, "grad_norm": 1.7958190842262187, "learning_rate": 1.6284241292169996e-05, "loss": 0.7163, "step": 12319 }, { "epoch": 0.9156447417316983, "grad_norm": 2.0580170671481683, "learning_rate": 1.628361713303959e-05, "loss": 0.8663, "step": 12320 }, { "epoch": 0.9157190635451505, "grad_norm": 1.5648560779140104, "learning_rate": 1.6282992933455585e-05, "loss": 0.704, "step": 12321 }, { "epoch": 0.9157933853586028, "grad_norm": 2.540034022474759, "learning_rate": 1.6282368693421995e-05, "loss": 0.7421, "step": 12322 }, { "epoch": 0.915867707172055, "grad_norm": 2.526287133479441, "learning_rate": 1.628174441294284e-05, "loss": 0.9307, "step": 12323 }, { "epoch": 0.9159420289855073, "grad_norm": 2.2958327462344363, "learning_rate": 1.628112009202214e-05, "loss": 0.6467, "step": 12324 }, { "epoch": 0.9160163507989595, "grad_norm": 2.1196563736382026, "learning_rate": 1.628049573066392e-05, "loss": 0.8348, "step": 12325 }, { "epoch": 0.9160906726124117, "grad_norm": 1.9872949787065846, "learning_rate": 1.627987132887219e-05, "loss": 0.8342, "step": 12326 }, { "epoch": 0.916164994425864, "grad_norm": 1.5520667132621722, "learning_rate": 1.6279246886650976e-05, "loss": 0.7848, "step": 12327 }, { "epoch": 0.9162393162393162, "grad_norm": 1.625129229868169, "learning_rate": 1.6278622404004294e-05, "loss": 0.679, "step": 12328 }, { "epoch": 0.9163136380527684, "grad_norm": 1.8502044101289674, "learning_rate": 1.627799788093617e-05, "loss": 0.9679, "step": 12329 }, { "epoch": 0.9163879598662207, "grad_norm": 1.6760754081602356, "learning_rate": 1.627737331745062e-05, "loss": 0.7752, "step": 12330 }, { "epoch": 0.916462281679673, "grad_norm": 2.0783792988035503, "learning_rate": 1.6276748713551665e-05, "loss": 0.7571, "step": 12331 }, { "epoch": 0.9165366034931253, "grad_norm": 1.6598662146443757, "learning_rate": 1.627612406924333e-05, "loss": 0.6608, "step": 12332 }, { "epoch": 0.9166109253065775, "grad_norm": 1.817889571760633, "learning_rate": 1.627549938452963e-05, "loss": 0.6517, "step": 12333 }, { "epoch": 0.9166852471200297, "grad_norm": 1.614297898168978, "learning_rate": 1.6274874659414595e-05, "loss": 0.8837, "step": 12334 }, { "epoch": 0.916759568933482, "grad_norm": 1.6210611299353648, "learning_rate": 1.627424989390224e-05, "loss": 0.7774, "step": 12335 }, { "epoch": 0.9168338907469342, "grad_norm": 1.8113993735101013, "learning_rate": 1.627362508799659e-05, "loss": 0.9493, "step": 12336 }, { "epoch": 0.9169082125603865, "grad_norm": 1.6646921885290191, "learning_rate": 1.627300024170167e-05, "loss": 0.8585, "step": 12337 }, { "epoch": 0.9169825343738387, "grad_norm": 1.7339230149014915, "learning_rate": 1.6272375355021495e-05, "loss": 0.8144, "step": 12338 }, { "epoch": 0.9170568561872909, "grad_norm": 1.914971522716692, "learning_rate": 1.6271750427960092e-05, "loss": 1.1285, "step": 12339 }, { "epoch": 0.9171311780007432, "grad_norm": 1.8807269648605185, "learning_rate": 1.6271125460521492e-05, "loss": 0.9273, "step": 12340 }, { "epoch": 0.9172054998141954, "grad_norm": 1.975380330466038, "learning_rate": 1.6270500452709708e-05, "loss": 1.1499, "step": 12341 }, { "epoch": 0.9172798216276478, "grad_norm": 2.25669632522736, "learning_rate": 1.6269875404528766e-05, "loss": 0.7535, "step": 12342 }, { "epoch": 0.9173541434411, "grad_norm": 3.3947952655646585, "learning_rate": 1.6269250315982693e-05, "loss": 0.792, "step": 12343 }, { "epoch": 0.9174284652545522, "grad_norm": 2.1012335057492906, "learning_rate": 1.6268625187075514e-05, "loss": 0.98, "step": 12344 }, { "epoch": 0.9175027870680045, "grad_norm": 1.8857206724681492, "learning_rate": 1.6268000017811247e-05, "loss": 1.0398, "step": 12345 }, { "epoch": 0.9175771088814567, "grad_norm": 1.847206170791232, "learning_rate": 1.6267374808193924e-05, "loss": 0.8731, "step": 12346 }, { "epoch": 0.917651430694909, "grad_norm": 1.876740567145633, "learning_rate": 1.6266749558227566e-05, "loss": 0.9314, "step": 12347 }, { "epoch": 0.9177257525083612, "grad_norm": 1.8281299210473094, "learning_rate": 1.62661242679162e-05, "loss": 0.8703, "step": 12348 }, { "epoch": 0.9178000743218134, "grad_norm": 1.7033042307727875, "learning_rate": 1.626549893726385e-05, "loss": 0.8025, "step": 12349 }, { "epoch": 0.9178743961352657, "grad_norm": 1.7082656453361569, "learning_rate": 1.6264873566274543e-05, "loss": 0.7895, "step": 12350 }, { "epoch": 0.9179487179487179, "grad_norm": 2.129346953852903, "learning_rate": 1.6264248154952308e-05, "loss": 0.9939, "step": 12351 }, { "epoch": 0.9180230397621701, "grad_norm": 2.185886108877033, "learning_rate": 1.6263622703301165e-05, "loss": 0.8883, "step": 12352 }, { "epoch": 0.9180973615756225, "grad_norm": 2.2104227805646928, "learning_rate": 1.6262997211325147e-05, "loss": 0.8478, "step": 12353 }, { "epoch": 0.9181716833890747, "grad_norm": 1.2736712375427437, "learning_rate": 1.6262371679028275e-05, "loss": 0.402, "step": 12354 }, { "epoch": 0.918246005202527, "grad_norm": 1.8827117589125488, "learning_rate": 1.626174610641458e-05, "loss": 0.8978, "step": 12355 }, { "epoch": 0.9183203270159792, "grad_norm": 1.8125094272807978, "learning_rate": 1.626112049348809e-05, "loss": 0.8575, "step": 12356 }, { "epoch": 0.9183946488294314, "grad_norm": 1.7692083449167493, "learning_rate": 1.6260494840252826e-05, "loss": 0.7612, "step": 12357 }, { "epoch": 0.9184689706428837, "grad_norm": 1.71957194300042, "learning_rate": 1.625986914671283e-05, "loss": 0.6094, "step": 12358 }, { "epoch": 0.9185432924563359, "grad_norm": 1.4343664877513946, "learning_rate": 1.6259243412872115e-05, "loss": 0.7315, "step": 12359 }, { "epoch": 0.9186176142697882, "grad_norm": 1.8622141078997947, "learning_rate": 1.625861763873472e-05, "loss": 0.7376, "step": 12360 }, { "epoch": 0.9186919360832404, "grad_norm": 4.021395998334734, "learning_rate": 1.6257991824304663e-05, "loss": 0.806, "step": 12361 }, { "epoch": 0.9187662578966926, "grad_norm": 2.602858926975531, "learning_rate": 1.6257365969585983e-05, "loss": 0.9696, "step": 12362 }, { "epoch": 0.9188405797101449, "grad_norm": 1.7629598977033731, "learning_rate": 1.6256740074582704e-05, "loss": 0.813, "step": 12363 }, { "epoch": 0.9189149015235972, "grad_norm": 2.146863300858341, "learning_rate": 1.6256114139298858e-05, "loss": 0.9407, "step": 12364 }, { "epoch": 0.9189892233370495, "grad_norm": 1.6744648058770684, "learning_rate": 1.6255488163738476e-05, "loss": 0.7738, "step": 12365 }, { "epoch": 0.9190635451505017, "grad_norm": 6.6745075180031455, "learning_rate": 1.625486214790558e-05, "loss": 1.0259, "step": 12366 }, { "epoch": 0.9191378669639539, "grad_norm": 2.1795641336328355, "learning_rate": 1.6254236091804212e-05, "loss": 0.8958, "step": 12367 }, { "epoch": 0.9192121887774062, "grad_norm": 1.7039719990374096, "learning_rate": 1.6253609995438393e-05, "loss": 0.9422, "step": 12368 }, { "epoch": 0.9192865105908584, "grad_norm": 1.5653335460319069, "learning_rate": 1.625298385881216e-05, "loss": 0.7561, "step": 12369 }, { "epoch": 0.9193608324043107, "grad_norm": 3.3900134039948635, "learning_rate": 1.625235768192954e-05, "loss": 0.9937, "step": 12370 }, { "epoch": 0.9194351542177629, "grad_norm": 2.264926290449169, "learning_rate": 1.6251731464794565e-05, "loss": 0.689, "step": 12371 }, { "epoch": 0.9195094760312151, "grad_norm": 1.8471212369186718, "learning_rate": 1.6251105207411265e-05, "loss": 0.6248, "step": 12372 }, { "epoch": 0.9195837978446674, "grad_norm": 1.7218688289086328, "learning_rate": 1.6250478909783682e-05, "loss": 0.8713, "step": 12373 }, { "epoch": 0.9196581196581196, "grad_norm": 2.1700882175421503, "learning_rate": 1.6249852571915833e-05, "loss": 0.9785, "step": 12374 }, { "epoch": 0.919732441471572, "grad_norm": 2.471445581896378, "learning_rate": 1.624922619381176e-05, "loss": 0.9028, "step": 12375 }, { "epoch": 0.9198067632850242, "grad_norm": 2.5427015551955865, "learning_rate": 1.624859977547549e-05, "loss": 0.8989, "step": 12376 }, { "epoch": 0.9198810850984764, "grad_norm": 11.973299321276832, "learning_rate": 1.6247973316911062e-05, "loss": 1.0067, "step": 12377 }, { "epoch": 0.9199554069119287, "grad_norm": 1.6389210653962567, "learning_rate": 1.624734681812251e-05, "loss": 0.6838, "step": 12378 }, { "epoch": 0.9200297287253809, "grad_norm": 1.895846341568689, "learning_rate": 1.6246720279113856e-05, "loss": 0.8178, "step": 12379 }, { "epoch": 0.9201040505388332, "grad_norm": 1.952052150295093, "learning_rate": 1.6246093699889143e-05, "loss": 1.0007, "step": 12380 }, { "epoch": 0.9201783723522854, "grad_norm": 1.913386315528149, "learning_rate": 1.6245467080452404e-05, "loss": 0.6047, "step": 12381 }, { "epoch": 0.9202526941657376, "grad_norm": 2.7583696619879654, "learning_rate": 1.624484042080767e-05, "loss": 0.6698, "step": 12382 }, { "epoch": 0.9203270159791899, "grad_norm": 1.7182394935934173, "learning_rate": 1.624421372095898e-05, "loss": 0.7783, "step": 12383 }, { "epoch": 0.9204013377926421, "grad_norm": 1.940706960850549, "learning_rate": 1.6243586980910363e-05, "loss": 0.7535, "step": 12384 }, { "epoch": 0.9204756596060943, "grad_norm": 2.728911322341332, "learning_rate": 1.624296020066586e-05, "loss": 1.0352, "step": 12385 }, { "epoch": 0.9205499814195467, "grad_norm": 1.9784114021192492, "learning_rate": 1.62423333802295e-05, "loss": 0.883, "step": 12386 }, { "epoch": 0.9206243032329989, "grad_norm": 1.8346265241771245, "learning_rate": 1.6241706519605328e-05, "loss": 0.7097, "step": 12387 }, { "epoch": 0.9206986250464512, "grad_norm": 2.3608326024501953, "learning_rate": 1.6241079618797367e-05, "loss": 0.9147, "step": 12388 }, { "epoch": 0.9207729468599034, "grad_norm": 1.8633350765400176, "learning_rate": 1.6240452677809663e-05, "loss": 0.886, "step": 12389 }, { "epoch": 0.9208472686733556, "grad_norm": 7.348969190587144, "learning_rate": 1.6239825696646244e-05, "loss": 0.9937, "step": 12390 }, { "epoch": 0.9209215904868079, "grad_norm": 1.744703703374939, "learning_rate": 1.6239198675311152e-05, "loss": 0.7355, "step": 12391 }, { "epoch": 0.9209959123002601, "grad_norm": 1.773082189389729, "learning_rate": 1.6238571613808427e-05, "loss": 0.9858, "step": 12392 }, { "epoch": 0.9210702341137124, "grad_norm": 1.951896247121957, "learning_rate": 1.62379445121421e-05, "loss": 0.7551, "step": 12393 }, { "epoch": 0.9211445559271646, "grad_norm": 1.5758929312440209, "learning_rate": 1.6237317370316206e-05, "loss": 0.6817, "step": 12394 }, { "epoch": 0.9212188777406168, "grad_norm": 1.6324786419129833, "learning_rate": 1.623669018833479e-05, "loss": 0.8078, "step": 12395 }, { "epoch": 0.9212931995540691, "grad_norm": 4.598062722229754, "learning_rate": 1.6236062966201882e-05, "loss": 0.8407, "step": 12396 }, { "epoch": 0.9213675213675213, "grad_norm": 1.6898495717910678, "learning_rate": 1.623543570392153e-05, "loss": 0.8899, "step": 12397 }, { "epoch": 0.9214418431809737, "grad_norm": 2.32915519801259, "learning_rate": 1.6234808401497765e-05, "loss": 1.022, "step": 12398 }, { "epoch": 0.9215161649944259, "grad_norm": 2.0879572809364864, "learning_rate": 1.6234181058934627e-05, "loss": 1.0313, "step": 12399 }, { "epoch": 0.9215904868078781, "grad_norm": 1.6291332183435416, "learning_rate": 1.623355367623615e-05, "loss": 0.6606, "step": 12400 }, { "epoch": 0.9216648086213304, "grad_norm": 2.6423944524808163, "learning_rate": 1.6232926253406382e-05, "loss": 0.8709, "step": 12401 }, { "epoch": 0.9217391304347826, "grad_norm": 10.929763438586953, "learning_rate": 1.623229879044936e-05, "loss": 1.0773, "step": 12402 }, { "epoch": 0.9218134522482349, "grad_norm": 2.138165090912707, "learning_rate": 1.6231671287369117e-05, "loss": 0.8497, "step": 12403 }, { "epoch": 0.9218877740616871, "grad_norm": 2.4530898404869537, "learning_rate": 1.6231043744169702e-05, "loss": 0.704, "step": 12404 }, { "epoch": 0.9219620958751393, "grad_norm": 2.165863591191199, "learning_rate": 1.6230416160855148e-05, "loss": 0.797, "step": 12405 }, { "epoch": 0.9220364176885916, "grad_norm": 2.204885065434393, "learning_rate": 1.6229788537429502e-05, "loss": 0.9811, "step": 12406 }, { "epoch": 0.9221107395020438, "grad_norm": 1.819964449117304, "learning_rate": 1.6229160873896795e-05, "loss": 0.96, "step": 12407 }, { "epoch": 0.922185061315496, "grad_norm": 2.2301742680877386, "learning_rate": 1.6228533170261075e-05, "loss": 0.9771, "step": 12408 }, { "epoch": 0.9222593831289484, "grad_norm": 2.268199051895139, "learning_rate": 1.6227905426526383e-05, "loss": 0.6679, "step": 12409 }, { "epoch": 0.9223337049424006, "grad_norm": 1.912370029143477, "learning_rate": 1.622727764269676e-05, "loss": 0.7504, "step": 12410 }, { "epoch": 0.9224080267558529, "grad_norm": 2.0872523526886013, "learning_rate": 1.6226649818776245e-05, "loss": 0.8308, "step": 12411 }, { "epoch": 0.9224823485693051, "grad_norm": 2.381799271636867, "learning_rate": 1.622602195476888e-05, "loss": 0.9232, "step": 12412 }, { "epoch": 0.9225566703827573, "grad_norm": 2.101849086701554, "learning_rate": 1.6225394050678712e-05, "loss": 0.799, "step": 12413 }, { "epoch": 0.9226309921962096, "grad_norm": 2.1249773089920714, "learning_rate": 1.6224766106509776e-05, "loss": 0.8166, "step": 12414 }, { "epoch": 0.9227053140096618, "grad_norm": 1.6177897645485595, "learning_rate": 1.6224138122266123e-05, "loss": 0.7537, "step": 12415 }, { "epoch": 0.9227796358231141, "grad_norm": 2.3433295545638724, "learning_rate": 1.622351009795179e-05, "loss": 1.0339, "step": 12416 }, { "epoch": 0.9228539576365663, "grad_norm": 2.1044092251323243, "learning_rate": 1.622288203357082e-05, "loss": 1.0272, "step": 12417 }, { "epoch": 0.9229282794500185, "grad_norm": 1.706864421904711, "learning_rate": 1.6222253929127262e-05, "loss": 0.8857, "step": 12418 }, { "epoch": 0.9230026012634708, "grad_norm": 1.892620919722207, "learning_rate": 1.6221625784625158e-05, "loss": 0.9985, "step": 12419 }, { "epoch": 0.9230769230769231, "grad_norm": 1.9677821786683434, "learning_rate": 1.6220997600068543e-05, "loss": 0.907, "step": 12420 }, { "epoch": 0.9231512448903754, "grad_norm": 1.6668812518514378, "learning_rate": 1.6220369375461472e-05, "loss": 0.8279, "step": 12421 }, { "epoch": 0.9232255667038276, "grad_norm": 2.5125650696402038, "learning_rate": 1.6219741110807988e-05, "loss": 0.7964, "step": 12422 }, { "epoch": 0.9232998885172798, "grad_norm": 1.8143137544298946, "learning_rate": 1.621911280611213e-05, "loss": 0.7755, "step": 12423 }, { "epoch": 0.9233742103307321, "grad_norm": 1.6020977792205877, "learning_rate": 1.621848446137795e-05, "loss": 0.5565, "step": 12424 }, { "epoch": 0.9234485321441843, "grad_norm": 1.4562338909115704, "learning_rate": 1.6217856076609484e-05, "loss": 0.6417, "step": 12425 }, { "epoch": 0.9235228539576366, "grad_norm": 2.459981140116991, "learning_rate": 1.6217227651810788e-05, "loss": 0.7561, "step": 12426 }, { "epoch": 0.9235971757710888, "grad_norm": 1.564860269176605, "learning_rate": 1.6216599186985898e-05, "loss": 0.7545, "step": 12427 }, { "epoch": 0.923671497584541, "grad_norm": 2.110519910129634, "learning_rate": 1.621597068213887e-05, "loss": 1.0661, "step": 12428 }, { "epoch": 0.9237458193979933, "grad_norm": 1.8399290379211855, "learning_rate": 1.6215342137273743e-05, "loss": 0.7374, "step": 12429 }, { "epoch": 0.9238201412114455, "grad_norm": 2.126584155678444, "learning_rate": 1.6214713552394564e-05, "loss": 0.6736, "step": 12430 }, { "epoch": 0.9238944630248979, "grad_norm": 1.8924015542812032, "learning_rate": 1.6214084927505384e-05, "loss": 0.9335, "step": 12431 }, { "epoch": 0.9239687848383501, "grad_norm": 1.6835244952509143, "learning_rate": 1.621345626261025e-05, "loss": 0.6469, "step": 12432 }, { "epoch": 0.9240431066518023, "grad_norm": 2.1610127740339453, "learning_rate": 1.6212827557713203e-05, "loss": 0.9281, "step": 12433 }, { "epoch": 0.9241174284652546, "grad_norm": 4.025972898635698, "learning_rate": 1.6212198812818296e-05, "loss": 0.9598, "step": 12434 }, { "epoch": 0.9241917502787068, "grad_norm": 1.9801660288836656, "learning_rate": 1.6211570027929577e-05, "loss": 0.9413, "step": 12435 }, { "epoch": 0.924266072092159, "grad_norm": 2.0068365097276994, "learning_rate": 1.621094120305109e-05, "loss": 0.9776, "step": 12436 }, { "epoch": 0.9243403939056113, "grad_norm": 1.615509311114353, "learning_rate": 1.621031233818689e-05, "loss": 0.6897, "step": 12437 }, { "epoch": 0.9244147157190635, "grad_norm": 1.7852317543179674, "learning_rate": 1.6209683433341014e-05, "loss": 0.7935, "step": 12438 }, { "epoch": 0.9244890375325158, "grad_norm": 6.506265889765504, "learning_rate": 1.6209054488517522e-05, "loss": 0.7699, "step": 12439 }, { "epoch": 0.924563359345968, "grad_norm": 2.5650789867753074, "learning_rate": 1.620842550372046e-05, "loss": 0.9034, "step": 12440 }, { "epoch": 0.9246376811594202, "grad_norm": 1.953428733928634, "learning_rate": 1.6207796478953875e-05, "loss": 0.8866, "step": 12441 }, { "epoch": 0.9247120029728726, "grad_norm": 1.7831647619614064, "learning_rate": 1.620716741422182e-05, "loss": 0.7764, "step": 12442 }, { "epoch": 0.9247863247863248, "grad_norm": 1.838403223781853, "learning_rate": 1.6206538309528345e-05, "loss": 0.9529, "step": 12443 }, { "epoch": 0.9248606465997771, "grad_norm": 1.7337929507623955, "learning_rate": 1.62059091648775e-05, "loss": 0.8881, "step": 12444 }, { "epoch": 0.9249349684132293, "grad_norm": 2.009179109425526, "learning_rate": 1.6205279980273325e-05, "loss": 0.9066, "step": 12445 }, { "epoch": 0.9250092902266815, "grad_norm": 2.2193243882539675, "learning_rate": 1.6204650755719886e-05, "loss": 0.9053, "step": 12446 }, { "epoch": 0.9250836120401338, "grad_norm": 2.2377541622348196, "learning_rate": 1.6204021491221227e-05, "loss": 0.804, "step": 12447 }, { "epoch": 0.925157933853586, "grad_norm": 1.8883903862490374, "learning_rate": 1.6203392186781396e-05, "loss": 0.7135, "step": 12448 }, { "epoch": 0.9252322556670383, "grad_norm": 1.802371415174771, "learning_rate": 1.620276284240445e-05, "loss": 0.9895, "step": 12449 }, { "epoch": 0.9253065774804905, "grad_norm": 1.400401237050854, "learning_rate": 1.620213345809444e-05, "loss": 0.7328, "step": 12450 }, { "epoch": 0.9253808992939427, "grad_norm": 2.16986047304296, "learning_rate": 1.6201504033855417e-05, "loss": 0.7283, "step": 12451 }, { "epoch": 0.925455221107395, "grad_norm": 1.9107626374472386, "learning_rate": 1.6200874569691434e-05, "loss": 0.8799, "step": 12452 }, { "epoch": 0.9255295429208472, "grad_norm": 1.5501412328060207, "learning_rate": 1.6200245065606538e-05, "loss": 0.5897, "step": 12453 }, { "epoch": 0.9256038647342996, "grad_norm": 2.057174895072081, "learning_rate": 1.6199615521604787e-05, "loss": 0.9812, "step": 12454 }, { "epoch": 0.9256781865477518, "grad_norm": 1.6199221565957564, "learning_rate": 1.6198985937690234e-05, "loss": 1.029, "step": 12455 }, { "epoch": 0.925752508361204, "grad_norm": 1.9622651365959727, "learning_rate": 1.619835631386693e-05, "loss": 0.8652, "step": 12456 }, { "epoch": 0.9258268301746563, "grad_norm": 2.1961624843540317, "learning_rate": 1.6197726650138933e-05, "loss": 0.9385, "step": 12457 }, { "epoch": 0.9259011519881085, "grad_norm": 1.4645209277647937, "learning_rate": 1.6197096946510292e-05, "loss": 0.6591, "step": 12458 }, { "epoch": 0.9259754738015608, "grad_norm": 1.775867166064493, "learning_rate": 1.619646720298506e-05, "loss": 0.755, "step": 12459 }, { "epoch": 0.926049795615013, "grad_norm": 2.520350130077111, "learning_rate": 1.6195837419567297e-05, "loss": 0.8599, "step": 12460 }, { "epoch": 0.9261241174284652, "grad_norm": 1.8682940427407546, "learning_rate": 1.619520759626105e-05, "loss": 0.7265, "step": 12461 }, { "epoch": 0.9261984392419175, "grad_norm": 2.251691007311756, "learning_rate": 1.6194577733070383e-05, "loss": 0.8148, "step": 12462 }, { "epoch": 0.9262727610553697, "grad_norm": 1.9182573899189976, "learning_rate": 1.619394782999934e-05, "loss": 0.832, "step": 12463 }, { "epoch": 0.926347082868822, "grad_norm": 1.6727532627061261, "learning_rate": 1.6193317887051987e-05, "loss": 0.8512, "step": 12464 }, { "epoch": 0.9264214046822743, "grad_norm": 1.7393648009266045, "learning_rate": 1.6192687904232373e-05, "loss": 0.794, "step": 12465 }, { "epoch": 0.9264957264957265, "grad_norm": 1.7019119962983678, "learning_rate": 1.6192057881544555e-05, "loss": 0.7758, "step": 12466 }, { "epoch": 0.9265700483091788, "grad_norm": 1.7871828585238534, "learning_rate": 1.6191427818992592e-05, "loss": 0.8195, "step": 12467 }, { "epoch": 0.926644370122631, "grad_norm": 1.7833184912516744, "learning_rate": 1.619079771658054e-05, "loss": 0.6342, "step": 12468 }, { "epoch": 0.9267186919360832, "grad_norm": 2.1044496487834428, "learning_rate": 1.6190167574312447e-05, "loss": 0.5691, "step": 12469 }, { "epoch": 0.9267930137495355, "grad_norm": 1.6293962972417448, "learning_rate": 1.6189537392192378e-05, "loss": 0.585, "step": 12470 }, { "epoch": 0.9268673355629877, "grad_norm": 1.7314164319616965, "learning_rate": 1.6188907170224392e-05, "loss": 1.0274, "step": 12471 }, { "epoch": 0.92694165737644, "grad_norm": 2.1793877910372585, "learning_rate": 1.6188276908412542e-05, "loss": 1.011, "step": 12472 }, { "epoch": 0.9270159791898922, "grad_norm": 1.5004960293047271, "learning_rate": 1.6187646606760884e-05, "loss": 0.7555, "step": 12473 }, { "epoch": 0.9270903010033444, "grad_norm": 1.8498912337392266, "learning_rate": 1.6187016265273477e-05, "loss": 0.9389, "step": 12474 }, { "epoch": 0.9271646228167967, "grad_norm": 2.1155344397224454, "learning_rate": 1.6186385883954382e-05, "loss": 0.8364, "step": 12475 }, { "epoch": 0.927238944630249, "grad_norm": 2.1081065613184413, "learning_rate": 1.6185755462807656e-05, "loss": 1.0563, "step": 12476 }, { "epoch": 0.9273132664437013, "grad_norm": 1.7848580916530987, "learning_rate": 1.6185125001837357e-05, "loss": 0.9085, "step": 12477 }, { "epoch": 0.9273875882571535, "grad_norm": 1.6944275993750793, "learning_rate": 1.6184494501047545e-05, "loss": 0.7287, "step": 12478 }, { "epoch": 0.9274619100706057, "grad_norm": 2.064055702609349, "learning_rate": 1.6183863960442278e-05, "loss": 1.0529, "step": 12479 }, { "epoch": 0.927536231884058, "grad_norm": 2.0950733403007193, "learning_rate": 1.6183233380025616e-05, "loss": 0.7968, "step": 12480 }, { "epoch": 0.9276105536975102, "grad_norm": 1.8860285604636582, "learning_rate": 1.6182602759801617e-05, "loss": 1.0352, "step": 12481 }, { "epoch": 0.9276848755109625, "grad_norm": 2.234885067684675, "learning_rate": 1.6181972099774344e-05, "loss": 0.8806, "step": 12482 }, { "epoch": 0.9277591973244147, "grad_norm": 2.146426502182542, "learning_rate": 1.6181341399947856e-05, "loss": 0.779, "step": 12483 }, { "epoch": 0.9278335191378669, "grad_norm": 2.3437463439436415, "learning_rate": 1.6180710660326208e-05, "loss": 0.8023, "step": 12484 }, { "epoch": 0.9279078409513192, "grad_norm": 1.9431524126326214, "learning_rate": 1.618007988091347e-05, "loss": 1.0008, "step": 12485 }, { "epoch": 0.9279821627647714, "grad_norm": 2.4842929077662106, "learning_rate": 1.6179449061713696e-05, "loss": 0.7878, "step": 12486 }, { "epoch": 0.9280564845782238, "grad_norm": 1.8863636488494158, "learning_rate": 1.617881820273095e-05, "loss": 0.8685, "step": 12487 }, { "epoch": 0.928130806391676, "grad_norm": 1.6609224414010435, "learning_rate": 1.6178187303969294e-05, "loss": 0.8596, "step": 12488 }, { "epoch": 0.9282051282051282, "grad_norm": 1.665890859587287, "learning_rate": 1.617755636543279e-05, "loss": 0.7138, "step": 12489 }, { "epoch": 0.9282794500185805, "grad_norm": 2.1674515666121006, "learning_rate": 1.6176925387125498e-05, "loss": 0.8955, "step": 12490 }, { "epoch": 0.9283537718320327, "grad_norm": 4.612929504531316, "learning_rate": 1.6176294369051476e-05, "loss": 0.8911, "step": 12491 }, { "epoch": 0.928428093645485, "grad_norm": 1.6000281721783007, "learning_rate": 1.6175663311214796e-05, "loss": 0.9261, "step": 12492 }, { "epoch": 0.9285024154589372, "grad_norm": 1.7083551968788142, "learning_rate": 1.6175032213619518e-05, "loss": 0.6926, "step": 12493 }, { "epoch": 0.9285767372723894, "grad_norm": 1.8832487530938657, "learning_rate": 1.6174401076269698e-05, "loss": 0.759, "step": 12494 }, { "epoch": 0.9286510590858417, "grad_norm": 2.2836642381046697, "learning_rate": 1.6173769899169407e-05, "loss": 0.7974, "step": 12495 }, { "epoch": 0.9287253808992939, "grad_norm": 2.2380628196880767, "learning_rate": 1.6173138682322705e-05, "loss": 0.8579, "step": 12496 }, { "epoch": 0.9287997027127461, "grad_norm": 2.042931091978204, "learning_rate": 1.6172507425733657e-05, "loss": 0.9089, "step": 12497 }, { "epoch": 0.9288740245261985, "grad_norm": 1.8882629624924012, "learning_rate": 1.6171876129406327e-05, "loss": 0.9087, "step": 12498 }, { "epoch": 0.9289483463396507, "grad_norm": 1.95113375544932, "learning_rate": 1.6171244793344776e-05, "loss": 0.8968, "step": 12499 }, { "epoch": 0.929022668153103, "grad_norm": 1.6311184905310263, "learning_rate": 1.6170613417553073e-05, "loss": 0.5507, "step": 12500 }, { "epoch": 0.9290969899665552, "grad_norm": 1.6910597066901876, "learning_rate": 1.616998200203528e-05, "loss": 0.6143, "step": 12501 }, { "epoch": 0.9291713117800074, "grad_norm": 2.0499750418328255, "learning_rate": 1.6169350546795466e-05, "loss": 0.8825, "step": 12502 }, { "epoch": 0.9292456335934597, "grad_norm": 1.663569100119879, "learning_rate": 1.616871905183769e-05, "loss": 0.629, "step": 12503 }, { "epoch": 0.9293199554069119, "grad_norm": 2.380620684112885, "learning_rate": 1.616808751716602e-05, "loss": 0.9654, "step": 12504 }, { "epoch": 0.9293942772203642, "grad_norm": 2.279300491993322, "learning_rate": 1.616745594278453e-05, "loss": 0.978, "step": 12505 }, { "epoch": 0.9294685990338164, "grad_norm": 2.2098934137475257, "learning_rate": 1.6166824328697273e-05, "loss": 0.9138, "step": 12506 }, { "epoch": 0.9295429208472686, "grad_norm": 1.912038984650389, "learning_rate": 1.616619267490832e-05, "loss": 0.786, "step": 12507 }, { "epoch": 0.9296172426607209, "grad_norm": 1.6658021297214252, "learning_rate": 1.6165560981421738e-05, "loss": 0.8985, "step": 12508 }, { "epoch": 0.9296915644741731, "grad_norm": 2.094239421848056, "learning_rate": 1.61649292482416e-05, "loss": 0.8682, "step": 12509 }, { "epoch": 0.9297658862876255, "grad_norm": 1.8475348458294987, "learning_rate": 1.6164297475371958e-05, "loss": 0.7416, "step": 12510 }, { "epoch": 0.9298402081010777, "grad_norm": 2.827850454666068, "learning_rate": 1.6163665662816898e-05, "loss": 1.0525, "step": 12511 }, { "epoch": 0.9299145299145299, "grad_norm": 1.751864181101065, "learning_rate": 1.6163033810580476e-05, "loss": 0.7797, "step": 12512 }, { "epoch": 0.9299888517279822, "grad_norm": 2.0134093983424033, "learning_rate": 1.616240191866676e-05, "loss": 1.0113, "step": 12513 }, { "epoch": 0.9300631735414344, "grad_norm": 1.7937321515287852, "learning_rate": 1.616176998707982e-05, "loss": 0.7219, "step": 12514 }, { "epoch": 0.9301374953548867, "grad_norm": 1.808070593115546, "learning_rate": 1.6161138015823725e-05, "loss": 0.8921, "step": 12515 }, { "epoch": 0.9302118171683389, "grad_norm": 1.7071242138806775, "learning_rate": 1.6160506004902544e-05, "loss": 0.8651, "step": 12516 }, { "epoch": 0.9302861389817911, "grad_norm": 1.7904414282171583, "learning_rate": 1.6159873954320343e-05, "loss": 0.9762, "step": 12517 }, { "epoch": 0.9303604607952434, "grad_norm": 1.5348295484170589, "learning_rate": 1.6159241864081198e-05, "loss": 0.6021, "step": 12518 }, { "epoch": 0.9304347826086956, "grad_norm": 1.9981132454334574, "learning_rate": 1.6158609734189168e-05, "loss": 0.8679, "step": 12519 }, { "epoch": 0.9305091044221478, "grad_norm": 1.7934732442640011, "learning_rate": 1.6157977564648328e-05, "loss": 0.8508, "step": 12520 }, { "epoch": 0.9305834262356002, "grad_norm": 1.8373662824108434, "learning_rate": 1.615734535546275e-05, "loss": 1.0712, "step": 12521 }, { "epoch": 0.9306577480490524, "grad_norm": 2.198286474760822, "learning_rate": 1.6156713106636504e-05, "loss": 1.041, "step": 12522 }, { "epoch": 0.9307320698625047, "grad_norm": 1.5796890255266003, "learning_rate": 1.615608081817366e-05, "loss": 0.7221, "step": 12523 }, { "epoch": 0.9308063916759569, "grad_norm": 2.137949472062028, "learning_rate": 1.615544849007828e-05, "loss": 0.8649, "step": 12524 }, { "epoch": 0.9308807134894092, "grad_norm": 2.6816782222926023, "learning_rate": 1.6154816122354445e-05, "loss": 0.9671, "step": 12525 }, { "epoch": 0.9309550353028614, "grad_norm": 2.593175178219675, "learning_rate": 1.6154183715006225e-05, "loss": 0.8664, "step": 12526 }, { "epoch": 0.9310293571163136, "grad_norm": 1.935903506705896, "learning_rate": 1.6153551268037688e-05, "loss": 0.8326, "step": 12527 }, { "epoch": 0.9311036789297659, "grad_norm": 3.265959979913167, "learning_rate": 1.6152918781452907e-05, "loss": 0.9841, "step": 12528 }, { "epoch": 0.9311780007432181, "grad_norm": 1.8262981190706957, "learning_rate": 1.6152286255255957e-05, "loss": 0.8229, "step": 12529 }, { "epoch": 0.9312523225566703, "grad_norm": 1.747870880969788, "learning_rate": 1.6151653689450905e-05, "loss": 0.9251, "step": 12530 }, { "epoch": 0.9313266443701226, "grad_norm": 1.933598125972594, "learning_rate": 1.6151021084041825e-05, "loss": 0.9767, "step": 12531 }, { "epoch": 0.9314009661835749, "grad_norm": 1.833966879877513, "learning_rate": 1.615038843903279e-05, "loss": 0.8498, "step": 12532 }, { "epoch": 0.9314752879970272, "grad_norm": 7.094047032227297, "learning_rate": 1.6149755754427875e-05, "loss": 0.6896, "step": 12533 }, { "epoch": 0.9315496098104794, "grad_norm": 1.6640799534052082, "learning_rate": 1.6149123030231152e-05, "loss": 0.8613, "step": 12534 }, { "epoch": 0.9316239316239316, "grad_norm": 1.8584817525026427, "learning_rate": 1.614849026644669e-05, "loss": 0.8491, "step": 12535 }, { "epoch": 0.9316982534373839, "grad_norm": 2.0960459175411446, "learning_rate": 1.6147857463078575e-05, "loss": 0.8471, "step": 12536 }, { "epoch": 0.9317725752508361, "grad_norm": 2.052687389394865, "learning_rate": 1.6147224620130865e-05, "loss": 0.99, "step": 12537 }, { "epoch": 0.9318468970642884, "grad_norm": 2.946233848732342, "learning_rate": 1.6146591737607645e-05, "loss": 0.8533, "step": 12538 }, { "epoch": 0.9319212188777406, "grad_norm": 1.7085875705676117, "learning_rate": 1.6145958815512987e-05, "loss": 0.8174, "step": 12539 }, { "epoch": 0.9319955406911928, "grad_norm": 2.0194703286082785, "learning_rate": 1.614532585385096e-05, "loss": 1.034, "step": 12540 }, { "epoch": 0.9320698625046451, "grad_norm": 1.5751566972502988, "learning_rate": 1.614469285262565e-05, "loss": 0.6904, "step": 12541 }, { "epoch": 0.9321441843180973, "grad_norm": 2.556185932149662, "learning_rate": 1.6144059811841125e-05, "loss": 0.8239, "step": 12542 }, { "epoch": 0.9322185061315497, "grad_norm": 2.0877959269599122, "learning_rate": 1.6143426731501458e-05, "loss": 0.7131, "step": 12543 }, { "epoch": 0.9322928279450019, "grad_norm": 1.9475331756986718, "learning_rate": 1.6142793611610732e-05, "loss": 0.7252, "step": 12544 }, { "epoch": 0.9323671497584541, "grad_norm": 1.9662867820611332, "learning_rate": 1.6142160452173018e-05, "loss": 1.0832, "step": 12545 }, { "epoch": 0.9324414715719064, "grad_norm": 2.4925587519647174, "learning_rate": 1.6141527253192397e-05, "loss": 0.7853, "step": 12546 }, { "epoch": 0.9325157933853586, "grad_norm": 2.310381869205392, "learning_rate": 1.614089401467294e-05, "loss": 0.8273, "step": 12547 }, { "epoch": 0.9325901151988109, "grad_norm": 1.8208055830863856, "learning_rate": 1.6140260736618722e-05, "loss": 0.8845, "step": 12548 }, { "epoch": 0.9326644370122631, "grad_norm": 2.0293458414567325, "learning_rate": 1.6139627419033827e-05, "loss": 0.8652, "step": 12549 }, { "epoch": 0.9327387588257153, "grad_norm": 1.9736465451123761, "learning_rate": 1.6138994061922333e-05, "loss": 0.7682, "step": 12550 }, { "epoch": 0.9328130806391676, "grad_norm": 1.6671048642654995, "learning_rate": 1.613836066528831e-05, "loss": 0.8027, "step": 12551 }, { "epoch": 0.9328874024526198, "grad_norm": 1.7715384297946113, "learning_rate": 1.6137727229135836e-05, "loss": 0.7595, "step": 12552 }, { "epoch": 0.932961724266072, "grad_norm": 1.9191004355464552, "learning_rate": 1.6137093753469e-05, "loss": 0.9746, "step": 12553 }, { "epoch": 0.9330360460795244, "grad_norm": 1.8836149489646152, "learning_rate": 1.613646023829187e-05, "loss": 0.8336, "step": 12554 }, { "epoch": 0.9331103678929766, "grad_norm": 2.077551669617694, "learning_rate": 1.6135826683608528e-05, "loss": 0.9039, "step": 12555 }, { "epoch": 0.9331846897064289, "grad_norm": 1.4611888945017595, "learning_rate": 1.613519308942305e-05, "loss": 0.7554, "step": 12556 }, { "epoch": 0.9332590115198811, "grad_norm": 2.030951448205405, "learning_rate": 1.613455945573952e-05, "loss": 0.9121, "step": 12557 }, { "epoch": 0.9333333333333333, "grad_norm": 2.843279830376776, "learning_rate": 1.613392578256201e-05, "loss": 0.7743, "step": 12558 }, { "epoch": 0.9334076551467856, "grad_norm": 1.9642132003057184, "learning_rate": 1.6133292069894606e-05, "loss": 0.8816, "step": 12559 }, { "epoch": 0.9334819769602378, "grad_norm": 1.9716038261317048, "learning_rate": 1.613265831774139e-05, "loss": 0.5313, "step": 12560 }, { "epoch": 0.9335562987736901, "grad_norm": 1.9737637376577704, "learning_rate": 1.613202452610643e-05, "loss": 0.868, "step": 12561 }, { "epoch": 0.9336306205871423, "grad_norm": 3.306613037883373, "learning_rate": 1.6131390694993822e-05, "loss": 0.8559, "step": 12562 }, { "epoch": 0.9337049424005945, "grad_norm": 2.270707547438646, "learning_rate": 1.6130756824407634e-05, "loss": 0.8961, "step": 12563 }, { "epoch": 0.9337792642140468, "grad_norm": 2.7648587943042817, "learning_rate": 1.6130122914351955e-05, "loss": 0.952, "step": 12564 }, { "epoch": 0.933853586027499, "grad_norm": 3.640639642957821, "learning_rate": 1.612948896483086e-05, "loss": 0.8394, "step": 12565 }, { "epoch": 0.9339279078409514, "grad_norm": 1.5668537527369044, "learning_rate": 1.6128854975848435e-05, "loss": 0.6934, "step": 12566 }, { "epoch": 0.9340022296544036, "grad_norm": 2.686111775368765, "learning_rate": 1.6128220947408758e-05, "loss": 0.872, "step": 12567 }, { "epoch": 0.9340765514678558, "grad_norm": 2.0454162571557304, "learning_rate": 1.612758687951591e-05, "loss": 1.0146, "step": 12568 }, { "epoch": 0.9341508732813081, "grad_norm": 2.5838228938365275, "learning_rate": 1.6126952772173983e-05, "loss": 0.7048, "step": 12569 }, { "epoch": 0.9342251950947603, "grad_norm": 2.087503680716886, "learning_rate": 1.6126318625387045e-05, "loss": 0.8546, "step": 12570 }, { "epoch": 0.9342995169082126, "grad_norm": 1.539721756928031, "learning_rate": 1.612568443915919e-05, "loss": 0.7591, "step": 12571 }, { "epoch": 0.9343738387216648, "grad_norm": 1.875281073457247, "learning_rate": 1.6125050213494494e-05, "loss": 0.7366, "step": 12572 }, { "epoch": 0.934448160535117, "grad_norm": 2.063236827995044, "learning_rate": 1.6124415948397047e-05, "loss": 0.8799, "step": 12573 }, { "epoch": 0.9345224823485693, "grad_norm": 2.280042228750142, "learning_rate": 1.612378164387092e-05, "loss": 0.7517, "step": 12574 }, { "epoch": 0.9345968041620215, "grad_norm": 2.6883249436469097, "learning_rate": 1.612314729992021e-05, "loss": 0.8575, "step": 12575 }, { "epoch": 0.9346711259754737, "grad_norm": 1.8142955545258082, "learning_rate": 1.6122512916548995e-05, "loss": 0.8462, "step": 12576 }, { "epoch": 0.9347454477889261, "grad_norm": 2.325581197729178, "learning_rate": 1.612187849376136e-05, "loss": 0.9243, "step": 12577 }, { "epoch": 0.9348197696023783, "grad_norm": 1.9088556847519804, "learning_rate": 1.6121244031561384e-05, "loss": 0.8578, "step": 12578 }, { "epoch": 0.9348940914158306, "grad_norm": 1.7254593652789059, "learning_rate": 1.6120609529953164e-05, "loss": 0.8679, "step": 12579 }, { "epoch": 0.9349684132292828, "grad_norm": 4.503916457497404, "learning_rate": 1.6119974988940775e-05, "loss": 0.9266, "step": 12580 }, { "epoch": 0.935042735042735, "grad_norm": 2.2204400627250616, "learning_rate": 1.61193404085283e-05, "loss": 1.0005, "step": 12581 }, { "epoch": 0.9351170568561873, "grad_norm": 2.2728501145673463, "learning_rate": 1.6118705788719834e-05, "loss": 0.9005, "step": 12582 }, { "epoch": 0.9351913786696395, "grad_norm": 1.911418914748734, "learning_rate": 1.6118071129519454e-05, "loss": 0.8702, "step": 12583 }, { "epoch": 0.9352657004830918, "grad_norm": 2.201606728946784, "learning_rate": 1.6117436430931252e-05, "loss": 0.8806, "step": 12584 }, { "epoch": 0.935340022296544, "grad_norm": 2.0432053561245254, "learning_rate": 1.611680169295931e-05, "loss": 0.9706, "step": 12585 }, { "epoch": 0.9354143441099962, "grad_norm": 1.7474685189802983, "learning_rate": 1.611616691560772e-05, "loss": 0.7749, "step": 12586 }, { "epoch": 0.9354886659234485, "grad_norm": 1.622282061128344, "learning_rate": 1.611553209888056e-05, "loss": 0.8819, "step": 12587 }, { "epoch": 0.9355629877369008, "grad_norm": 1.9230319737317287, "learning_rate": 1.6114897242781925e-05, "loss": 0.762, "step": 12588 }, { "epoch": 0.9356373095503531, "grad_norm": 2.350687997328815, "learning_rate": 1.6114262347315897e-05, "loss": 1.0343, "step": 12589 }, { "epoch": 0.9357116313638053, "grad_norm": 2.17283223304669, "learning_rate": 1.6113627412486565e-05, "loss": 0.9013, "step": 12590 }, { "epoch": 0.9357859531772575, "grad_norm": 2.3648652039312936, "learning_rate": 1.6112992438298016e-05, "loss": 0.9288, "step": 12591 }, { "epoch": 0.9358602749907098, "grad_norm": 2.066861065828551, "learning_rate": 1.6112357424754342e-05, "loss": 0.6556, "step": 12592 }, { "epoch": 0.935934596804162, "grad_norm": 1.7723928729422547, "learning_rate": 1.6111722371859628e-05, "loss": 0.8115, "step": 12593 }, { "epoch": 0.9360089186176143, "grad_norm": 2.328910898884035, "learning_rate": 1.611108727961796e-05, "loss": 0.9033, "step": 12594 }, { "epoch": 0.9360832404310665, "grad_norm": 2.097122845871938, "learning_rate": 1.6110452148033427e-05, "loss": 0.6826, "step": 12595 }, { "epoch": 0.9361575622445187, "grad_norm": 1.8586208901752643, "learning_rate": 1.6109816977110127e-05, "loss": 0.7644, "step": 12596 }, { "epoch": 0.936231884057971, "grad_norm": 1.965884513718765, "learning_rate": 1.6109181766852137e-05, "loss": 0.7418, "step": 12597 }, { "epoch": 0.9363062058714232, "grad_norm": 1.9837924805314484, "learning_rate": 1.6108546517263552e-05, "loss": 0.7721, "step": 12598 }, { "epoch": 0.9363805276848756, "grad_norm": 1.8881707208414713, "learning_rate": 1.6107911228348463e-05, "loss": 0.7233, "step": 12599 }, { "epoch": 0.9364548494983278, "grad_norm": 1.8569627716190966, "learning_rate": 1.6107275900110958e-05, "loss": 0.8277, "step": 12600 }, { "epoch": 0.93652917131178, "grad_norm": 1.8921350825990153, "learning_rate": 1.610664053255513e-05, "loss": 0.8849, "step": 12601 }, { "epoch": 0.9366034931252323, "grad_norm": 1.9933902955571963, "learning_rate": 1.610600512568506e-05, "loss": 0.777, "step": 12602 }, { "epoch": 0.9366778149386845, "grad_norm": 1.7889964436486752, "learning_rate": 1.6105369679504855e-05, "loss": 0.772, "step": 12603 }, { "epoch": 0.9367521367521368, "grad_norm": 2.191689650595363, "learning_rate": 1.6104734194018594e-05, "loss": 0.8372, "step": 12604 }, { "epoch": 0.936826458565589, "grad_norm": 2.4166406485152496, "learning_rate": 1.610409866923037e-05, "loss": 1.0044, "step": 12605 }, { "epoch": 0.9369007803790412, "grad_norm": 1.540031405974367, "learning_rate": 1.610346310514427e-05, "loss": 0.6936, "step": 12606 }, { "epoch": 0.9369751021924935, "grad_norm": 1.8934818691729254, "learning_rate": 1.61028275017644e-05, "loss": 0.8641, "step": 12607 }, { "epoch": 0.9370494240059457, "grad_norm": 1.960287910456754, "learning_rate": 1.6102191859094837e-05, "loss": 0.8965, "step": 12608 }, { "epoch": 0.937123745819398, "grad_norm": 1.9236593559144275, "learning_rate": 1.6101556177139683e-05, "loss": 0.7698, "step": 12609 }, { "epoch": 0.9371980676328503, "grad_norm": 1.9325661194808945, "learning_rate": 1.6100920455903027e-05, "loss": 0.9383, "step": 12610 }, { "epoch": 0.9372723894463025, "grad_norm": 1.987816041810956, "learning_rate": 1.6100284695388956e-05, "loss": 0.9537, "step": 12611 }, { "epoch": 0.9373467112597548, "grad_norm": 1.9807230476593416, "learning_rate": 1.6099648895601575e-05, "loss": 0.8551, "step": 12612 }, { "epoch": 0.937421033073207, "grad_norm": 2.1903265654478825, "learning_rate": 1.6099013056544974e-05, "loss": 0.868, "step": 12613 }, { "epoch": 0.9374953548866592, "grad_norm": 1.733550076100973, "learning_rate": 1.6098377178223235e-05, "loss": 0.8554, "step": 12614 }, { "epoch": 0.9375696767001115, "grad_norm": 2.1712626491363203, "learning_rate": 1.6097741260640463e-05, "loss": 1.0571, "step": 12615 }, { "epoch": 0.9376439985135637, "grad_norm": 1.9064622212571969, "learning_rate": 1.609710530380075e-05, "loss": 0.9383, "step": 12616 }, { "epoch": 0.937718320327016, "grad_norm": 1.87746625312472, "learning_rate": 1.609646930770819e-05, "loss": 0.9173, "step": 12617 }, { "epoch": 0.9377926421404682, "grad_norm": 2.22216398291022, "learning_rate": 1.6095833272366876e-05, "loss": 0.8603, "step": 12618 }, { "epoch": 0.9378669639539204, "grad_norm": 2.2391340306781897, "learning_rate": 1.6095197197780902e-05, "loss": 0.9041, "step": 12619 }, { "epoch": 0.9379412857673727, "grad_norm": 1.694919482024123, "learning_rate": 1.6094561083954365e-05, "loss": 0.6611, "step": 12620 }, { "epoch": 0.9380156075808249, "grad_norm": 1.9382078039996309, "learning_rate": 1.6093924930891365e-05, "loss": 0.8381, "step": 12621 }, { "epoch": 0.9380899293942773, "grad_norm": 2.2870482979753866, "learning_rate": 1.6093288738595987e-05, "loss": 0.6009, "step": 12622 }, { "epoch": 0.9381642512077295, "grad_norm": 2.001513992836168, "learning_rate": 1.6092652507072335e-05, "loss": 0.8683, "step": 12623 }, { "epoch": 0.9382385730211817, "grad_norm": 1.8658243719098278, "learning_rate": 1.60920162363245e-05, "loss": 0.849, "step": 12624 }, { "epoch": 0.938312894834634, "grad_norm": 1.8373152672600999, "learning_rate": 1.6091379926356582e-05, "loss": 0.9328, "step": 12625 }, { "epoch": 0.9383872166480862, "grad_norm": 1.809491173745662, "learning_rate": 1.6090743577172678e-05, "loss": 0.6317, "step": 12626 }, { "epoch": 0.9384615384615385, "grad_norm": 2.2177494647341227, "learning_rate": 1.609010718877688e-05, "loss": 0.905, "step": 12627 }, { "epoch": 0.9385358602749907, "grad_norm": 2.3035738321218444, "learning_rate": 1.608947076117329e-05, "loss": 0.7629, "step": 12628 }, { "epoch": 0.9386101820884429, "grad_norm": 2.076782541075609, "learning_rate": 1.6088834294366e-05, "loss": 0.7592, "step": 12629 }, { "epoch": 0.9386845039018952, "grad_norm": 2.329172241722447, "learning_rate": 1.6088197788359115e-05, "loss": 0.8669, "step": 12630 }, { "epoch": 0.9387588257153474, "grad_norm": 2.094645151641075, "learning_rate": 1.6087561243156727e-05, "loss": 0.7362, "step": 12631 }, { "epoch": 0.9388331475287996, "grad_norm": 1.6309062213335899, "learning_rate": 1.6086924658762933e-05, "loss": 0.6957, "step": 12632 }, { "epoch": 0.938907469342252, "grad_norm": 1.8388724288923934, "learning_rate": 1.6086288035181835e-05, "loss": 0.9215, "step": 12633 }, { "epoch": 0.9389817911557042, "grad_norm": 1.734793396106144, "learning_rate": 1.608565137241753e-05, "loss": 0.7257, "step": 12634 }, { "epoch": 0.9390561129691565, "grad_norm": 1.832593347189321, "learning_rate": 1.608501467047412e-05, "loss": 0.5858, "step": 12635 }, { "epoch": 0.9391304347826087, "grad_norm": 1.6823127159078366, "learning_rate": 1.6084377929355702e-05, "loss": 0.8853, "step": 12636 }, { "epoch": 0.939204756596061, "grad_norm": 2.3009213178530414, "learning_rate": 1.6083741149066373e-05, "loss": 0.8621, "step": 12637 }, { "epoch": 0.9392790784095132, "grad_norm": 2.167125124088578, "learning_rate": 1.608310432961023e-05, "loss": 0.79, "step": 12638 }, { "epoch": 0.9393534002229654, "grad_norm": 1.7215644789491207, "learning_rate": 1.608246747099138e-05, "loss": 0.7371, "step": 12639 }, { "epoch": 0.9394277220364177, "grad_norm": 1.7916864008470879, "learning_rate": 1.6081830573213922e-05, "loss": 0.7651, "step": 12640 }, { "epoch": 0.9395020438498699, "grad_norm": 1.5542674457845658, "learning_rate": 1.6081193636281953e-05, "loss": 0.7852, "step": 12641 }, { "epoch": 0.9395763656633221, "grad_norm": 2.3773577480625647, "learning_rate": 1.6080556660199573e-05, "loss": 0.9688, "step": 12642 }, { "epoch": 0.9396506874767744, "grad_norm": 2.2300718740348064, "learning_rate": 1.6079919644970887e-05, "loss": 0.9132, "step": 12643 }, { "epoch": 0.9397250092902267, "grad_norm": 1.7702153811536459, "learning_rate": 1.6079282590599995e-05, "loss": 0.9031, "step": 12644 }, { "epoch": 0.939799331103679, "grad_norm": 2.5101596555503076, "learning_rate": 1.6078645497090994e-05, "loss": 1.004, "step": 12645 }, { "epoch": 0.9398736529171312, "grad_norm": 4.770949381268362, "learning_rate": 1.607800836444799e-05, "loss": 0.7609, "step": 12646 }, { "epoch": 0.9399479747305834, "grad_norm": 1.4242720713708308, "learning_rate": 1.6077371192675083e-05, "loss": 0.7155, "step": 12647 }, { "epoch": 0.9400222965440357, "grad_norm": 1.6651582238557459, "learning_rate": 1.607673398177638e-05, "loss": 1.0496, "step": 12648 }, { "epoch": 0.9400966183574879, "grad_norm": 5.571740962171486, "learning_rate": 1.6076096731755974e-05, "loss": 0.7443, "step": 12649 }, { "epoch": 0.9401709401709402, "grad_norm": 8.13869406249501, "learning_rate": 1.607545944261797e-05, "loss": 0.7749, "step": 12650 }, { "epoch": 0.9402452619843924, "grad_norm": 2.1077833984476504, "learning_rate": 1.607482211436648e-05, "loss": 1.0338, "step": 12651 }, { "epoch": 0.9403195837978446, "grad_norm": 1.781410710712559, "learning_rate": 1.6074184747005596e-05, "loss": 0.8202, "step": 12652 }, { "epoch": 0.9403939056112969, "grad_norm": 1.5687802904083112, "learning_rate": 1.6073547340539425e-05, "loss": 0.8817, "step": 12653 }, { "epoch": 0.9404682274247491, "grad_norm": 2.2824228296773934, "learning_rate": 1.6072909894972076e-05, "loss": 0.6716, "step": 12654 }, { "epoch": 0.9405425492382015, "grad_norm": 1.994333044443078, "learning_rate": 1.6072272410307644e-05, "loss": 0.8978, "step": 12655 }, { "epoch": 0.9406168710516537, "grad_norm": 2.3555991980322935, "learning_rate": 1.607163488655024e-05, "loss": 0.9409, "step": 12656 }, { "epoch": 0.9406911928651059, "grad_norm": 1.8575689933201174, "learning_rate": 1.6070997323703962e-05, "loss": 0.6776, "step": 12657 }, { "epoch": 0.9407655146785582, "grad_norm": 2.618323478790295, "learning_rate": 1.6070359721772925e-05, "loss": 0.9487, "step": 12658 }, { "epoch": 0.9408398364920104, "grad_norm": 1.9745105136672276, "learning_rate": 1.606972208076122e-05, "loss": 0.7828, "step": 12659 }, { "epoch": 0.9409141583054627, "grad_norm": 2.172302618589384, "learning_rate": 1.6069084400672962e-05, "loss": 0.9863, "step": 12660 }, { "epoch": 0.9409884801189149, "grad_norm": 2.0329752828531453, "learning_rate": 1.6068446681512256e-05, "loss": 0.7031, "step": 12661 }, { "epoch": 0.9410628019323671, "grad_norm": 3.1448879175579805, "learning_rate": 1.6067808923283203e-05, "loss": 0.7818, "step": 12662 }, { "epoch": 0.9411371237458194, "grad_norm": 2.0564769029645653, "learning_rate": 1.6067171125989908e-05, "loss": 1.0227, "step": 12663 }, { "epoch": 0.9412114455592716, "grad_norm": 2.9676470098104195, "learning_rate": 1.6066533289636485e-05, "loss": 0.9294, "step": 12664 }, { "epoch": 0.9412857673727238, "grad_norm": 2.4137507822743407, "learning_rate": 1.6065895414227034e-05, "loss": 0.7815, "step": 12665 }, { "epoch": 0.9413600891861762, "grad_norm": 1.806071597084568, "learning_rate": 1.6065257499765657e-05, "loss": 0.77, "step": 12666 }, { "epoch": 0.9414344109996284, "grad_norm": 2.3502225024240326, "learning_rate": 1.6064619546256475e-05, "loss": 0.7523, "step": 12667 }, { "epoch": 0.9415087328130807, "grad_norm": 6.85105389245982, "learning_rate": 1.6063981553703586e-05, "loss": 0.9622, "step": 12668 }, { "epoch": 0.9415830546265329, "grad_norm": 2.220789920474782, "learning_rate": 1.6063343522111095e-05, "loss": 0.8951, "step": 12669 }, { "epoch": 0.9416573764399851, "grad_norm": 2.72331584706184, "learning_rate": 1.6062705451483114e-05, "loss": 1.1011, "step": 12670 }, { "epoch": 0.9417316982534374, "grad_norm": 1.8320560888919224, "learning_rate": 1.6062067341823754e-05, "loss": 1.0004, "step": 12671 }, { "epoch": 0.9418060200668896, "grad_norm": 1.9412585713203208, "learning_rate": 1.6061429193137113e-05, "loss": 0.9349, "step": 12672 }, { "epoch": 0.9418803418803419, "grad_norm": 2.3329876488706462, "learning_rate": 1.606079100542731e-05, "loss": 0.8917, "step": 12673 }, { "epoch": 0.9419546636937941, "grad_norm": 2.3891212738881884, "learning_rate": 1.6060152778698443e-05, "loss": 0.769, "step": 12674 }, { "epoch": 0.9420289855072463, "grad_norm": 2.3083301857252834, "learning_rate": 1.6059514512954634e-05, "loss": 0.8957, "step": 12675 }, { "epoch": 0.9421033073206986, "grad_norm": 1.940434187740796, "learning_rate": 1.6058876208199984e-05, "loss": 0.9485, "step": 12676 }, { "epoch": 0.9421776291341508, "grad_norm": 2.093526460352392, "learning_rate": 1.6058237864438596e-05, "loss": 0.7907, "step": 12677 }, { "epoch": 0.9422519509476032, "grad_norm": 1.9172219631277445, "learning_rate": 1.6057599481674593e-05, "loss": 1.02, "step": 12678 }, { "epoch": 0.9423262727610554, "grad_norm": 1.6836046786154037, "learning_rate": 1.605696105991208e-05, "loss": 0.847, "step": 12679 }, { "epoch": 0.9424005945745076, "grad_norm": 2.3096710616557417, "learning_rate": 1.6056322599155163e-05, "loss": 0.892, "step": 12680 }, { "epoch": 0.9424749163879599, "grad_norm": 2.0479858964426207, "learning_rate": 1.6055684099407955e-05, "loss": 1.0184, "step": 12681 }, { "epoch": 0.9425492382014121, "grad_norm": 1.749807750667731, "learning_rate": 1.6055045560674568e-05, "loss": 0.8449, "step": 12682 }, { "epoch": 0.9426235600148644, "grad_norm": 1.8633560355912075, "learning_rate": 1.6054406982959112e-05, "loss": 0.6336, "step": 12683 }, { "epoch": 0.9426978818283166, "grad_norm": 2.7006232719221295, "learning_rate": 1.6053768366265697e-05, "loss": 0.8174, "step": 12684 }, { "epoch": 0.9427722036417688, "grad_norm": 1.8594345762187676, "learning_rate": 1.605312971059844e-05, "loss": 0.9588, "step": 12685 }, { "epoch": 0.9428465254552211, "grad_norm": 1.708931133519487, "learning_rate": 1.6052491015961443e-05, "loss": 0.8309, "step": 12686 }, { "epoch": 0.9429208472686733, "grad_norm": 1.796297329882358, "learning_rate": 1.6051852282358823e-05, "loss": 0.7524, "step": 12687 }, { "epoch": 0.9429951690821256, "grad_norm": 2.004176971132842, "learning_rate": 1.6051213509794693e-05, "loss": 1.0034, "step": 12688 }, { "epoch": 0.9430694908955779, "grad_norm": 2.3185131158455032, "learning_rate": 1.6050574698273166e-05, "loss": 0.879, "step": 12689 }, { "epoch": 0.9431438127090301, "grad_norm": 3.5143837709580477, "learning_rate": 1.6049935847798353e-05, "loss": 0.9945, "step": 12690 }, { "epoch": 0.9432181345224824, "grad_norm": 1.5991861105673273, "learning_rate": 1.6049296958374366e-05, "loss": 0.6995, "step": 12691 }, { "epoch": 0.9432924563359346, "grad_norm": 1.792240420007772, "learning_rate": 1.6048658030005318e-05, "loss": 0.8673, "step": 12692 }, { "epoch": 0.9433667781493869, "grad_norm": 2.9020240246837283, "learning_rate": 1.6048019062695324e-05, "loss": 0.933, "step": 12693 }, { "epoch": 0.9434410999628391, "grad_norm": 1.8998147445511333, "learning_rate": 1.6047380056448495e-05, "loss": 0.898, "step": 12694 }, { "epoch": 0.9435154217762913, "grad_norm": 2.735002951861123, "learning_rate": 1.6046741011268952e-05, "loss": 0.7643, "step": 12695 }, { "epoch": 0.9435897435897436, "grad_norm": 2.0762634630171095, "learning_rate": 1.60461019271608e-05, "loss": 0.8906, "step": 12696 }, { "epoch": 0.9436640654031958, "grad_norm": 1.9647887967622717, "learning_rate": 1.604546280412816e-05, "loss": 0.8669, "step": 12697 }, { "epoch": 0.943738387216648, "grad_norm": 1.5011141032112474, "learning_rate": 1.604482364217514e-05, "loss": 0.6203, "step": 12698 }, { "epoch": 0.9438127090301003, "grad_norm": 2.0043154844614275, "learning_rate": 1.6044184441305864e-05, "loss": 0.8449, "step": 12699 }, { "epoch": 0.9438870308435526, "grad_norm": 1.662117643650881, "learning_rate": 1.604354520152444e-05, "loss": 0.8546, "step": 12700 }, { "epoch": 0.9439613526570049, "grad_norm": 2.0432599266683087, "learning_rate": 1.6042905922834985e-05, "loss": 0.9136, "step": 12701 }, { "epoch": 0.9440356744704571, "grad_norm": 2.993298020484582, "learning_rate": 1.6042266605241617e-05, "loss": 0.8247, "step": 12702 }, { "epoch": 0.9441099962839093, "grad_norm": 1.7282271247469783, "learning_rate": 1.604162724874845e-05, "loss": 0.7455, "step": 12703 }, { "epoch": 0.9441843180973616, "grad_norm": 2.14672718449875, "learning_rate": 1.6040987853359598e-05, "loss": 1.1057, "step": 12704 }, { "epoch": 0.9442586399108138, "grad_norm": 1.8644833493117419, "learning_rate": 1.604034841907918e-05, "loss": 0.8645, "step": 12705 }, { "epoch": 0.9443329617242661, "grad_norm": 2.360575701335968, "learning_rate": 1.6039708945911315e-05, "loss": 0.626, "step": 12706 }, { "epoch": 0.9444072835377183, "grad_norm": 1.824235262219272, "learning_rate": 1.6039069433860114e-05, "loss": 0.9751, "step": 12707 }, { "epoch": 0.9444816053511705, "grad_norm": 2.4975724848074594, "learning_rate": 1.6038429882929697e-05, "loss": 0.8289, "step": 12708 }, { "epoch": 0.9445559271646228, "grad_norm": 1.6731674470382814, "learning_rate": 1.6037790293124188e-05, "loss": 0.8563, "step": 12709 }, { "epoch": 0.944630248978075, "grad_norm": 1.9186159363537152, "learning_rate": 1.603715066444769e-05, "loss": 0.708, "step": 12710 }, { "epoch": 0.9447045707915274, "grad_norm": 1.9560849593767897, "learning_rate": 1.6036510996904337e-05, "loss": 0.6845, "step": 12711 }, { "epoch": 0.9447788926049796, "grad_norm": 1.8791814155867772, "learning_rate": 1.6035871290498234e-05, "loss": 0.8842, "step": 12712 }, { "epoch": 0.9448532144184318, "grad_norm": 1.5698078351856861, "learning_rate": 1.6035231545233506e-05, "loss": 0.8234, "step": 12713 }, { "epoch": 0.9449275362318841, "grad_norm": 1.5483777098180116, "learning_rate": 1.603459176111427e-05, "loss": 0.7909, "step": 12714 }, { "epoch": 0.9450018580453363, "grad_norm": 1.7544412648552952, "learning_rate": 1.6033951938144644e-05, "loss": 0.9867, "step": 12715 }, { "epoch": 0.9450761798587886, "grad_norm": 1.8051222410090568, "learning_rate": 1.6033312076328752e-05, "loss": 0.7611, "step": 12716 }, { "epoch": 0.9451505016722408, "grad_norm": 1.8281913655167998, "learning_rate": 1.603267217567071e-05, "loss": 0.7071, "step": 12717 }, { "epoch": 0.945224823485693, "grad_norm": 2.877744932068444, "learning_rate": 1.6032032236174635e-05, "loss": 0.8886, "step": 12718 }, { "epoch": 0.9452991452991453, "grad_norm": 1.7367452812839974, "learning_rate": 1.603139225784465e-05, "loss": 0.8377, "step": 12719 }, { "epoch": 0.9453734671125975, "grad_norm": 1.949713631757861, "learning_rate": 1.6030752240684877e-05, "loss": 0.9213, "step": 12720 }, { "epoch": 0.9454477889260497, "grad_norm": 1.5584956024632557, "learning_rate": 1.603011218469943e-05, "loss": 0.7671, "step": 12721 }, { "epoch": 0.9455221107395021, "grad_norm": 2.2264020072955195, "learning_rate": 1.602947208989244e-05, "loss": 0.8216, "step": 12722 }, { "epoch": 0.9455964325529543, "grad_norm": 1.7691660777832667, "learning_rate": 1.6028831956268013e-05, "loss": 0.9892, "step": 12723 }, { "epoch": 0.9456707543664066, "grad_norm": 1.843473148776844, "learning_rate": 1.6028191783830285e-05, "loss": 0.932, "step": 12724 }, { "epoch": 0.9457450761798588, "grad_norm": 2.111912594501671, "learning_rate": 1.6027551572583367e-05, "loss": 0.7848, "step": 12725 }, { "epoch": 0.945819397993311, "grad_norm": 2.3518898423542143, "learning_rate": 1.602691132253139e-05, "loss": 0.7718, "step": 12726 }, { "epoch": 0.9458937198067633, "grad_norm": 1.8216888491309553, "learning_rate": 1.6026271033678467e-05, "loss": 0.7528, "step": 12727 }, { "epoch": 0.9459680416202155, "grad_norm": 1.5870508302689934, "learning_rate": 1.6025630706028724e-05, "loss": 0.6465, "step": 12728 }, { "epoch": 0.9460423634336678, "grad_norm": 1.7031183097189018, "learning_rate": 1.6024990339586285e-05, "loss": 0.7393, "step": 12729 }, { "epoch": 0.94611668524712, "grad_norm": 1.7780114209259106, "learning_rate": 1.6024349934355267e-05, "loss": 0.8249, "step": 12730 }, { "epoch": 0.9461910070605722, "grad_norm": 2.112684055876279, "learning_rate": 1.60237094903398e-05, "loss": 0.7594, "step": 12731 }, { "epoch": 0.9462653288740245, "grad_norm": 1.6120893140119634, "learning_rate": 1.6023069007544e-05, "loss": 0.8281, "step": 12732 }, { "epoch": 0.9463396506874768, "grad_norm": 5.610804141710755, "learning_rate": 1.6022428485971998e-05, "loss": 0.9589, "step": 12733 }, { "epoch": 0.9464139725009291, "grad_norm": 1.667565820718809, "learning_rate": 1.6021787925627915e-05, "loss": 0.7333, "step": 12734 }, { "epoch": 0.9464882943143813, "grad_norm": 2.296517043888676, "learning_rate": 1.602114732651587e-05, "loss": 0.9975, "step": 12735 }, { "epoch": 0.9465626161278335, "grad_norm": 1.7427325377199805, "learning_rate": 1.6020506688639992e-05, "loss": 0.9648, "step": 12736 }, { "epoch": 0.9466369379412858, "grad_norm": 2.1633652816114117, "learning_rate": 1.6019866012004404e-05, "loss": 1.0201, "step": 12737 }, { "epoch": 0.946711259754738, "grad_norm": 2.165170161468378, "learning_rate": 1.6019225296613234e-05, "loss": 1.113, "step": 12738 }, { "epoch": 0.9467855815681903, "grad_norm": 1.8345572307735398, "learning_rate": 1.60185845424706e-05, "loss": 1.0058, "step": 12739 }, { "epoch": 0.9468599033816425, "grad_norm": 2.0416027882653522, "learning_rate": 1.6017943749580633e-05, "loss": 0.8175, "step": 12740 }, { "epoch": 0.9469342251950947, "grad_norm": 1.8559020655427636, "learning_rate": 1.6017302917947456e-05, "loss": 0.796, "step": 12741 }, { "epoch": 0.947008547008547, "grad_norm": 1.843519291243869, "learning_rate": 1.60166620475752e-05, "loss": 0.7628, "step": 12742 }, { "epoch": 0.9470828688219992, "grad_norm": 1.855071145719582, "learning_rate": 1.6016021138467977e-05, "loss": 0.7171, "step": 12743 }, { "epoch": 0.9471571906354515, "grad_norm": 1.558179570276276, "learning_rate": 1.601538019062993e-05, "loss": 0.8378, "step": 12744 }, { "epoch": 0.9472315124489038, "grad_norm": 1.54543180013254, "learning_rate": 1.601473920406517e-05, "loss": 0.6478, "step": 12745 }, { "epoch": 0.947305834262356, "grad_norm": 1.9337084072443604, "learning_rate": 1.6014098178777834e-05, "loss": 0.8084, "step": 12746 }, { "epoch": 0.9473801560758083, "grad_norm": 2.6358025357784642, "learning_rate": 1.6013457114772047e-05, "loss": 0.7881, "step": 12747 }, { "epoch": 0.9474544778892605, "grad_norm": 1.98958599875951, "learning_rate": 1.6012816012051934e-05, "loss": 0.9277, "step": 12748 }, { "epoch": 0.9475287997027128, "grad_norm": 1.662378427561456, "learning_rate": 1.6012174870621623e-05, "loss": 0.5965, "step": 12749 }, { "epoch": 0.947603121516165, "grad_norm": 1.8354492839129053, "learning_rate": 1.6011533690485244e-05, "loss": 0.6831, "step": 12750 }, { "epoch": 0.9476774433296172, "grad_norm": 1.6527036011831664, "learning_rate": 1.6010892471646923e-05, "loss": 0.6825, "step": 12751 }, { "epoch": 0.9477517651430695, "grad_norm": 1.9915123360303975, "learning_rate": 1.6010251214110787e-05, "loss": 0.7138, "step": 12752 }, { "epoch": 0.9478260869565217, "grad_norm": 2.608040479086508, "learning_rate": 1.600960991788097e-05, "loss": 0.8133, "step": 12753 }, { "epoch": 0.947900408769974, "grad_norm": 2.021907616449202, "learning_rate": 1.600896858296159e-05, "loss": 0.9628, "step": 12754 }, { "epoch": 0.9479747305834262, "grad_norm": 1.496399418971394, "learning_rate": 1.6008327209356784e-05, "loss": 0.5949, "step": 12755 }, { "epoch": 0.9480490523968785, "grad_norm": 1.959597624294591, "learning_rate": 1.6007685797070677e-05, "loss": 0.8522, "step": 12756 }, { "epoch": 0.9481233742103308, "grad_norm": 1.792571733220139, "learning_rate": 1.6007044346107405e-05, "loss": 0.8671, "step": 12757 }, { "epoch": 0.948197696023783, "grad_norm": 4.487915210225523, "learning_rate": 1.6006402856471092e-05, "loss": 0.7718, "step": 12758 }, { "epoch": 0.9482720178372352, "grad_norm": 2.114242753443089, "learning_rate": 1.6005761328165868e-05, "loss": 0.9074, "step": 12759 }, { "epoch": 0.9483463396506875, "grad_norm": 1.7996745220245283, "learning_rate": 1.6005119761195864e-05, "loss": 0.6995, "step": 12760 }, { "epoch": 0.9484206614641397, "grad_norm": 1.9035955930005326, "learning_rate": 1.600447815556521e-05, "loss": 0.9076, "step": 12761 }, { "epoch": 0.948494983277592, "grad_norm": 1.9686557840646917, "learning_rate": 1.6003836511278038e-05, "loss": 0.9957, "step": 12762 }, { "epoch": 0.9485693050910442, "grad_norm": 1.9401097936520226, "learning_rate": 1.600319482833848e-05, "loss": 0.8203, "step": 12763 }, { "epoch": 0.9486436269044964, "grad_norm": 1.8706148843304788, "learning_rate": 1.600255310675066e-05, "loss": 0.8374, "step": 12764 }, { "epoch": 0.9487179487179487, "grad_norm": 2.439533264846744, "learning_rate": 1.6001911346518722e-05, "loss": 0.9234, "step": 12765 }, { "epoch": 0.9487922705314009, "grad_norm": 1.6920226052935792, "learning_rate": 1.6001269547646784e-05, "loss": 0.7667, "step": 12766 }, { "epoch": 0.9488665923448533, "grad_norm": 1.951085844061484, "learning_rate": 1.6000627710138987e-05, "loss": 0.7984, "step": 12767 }, { "epoch": 0.9489409141583055, "grad_norm": 1.8203811302069344, "learning_rate": 1.599998583399946e-05, "loss": 0.613, "step": 12768 }, { "epoch": 0.9490152359717577, "grad_norm": 1.6362550264508064, "learning_rate": 1.5999343919232334e-05, "loss": 0.7226, "step": 12769 }, { "epoch": 0.94908955778521, "grad_norm": 2.4404729052936434, "learning_rate": 1.5998701965841745e-05, "loss": 0.9193, "step": 12770 }, { "epoch": 0.9491638795986622, "grad_norm": 1.992574798639489, "learning_rate": 1.5998059973831824e-05, "loss": 1.0017, "step": 12771 }, { "epoch": 0.9492382014121145, "grad_norm": 1.8950725685473686, "learning_rate": 1.59974179432067e-05, "loss": 0.7842, "step": 12772 }, { "epoch": 0.9493125232255667, "grad_norm": 5.145675246430745, "learning_rate": 1.5996775873970513e-05, "loss": 0.8883, "step": 12773 }, { "epoch": 0.9493868450390189, "grad_norm": 1.790161348449811, "learning_rate": 1.5996133766127394e-05, "loss": 0.8553, "step": 12774 }, { "epoch": 0.9494611668524712, "grad_norm": 3.4452952415272824, "learning_rate": 1.599549161968148e-05, "loss": 0.9543, "step": 12775 }, { "epoch": 0.9495354886659234, "grad_norm": 1.914927010387934, "learning_rate": 1.59948494346369e-05, "loss": 0.8844, "step": 12776 }, { "epoch": 0.9496098104793756, "grad_norm": 1.6595268777943883, "learning_rate": 1.599420721099779e-05, "loss": 0.7966, "step": 12777 }, { "epoch": 0.949684132292828, "grad_norm": 1.6957938382141677, "learning_rate": 1.5993564948768283e-05, "loss": 0.9147, "step": 12778 }, { "epoch": 0.9497584541062802, "grad_norm": 1.5973214432826324, "learning_rate": 1.5992922647952518e-05, "loss": 0.7227, "step": 12779 }, { "epoch": 0.9498327759197325, "grad_norm": 1.3395414816630027, "learning_rate": 1.599228030855463e-05, "loss": 0.7433, "step": 12780 }, { "epoch": 0.9499070977331847, "grad_norm": 1.8525320946077597, "learning_rate": 1.599163793057875e-05, "loss": 0.7701, "step": 12781 }, { "epoch": 0.949981419546637, "grad_norm": 8.353531909132863, "learning_rate": 1.5990995514029015e-05, "loss": 1.0289, "step": 12782 }, { "epoch": 0.9500557413600892, "grad_norm": 1.6432076856216133, "learning_rate": 1.5990353058909566e-05, "loss": 0.9091, "step": 12783 }, { "epoch": 0.9501300631735414, "grad_norm": 1.559337976191067, "learning_rate": 1.5989710565224528e-05, "loss": 0.7713, "step": 12784 }, { "epoch": 0.9502043849869937, "grad_norm": 1.7000152461179419, "learning_rate": 1.5989068032978047e-05, "loss": 0.8688, "step": 12785 }, { "epoch": 0.9502787068004459, "grad_norm": 1.8761175151069922, "learning_rate": 1.598842546217426e-05, "loss": 0.8639, "step": 12786 }, { "epoch": 0.9503530286138981, "grad_norm": 2.156039301847604, "learning_rate": 1.59877828528173e-05, "loss": 0.7755, "step": 12787 }, { "epoch": 0.9504273504273504, "grad_norm": 2.1566011994234566, "learning_rate": 1.59871402049113e-05, "loss": 0.9942, "step": 12788 }, { "epoch": 0.9505016722408027, "grad_norm": 1.4858513240500677, "learning_rate": 1.5986497518460405e-05, "loss": 0.6349, "step": 12789 }, { "epoch": 0.950575994054255, "grad_norm": 1.8027075507635804, "learning_rate": 1.598585479346875e-05, "loss": 0.7518, "step": 12790 }, { "epoch": 0.9506503158677072, "grad_norm": 1.7311292462177479, "learning_rate": 1.598521202994047e-05, "loss": 0.7069, "step": 12791 }, { "epoch": 0.9507246376811594, "grad_norm": 1.9184500773050792, "learning_rate": 1.5984569227879708e-05, "loss": 0.8763, "step": 12792 }, { "epoch": 0.9507989594946117, "grad_norm": 13.759564460680632, "learning_rate": 1.59839263872906e-05, "loss": 1.05, "step": 12793 }, { "epoch": 0.9508732813080639, "grad_norm": 1.7628542888090621, "learning_rate": 1.5983283508177285e-05, "loss": 0.8141, "step": 12794 }, { "epoch": 0.9509476031215162, "grad_norm": 2.1086169280489493, "learning_rate": 1.5982640590543898e-05, "loss": 0.9782, "step": 12795 }, { "epoch": 0.9510219249349684, "grad_norm": 1.517771244636299, "learning_rate": 1.598199763439458e-05, "loss": 0.743, "step": 12796 }, { "epoch": 0.9510962467484206, "grad_norm": 1.5361510565036067, "learning_rate": 1.5981354639733475e-05, "loss": 0.6691, "step": 12797 }, { "epoch": 0.9511705685618729, "grad_norm": 2.5114378331624705, "learning_rate": 1.598071160656472e-05, "loss": 0.7889, "step": 12798 }, { "epoch": 0.9512448903753251, "grad_norm": 2.316348749115167, "learning_rate": 1.5980068534892455e-05, "loss": 0.7909, "step": 12799 }, { "epoch": 0.9513192121887774, "grad_norm": 1.9557691215287667, "learning_rate": 1.5979425424720815e-05, "loss": 0.8686, "step": 12800 }, { "epoch": 0.9513935340022297, "grad_norm": 2.6336722142297186, "learning_rate": 1.5978782276053947e-05, "loss": 0.9901, "step": 12801 }, { "epoch": 0.9514678558156819, "grad_norm": 2.628912404538753, "learning_rate": 1.5978139088895988e-05, "loss": 0.7641, "step": 12802 }, { "epoch": 0.9515421776291342, "grad_norm": 1.7940959028921726, "learning_rate": 1.597749586325108e-05, "loss": 0.8541, "step": 12803 }, { "epoch": 0.9516164994425864, "grad_norm": 1.8366087632103438, "learning_rate": 1.597685259912336e-05, "loss": 0.7452, "step": 12804 }, { "epoch": 0.9516908212560387, "grad_norm": 1.9571747273942497, "learning_rate": 1.597620929651698e-05, "loss": 0.9777, "step": 12805 }, { "epoch": 0.9517651430694909, "grad_norm": 1.7813270816682794, "learning_rate": 1.597556595543607e-05, "loss": 0.8395, "step": 12806 }, { "epoch": 0.9518394648829431, "grad_norm": 2.1342868423276053, "learning_rate": 1.5974922575884777e-05, "loss": 0.8773, "step": 12807 }, { "epoch": 0.9519137866963954, "grad_norm": 1.765455145307237, "learning_rate": 1.5974279157867246e-05, "loss": 0.9117, "step": 12808 }, { "epoch": 0.9519881085098476, "grad_norm": 1.6716652139074077, "learning_rate": 1.5973635701387612e-05, "loss": 0.6908, "step": 12809 }, { "epoch": 0.9520624303232998, "grad_norm": 1.5332443698323404, "learning_rate": 1.5972992206450023e-05, "loss": 0.8351, "step": 12810 }, { "epoch": 0.9521367521367521, "grad_norm": 1.8398897624646844, "learning_rate": 1.5972348673058618e-05, "loss": 0.8448, "step": 12811 }, { "epoch": 0.9522110739502044, "grad_norm": 1.8500639899646008, "learning_rate": 1.5971705101217546e-05, "loss": 0.936, "step": 12812 }, { "epoch": 0.9522853957636567, "grad_norm": 1.7780765400087766, "learning_rate": 1.5971061490930942e-05, "loss": 0.81, "step": 12813 }, { "epoch": 0.9523597175771089, "grad_norm": 1.6529176382405921, "learning_rate": 1.5970417842202954e-05, "loss": 0.6972, "step": 12814 }, { "epoch": 0.9524340393905611, "grad_norm": 2.1859889919870494, "learning_rate": 1.596977415503773e-05, "loss": 0.9572, "step": 12815 }, { "epoch": 0.9525083612040134, "grad_norm": 2.582222851020535, "learning_rate": 1.5969130429439407e-05, "loss": 0.8905, "step": 12816 }, { "epoch": 0.9525826830174656, "grad_norm": 1.7989309265377968, "learning_rate": 1.5968486665412132e-05, "loss": 0.787, "step": 12817 }, { "epoch": 0.9526570048309179, "grad_norm": 2.069683913952507, "learning_rate": 1.596784286296005e-05, "loss": 0.8194, "step": 12818 }, { "epoch": 0.9527313266443701, "grad_norm": 1.8081293065138495, "learning_rate": 1.5967199022087305e-05, "loss": 0.9716, "step": 12819 }, { "epoch": 0.9528056484578223, "grad_norm": 1.9796931774341822, "learning_rate": 1.5966555142798043e-05, "loss": 0.9376, "step": 12820 }, { "epoch": 0.9528799702712746, "grad_norm": 2.029727581303059, "learning_rate": 1.596591122509641e-05, "loss": 0.898, "step": 12821 }, { "epoch": 0.9529542920847268, "grad_norm": 1.674798206873378, "learning_rate": 1.5965267268986545e-05, "loss": 0.7456, "step": 12822 }, { "epoch": 0.9530286138981792, "grad_norm": 1.682866696692005, "learning_rate": 1.5964623274472603e-05, "loss": 0.9808, "step": 12823 }, { "epoch": 0.9531029357116314, "grad_norm": 1.8854670004982965, "learning_rate": 1.5963979241558724e-05, "loss": 0.8922, "step": 12824 }, { "epoch": 0.9531772575250836, "grad_norm": 2.0222545068078244, "learning_rate": 1.5963335170249058e-05, "loss": 0.9006, "step": 12825 }, { "epoch": 0.9532515793385359, "grad_norm": 1.9335617071496596, "learning_rate": 1.596269106054775e-05, "loss": 0.7992, "step": 12826 }, { "epoch": 0.9533259011519881, "grad_norm": 1.9805415637330905, "learning_rate": 1.5962046912458944e-05, "loss": 0.7896, "step": 12827 }, { "epoch": 0.9534002229654404, "grad_norm": 1.9559878846164567, "learning_rate": 1.596140272598679e-05, "loss": 0.9772, "step": 12828 }, { "epoch": 0.9534745447788926, "grad_norm": 2.1037977870514246, "learning_rate": 1.5960758501135435e-05, "loss": 1.0743, "step": 12829 }, { "epoch": 0.9535488665923448, "grad_norm": 1.9372738904359497, "learning_rate": 1.5960114237909025e-05, "loss": 0.9316, "step": 12830 }, { "epoch": 0.9536231884057971, "grad_norm": 1.819366610439004, "learning_rate": 1.5959469936311708e-05, "loss": 0.7332, "step": 12831 }, { "epoch": 0.9536975102192493, "grad_norm": 2.2162769683971417, "learning_rate": 1.5958825596347636e-05, "loss": 0.8384, "step": 12832 }, { "epoch": 0.9537718320327015, "grad_norm": 1.5846712688239288, "learning_rate": 1.5958181218020955e-05, "loss": 0.7383, "step": 12833 }, { "epoch": 0.9538461538461539, "grad_norm": 2.1493956857525767, "learning_rate": 1.595753680133581e-05, "loss": 0.9949, "step": 12834 }, { "epoch": 0.9539204756596061, "grad_norm": 1.9232187240749878, "learning_rate": 1.595689234629635e-05, "loss": 0.9315, "step": 12835 }, { "epoch": 0.9539947974730584, "grad_norm": 5.888553935757692, "learning_rate": 1.5956247852906728e-05, "loss": 1.0165, "step": 12836 }, { "epoch": 0.9540691192865106, "grad_norm": 1.9967723905203925, "learning_rate": 1.595560332117109e-05, "loss": 1.0552, "step": 12837 }, { "epoch": 0.9541434410999629, "grad_norm": 2.5353078409769165, "learning_rate": 1.595495875109359e-05, "loss": 0.7865, "step": 12838 }, { "epoch": 0.9542177629134151, "grad_norm": 2.0730531848462257, "learning_rate": 1.5954314142678372e-05, "loss": 0.9906, "step": 12839 }, { "epoch": 0.9542920847268673, "grad_norm": 2.414072812472365, "learning_rate": 1.595366949592959e-05, "loss": 0.9906, "step": 12840 }, { "epoch": 0.9543664065403196, "grad_norm": 2.3431523582113787, "learning_rate": 1.5953024810851393e-05, "loss": 1.0626, "step": 12841 }, { "epoch": 0.9544407283537718, "grad_norm": 2.1551118150475204, "learning_rate": 1.5952380087447927e-05, "loss": 0.9368, "step": 12842 }, { "epoch": 0.954515050167224, "grad_norm": 1.8111015913424178, "learning_rate": 1.5951735325723353e-05, "loss": 0.8424, "step": 12843 }, { "epoch": 0.9545893719806763, "grad_norm": 2.073660907651326, "learning_rate": 1.5951090525681813e-05, "loss": 0.9489, "step": 12844 }, { "epoch": 0.9546636937941286, "grad_norm": 2.0753848779232267, "learning_rate": 1.5950445687327462e-05, "loss": 1.0822, "step": 12845 }, { "epoch": 0.9547380156075809, "grad_norm": 1.5844141334229498, "learning_rate": 1.594980081066445e-05, "loss": 0.7559, "step": 12846 }, { "epoch": 0.9548123374210331, "grad_norm": 2.0037907686661613, "learning_rate": 1.5949155895696925e-05, "loss": 0.9426, "step": 12847 }, { "epoch": 0.9548866592344853, "grad_norm": 2.0055877037623633, "learning_rate": 1.5948510942429046e-05, "loss": 0.8433, "step": 12848 }, { "epoch": 0.9549609810479376, "grad_norm": 1.6945984740825517, "learning_rate": 1.594786595086496e-05, "loss": 0.6671, "step": 12849 }, { "epoch": 0.9550353028613898, "grad_norm": 1.5837866510238858, "learning_rate": 1.594722092100883e-05, "loss": 0.7016, "step": 12850 }, { "epoch": 0.9551096246748421, "grad_norm": 1.8457939972020354, "learning_rate": 1.5946575852864792e-05, "loss": 0.878, "step": 12851 }, { "epoch": 0.9551839464882943, "grad_norm": 2.3862730227879387, "learning_rate": 1.5945930746437012e-05, "loss": 0.9158, "step": 12852 }, { "epoch": 0.9552582683017465, "grad_norm": 1.5705522069247917, "learning_rate": 1.5945285601729634e-05, "loss": 0.7559, "step": 12853 }, { "epoch": 0.9553325901151988, "grad_norm": 1.7546266381335816, "learning_rate": 1.594464041874682e-05, "loss": 0.7954, "step": 12854 }, { "epoch": 0.955406911928651, "grad_norm": 1.5522874442257835, "learning_rate": 1.5943995197492716e-05, "loss": 0.6048, "step": 12855 }, { "epoch": 0.9554812337421033, "grad_norm": 2.128426833415959, "learning_rate": 1.5943349937971483e-05, "loss": 0.8855, "step": 12856 }, { "epoch": 0.9555555555555556, "grad_norm": 2.2469487758001585, "learning_rate": 1.5942704640187268e-05, "loss": 0.6934, "step": 12857 }, { "epoch": 0.9556298773690078, "grad_norm": 2.6114637516463857, "learning_rate": 1.5942059304144228e-05, "loss": 0.7833, "step": 12858 }, { "epoch": 0.9557041991824601, "grad_norm": 2.6802002610097975, "learning_rate": 1.594141392984652e-05, "loss": 1.0046, "step": 12859 }, { "epoch": 0.9557785209959123, "grad_norm": 1.9916744374040019, "learning_rate": 1.59407685172983e-05, "loss": 0.8116, "step": 12860 }, { "epoch": 0.9558528428093646, "grad_norm": 1.6834606345080003, "learning_rate": 1.5940123066503717e-05, "loss": 0.8369, "step": 12861 }, { "epoch": 0.9559271646228168, "grad_norm": 1.88631967231087, "learning_rate": 1.593947757746693e-05, "loss": 0.8418, "step": 12862 }, { "epoch": 0.956001486436269, "grad_norm": 2.102313542505147, "learning_rate": 1.5938832050192098e-05, "loss": 0.8309, "step": 12863 }, { "epoch": 0.9560758082497213, "grad_norm": 6.111106768932156, "learning_rate": 1.593818648468337e-05, "loss": 0.8426, "step": 12864 }, { "epoch": 0.9561501300631735, "grad_norm": 1.7732730000744574, "learning_rate": 1.5937540880944905e-05, "loss": 0.7876, "step": 12865 }, { "epoch": 0.9562244518766257, "grad_norm": 1.8285174634116295, "learning_rate": 1.5936895238980862e-05, "loss": 0.7647, "step": 12866 }, { "epoch": 0.956298773690078, "grad_norm": 1.526475303234361, "learning_rate": 1.5936249558795397e-05, "loss": 0.7804, "step": 12867 }, { "epoch": 0.9563730955035303, "grad_norm": 1.7369643004933342, "learning_rate": 1.5935603840392665e-05, "loss": 0.8915, "step": 12868 }, { "epoch": 0.9564474173169826, "grad_norm": 1.9088705785028164, "learning_rate": 1.593495808377682e-05, "loss": 0.8292, "step": 12869 }, { "epoch": 0.9565217391304348, "grad_norm": 1.9994762207372523, "learning_rate": 1.5934312288952026e-05, "loss": 0.8491, "step": 12870 }, { "epoch": 0.956596060943887, "grad_norm": 2.0407440610228114, "learning_rate": 1.5933666455922433e-05, "loss": 0.8677, "step": 12871 }, { "epoch": 0.9566703827573393, "grad_norm": 2.0086146115691252, "learning_rate": 1.5933020584692213e-05, "loss": 1.0401, "step": 12872 }, { "epoch": 0.9567447045707915, "grad_norm": 2.0008962623503224, "learning_rate": 1.5932374675265503e-05, "loss": 0.9518, "step": 12873 }, { "epoch": 0.9568190263842438, "grad_norm": 1.4512546424909911, "learning_rate": 1.5931728727646478e-05, "loss": 0.712, "step": 12874 }, { "epoch": 0.956893348197696, "grad_norm": 1.7250520709438797, "learning_rate": 1.593108274183929e-05, "loss": 0.6807, "step": 12875 }, { "epoch": 0.9569676700111482, "grad_norm": 1.6177751174778827, "learning_rate": 1.59304367178481e-05, "loss": 0.7192, "step": 12876 }, { "epoch": 0.9570419918246005, "grad_norm": 1.8580550790771746, "learning_rate": 1.5929790655677068e-05, "loss": 0.7744, "step": 12877 }, { "epoch": 0.9571163136380527, "grad_norm": 1.7710146146116272, "learning_rate": 1.592914455533035e-05, "loss": 0.8324, "step": 12878 }, { "epoch": 0.9571906354515051, "grad_norm": 2.177703860106993, "learning_rate": 1.5928498416812102e-05, "loss": 0.8969, "step": 12879 }, { "epoch": 0.9572649572649573, "grad_norm": 1.947882274762614, "learning_rate": 1.5927852240126494e-05, "loss": 0.8748, "step": 12880 }, { "epoch": 0.9573392790784095, "grad_norm": 1.7267765917235522, "learning_rate": 1.592720602527768e-05, "loss": 0.9134, "step": 12881 }, { "epoch": 0.9574136008918618, "grad_norm": 1.8475308442937564, "learning_rate": 1.5926559772269823e-05, "loss": 0.8939, "step": 12882 }, { "epoch": 0.957487922705314, "grad_norm": 1.5452274658429508, "learning_rate": 1.592591348110708e-05, "loss": 0.6343, "step": 12883 }, { "epoch": 0.9575622445187663, "grad_norm": 2.095483704904943, "learning_rate": 1.592526715179361e-05, "loss": 0.849, "step": 12884 }, { "epoch": 0.9576365663322185, "grad_norm": 2.507177457397483, "learning_rate": 1.5924620784333584e-05, "loss": 0.76, "step": 12885 }, { "epoch": 0.9577108881456707, "grad_norm": 2.4830917678676863, "learning_rate": 1.5923974378731152e-05, "loss": 1.0112, "step": 12886 }, { "epoch": 0.957785209959123, "grad_norm": 1.9628938740682933, "learning_rate": 1.5923327934990483e-05, "loss": 0.8486, "step": 12887 }, { "epoch": 0.9578595317725752, "grad_norm": 1.4535278071935207, "learning_rate": 1.5922681453115732e-05, "loss": 0.7052, "step": 12888 }, { "epoch": 0.9579338535860275, "grad_norm": 1.535845947351157, "learning_rate": 1.592203493311107e-05, "loss": 0.6448, "step": 12889 }, { "epoch": 0.9580081753994798, "grad_norm": 1.8411664763749283, "learning_rate": 1.5921388374980653e-05, "loss": 0.8784, "step": 12890 }, { "epoch": 0.958082497212932, "grad_norm": 1.77673993272068, "learning_rate": 1.5920741778728644e-05, "loss": 0.8928, "step": 12891 }, { "epoch": 0.9581568190263843, "grad_norm": 1.8791984385445413, "learning_rate": 1.5920095144359205e-05, "loss": 0.9069, "step": 12892 }, { "epoch": 0.9582311408398365, "grad_norm": 2.258732097413108, "learning_rate": 1.5919448471876505e-05, "loss": 0.9115, "step": 12893 }, { "epoch": 0.9583054626532888, "grad_norm": 2.1896679157771737, "learning_rate": 1.59188017612847e-05, "loss": 0.926, "step": 12894 }, { "epoch": 0.958379784466741, "grad_norm": 2.434520047809123, "learning_rate": 1.5918155012587956e-05, "loss": 0.9, "step": 12895 }, { "epoch": 0.9584541062801932, "grad_norm": 1.8960660732573689, "learning_rate": 1.591750822579044e-05, "loss": 0.8875, "step": 12896 }, { "epoch": 0.9585284280936455, "grad_norm": 1.8352948347201228, "learning_rate": 1.591686140089631e-05, "loss": 0.7806, "step": 12897 }, { "epoch": 0.9586027499070977, "grad_norm": 2.424759665365206, "learning_rate": 1.5916214537909734e-05, "loss": 1.071, "step": 12898 }, { "epoch": 0.95867707172055, "grad_norm": 1.515670392234148, "learning_rate": 1.5915567636834874e-05, "loss": 0.7089, "step": 12899 }, { "epoch": 0.9587513935340022, "grad_norm": 1.9436222219595716, "learning_rate": 1.59149206976759e-05, "loss": 0.9026, "step": 12900 }, { "epoch": 0.9588257153474545, "grad_norm": 2.0470625984298847, "learning_rate": 1.5914273720436972e-05, "loss": 0.9619, "step": 12901 }, { "epoch": 0.9589000371609068, "grad_norm": 1.8943821044062747, "learning_rate": 1.5913626705122258e-05, "loss": 0.7765, "step": 12902 }, { "epoch": 0.958974358974359, "grad_norm": 1.9500826631872281, "learning_rate": 1.591297965173592e-05, "loss": 0.8305, "step": 12903 }, { "epoch": 0.9590486807878112, "grad_norm": 1.7988822338971389, "learning_rate": 1.5912332560282128e-05, "loss": 0.8911, "step": 12904 }, { "epoch": 0.9591230026012635, "grad_norm": 1.8748895400358567, "learning_rate": 1.5911685430765044e-05, "loss": 0.8524, "step": 12905 }, { "epoch": 0.9591973244147157, "grad_norm": 3.1707506048271807, "learning_rate": 1.5911038263188835e-05, "loss": 0.9346, "step": 12906 }, { "epoch": 0.959271646228168, "grad_norm": 2.27310548912346, "learning_rate": 1.591039105755767e-05, "loss": 0.9573, "step": 12907 }, { "epoch": 0.9593459680416202, "grad_norm": 1.8009924694190091, "learning_rate": 1.5909743813875713e-05, "loss": 0.8404, "step": 12908 }, { "epoch": 0.9594202898550724, "grad_norm": 1.9316899486987764, "learning_rate": 1.5909096532147133e-05, "loss": 0.907, "step": 12909 }, { "epoch": 0.9594946116685247, "grad_norm": 1.4752402064467822, "learning_rate": 1.5908449212376096e-05, "loss": 0.6832, "step": 12910 }, { "epoch": 0.9595689334819769, "grad_norm": 1.517728854632163, "learning_rate": 1.590780185456677e-05, "loss": 0.7294, "step": 12911 }, { "epoch": 0.9596432552954292, "grad_norm": 1.7871077142754155, "learning_rate": 1.590715445872332e-05, "loss": 0.7386, "step": 12912 }, { "epoch": 0.9597175771088815, "grad_norm": 1.864961301723185, "learning_rate": 1.5906507024849917e-05, "loss": 0.847, "step": 12913 }, { "epoch": 0.9597918989223337, "grad_norm": 2.04820254884854, "learning_rate": 1.5905859552950732e-05, "loss": 0.8394, "step": 12914 }, { "epoch": 0.959866220735786, "grad_norm": 2.100318967794265, "learning_rate": 1.5905212043029925e-05, "loss": 0.9387, "step": 12915 }, { "epoch": 0.9599405425492382, "grad_norm": 2.025828184416707, "learning_rate": 1.590456449509167e-05, "loss": 0.8872, "step": 12916 }, { "epoch": 0.9600148643626905, "grad_norm": 1.9176269095295206, "learning_rate": 1.5903916909140136e-05, "loss": 0.912, "step": 12917 }, { "epoch": 0.9600891861761427, "grad_norm": 1.795605769788913, "learning_rate": 1.5903269285179492e-05, "loss": 0.8438, "step": 12918 }, { "epoch": 0.9601635079895949, "grad_norm": 2.26988460558081, "learning_rate": 1.5902621623213907e-05, "loss": 0.7631, "step": 12919 }, { "epoch": 0.9602378298030472, "grad_norm": 3.920415713556687, "learning_rate": 1.590197392324755e-05, "loss": 1.1835, "step": 12920 }, { "epoch": 0.9603121516164994, "grad_norm": 2.560207820393461, "learning_rate": 1.5901326185284585e-05, "loss": 0.9097, "step": 12921 }, { "epoch": 0.9603864734299516, "grad_norm": 2.039450929982026, "learning_rate": 1.5900678409329195e-05, "loss": 0.8933, "step": 12922 }, { "epoch": 0.9604607952434039, "grad_norm": 2.006623209517341, "learning_rate": 1.5900030595385546e-05, "loss": 0.9145, "step": 12923 }, { "epoch": 0.9605351170568562, "grad_norm": 1.857273783601542, "learning_rate": 1.5899382743457802e-05, "loss": 0.8976, "step": 12924 }, { "epoch": 0.9606094388703085, "grad_norm": 1.7493771551694917, "learning_rate": 1.5898734853550134e-05, "loss": 0.809, "step": 12925 }, { "epoch": 0.9606837606837607, "grad_norm": 1.7875330468841413, "learning_rate": 1.5898086925666724e-05, "loss": 0.8782, "step": 12926 }, { "epoch": 0.960758082497213, "grad_norm": 2.196235761417498, "learning_rate": 1.5897438959811734e-05, "loss": 0.8354, "step": 12927 }, { "epoch": 0.9608324043106652, "grad_norm": 3.172754639515235, "learning_rate": 1.5896790955989336e-05, "loss": 0.8607, "step": 12928 }, { "epoch": 0.9609067261241174, "grad_norm": 3.1346920771253437, "learning_rate": 1.5896142914203707e-05, "loss": 0.9238, "step": 12929 }, { "epoch": 0.9609810479375697, "grad_norm": 1.9763593824278265, "learning_rate": 1.589549483445901e-05, "loss": 0.6834, "step": 12930 }, { "epoch": 0.9610553697510219, "grad_norm": 2.097536072545584, "learning_rate": 1.589484671675943e-05, "loss": 0.8551, "step": 12931 }, { "epoch": 0.9611296915644741, "grad_norm": 2.006881656376115, "learning_rate": 1.5894198561109128e-05, "loss": 0.936, "step": 12932 }, { "epoch": 0.9612040133779264, "grad_norm": 2.0025647481246702, "learning_rate": 1.5893550367512284e-05, "loss": 0.865, "step": 12933 }, { "epoch": 0.9612783351913786, "grad_norm": 2.1609789096435157, "learning_rate": 1.5892902135973068e-05, "loss": 0.9902, "step": 12934 }, { "epoch": 0.961352657004831, "grad_norm": 1.7856874867706563, "learning_rate": 1.589225386649566e-05, "loss": 0.8128, "step": 12935 }, { "epoch": 0.9614269788182832, "grad_norm": 1.7361035319050677, "learning_rate": 1.5891605559084218e-05, "loss": 0.9699, "step": 12936 }, { "epoch": 0.9615013006317354, "grad_norm": 2.113296623461996, "learning_rate": 1.5890957213742926e-05, "loss": 0.8656, "step": 12937 }, { "epoch": 0.9615756224451877, "grad_norm": 1.684650973818896, "learning_rate": 1.589030883047596e-05, "loss": 0.8357, "step": 12938 }, { "epoch": 0.9616499442586399, "grad_norm": 1.8558303579561721, "learning_rate": 1.588966040928749e-05, "loss": 0.6034, "step": 12939 }, { "epoch": 0.9617242660720922, "grad_norm": 2.094332414185796, "learning_rate": 1.5889011950181696e-05, "loss": 0.8015, "step": 12940 }, { "epoch": 0.9617985878855444, "grad_norm": 3.281322722109246, "learning_rate": 1.5888363453162743e-05, "loss": 0.8657, "step": 12941 }, { "epoch": 0.9618729096989966, "grad_norm": 1.5983605822280476, "learning_rate": 1.5887714918234817e-05, "loss": 0.6455, "step": 12942 }, { "epoch": 0.9619472315124489, "grad_norm": 2.0073516403630487, "learning_rate": 1.588706634540208e-05, "loss": 0.8737, "step": 12943 }, { "epoch": 0.9620215533259011, "grad_norm": 2.010051673083658, "learning_rate": 1.5886417734668722e-05, "loss": 0.8422, "step": 12944 }, { "epoch": 0.9620958751393534, "grad_norm": 1.6581722458482697, "learning_rate": 1.5885769086038908e-05, "loss": 0.7802, "step": 12945 }, { "epoch": 0.9621701969528057, "grad_norm": 1.7825851878047836, "learning_rate": 1.588512039951682e-05, "loss": 0.8674, "step": 12946 }, { "epoch": 0.9622445187662579, "grad_norm": 1.622324066548459, "learning_rate": 1.5884471675106635e-05, "loss": 0.7052, "step": 12947 }, { "epoch": 0.9623188405797102, "grad_norm": 1.6235599113369477, "learning_rate": 1.5883822912812522e-05, "loss": 0.8513, "step": 12948 }, { "epoch": 0.9623931623931624, "grad_norm": 2.141903299219896, "learning_rate": 1.5883174112638668e-05, "loss": 0.9783, "step": 12949 }, { "epoch": 0.9624674842066147, "grad_norm": 1.76439253663313, "learning_rate": 1.588252527458924e-05, "loss": 0.742, "step": 12950 }, { "epoch": 0.9625418060200669, "grad_norm": 1.7238994235435976, "learning_rate": 1.588187639866842e-05, "loss": 0.842, "step": 12951 }, { "epoch": 0.9626161278335191, "grad_norm": 2.4043904422465725, "learning_rate": 1.5881227484880386e-05, "loss": 0.86, "step": 12952 }, { "epoch": 0.9626904496469714, "grad_norm": 2.3404726470812767, "learning_rate": 1.5880578533229312e-05, "loss": 0.866, "step": 12953 }, { "epoch": 0.9627647714604236, "grad_norm": 1.912863624704632, "learning_rate": 1.5879929543719383e-05, "loss": 0.7724, "step": 12954 }, { "epoch": 0.9628390932738758, "grad_norm": 2.0421005399770973, "learning_rate": 1.587928051635477e-05, "loss": 0.8372, "step": 12955 }, { "epoch": 0.9629134150873281, "grad_norm": 2.0969799915001084, "learning_rate": 1.587863145113965e-05, "loss": 0.964, "step": 12956 }, { "epoch": 0.9629877369007804, "grad_norm": 2.3034660511668146, "learning_rate": 1.5877982348078212e-05, "loss": 0.8982, "step": 12957 }, { "epoch": 0.9630620587142327, "grad_norm": 1.7068173115741663, "learning_rate": 1.5877333207174628e-05, "loss": 0.5856, "step": 12958 }, { "epoch": 0.9631363805276849, "grad_norm": 1.4836749517385133, "learning_rate": 1.5876684028433076e-05, "loss": 0.7544, "step": 12959 }, { "epoch": 0.9632107023411371, "grad_norm": 1.8191211952627464, "learning_rate": 1.5876034811857737e-05, "loss": 0.8596, "step": 12960 }, { "epoch": 0.9632850241545894, "grad_norm": 1.9999799046867193, "learning_rate": 1.5875385557452792e-05, "loss": 0.8499, "step": 12961 }, { "epoch": 0.9633593459680416, "grad_norm": 1.7647752330868676, "learning_rate": 1.5874736265222415e-05, "loss": 0.6614, "step": 12962 }, { "epoch": 0.9634336677814939, "grad_norm": 2.423205212633414, "learning_rate": 1.5874086935170797e-05, "loss": 0.8914, "step": 12963 }, { "epoch": 0.9635079895949461, "grad_norm": 1.5813823373319253, "learning_rate": 1.587343756730211e-05, "loss": 0.642, "step": 12964 }, { "epoch": 0.9635823114083983, "grad_norm": 2.708112779597971, "learning_rate": 1.5872788161620536e-05, "loss": 0.651, "step": 12965 }, { "epoch": 0.9636566332218506, "grad_norm": 2.335031655520349, "learning_rate": 1.5872138718130258e-05, "loss": 0.7642, "step": 12966 }, { "epoch": 0.9637309550353028, "grad_norm": 1.457400792324652, "learning_rate": 1.5871489236835452e-05, "loss": 0.5362, "step": 12967 }, { "epoch": 0.963805276848755, "grad_norm": 1.8564552933756553, "learning_rate": 1.5870839717740307e-05, "loss": 0.872, "step": 12968 }, { "epoch": 0.9638795986622074, "grad_norm": 1.7913878391604172, "learning_rate": 1.5870190160849e-05, "loss": 0.7804, "step": 12969 }, { "epoch": 0.9639539204756596, "grad_norm": 1.555112186326816, "learning_rate": 1.586954056616571e-05, "loss": 0.7225, "step": 12970 }, { "epoch": 0.9640282422891119, "grad_norm": 1.5178848784468668, "learning_rate": 1.5868890933694625e-05, "loss": 0.5715, "step": 12971 }, { "epoch": 0.9641025641025641, "grad_norm": 1.7033917182790024, "learning_rate": 1.5868241263439925e-05, "loss": 0.8103, "step": 12972 }, { "epoch": 0.9641768859160164, "grad_norm": 1.8123995883322142, "learning_rate": 1.586759155540579e-05, "loss": 0.8882, "step": 12973 }, { "epoch": 0.9642512077294686, "grad_norm": 1.7885046850571336, "learning_rate": 1.586694180959641e-05, "loss": 0.936, "step": 12974 }, { "epoch": 0.9643255295429208, "grad_norm": 1.8711350031150187, "learning_rate": 1.586629202601596e-05, "loss": 1.0439, "step": 12975 }, { "epoch": 0.9643998513563731, "grad_norm": 1.5716396064790221, "learning_rate": 1.5865642204668623e-05, "loss": 0.7846, "step": 12976 }, { "epoch": 0.9644741731698253, "grad_norm": 2.6540432571342194, "learning_rate": 1.586499234555859e-05, "loss": 0.676, "step": 12977 }, { "epoch": 0.9645484949832775, "grad_norm": 1.9007856439472226, "learning_rate": 1.586434244869004e-05, "loss": 0.8507, "step": 12978 }, { "epoch": 0.9646228167967298, "grad_norm": 1.694963105734476, "learning_rate": 1.5863692514067155e-05, "loss": 0.824, "step": 12979 }, { "epoch": 0.9646971386101821, "grad_norm": 1.9052984391691026, "learning_rate": 1.5863042541694126e-05, "loss": 0.8154, "step": 12980 }, { "epoch": 0.9647714604236344, "grad_norm": 1.9018260584026503, "learning_rate": 1.586239253157513e-05, "loss": 0.7349, "step": 12981 }, { "epoch": 0.9648457822370866, "grad_norm": 1.6068167335258003, "learning_rate": 1.586174248371436e-05, "loss": 0.7867, "step": 12982 }, { "epoch": 0.9649201040505389, "grad_norm": 1.6422338054001802, "learning_rate": 1.586109239811599e-05, "loss": 0.8332, "step": 12983 }, { "epoch": 0.9649944258639911, "grad_norm": 1.6128632568484744, "learning_rate": 1.586044227478421e-05, "loss": 0.6329, "step": 12984 }, { "epoch": 0.9650687476774433, "grad_norm": 1.8475570299857842, "learning_rate": 1.585979211372321e-05, "loss": 1.0714, "step": 12985 }, { "epoch": 0.9651430694908956, "grad_norm": 2.0335736481320534, "learning_rate": 1.5859141914937177e-05, "loss": 0.9676, "step": 12986 }, { "epoch": 0.9652173913043478, "grad_norm": 2.0509381168129113, "learning_rate": 1.5858491678430286e-05, "loss": 0.6354, "step": 12987 }, { "epoch": 0.9652917131178, "grad_norm": 2.0067488323206732, "learning_rate": 1.5857841404206733e-05, "loss": 0.6947, "step": 12988 }, { "epoch": 0.9653660349312523, "grad_norm": 1.7192469584361783, "learning_rate": 1.5857191092270697e-05, "loss": 0.7777, "step": 12989 }, { "epoch": 0.9654403567447045, "grad_norm": 2.2323944188769422, "learning_rate": 1.5856540742626374e-05, "loss": 0.7782, "step": 12990 }, { "epoch": 0.9655146785581569, "grad_norm": 1.7750251180406817, "learning_rate": 1.5855890355277942e-05, "loss": 0.8342, "step": 12991 }, { "epoch": 0.9655890003716091, "grad_norm": 1.8788435850779321, "learning_rate": 1.5855239930229594e-05, "loss": 0.8796, "step": 12992 }, { "epoch": 0.9656633221850613, "grad_norm": 1.6413609380524523, "learning_rate": 1.5854589467485512e-05, "loss": 0.7376, "step": 12993 }, { "epoch": 0.9657376439985136, "grad_norm": 1.6387691252701544, "learning_rate": 1.5853938967049887e-05, "loss": 0.8261, "step": 12994 }, { "epoch": 0.9658119658119658, "grad_norm": 1.8127634815012366, "learning_rate": 1.585328842892691e-05, "loss": 0.8436, "step": 12995 }, { "epoch": 0.9658862876254181, "grad_norm": 1.5028686837040364, "learning_rate": 1.5852637853120762e-05, "loss": 0.7435, "step": 12996 }, { "epoch": 0.9659606094388703, "grad_norm": 1.7600051562101224, "learning_rate": 1.585198723963564e-05, "loss": 0.6923, "step": 12997 }, { "epoch": 0.9660349312523225, "grad_norm": 1.5763635393900683, "learning_rate": 1.5851336588475724e-05, "loss": 0.6849, "step": 12998 }, { "epoch": 0.9661092530657748, "grad_norm": 1.9513866668737547, "learning_rate": 1.5850685899645206e-05, "loss": 0.84, "step": 12999 }, { "epoch": 0.966183574879227, "grad_norm": 1.8379685677434625, "learning_rate": 1.5850035173148278e-05, "loss": 0.7397, "step": 13000 }, { "epoch": 0.9662578966926793, "grad_norm": 1.6782210747701252, "learning_rate": 1.5849384408989127e-05, "loss": 0.6137, "step": 13001 }, { "epoch": 0.9663322185061316, "grad_norm": 3.4853892920266256, "learning_rate": 1.5848733607171943e-05, "loss": 0.7687, "step": 13002 }, { "epoch": 0.9664065403195838, "grad_norm": 7.777823259897843, "learning_rate": 1.584808276770091e-05, "loss": 1.0648, "step": 13003 }, { "epoch": 0.9664808621330361, "grad_norm": 1.9627514803450392, "learning_rate": 1.584743189058023e-05, "loss": 0.8864, "step": 13004 }, { "epoch": 0.9665551839464883, "grad_norm": 2.0871641327759054, "learning_rate": 1.5846780975814086e-05, "loss": 0.8445, "step": 13005 }, { "epoch": 0.9666295057599406, "grad_norm": 1.8780165020299266, "learning_rate": 1.5846130023406667e-05, "loss": 0.8729, "step": 13006 }, { "epoch": 0.9667038275733928, "grad_norm": 1.787574502458398, "learning_rate": 1.5845479033362167e-05, "loss": 0.7347, "step": 13007 }, { "epoch": 0.966778149386845, "grad_norm": 2.0354308311127847, "learning_rate": 1.5844828005684778e-05, "loss": 1.0877, "step": 13008 }, { "epoch": 0.9668524712002973, "grad_norm": 1.8225506683874544, "learning_rate": 1.584417694037869e-05, "loss": 0.9184, "step": 13009 }, { "epoch": 0.9669267930137495, "grad_norm": 1.8332357984000067, "learning_rate": 1.5843525837448092e-05, "loss": 0.7827, "step": 13010 }, { "epoch": 0.9670011148272017, "grad_norm": 1.8528421515287625, "learning_rate": 1.5842874696897177e-05, "loss": 1.0739, "step": 13011 }, { "epoch": 0.967075436640654, "grad_norm": 5.639264514303954, "learning_rate": 1.584222351873014e-05, "loss": 0.9457, "step": 13012 }, { "epoch": 0.9671497584541063, "grad_norm": 1.8290036805800853, "learning_rate": 1.584157230295117e-05, "loss": 0.7245, "step": 13013 }, { "epoch": 0.9672240802675586, "grad_norm": 1.9314113126933141, "learning_rate": 1.5840921049564465e-05, "loss": 0.9416, "step": 13014 }, { "epoch": 0.9672984020810108, "grad_norm": 1.729544046671601, "learning_rate": 1.584026975857421e-05, "loss": 0.7973, "step": 13015 }, { "epoch": 0.967372723894463, "grad_norm": 1.9833352904282138, "learning_rate": 1.58396184299846e-05, "loss": 0.938, "step": 13016 }, { "epoch": 0.9674470457079153, "grad_norm": 1.4747939156926957, "learning_rate": 1.5838967063799828e-05, "loss": 0.8686, "step": 13017 }, { "epoch": 0.9675213675213675, "grad_norm": 1.7044489774234184, "learning_rate": 1.583831566002409e-05, "loss": 0.7781, "step": 13018 }, { "epoch": 0.9675956893348198, "grad_norm": 1.6101701023696, "learning_rate": 1.5837664218661583e-05, "loss": 0.8406, "step": 13019 }, { "epoch": 0.967670011148272, "grad_norm": 1.769236647432088, "learning_rate": 1.5837012739716492e-05, "loss": 0.9436, "step": 13020 }, { "epoch": 0.9677443329617242, "grad_norm": 1.9959138241116705, "learning_rate": 1.583636122319302e-05, "loss": 0.8941, "step": 13021 }, { "epoch": 0.9678186547751765, "grad_norm": 1.5268899681210304, "learning_rate": 1.5835709669095353e-05, "loss": 0.8472, "step": 13022 }, { "epoch": 0.9678929765886287, "grad_norm": 2.14729514567773, "learning_rate": 1.583505807742769e-05, "loss": 1.0066, "step": 13023 }, { "epoch": 0.967967298402081, "grad_norm": 2.7231700102410294, "learning_rate": 1.5834406448194228e-05, "loss": 0.9971, "step": 13024 }, { "epoch": 0.9680416202155333, "grad_norm": 7.14520597922066, "learning_rate": 1.583375478139916e-05, "loss": 0.8941, "step": 13025 }, { "epoch": 0.9681159420289855, "grad_norm": 2.0901303651065675, "learning_rate": 1.583310307704668e-05, "loss": 1.1102, "step": 13026 }, { "epoch": 0.9681902638424378, "grad_norm": 2.15334672483026, "learning_rate": 1.5832451335140987e-05, "loss": 0.8926, "step": 13027 }, { "epoch": 0.96826458565589, "grad_norm": 1.5574489260966704, "learning_rate": 1.5831799555686272e-05, "loss": 0.6558, "step": 13028 }, { "epoch": 0.9683389074693423, "grad_norm": 1.8426678042688167, "learning_rate": 1.5831147738686737e-05, "loss": 0.7496, "step": 13029 }, { "epoch": 0.9684132292827945, "grad_norm": 2.0666602515286434, "learning_rate": 1.5830495884146573e-05, "loss": 0.752, "step": 13030 }, { "epoch": 0.9684875510962467, "grad_norm": 2.482908112706816, "learning_rate": 1.5829843992069985e-05, "loss": 0.8007, "step": 13031 }, { "epoch": 0.968561872909699, "grad_norm": 2.0882812215863424, "learning_rate": 1.5829192062461157e-05, "loss": 0.8578, "step": 13032 }, { "epoch": 0.9686361947231512, "grad_norm": 1.9150998916381763, "learning_rate": 1.5828540095324297e-05, "loss": 0.8247, "step": 13033 }, { "epoch": 0.9687105165366034, "grad_norm": 1.674275281561286, "learning_rate": 1.5827888090663596e-05, "loss": 0.7053, "step": 13034 }, { "epoch": 0.9687848383500557, "grad_norm": 2.844276648925874, "learning_rate": 1.5827236048483253e-05, "loss": 0.8221, "step": 13035 }, { "epoch": 0.968859160163508, "grad_norm": 1.5941699307123371, "learning_rate": 1.582658396878747e-05, "loss": 0.8411, "step": 13036 }, { "epoch": 0.9689334819769603, "grad_norm": 1.9662481720557055, "learning_rate": 1.5825931851580442e-05, "loss": 0.9196, "step": 13037 }, { "epoch": 0.9690078037904125, "grad_norm": 1.7948577544557194, "learning_rate": 1.5825279696866364e-05, "loss": 0.8816, "step": 13038 }, { "epoch": 0.9690821256038648, "grad_norm": 2.13497653037345, "learning_rate": 1.5824627504649438e-05, "loss": 0.9207, "step": 13039 }, { "epoch": 0.969156447417317, "grad_norm": 2.0336866299080425, "learning_rate": 1.5823975274933862e-05, "loss": 0.8228, "step": 13040 }, { "epoch": 0.9692307692307692, "grad_norm": 2.233093486646075, "learning_rate": 1.5823323007723837e-05, "loss": 0.8586, "step": 13041 }, { "epoch": 0.9693050910442215, "grad_norm": 2.121106989523579, "learning_rate": 1.5822670703023558e-05, "loss": 1.0464, "step": 13042 }, { "epoch": 0.9693794128576737, "grad_norm": 2.169950035295927, "learning_rate": 1.582201836083723e-05, "loss": 0.7782, "step": 13043 }, { "epoch": 0.9694537346711259, "grad_norm": 2.0092203688530805, "learning_rate": 1.582136598116905e-05, "loss": 0.8108, "step": 13044 }, { "epoch": 0.9695280564845782, "grad_norm": 2.2327345673022556, "learning_rate": 1.5820713564023215e-05, "loss": 0.8432, "step": 13045 }, { "epoch": 0.9696023782980304, "grad_norm": 1.826897995917351, "learning_rate": 1.582006110940393e-05, "loss": 0.9502, "step": 13046 }, { "epoch": 0.9696767001114828, "grad_norm": 1.5805504834358266, "learning_rate": 1.5819408617315394e-05, "loss": 0.7052, "step": 13047 }, { "epoch": 0.969751021924935, "grad_norm": 1.5252438117550065, "learning_rate": 1.581875608776181e-05, "loss": 0.5494, "step": 13048 }, { "epoch": 0.9698253437383872, "grad_norm": 1.6250302905143617, "learning_rate": 1.581810352074737e-05, "loss": 0.7154, "step": 13049 }, { "epoch": 0.9698996655518395, "grad_norm": 1.9392052327454872, "learning_rate": 1.5817450916276284e-05, "loss": 0.9265, "step": 13050 }, { "epoch": 0.9699739873652917, "grad_norm": 1.7086098018677751, "learning_rate": 1.581679827435275e-05, "loss": 0.7644, "step": 13051 }, { "epoch": 0.970048309178744, "grad_norm": 3.200470564174461, "learning_rate": 1.581614559498097e-05, "loss": 0.8966, "step": 13052 }, { "epoch": 0.9701226309921962, "grad_norm": 1.928624272061409, "learning_rate": 1.581549287816515e-05, "loss": 0.8153, "step": 13053 }, { "epoch": 0.9701969528056484, "grad_norm": 2.511788070304034, "learning_rate": 1.5814840123909488e-05, "loss": 0.7629, "step": 13054 }, { "epoch": 0.9702712746191007, "grad_norm": 2.3931929369821896, "learning_rate": 1.5814187332218183e-05, "loss": 0.7048, "step": 13055 }, { "epoch": 0.9703455964325529, "grad_norm": 1.8882697617774042, "learning_rate": 1.5813534503095446e-05, "loss": 0.7832, "step": 13056 }, { "epoch": 0.9704199182460052, "grad_norm": 1.900423186358663, "learning_rate": 1.581288163654547e-05, "loss": 0.8163, "step": 13057 }, { "epoch": 0.9704942400594575, "grad_norm": 1.9620504413818545, "learning_rate": 1.581222873257247e-05, "loss": 0.9764, "step": 13058 }, { "epoch": 0.9705685618729097, "grad_norm": 2.3529631690452515, "learning_rate": 1.5811575791180643e-05, "loss": 1.0028, "step": 13059 }, { "epoch": 0.970642883686362, "grad_norm": 1.8815059170274304, "learning_rate": 1.5810922812374187e-05, "loss": 0.7537, "step": 13060 }, { "epoch": 0.9707172054998142, "grad_norm": 2.1516530156044564, "learning_rate": 1.5810269796157315e-05, "loss": 1.0044, "step": 13061 }, { "epoch": 0.9707915273132665, "grad_norm": 1.9777196148165583, "learning_rate": 1.5809616742534227e-05, "loss": 0.7075, "step": 13062 }, { "epoch": 0.9708658491267187, "grad_norm": 1.5009140495099778, "learning_rate": 1.580896365150913e-05, "loss": 0.67, "step": 13063 }, { "epoch": 0.9709401709401709, "grad_norm": 1.8828415198814812, "learning_rate": 1.5808310523086223e-05, "loss": 0.9647, "step": 13064 }, { "epoch": 0.9710144927536232, "grad_norm": 1.70464014360146, "learning_rate": 1.5807657357269715e-05, "loss": 0.6853, "step": 13065 }, { "epoch": 0.9710888145670754, "grad_norm": 1.9487653528112163, "learning_rate": 1.580700415406381e-05, "loss": 0.7012, "step": 13066 }, { "epoch": 0.9711631363805276, "grad_norm": 1.6544339941727975, "learning_rate": 1.5806350913472716e-05, "loss": 0.7457, "step": 13067 }, { "epoch": 0.9712374581939799, "grad_norm": 5.4354563339008415, "learning_rate": 1.5805697635500633e-05, "loss": 0.9032, "step": 13068 }, { "epoch": 0.9713117800074322, "grad_norm": 3.6421506034167597, "learning_rate": 1.580504432015177e-05, "loss": 0.946, "step": 13069 }, { "epoch": 0.9713861018208845, "grad_norm": 1.9698546099621217, "learning_rate": 1.580439096743034e-05, "loss": 0.9489, "step": 13070 }, { "epoch": 0.9714604236343367, "grad_norm": 1.8181104654582307, "learning_rate": 1.5803737577340534e-05, "loss": 0.9857, "step": 13071 }, { "epoch": 0.971534745447789, "grad_norm": 1.8320129223659567, "learning_rate": 1.580308414988657e-05, "loss": 0.793, "step": 13072 }, { "epoch": 0.9716090672612412, "grad_norm": 2.0938724547336887, "learning_rate": 1.5802430685072647e-05, "loss": 0.818, "step": 13073 }, { "epoch": 0.9716833890746934, "grad_norm": 1.8439269223920434, "learning_rate": 1.580177718290298e-05, "loss": 0.8965, "step": 13074 }, { "epoch": 0.9717577108881457, "grad_norm": 1.4756536179839783, "learning_rate": 1.5801123643381774e-05, "loss": 0.7538, "step": 13075 }, { "epoch": 0.9718320327015979, "grad_norm": 2.1432249283213523, "learning_rate": 1.5800470066513232e-05, "loss": 0.9837, "step": 13076 }, { "epoch": 0.9719063545150501, "grad_norm": 2.5906958009159022, "learning_rate": 1.5799816452301565e-05, "loss": 0.6751, "step": 13077 }, { "epoch": 0.9719806763285024, "grad_norm": 1.65599159405504, "learning_rate": 1.579916280075098e-05, "loss": 0.8682, "step": 13078 }, { "epoch": 0.9720549981419546, "grad_norm": 2.2442832786949456, "learning_rate": 1.579850911186569e-05, "loss": 0.7889, "step": 13079 }, { "epoch": 0.9721293199554069, "grad_norm": 2.0871211081692187, "learning_rate": 1.5797855385649893e-05, "loss": 0.9799, "step": 13080 }, { "epoch": 0.9722036417688592, "grad_norm": 2.2379951591489937, "learning_rate": 1.5797201622107808e-05, "loss": 0.9352, "step": 13081 }, { "epoch": 0.9722779635823114, "grad_norm": 1.7525818760609095, "learning_rate": 1.5796547821243642e-05, "loss": 0.7971, "step": 13082 }, { "epoch": 0.9723522853957637, "grad_norm": 1.840798853386113, "learning_rate": 1.5795893983061597e-05, "loss": 0.8456, "step": 13083 }, { "epoch": 0.9724266072092159, "grad_norm": 1.993676510675728, "learning_rate": 1.5795240107565887e-05, "loss": 0.877, "step": 13084 }, { "epoch": 0.9725009290226682, "grad_norm": 2.282492384440813, "learning_rate": 1.5794586194760726e-05, "loss": 0.8711, "step": 13085 }, { "epoch": 0.9725752508361204, "grad_norm": 1.783470880905514, "learning_rate": 1.5793932244650317e-05, "loss": 0.7737, "step": 13086 }, { "epoch": 0.9726495726495726, "grad_norm": 1.681607778168015, "learning_rate": 1.5793278257238875e-05, "loss": 0.7188, "step": 13087 }, { "epoch": 0.9727238944630249, "grad_norm": 2.058096846002815, "learning_rate": 1.5792624232530606e-05, "loss": 1.0011, "step": 13088 }, { "epoch": 0.9727982162764771, "grad_norm": 2.2505792323812814, "learning_rate": 1.5791970170529723e-05, "loss": 1.0101, "step": 13089 }, { "epoch": 0.9728725380899294, "grad_norm": 1.993844132085146, "learning_rate": 1.5791316071240437e-05, "loss": 0.7305, "step": 13090 }, { "epoch": 0.9729468599033816, "grad_norm": 1.8512831260426152, "learning_rate": 1.5790661934666957e-05, "loss": 0.8585, "step": 13091 }, { "epoch": 0.9730211817168339, "grad_norm": 2.323821538991435, "learning_rate": 1.5790007760813496e-05, "loss": 0.9775, "step": 13092 }, { "epoch": 0.9730955035302862, "grad_norm": 1.8396876647315172, "learning_rate": 1.578935354968427e-05, "loss": 0.9225, "step": 13093 }, { "epoch": 0.9731698253437384, "grad_norm": 1.9668729277405, "learning_rate": 1.5788699301283484e-05, "loss": 0.8589, "step": 13094 }, { "epoch": 0.9732441471571907, "grad_norm": 1.6098482461577295, "learning_rate": 1.578804501561535e-05, "loss": 0.7202, "step": 13095 }, { "epoch": 0.9733184689706429, "grad_norm": 1.9599212257220837, "learning_rate": 1.5787390692684086e-05, "loss": 0.9503, "step": 13096 }, { "epoch": 0.9733927907840951, "grad_norm": 1.8068940952167274, "learning_rate": 1.5786736332493897e-05, "loss": 0.7696, "step": 13097 }, { "epoch": 0.9734671125975474, "grad_norm": 1.9429752295113971, "learning_rate": 1.5786081935049004e-05, "loss": 0.8662, "step": 13098 }, { "epoch": 0.9735414344109996, "grad_norm": 1.878046102116707, "learning_rate": 1.578542750035361e-05, "loss": 0.7372, "step": 13099 }, { "epoch": 0.9736157562244518, "grad_norm": 1.5342433863527525, "learning_rate": 1.578477302841194e-05, "loss": 0.7068, "step": 13100 }, { "epoch": 0.9736900780379041, "grad_norm": 2.0857850585297117, "learning_rate": 1.57841185192282e-05, "loss": 1.0288, "step": 13101 }, { "epoch": 0.9737643998513563, "grad_norm": 1.9575619768401715, "learning_rate": 1.5783463972806604e-05, "loss": 0.534, "step": 13102 }, { "epoch": 0.9738387216648087, "grad_norm": 1.9352602398604433, "learning_rate": 1.5782809389151366e-05, "loss": 0.8414, "step": 13103 }, { "epoch": 0.9739130434782609, "grad_norm": 1.5819029866111818, "learning_rate": 1.5782154768266703e-05, "loss": 0.7945, "step": 13104 }, { "epoch": 0.9739873652917131, "grad_norm": 2.1715862557511882, "learning_rate": 1.5781500110156826e-05, "loss": 0.8766, "step": 13105 }, { "epoch": 0.9740616871051654, "grad_norm": 2.0598352140057306, "learning_rate": 1.5780845414825952e-05, "loss": 0.8342, "step": 13106 }, { "epoch": 0.9741360089186176, "grad_norm": 1.6258824247133978, "learning_rate": 1.5780190682278294e-05, "loss": 0.8762, "step": 13107 }, { "epoch": 0.9742103307320699, "grad_norm": 7.384099383993501, "learning_rate": 1.577953591251807e-05, "loss": 1.0385, "step": 13108 }, { "epoch": 0.9742846525455221, "grad_norm": 1.7292811240086752, "learning_rate": 1.5778881105549494e-05, "loss": 0.8127, "step": 13109 }, { "epoch": 0.9743589743589743, "grad_norm": 15.906278267971603, "learning_rate": 1.5778226261376782e-05, "loss": 0.6882, "step": 13110 }, { "epoch": 0.9744332961724266, "grad_norm": 2.1305989372558964, "learning_rate": 1.5777571380004147e-05, "loss": 0.8733, "step": 13111 }, { "epoch": 0.9745076179858788, "grad_norm": 2.466741295562105, "learning_rate": 1.5776916461435806e-05, "loss": 1.0486, "step": 13112 }, { "epoch": 0.974581939799331, "grad_norm": 1.8179417538450011, "learning_rate": 1.577626150567598e-05, "loss": 0.843, "step": 13113 }, { "epoch": 0.9746562616127834, "grad_norm": 1.593966464000755, "learning_rate": 1.577560651272888e-05, "loss": 0.7527, "step": 13114 }, { "epoch": 0.9747305834262356, "grad_norm": 3.517799220229524, "learning_rate": 1.5774951482598726e-05, "loss": 0.802, "step": 13115 }, { "epoch": 0.9748049052396879, "grad_norm": 1.950043514549362, "learning_rate": 1.5774296415289735e-05, "loss": 0.7737, "step": 13116 }, { "epoch": 0.9748792270531401, "grad_norm": 1.8514440169500317, "learning_rate": 1.577364131080612e-05, "loss": 1.0302, "step": 13117 }, { "epoch": 0.9749535488665924, "grad_norm": 1.924403315511632, "learning_rate": 1.5772986169152105e-05, "loss": 0.8091, "step": 13118 }, { "epoch": 0.9750278706800446, "grad_norm": 1.7290184004591838, "learning_rate": 1.57723309903319e-05, "loss": 0.9452, "step": 13119 }, { "epoch": 0.9751021924934968, "grad_norm": 2.0698212705435353, "learning_rate": 1.577167577434973e-05, "loss": 1.0043, "step": 13120 }, { "epoch": 0.9751765143069491, "grad_norm": 2.5205849643386844, "learning_rate": 1.5771020521209815e-05, "loss": 0.8769, "step": 13121 }, { "epoch": 0.9752508361204013, "grad_norm": 3.1967673070511284, "learning_rate": 1.5770365230916365e-05, "loss": 0.8897, "step": 13122 }, { "epoch": 0.9753251579338535, "grad_norm": 2.446810430194128, "learning_rate": 1.5769709903473606e-05, "loss": 0.8681, "step": 13123 }, { "epoch": 0.9753994797473058, "grad_norm": 1.529744349165761, "learning_rate": 1.5769054538885748e-05, "loss": 0.5576, "step": 13124 }, { "epoch": 0.9754738015607581, "grad_norm": 1.5573146652706356, "learning_rate": 1.576839913715702e-05, "loss": 0.8351, "step": 13125 }, { "epoch": 0.9755481233742104, "grad_norm": 2.0637773359595624, "learning_rate": 1.576774369829164e-05, "loss": 0.8892, "step": 13126 }, { "epoch": 0.9756224451876626, "grad_norm": 1.9488901344982907, "learning_rate": 1.5767088222293826e-05, "loss": 0.9016, "step": 13127 }, { "epoch": 0.9756967670011149, "grad_norm": 1.9992329824094375, "learning_rate": 1.576643270916779e-05, "loss": 0.896, "step": 13128 }, { "epoch": 0.9757710888145671, "grad_norm": 1.8684534186819555, "learning_rate": 1.5765777158917765e-05, "loss": 0.7843, "step": 13129 }, { "epoch": 0.9758454106280193, "grad_norm": 2.192973540168774, "learning_rate": 1.5765121571547966e-05, "loss": 0.9433, "step": 13130 }, { "epoch": 0.9759197324414716, "grad_norm": 1.9244900040828097, "learning_rate": 1.576446594706261e-05, "loss": 0.5475, "step": 13131 }, { "epoch": 0.9759940542549238, "grad_norm": 2.0117692908266047, "learning_rate": 1.5763810285465923e-05, "loss": 0.9335, "step": 13132 }, { "epoch": 0.976068376068376, "grad_norm": 1.903028408384122, "learning_rate": 1.5763154586762128e-05, "loss": 0.8183, "step": 13133 }, { "epoch": 0.9761426978818283, "grad_norm": 2.124242875263339, "learning_rate": 1.576249885095544e-05, "loss": 0.7181, "step": 13134 }, { "epoch": 0.9762170196952805, "grad_norm": 2.0605602546327613, "learning_rate": 1.5761843078050084e-05, "loss": 0.7585, "step": 13135 }, { "epoch": 0.9762913415087329, "grad_norm": 2.65781208582699, "learning_rate": 1.5761187268050276e-05, "loss": 0.7368, "step": 13136 }, { "epoch": 0.9763656633221851, "grad_norm": 1.7120265033050939, "learning_rate": 1.576053142096025e-05, "loss": 0.7797, "step": 13137 }, { "epoch": 0.9764399851356373, "grad_norm": 1.953903627995326, "learning_rate": 1.575987553678422e-05, "loss": 0.747, "step": 13138 }, { "epoch": 0.9765143069490896, "grad_norm": 2.2180820390788223, "learning_rate": 1.575921961552641e-05, "loss": 0.7949, "step": 13139 }, { "epoch": 0.9765886287625418, "grad_norm": 1.7895028551409147, "learning_rate": 1.575856365719104e-05, "loss": 0.8599, "step": 13140 }, { "epoch": 0.9766629505759941, "grad_norm": 1.775684776551833, "learning_rate": 1.575790766178234e-05, "loss": 0.8821, "step": 13141 }, { "epoch": 0.9767372723894463, "grad_norm": 1.8659287484716656, "learning_rate": 1.5757251629304526e-05, "loss": 0.9605, "step": 13142 }, { "epoch": 0.9768115942028985, "grad_norm": 1.9101787916896966, "learning_rate": 1.5756595559761825e-05, "loss": 0.8726, "step": 13143 }, { "epoch": 0.9768859160163508, "grad_norm": 1.9450161163947117, "learning_rate": 1.5755939453158466e-05, "loss": 0.8651, "step": 13144 }, { "epoch": 0.976960237829803, "grad_norm": 1.6360804951938144, "learning_rate": 1.575528330949866e-05, "loss": 0.7957, "step": 13145 }, { "epoch": 0.9770345596432553, "grad_norm": 2.0225955844933523, "learning_rate": 1.575462712878664e-05, "loss": 0.7599, "step": 13146 }, { "epoch": 0.9771088814567075, "grad_norm": 2.1400948883788695, "learning_rate": 1.5753970911026635e-05, "loss": 1.0434, "step": 13147 }, { "epoch": 0.9771832032701598, "grad_norm": 1.765517597406711, "learning_rate": 1.575331465622286e-05, "loss": 0.7356, "step": 13148 }, { "epoch": 0.9772575250836121, "grad_norm": 2.3746556133798253, "learning_rate": 1.575265836437954e-05, "loss": 0.9225, "step": 13149 }, { "epoch": 0.9773318468970643, "grad_norm": 2.7596916159013176, "learning_rate": 1.575200203550091e-05, "loss": 1.0252, "step": 13150 }, { "epoch": 0.9774061687105166, "grad_norm": 2.0321817743438175, "learning_rate": 1.5751345669591186e-05, "loss": 0.8241, "step": 13151 }, { "epoch": 0.9774804905239688, "grad_norm": 1.8769055746777945, "learning_rate": 1.57506892666546e-05, "loss": 0.882, "step": 13152 }, { "epoch": 0.977554812337421, "grad_norm": 2.837507486497156, "learning_rate": 1.575003282669537e-05, "loss": 0.8568, "step": 13153 }, { "epoch": 0.9776291341508733, "grad_norm": 1.955955527867738, "learning_rate": 1.574937634971773e-05, "loss": 0.8171, "step": 13154 }, { "epoch": 0.9777034559643255, "grad_norm": 1.5595201412245145, "learning_rate": 1.5748719835725904e-05, "loss": 0.8114, "step": 13155 }, { "epoch": 0.9777777777777777, "grad_norm": 2.24950307942087, "learning_rate": 1.5748063284724116e-05, "loss": 0.9845, "step": 13156 }, { "epoch": 0.97785209959123, "grad_norm": 2.3801916221446615, "learning_rate": 1.5747406696716597e-05, "loss": 0.8633, "step": 13157 }, { "epoch": 0.9779264214046822, "grad_norm": 1.535465861165471, "learning_rate": 1.574675007170757e-05, "loss": 0.7838, "step": 13158 }, { "epoch": 0.9780007432181346, "grad_norm": 2.4604618163019976, "learning_rate": 1.5746093409701267e-05, "loss": 0.7693, "step": 13159 }, { "epoch": 0.9780750650315868, "grad_norm": 1.8036914669751862, "learning_rate": 1.5745436710701912e-05, "loss": 1.0603, "step": 13160 }, { "epoch": 0.978149386845039, "grad_norm": 1.9359551909816317, "learning_rate": 1.574477997471373e-05, "loss": 0.8245, "step": 13161 }, { "epoch": 0.9782237086584913, "grad_norm": 1.6760493124842941, "learning_rate": 1.5744123201740954e-05, "loss": 0.7861, "step": 13162 }, { "epoch": 0.9782980304719435, "grad_norm": 1.8670201323065247, "learning_rate": 1.5743466391787815e-05, "loss": 0.7672, "step": 13163 }, { "epoch": 0.9783723522853958, "grad_norm": 2.1323966652404067, "learning_rate": 1.574280954485853e-05, "loss": 1.0499, "step": 13164 }, { "epoch": 0.978446674098848, "grad_norm": 2.70361871714763, "learning_rate": 1.574215266095734e-05, "loss": 0.7192, "step": 13165 }, { "epoch": 0.9785209959123002, "grad_norm": 1.6679526532688982, "learning_rate": 1.574149574008847e-05, "loss": 0.8161, "step": 13166 }, { "epoch": 0.9785953177257525, "grad_norm": 1.848776131703361, "learning_rate": 1.5740838782256147e-05, "loss": 0.8099, "step": 13167 }, { "epoch": 0.9786696395392047, "grad_norm": 1.965870230776204, "learning_rate": 1.57401817874646e-05, "loss": 0.8243, "step": 13168 }, { "epoch": 0.978743961352657, "grad_norm": 1.6524720521436955, "learning_rate": 1.5739524755718062e-05, "loss": 0.5822, "step": 13169 }, { "epoch": 0.9788182831661093, "grad_norm": 1.819788018253274, "learning_rate": 1.5738867687020762e-05, "loss": 0.6809, "step": 13170 }, { "epoch": 0.9788926049795615, "grad_norm": 1.9621545445908462, "learning_rate": 1.5738210581376932e-05, "loss": 0.8457, "step": 13171 }, { "epoch": 0.9789669267930138, "grad_norm": 2.0630388705574663, "learning_rate": 1.5737553438790796e-05, "loss": 0.7867, "step": 13172 }, { "epoch": 0.979041248606466, "grad_norm": 1.7027947520098683, "learning_rate": 1.573689625926659e-05, "loss": 0.8695, "step": 13173 }, { "epoch": 0.9791155704199183, "grad_norm": 1.9089144887929566, "learning_rate": 1.5736239042808544e-05, "loss": 0.783, "step": 13174 }, { "epoch": 0.9791898922333705, "grad_norm": 2.0533419357125733, "learning_rate": 1.5735581789420887e-05, "loss": 0.7121, "step": 13175 }, { "epoch": 0.9792642140468227, "grad_norm": 1.6332449644321112, "learning_rate": 1.5734924499107852e-05, "loss": 0.6584, "step": 13176 }, { "epoch": 0.979338535860275, "grad_norm": 2.4043401572860126, "learning_rate": 1.5734267171873674e-05, "loss": 0.9258, "step": 13177 }, { "epoch": 0.9794128576737272, "grad_norm": 2.091557342195341, "learning_rate": 1.5733609807722578e-05, "loss": 0.8433, "step": 13178 }, { "epoch": 0.9794871794871794, "grad_norm": 2.5187247157302943, "learning_rate": 1.5732952406658802e-05, "loss": 1.1395, "step": 13179 }, { "epoch": 0.9795615013006317, "grad_norm": 2.3072240707088905, "learning_rate": 1.5732294968686576e-05, "loss": 1.0235, "step": 13180 }, { "epoch": 0.979635823114084, "grad_norm": 2.724102308385144, "learning_rate": 1.573163749381013e-05, "loss": 0.9273, "step": 13181 }, { "epoch": 0.9797101449275363, "grad_norm": 1.7320932181808464, "learning_rate": 1.5730979982033697e-05, "loss": 0.7318, "step": 13182 }, { "epoch": 0.9797844667409885, "grad_norm": 1.4038316681114227, "learning_rate": 1.5730322433361518e-05, "loss": 0.706, "step": 13183 }, { "epoch": 0.9798587885544408, "grad_norm": 1.4109836904862876, "learning_rate": 1.572966484779782e-05, "loss": 0.542, "step": 13184 }, { "epoch": 0.979933110367893, "grad_norm": 1.8754168599384105, "learning_rate": 1.5729007225346835e-05, "loss": 0.8207, "step": 13185 }, { "epoch": 0.9800074321813452, "grad_norm": 1.810918358415137, "learning_rate": 1.5728349566012796e-05, "loss": 0.9119, "step": 13186 }, { "epoch": 0.9800817539947975, "grad_norm": 1.9626480779517401, "learning_rate": 1.5727691869799943e-05, "loss": 0.8904, "step": 13187 }, { "epoch": 0.9801560758082497, "grad_norm": 1.9290944637881953, "learning_rate": 1.5727034136712503e-05, "loss": 0.9536, "step": 13188 }, { "epoch": 0.9802303976217019, "grad_norm": 1.848891625931108, "learning_rate": 1.5726376366754722e-05, "loss": 0.9332, "step": 13189 }, { "epoch": 0.9803047194351542, "grad_norm": 2.4776817848865655, "learning_rate": 1.572571855993082e-05, "loss": 0.9701, "step": 13190 }, { "epoch": 0.9803790412486064, "grad_norm": 1.796607324589603, "learning_rate": 1.5725060716245043e-05, "loss": 0.9521, "step": 13191 }, { "epoch": 0.9804533630620588, "grad_norm": 1.8244624774870097, "learning_rate": 1.572440283570162e-05, "loss": 0.6242, "step": 13192 }, { "epoch": 0.980527684875511, "grad_norm": 1.6684642699021826, "learning_rate": 1.572374491830479e-05, "loss": 0.9553, "step": 13193 }, { "epoch": 0.9806020066889632, "grad_norm": 1.7517121152343023, "learning_rate": 1.572308696405879e-05, "loss": 0.7617, "step": 13194 }, { "epoch": 0.9806763285024155, "grad_norm": 1.9066674397707228, "learning_rate": 1.5722428972967848e-05, "loss": 0.6618, "step": 13195 }, { "epoch": 0.9807506503158677, "grad_norm": 1.7443651567225975, "learning_rate": 1.5721770945036205e-05, "loss": 0.7659, "step": 13196 }, { "epoch": 0.98082497212932, "grad_norm": 1.9405191971828422, "learning_rate": 1.57211128802681e-05, "loss": 0.7949, "step": 13197 }, { "epoch": 0.9808992939427722, "grad_norm": 2.0920172974696594, "learning_rate": 1.5720454778667767e-05, "loss": 0.8386, "step": 13198 }, { "epoch": 0.9809736157562244, "grad_norm": 2.117615311802839, "learning_rate": 1.5719796640239442e-05, "loss": 0.9625, "step": 13199 }, { "epoch": 0.9810479375696767, "grad_norm": 2.0829430517838494, "learning_rate": 1.5719138464987363e-05, "loss": 0.7803, "step": 13200 }, { "epoch": 0.9811222593831289, "grad_norm": 1.7587760358438038, "learning_rate": 1.571848025291577e-05, "loss": 0.6875, "step": 13201 }, { "epoch": 0.9811965811965812, "grad_norm": 2.0900229728585007, "learning_rate": 1.5717822004028896e-05, "loss": 0.9811, "step": 13202 }, { "epoch": 0.9812709030100334, "grad_norm": 2.148787723120726, "learning_rate": 1.571716371833098e-05, "loss": 0.8968, "step": 13203 }, { "epoch": 0.9813452248234857, "grad_norm": 1.8111593135199433, "learning_rate": 1.571650539582626e-05, "loss": 0.8322, "step": 13204 }, { "epoch": 0.981419546636938, "grad_norm": 2.2831127444745682, "learning_rate": 1.5715847036518977e-05, "loss": 0.764, "step": 13205 }, { "epoch": 0.9814938684503902, "grad_norm": 2.0624728653907174, "learning_rate": 1.5715188640413367e-05, "loss": 0.706, "step": 13206 }, { "epoch": 0.9815681902638425, "grad_norm": 1.8149666691939914, "learning_rate": 1.571453020751367e-05, "loss": 0.9238, "step": 13207 }, { "epoch": 0.9816425120772947, "grad_norm": 1.8009762669663276, "learning_rate": 1.571387173782412e-05, "loss": 0.7562, "step": 13208 }, { "epoch": 0.9817168338907469, "grad_norm": 1.768366367415524, "learning_rate": 1.5713213231348963e-05, "loss": 0.5734, "step": 13209 }, { "epoch": 0.9817911557041992, "grad_norm": 1.596174598580638, "learning_rate": 1.5712554688092433e-05, "loss": 0.714, "step": 13210 }, { "epoch": 0.9818654775176514, "grad_norm": 1.5032271019434083, "learning_rate": 1.5711896108058775e-05, "loss": 0.7061, "step": 13211 }, { "epoch": 0.9819397993311036, "grad_norm": 1.6632283481536878, "learning_rate": 1.571123749125223e-05, "loss": 0.6393, "step": 13212 }, { "epoch": 0.9820141211445559, "grad_norm": 1.4234449150149324, "learning_rate": 1.571057883767703e-05, "loss": 0.6832, "step": 13213 }, { "epoch": 0.9820884429580081, "grad_norm": 1.8135164272938138, "learning_rate": 1.570992014733742e-05, "loss": 0.7259, "step": 13214 }, { "epoch": 0.9821627647714605, "grad_norm": 2.1731580415326244, "learning_rate": 1.5709261420237635e-05, "loss": 0.8895, "step": 13215 }, { "epoch": 0.9822370865849127, "grad_norm": 2.048627307927757, "learning_rate": 1.5708602656381927e-05, "loss": 0.7123, "step": 13216 }, { "epoch": 0.982311408398365, "grad_norm": 1.488438815096859, "learning_rate": 1.5707943855774534e-05, "loss": 0.8136, "step": 13217 }, { "epoch": 0.9823857302118172, "grad_norm": 1.665388324324738, "learning_rate": 1.570728501841969e-05, "loss": 0.5754, "step": 13218 }, { "epoch": 0.9824600520252694, "grad_norm": 1.7557204965005897, "learning_rate": 1.5706626144321645e-05, "loss": 0.8514, "step": 13219 }, { "epoch": 0.9825343738387217, "grad_norm": 1.5094404339738527, "learning_rate": 1.570596723348463e-05, "loss": 0.6254, "step": 13220 }, { "epoch": 0.9826086956521739, "grad_norm": 2.27948072330046, "learning_rate": 1.57053082859129e-05, "loss": 0.9533, "step": 13221 }, { "epoch": 0.9826830174656261, "grad_norm": 1.7152421094594255, "learning_rate": 1.570464930161069e-05, "loss": 0.8009, "step": 13222 }, { "epoch": 0.9827573392790784, "grad_norm": 1.978550923580913, "learning_rate": 1.5703990280582246e-05, "loss": 0.9184, "step": 13223 }, { "epoch": 0.9828316610925306, "grad_norm": 1.740032359243941, "learning_rate": 1.5703331222831804e-05, "loss": 0.7872, "step": 13224 }, { "epoch": 0.9829059829059829, "grad_norm": 1.5667179482452274, "learning_rate": 1.5702672128363613e-05, "loss": 0.7976, "step": 13225 }, { "epoch": 0.9829803047194352, "grad_norm": 2.439521224309534, "learning_rate": 1.5702012997181917e-05, "loss": 0.8001, "step": 13226 }, { "epoch": 0.9830546265328874, "grad_norm": 1.91063715021034, "learning_rate": 1.5701353829290954e-05, "loss": 0.9069, "step": 13227 }, { "epoch": 0.9831289483463397, "grad_norm": 1.7950264822464463, "learning_rate": 1.5700694624694975e-05, "loss": 0.7146, "step": 13228 }, { "epoch": 0.9832032701597919, "grad_norm": 1.9325007486863113, "learning_rate": 1.5700035383398216e-05, "loss": 0.8629, "step": 13229 }, { "epoch": 0.9832775919732442, "grad_norm": 1.8532175763991836, "learning_rate": 1.5699376105404928e-05, "loss": 0.8267, "step": 13230 }, { "epoch": 0.9833519137866964, "grad_norm": 1.710675070047752, "learning_rate": 1.5698716790719347e-05, "loss": 0.805, "step": 13231 }, { "epoch": 0.9834262356001486, "grad_norm": 1.9500328364008181, "learning_rate": 1.5698057439345727e-05, "loss": 0.7496, "step": 13232 }, { "epoch": 0.9835005574136009, "grad_norm": 2.0390786205364844, "learning_rate": 1.569739805128831e-05, "loss": 0.8858, "step": 13233 }, { "epoch": 0.9835748792270531, "grad_norm": 1.6479230756934762, "learning_rate": 1.569673862655134e-05, "loss": 0.7642, "step": 13234 }, { "epoch": 0.9836492010405053, "grad_norm": 1.5139506463605923, "learning_rate": 1.5696079165139057e-05, "loss": 0.6368, "step": 13235 }, { "epoch": 0.9837235228539576, "grad_norm": 1.889470934079413, "learning_rate": 1.569541966705572e-05, "loss": 0.908, "step": 13236 }, { "epoch": 0.9837978446674099, "grad_norm": 2.263103902926577, "learning_rate": 1.569476013230556e-05, "loss": 1.0402, "step": 13237 }, { "epoch": 0.9838721664808622, "grad_norm": 1.8583503291117347, "learning_rate": 1.569410056089283e-05, "loss": 1.0156, "step": 13238 }, { "epoch": 0.9839464882943144, "grad_norm": 1.8897229017883472, "learning_rate": 1.569344095282178e-05, "loss": 0.7229, "step": 13239 }, { "epoch": 0.9840208101077667, "grad_norm": 1.7367910989896878, "learning_rate": 1.5692781308096652e-05, "loss": 0.8113, "step": 13240 }, { "epoch": 0.9840951319212189, "grad_norm": 2.16935095113393, "learning_rate": 1.569212162672169e-05, "loss": 1.1022, "step": 13241 }, { "epoch": 0.9841694537346711, "grad_norm": 2.072526365356097, "learning_rate": 1.5691461908701148e-05, "loss": 0.8726, "step": 13242 }, { "epoch": 0.9842437755481234, "grad_norm": 2.069363655827002, "learning_rate": 1.5690802154039265e-05, "loss": 0.8428, "step": 13243 }, { "epoch": 0.9843180973615756, "grad_norm": 3.9201548384542675, "learning_rate": 1.5690142362740297e-05, "loss": 0.8199, "step": 13244 }, { "epoch": 0.9843924191750278, "grad_norm": 1.395050935012926, "learning_rate": 1.5689482534808485e-05, "loss": 0.6114, "step": 13245 }, { "epoch": 0.9844667409884801, "grad_norm": 1.5917130276859293, "learning_rate": 1.5688822670248082e-05, "loss": 0.787, "step": 13246 }, { "epoch": 0.9845410628019323, "grad_norm": 1.8483447030710134, "learning_rate": 1.5688162769063337e-05, "loss": 0.7597, "step": 13247 }, { "epoch": 0.9846153846153847, "grad_norm": 2.0171300514793837, "learning_rate": 1.5687502831258487e-05, "loss": 1.0634, "step": 13248 }, { "epoch": 0.9846897064288369, "grad_norm": 1.6379941589888494, "learning_rate": 1.5686842856837793e-05, "loss": 0.7783, "step": 13249 }, { "epoch": 0.9847640282422891, "grad_norm": 2.0179204615943487, "learning_rate": 1.56861828458055e-05, "loss": 0.8708, "step": 13250 }, { "epoch": 0.9848383500557414, "grad_norm": 2.0889019067427537, "learning_rate": 1.568552279816586e-05, "loss": 0.8815, "step": 13251 }, { "epoch": 0.9849126718691936, "grad_norm": 1.682899200185078, "learning_rate": 1.5684862713923114e-05, "loss": 0.7553, "step": 13252 }, { "epoch": 0.9849869936826459, "grad_norm": 2.9183711555651155, "learning_rate": 1.5684202593081516e-05, "loss": 0.8966, "step": 13253 }, { "epoch": 0.9850613154960981, "grad_norm": 2.0966457169198347, "learning_rate": 1.568354243564532e-05, "loss": 0.9372, "step": 13254 }, { "epoch": 0.9851356373095503, "grad_norm": 1.607080826595485, "learning_rate": 1.5682882241618773e-05, "loss": 0.7551, "step": 13255 }, { "epoch": 0.9852099591230026, "grad_norm": 2.030430690508482, "learning_rate": 1.5682222011006126e-05, "loss": 0.7003, "step": 13256 }, { "epoch": 0.9852842809364548, "grad_norm": 1.9598210573365973, "learning_rate": 1.5681561743811628e-05, "loss": 0.7604, "step": 13257 }, { "epoch": 0.985358602749907, "grad_norm": 1.6840489692466065, "learning_rate": 1.5680901440039525e-05, "loss": 0.9135, "step": 13258 }, { "epoch": 0.9854329245633593, "grad_norm": 2.030242424571572, "learning_rate": 1.568024109969408e-05, "loss": 0.9623, "step": 13259 }, { "epoch": 0.9855072463768116, "grad_norm": 2.0056789521445224, "learning_rate": 1.5679580722779533e-05, "loss": 0.9067, "step": 13260 }, { "epoch": 0.9855815681902639, "grad_norm": 2.2543428794474463, "learning_rate": 1.567892030930014e-05, "loss": 0.9262, "step": 13261 }, { "epoch": 0.9856558900037161, "grad_norm": 1.575388530516094, "learning_rate": 1.5678259859260156e-05, "loss": 0.62, "step": 13262 }, { "epoch": 0.9857302118171684, "grad_norm": 1.907824240632817, "learning_rate": 1.567759937266383e-05, "loss": 0.8685, "step": 13263 }, { "epoch": 0.9858045336306206, "grad_norm": 2.309681638800111, "learning_rate": 1.567693884951541e-05, "loss": 0.7998, "step": 13264 }, { "epoch": 0.9858788554440728, "grad_norm": 1.9366423127818087, "learning_rate": 1.5676278289819154e-05, "loss": 0.8022, "step": 13265 }, { "epoch": 0.9859531772575251, "grad_norm": 1.7378479378109868, "learning_rate": 1.567561769357931e-05, "loss": 0.8033, "step": 13266 }, { "epoch": 0.9860274990709773, "grad_norm": 2.3431143626162196, "learning_rate": 1.5674957060800137e-05, "loss": 1.083, "step": 13267 }, { "epoch": 0.9861018208844295, "grad_norm": 1.8282831506649355, "learning_rate": 1.5674296391485888e-05, "loss": 0.7799, "step": 13268 }, { "epoch": 0.9861761426978818, "grad_norm": 2.426510363951782, "learning_rate": 1.567363568564081e-05, "loss": 0.9817, "step": 13269 }, { "epoch": 0.986250464511334, "grad_norm": 1.876680389548558, "learning_rate": 1.567297494326916e-05, "loss": 0.968, "step": 13270 }, { "epoch": 0.9863247863247864, "grad_norm": 1.84246263843223, "learning_rate": 1.567231416437519e-05, "loss": 0.7441, "step": 13271 }, { "epoch": 0.9863991081382386, "grad_norm": 1.810043758731064, "learning_rate": 1.5671653348963157e-05, "loss": 0.8015, "step": 13272 }, { "epoch": 0.9864734299516909, "grad_norm": 2.459029158806604, "learning_rate": 1.5670992497037315e-05, "loss": 0.9429, "step": 13273 }, { "epoch": 0.9865477517651431, "grad_norm": 1.566418884535701, "learning_rate": 1.567033160860192e-05, "loss": 0.8727, "step": 13274 }, { "epoch": 0.9866220735785953, "grad_norm": 1.5183766137597787, "learning_rate": 1.5669670683661218e-05, "loss": 0.8068, "step": 13275 }, { "epoch": 0.9866963953920476, "grad_norm": 10.07097945367218, "learning_rate": 1.5669009722219474e-05, "loss": 1.1471, "step": 13276 }, { "epoch": 0.9867707172054998, "grad_norm": 1.5384064492120681, "learning_rate": 1.566834872428094e-05, "loss": 0.6232, "step": 13277 }, { "epoch": 0.986845039018952, "grad_norm": 1.8703097833611317, "learning_rate": 1.5667687689849867e-05, "loss": 0.8928, "step": 13278 }, { "epoch": 0.9869193608324043, "grad_norm": 1.75438883656944, "learning_rate": 1.5667026618930517e-05, "loss": 0.9136, "step": 13279 }, { "epoch": 0.9869936826458565, "grad_norm": 1.8104386235067222, "learning_rate": 1.5666365511527145e-05, "loss": 0.6402, "step": 13280 }, { "epoch": 0.9870680044593088, "grad_norm": 2.965919390018879, "learning_rate": 1.5665704367644008e-05, "loss": 0.8157, "step": 13281 }, { "epoch": 0.9871423262727611, "grad_norm": 2.1179910318791046, "learning_rate": 1.566504318728536e-05, "loss": 0.938, "step": 13282 }, { "epoch": 0.9872166480862133, "grad_norm": 1.8167843697186545, "learning_rate": 1.566438197045545e-05, "loss": 0.8622, "step": 13283 }, { "epoch": 0.9872909698996656, "grad_norm": 1.6427973168977807, "learning_rate": 1.566372071715855e-05, "loss": 0.8498, "step": 13284 }, { "epoch": 0.9873652917131178, "grad_norm": 1.7016353427965745, "learning_rate": 1.566305942739891e-05, "loss": 0.7462, "step": 13285 }, { "epoch": 0.9874396135265701, "grad_norm": 1.798185156963998, "learning_rate": 1.5662398101180785e-05, "loss": 0.9727, "step": 13286 }, { "epoch": 0.9875139353400223, "grad_norm": 1.631452659493863, "learning_rate": 1.5661736738508434e-05, "loss": 0.9202, "step": 13287 }, { "epoch": 0.9875882571534745, "grad_norm": 2.4215446672473173, "learning_rate": 1.5661075339386115e-05, "loss": 1.0534, "step": 13288 }, { "epoch": 0.9876625789669268, "grad_norm": 1.8924227240273532, "learning_rate": 1.566041390381809e-05, "loss": 0.7514, "step": 13289 }, { "epoch": 0.987736900780379, "grad_norm": 3.443492658786031, "learning_rate": 1.5659752431808612e-05, "loss": 0.8941, "step": 13290 }, { "epoch": 0.9878112225938313, "grad_norm": 1.8233284528751497, "learning_rate": 1.5659090923361942e-05, "loss": 0.8635, "step": 13291 }, { "epoch": 0.9878855444072835, "grad_norm": 2.2114065909061433, "learning_rate": 1.565842937848234e-05, "loss": 0.9581, "step": 13292 }, { "epoch": 0.9879598662207358, "grad_norm": 3.114440326151456, "learning_rate": 1.5657767797174058e-05, "loss": 0.7924, "step": 13293 }, { "epoch": 0.9880341880341881, "grad_norm": 1.6343744474288355, "learning_rate": 1.5657106179441364e-05, "loss": 0.7767, "step": 13294 }, { "epoch": 0.9881085098476403, "grad_norm": 1.736855659638549, "learning_rate": 1.5656444525288517e-05, "loss": 0.9059, "step": 13295 }, { "epoch": 0.9881828316610926, "grad_norm": 1.7572003804325074, "learning_rate": 1.565578283471977e-05, "loss": 0.7486, "step": 13296 }, { "epoch": 0.9882571534745448, "grad_norm": 1.6577897408936835, "learning_rate": 1.5655121107739385e-05, "loss": 0.6031, "step": 13297 }, { "epoch": 0.988331475287997, "grad_norm": 1.8200578441408526, "learning_rate": 1.5654459344351628e-05, "loss": 0.846, "step": 13298 }, { "epoch": 0.9884057971014493, "grad_norm": 2.1380294466807985, "learning_rate": 1.5653797544560752e-05, "loss": 0.8028, "step": 13299 }, { "epoch": 0.9884801189149015, "grad_norm": 1.678234476772491, "learning_rate": 1.565313570837102e-05, "loss": 0.7909, "step": 13300 }, { "epoch": 0.9885544407283537, "grad_norm": 2.2115751738142264, "learning_rate": 1.5652473835786697e-05, "loss": 0.7901, "step": 13301 }, { "epoch": 0.988628762541806, "grad_norm": 1.8311413023714727, "learning_rate": 1.5651811926812038e-05, "loss": 0.7304, "step": 13302 }, { "epoch": 0.9887030843552582, "grad_norm": 2.8938879021685637, "learning_rate": 1.565114998145131e-05, "loss": 1.0176, "step": 13303 }, { "epoch": 0.9887774061687106, "grad_norm": 2.0239161650133286, "learning_rate": 1.565048799970877e-05, "loss": 0.9442, "step": 13304 }, { "epoch": 0.9888517279821628, "grad_norm": 1.8446085561903707, "learning_rate": 1.564982598158868e-05, "loss": 0.7209, "step": 13305 }, { "epoch": 0.988926049795615, "grad_norm": 1.6187985056979977, "learning_rate": 1.5649163927095306e-05, "loss": 0.9363, "step": 13306 }, { "epoch": 0.9890003716090673, "grad_norm": 2.0291961866454096, "learning_rate": 1.5648501836232908e-05, "loss": 0.9846, "step": 13307 }, { "epoch": 0.9890746934225195, "grad_norm": 2.0715743488980256, "learning_rate": 1.5647839709005747e-05, "loss": 1.0624, "step": 13308 }, { "epoch": 0.9891490152359718, "grad_norm": 1.5979443251436436, "learning_rate": 1.5647177545418086e-05, "loss": 0.6209, "step": 13309 }, { "epoch": 0.989223337049424, "grad_norm": 1.5500371198556566, "learning_rate": 1.564651534547419e-05, "loss": 0.8306, "step": 13310 }, { "epoch": 0.9892976588628762, "grad_norm": 2.208188457246087, "learning_rate": 1.564585310917832e-05, "loss": 1.0218, "step": 13311 }, { "epoch": 0.9893719806763285, "grad_norm": 1.4307598770067207, "learning_rate": 1.564519083653474e-05, "loss": 0.712, "step": 13312 }, { "epoch": 0.9894463024897807, "grad_norm": 1.9235133938524778, "learning_rate": 1.5644528527547717e-05, "loss": 0.9067, "step": 13313 }, { "epoch": 0.989520624303233, "grad_norm": 1.9881363671173744, "learning_rate": 1.564386618222151e-05, "loss": 0.7686, "step": 13314 }, { "epoch": 0.9895949461166852, "grad_norm": 2.468247673118701, "learning_rate": 1.5643203800560387e-05, "loss": 0.8915, "step": 13315 }, { "epoch": 0.9896692679301375, "grad_norm": 2.2718493500600316, "learning_rate": 1.564254138256861e-05, "loss": 0.7685, "step": 13316 }, { "epoch": 0.9897435897435898, "grad_norm": 1.766412169699054, "learning_rate": 1.5641878928250444e-05, "loss": 0.8492, "step": 13317 }, { "epoch": 0.989817911557042, "grad_norm": 1.8641207396846937, "learning_rate": 1.5641216437610152e-05, "loss": 0.8461, "step": 13318 }, { "epoch": 0.9898922333704943, "grad_norm": 1.3216301678643603, "learning_rate": 1.564055391065201e-05, "loss": 0.4987, "step": 13319 }, { "epoch": 0.9899665551839465, "grad_norm": 2.4786442559393698, "learning_rate": 1.5639891347380268e-05, "loss": 0.9423, "step": 13320 }, { "epoch": 0.9900408769973987, "grad_norm": 2.0772775208262853, "learning_rate": 1.5639228747799196e-05, "loss": 0.9232, "step": 13321 }, { "epoch": 0.990115198810851, "grad_norm": 1.729406791642611, "learning_rate": 1.5638566111913066e-05, "loss": 0.6639, "step": 13322 }, { "epoch": 0.9901895206243032, "grad_norm": 1.612879362849542, "learning_rate": 1.5637903439726136e-05, "loss": 0.7431, "step": 13323 }, { "epoch": 0.9902638424377554, "grad_norm": 1.8248239460901314, "learning_rate": 1.5637240731242682e-05, "loss": 0.9057, "step": 13324 }, { "epoch": 0.9903381642512077, "grad_norm": 2.194013845879924, "learning_rate": 1.563657798646696e-05, "loss": 0.7119, "step": 13325 }, { "epoch": 0.9904124860646599, "grad_norm": 1.676171325786903, "learning_rate": 1.5635915205403245e-05, "loss": 0.5679, "step": 13326 }, { "epoch": 0.9904868078781123, "grad_norm": 1.7052073491747866, "learning_rate": 1.5635252388055796e-05, "loss": 0.7805, "step": 13327 }, { "epoch": 0.9905611296915645, "grad_norm": 1.8775822991136815, "learning_rate": 1.5634589534428886e-05, "loss": 0.9688, "step": 13328 }, { "epoch": 0.9906354515050168, "grad_norm": 2.156420244390306, "learning_rate": 1.563392664452678e-05, "loss": 0.8594, "step": 13329 }, { "epoch": 0.990709773318469, "grad_norm": 1.7797306359321892, "learning_rate": 1.5633263718353752e-05, "loss": 0.8073, "step": 13330 }, { "epoch": 0.9907840951319212, "grad_norm": 1.7341237984038016, "learning_rate": 1.563260075591406e-05, "loss": 0.9032, "step": 13331 }, { "epoch": 0.9908584169453735, "grad_norm": 2.6564092645367925, "learning_rate": 1.5631937757211977e-05, "loss": 0.5508, "step": 13332 }, { "epoch": 0.9909327387588257, "grad_norm": 2.100174011580439, "learning_rate": 1.563127472225177e-05, "loss": 0.6719, "step": 13333 }, { "epoch": 0.9910070605722779, "grad_norm": 1.787235047374795, "learning_rate": 1.563061165103771e-05, "loss": 0.7067, "step": 13334 }, { "epoch": 0.9910813823857302, "grad_norm": 2.2548999816960515, "learning_rate": 1.5629948543574062e-05, "loss": 0.9959, "step": 13335 }, { "epoch": 0.9911557041991824, "grad_norm": 1.8290547817876135, "learning_rate": 1.5629285399865103e-05, "loss": 0.7215, "step": 13336 }, { "epoch": 0.9912300260126347, "grad_norm": 1.7538775038595502, "learning_rate": 1.562862221991509e-05, "loss": 0.7857, "step": 13337 }, { "epoch": 0.991304347826087, "grad_norm": 1.7745284167507263, "learning_rate": 1.5627959003728305e-05, "loss": 0.7702, "step": 13338 }, { "epoch": 0.9913786696395392, "grad_norm": 2.1325664897457233, "learning_rate": 1.562729575130901e-05, "loss": 1.0536, "step": 13339 }, { "epoch": 0.9914529914529915, "grad_norm": 1.647684662151408, "learning_rate": 1.5626632462661472e-05, "loss": 0.8827, "step": 13340 }, { "epoch": 0.9915273132664437, "grad_norm": 1.8362857906154382, "learning_rate": 1.5625969137789974e-05, "loss": 1.0996, "step": 13341 }, { "epoch": 0.991601635079896, "grad_norm": 1.8147708296857439, "learning_rate": 1.5625305776698775e-05, "loss": 0.7848, "step": 13342 }, { "epoch": 0.9916759568933482, "grad_norm": 2.0975770360424706, "learning_rate": 1.562464237939215e-05, "loss": 0.9345, "step": 13343 }, { "epoch": 0.9917502787068004, "grad_norm": 1.7323824248078026, "learning_rate": 1.562397894587437e-05, "loss": 0.7546, "step": 13344 }, { "epoch": 0.9918246005202527, "grad_norm": 1.9834225078035526, "learning_rate": 1.56233154761497e-05, "loss": 0.7403, "step": 13345 }, { "epoch": 0.9918989223337049, "grad_norm": 2.036849353645646, "learning_rate": 1.562265197022242e-05, "loss": 0.6134, "step": 13346 }, { "epoch": 0.9919732441471572, "grad_norm": 2.111124963595631, "learning_rate": 1.56219884280968e-05, "loss": 0.8152, "step": 13347 }, { "epoch": 0.9920475659606094, "grad_norm": 2.2755355612831396, "learning_rate": 1.5621324849777112e-05, "loss": 0.9015, "step": 13348 }, { "epoch": 0.9921218877740617, "grad_norm": 1.9745733362611813, "learning_rate": 1.562066123526762e-05, "loss": 0.8285, "step": 13349 }, { "epoch": 0.992196209587514, "grad_norm": 2.4735294180160654, "learning_rate": 1.5619997584572607e-05, "loss": 0.8828, "step": 13350 }, { "epoch": 0.9922705314009662, "grad_norm": 1.4610851789437402, "learning_rate": 1.5619333897696337e-05, "loss": 0.5993, "step": 13351 }, { "epoch": 0.9923448532144185, "grad_norm": 2.298695935270771, "learning_rate": 1.561867017464309e-05, "loss": 0.9598, "step": 13352 }, { "epoch": 0.9924191750278707, "grad_norm": 1.4995217757658386, "learning_rate": 1.561800641541714e-05, "loss": 0.7702, "step": 13353 }, { "epoch": 0.9924934968413229, "grad_norm": 1.882183339172134, "learning_rate": 1.561734262002275e-05, "loss": 0.8815, "step": 13354 }, { "epoch": 0.9925678186547752, "grad_norm": 1.8361774788365446, "learning_rate": 1.5616678788464202e-05, "loss": 0.8107, "step": 13355 }, { "epoch": 0.9926421404682274, "grad_norm": 1.6218091448157455, "learning_rate": 1.561601492074577e-05, "loss": 0.7625, "step": 13356 }, { "epoch": 0.9927164622816796, "grad_norm": 2.736331502182009, "learning_rate": 1.561535101687172e-05, "loss": 0.824, "step": 13357 }, { "epoch": 0.9927907840951319, "grad_norm": 2.0946792104429393, "learning_rate": 1.5614687076846335e-05, "loss": 0.7033, "step": 13358 }, { "epoch": 0.9928651059085841, "grad_norm": 2.1398193902442824, "learning_rate": 1.5614023100673884e-05, "loss": 0.8931, "step": 13359 }, { "epoch": 0.9929394277220365, "grad_norm": 2.103786696845462, "learning_rate": 1.5613359088358645e-05, "loss": 1.055, "step": 13360 }, { "epoch": 0.9930137495354887, "grad_norm": 1.913394953650142, "learning_rate": 1.561269503990489e-05, "loss": 0.7, "step": 13361 }, { "epoch": 0.993088071348941, "grad_norm": 1.597023001931972, "learning_rate": 1.5612030955316898e-05, "loss": 0.6876, "step": 13362 }, { "epoch": 0.9931623931623932, "grad_norm": 2.0196739153864165, "learning_rate": 1.561136683459894e-05, "loss": 0.853, "step": 13363 }, { "epoch": 0.9932367149758454, "grad_norm": 4.5281557801077525, "learning_rate": 1.5610702677755295e-05, "loss": 0.7549, "step": 13364 }, { "epoch": 0.9933110367892977, "grad_norm": 1.7752197727289538, "learning_rate": 1.5610038484790236e-05, "loss": 0.8075, "step": 13365 }, { "epoch": 0.9933853586027499, "grad_norm": 1.4963405689616753, "learning_rate": 1.560937425570804e-05, "loss": 0.6232, "step": 13366 }, { "epoch": 0.9934596804162021, "grad_norm": 2.2166694998067094, "learning_rate": 1.5608709990512986e-05, "loss": 0.8986, "step": 13367 }, { "epoch": 0.9935340022296544, "grad_norm": 2.1578117671268116, "learning_rate": 1.5608045689209347e-05, "loss": 0.9458, "step": 13368 }, { "epoch": 0.9936083240431066, "grad_norm": 1.9368801466091652, "learning_rate": 1.56073813518014e-05, "loss": 0.7225, "step": 13369 }, { "epoch": 0.9936826458565589, "grad_norm": 1.6257298443214445, "learning_rate": 1.5606716978293423e-05, "loss": 0.7635, "step": 13370 }, { "epoch": 0.9937569676700111, "grad_norm": 2.1706727776198464, "learning_rate": 1.5606052568689693e-05, "loss": 0.6882, "step": 13371 }, { "epoch": 0.9938312894834634, "grad_norm": 1.6463901749813423, "learning_rate": 1.5605388122994485e-05, "loss": 0.7351, "step": 13372 }, { "epoch": 0.9939056112969157, "grad_norm": 2.0116859311077784, "learning_rate": 1.5604723641212082e-05, "loss": 0.7044, "step": 13373 }, { "epoch": 0.9939799331103679, "grad_norm": 2.211159717068077, "learning_rate": 1.5604059123346757e-05, "loss": 0.9793, "step": 13374 }, { "epoch": 0.9940542549238202, "grad_norm": 2.063271111821627, "learning_rate": 1.5603394569402795e-05, "loss": 1.1039, "step": 13375 }, { "epoch": 0.9941285767372724, "grad_norm": 1.9613385192311203, "learning_rate": 1.5602729979384466e-05, "loss": 0.9282, "step": 13376 }, { "epoch": 0.9942028985507246, "grad_norm": 1.7302000168207685, "learning_rate": 1.560206535329605e-05, "loss": 0.8202, "step": 13377 }, { "epoch": 0.9942772203641769, "grad_norm": 2.03120491745174, "learning_rate": 1.5601400691141832e-05, "loss": 1.0172, "step": 13378 }, { "epoch": 0.9943515421776291, "grad_norm": 2.1626408383366407, "learning_rate": 1.5600735992926085e-05, "loss": 0.7622, "step": 13379 }, { "epoch": 0.9944258639910813, "grad_norm": 1.861293450776638, "learning_rate": 1.560007125865309e-05, "loss": 0.8692, "step": 13380 }, { "epoch": 0.9945001858045336, "grad_norm": 1.3336910831614825, "learning_rate": 1.5599406488327127e-05, "loss": 0.5899, "step": 13381 }, { "epoch": 0.9945745076179858, "grad_norm": 1.9159067829893555, "learning_rate": 1.5598741681952473e-05, "loss": 0.7868, "step": 13382 }, { "epoch": 0.9946488294314382, "grad_norm": 2.1521148182710697, "learning_rate": 1.5598076839533416e-05, "loss": 0.7885, "step": 13383 }, { "epoch": 0.9947231512448904, "grad_norm": 1.7621351904167626, "learning_rate": 1.559741196107423e-05, "loss": 0.9122, "step": 13384 }, { "epoch": 0.9947974730583427, "grad_norm": 1.6203983274314733, "learning_rate": 1.559674704657919e-05, "loss": 0.7189, "step": 13385 }, { "epoch": 0.9948717948717949, "grad_norm": 1.7799313932204448, "learning_rate": 1.559608209605259e-05, "loss": 0.5729, "step": 13386 }, { "epoch": 0.9949461166852471, "grad_norm": 1.8394851792055578, "learning_rate": 1.55954171094987e-05, "loss": 0.9535, "step": 13387 }, { "epoch": 0.9950204384986994, "grad_norm": 1.5150087731370343, "learning_rate": 1.5594752086921807e-05, "loss": 0.7918, "step": 13388 }, { "epoch": 0.9950947603121516, "grad_norm": 2.0175689475454432, "learning_rate": 1.559408702832619e-05, "loss": 0.6918, "step": 13389 }, { "epoch": 0.9951690821256038, "grad_norm": 1.8440019784165989, "learning_rate": 1.559342193371613e-05, "loss": 0.8611, "step": 13390 }, { "epoch": 0.9952434039390561, "grad_norm": 2.134709395418894, "learning_rate": 1.559275680309591e-05, "loss": 0.8476, "step": 13391 }, { "epoch": 0.9953177257525083, "grad_norm": 1.9591568638079686, "learning_rate": 1.5592091636469814e-05, "loss": 0.7904, "step": 13392 }, { "epoch": 0.9953920475659606, "grad_norm": 1.8703507379710487, "learning_rate": 1.559142643384212e-05, "loss": 0.7726, "step": 13393 }, { "epoch": 0.9954663693794129, "grad_norm": 2.910285272679881, "learning_rate": 1.559076119521711e-05, "loss": 0.9471, "step": 13394 }, { "epoch": 0.9955406911928651, "grad_norm": 1.7740398889004627, "learning_rate": 1.5590095920599075e-05, "loss": 0.83, "step": 13395 }, { "epoch": 0.9956150130063174, "grad_norm": 1.6437357599187687, "learning_rate": 1.558943060999229e-05, "loss": 0.8936, "step": 13396 }, { "epoch": 0.9956893348197696, "grad_norm": 1.716945375547904, "learning_rate": 1.558876526340104e-05, "loss": 0.9166, "step": 13397 }, { "epoch": 0.9957636566332219, "grad_norm": 1.8199109206307733, "learning_rate": 1.558809988082961e-05, "loss": 1.0074, "step": 13398 }, { "epoch": 0.9958379784466741, "grad_norm": 1.5601387213627225, "learning_rate": 1.5587434462282284e-05, "loss": 0.7638, "step": 13399 }, { "epoch": 0.9959123002601263, "grad_norm": 2.2917654776041556, "learning_rate": 1.558676900776334e-05, "loss": 0.5416, "step": 13400 }, { "epoch": 0.9959866220735786, "grad_norm": 1.9668590536952921, "learning_rate": 1.5586103517277073e-05, "loss": 1.0759, "step": 13401 }, { "epoch": 0.9960609438870308, "grad_norm": 2.127719572478572, "learning_rate": 1.558543799082776e-05, "loss": 0.9535, "step": 13402 }, { "epoch": 0.996135265700483, "grad_norm": 2.5142214795726514, "learning_rate": 1.5584772428419687e-05, "loss": 0.8205, "step": 13403 }, { "epoch": 0.9962095875139353, "grad_norm": 2.006823321345028, "learning_rate": 1.558410683005714e-05, "loss": 0.8308, "step": 13404 }, { "epoch": 0.9962839093273876, "grad_norm": 1.7617946584063424, "learning_rate": 1.5583441195744397e-05, "loss": 0.8398, "step": 13405 }, { "epoch": 0.9963582311408399, "grad_norm": 1.8725972459836495, "learning_rate": 1.5582775525485753e-05, "loss": 1.0281, "step": 13406 }, { "epoch": 0.9964325529542921, "grad_norm": 2.314643977921405, "learning_rate": 1.5582109819285492e-05, "loss": 0.8119, "step": 13407 }, { "epoch": 0.9965068747677444, "grad_norm": 2.0049328343680948, "learning_rate": 1.5581444077147893e-05, "loss": 0.7753, "step": 13408 }, { "epoch": 0.9965811965811966, "grad_norm": 2.253363622099521, "learning_rate": 1.5580778299077252e-05, "loss": 0.9376, "step": 13409 }, { "epoch": 0.9966555183946488, "grad_norm": 1.392185118233869, "learning_rate": 1.558011248507785e-05, "loss": 0.6742, "step": 13410 }, { "epoch": 0.9967298402081011, "grad_norm": 1.9816699096352757, "learning_rate": 1.557944663515397e-05, "loss": 0.9742, "step": 13411 }, { "epoch": 0.9968041620215533, "grad_norm": 2.014119630277186, "learning_rate": 1.55787807493099e-05, "loss": 0.8533, "step": 13412 }, { "epoch": 0.9968784838350055, "grad_norm": 2.092017355627412, "learning_rate": 1.557811482754993e-05, "loss": 0.7409, "step": 13413 }, { "epoch": 0.9969528056484578, "grad_norm": 2.2008577084012533, "learning_rate": 1.557744886987835e-05, "loss": 0.7803, "step": 13414 }, { "epoch": 0.99702712746191, "grad_norm": 1.5286271994566236, "learning_rate": 1.557678287629944e-05, "loss": 0.7569, "step": 13415 }, { "epoch": 0.9971014492753624, "grad_norm": 1.687755801300829, "learning_rate": 1.557611684681749e-05, "loss": 0.8157, "step": 13416 }, { "epoch": 0.9971757710888146, "grad_norm": 1.6809090851302777, "learning_rate": 1.5575450781436795e-05, "loss": 0.7993, "step": 13417 }, { "epoch": 0.9972500929022668, "grad_norm": 2.1938914363439186, "learning_rate": 1.5574784680161634e-05, "loss": 0.9863, "step": 13418 }, { "epoch": 0.9973244147157191, "grad_norm": 1.8196214420853931, "learning_rate": 1.5574118542996298e-05, "loss": 0.8559, "step": 13419 }, { "epoch": 0.9973987365291713, "grad_norm": 1.92691226953824, "learning_rate": 1.5573452369945077e-05, "loss": 0.7149, "step": 13420 }, { "epoch": 0.9974730583426236, "grad_norm": 2.3275338371825596, "learning_rate": 1.557278616101226e-05, "loss": 0.8916, "step": 13421 }, { "epoch": 0.9975473801560758, "grad_norm": 1.663387930259137, "learning_rate": 1.557211991620213e-05, "loss": 0.8288, "step": 13422 }, { "epoch": 0.997621701969528, "grad_norm": 1.6949437241237935, "learning_rate": 1.5571453635518988e-05, "loss": 0.8941, "step": 13423 }, { "epoch": 0.9976960237829803, "grad_norm": 1.2293395649799395, "learning_rate": 1.557078731896711e-05, "loss": 0.5753, "step": 13424 }, { "epoch": 0.9977703455964325, "grad_norm": 2.2309529716286414, "learning_rate": 1.5570120966550794e-05, "loss": 0.9406, "step": 13425 }, { "epoch": 0.9978446674098848, "grad_norm": 1.4373516258307075, "learning_rate": 1.556945457827433e-05, "loss": 0.7942, "step": 13426 }, { "epoch": 0.997918989223337, "grad_norm": 1.8907923235883388, "learning_rate": 1.5568788154142005e-05, "loss": 0.8501, "step": 13427 }, { "epoch": 0.9979933110367893, "grad_norm": 1.6070402890557456, "learning_rate": 1.556812169415811e-05, "loss": 0.7891, "step": 13428 }, { "epoch": 0.9980676328502416, "grad_norm": 1.8007142114281223, "learning_rate": 1.556745519832694e-05, "loss": 0.8037, "step": 13429 }, { "epoch": 0.9981419546636938, "grad_norm": 2.2605562316280996, "learning_rate": 1.556678866665278e-05, "loss": 0.9529, "step": 13430 }, { "epoch": 0.9982162764771461, "grad_norm": 2.1176416048610864, "learning_rate": 1.5566122099139924e-05, "loss": 0.8928, "step": 13431 }, { "epoch": 0.9982905982905983, "grad_norm": 1.8429681354203733, "learning_rate": 1.5565455495792666e-05, "loss": 0.8483, "step": 13432 }, { "epoch": 0.9983649201040505, "grad_norm": 2.0551019057276223, "learning_rate": 1.5564788856615286e-05, "loss": 0.741, "step": 13433 }, { "epoch": 0.9984392419175028, "grad_norm": 1.9876387853948403, "learning_rate": 1.5564122181612093e-05, "loss": 0.9944, "step": 13434 }, { "epoch": 0.998513563730955, "grad_norm": 2.4377733078183135, "learning_rate": 1.5563455470787364e-05, "loss": 0.581, "step": 13435 }, { "epoch": 0.9985878855444072, "grad_norm": 2.028986803077102, "learning_rate": 1.5562788724145397e-05, "loss": 0.7678, "step": 13436 }, { "epoch": 0.9986622073578595, "grad_norm": 1.541256185575554, "learning_rate": 1.556212194169049e-05, "loss": 0.7332, "step": 13437 }, { "epoch": 0.9987365291713117, "grad_norm": 2.559288043036182, "learning_rate": 1.5561455123426927e-05, "loss": 1.0776, "step": 13438 }, { "epoch": 0.9988108509847641, "grad_norm": 2.4203622984706112, "learning_rate": 1.5560788269359004e-05, "loss": 0.8205, "step": 13439 }, { "epoch": 0.9988851727982163, "grad_norm": 2.243027788160806, "learning_rate": 1.5560121379491016e-05, "loss": 0.9003, "step": 13440 }, { "epoch": 0.9989594946116686, "grad_norm": 2.3868615297368714, "learning_rate": 1.5559454453827253e-05, "loss": 0.8301, "step": 13441 }, { "epoch": 0.9990338164251208, "grad_norm": 2.031315235222934, "learning_rate": 1.5558787492372014e-05, "loss": 0.8672, "step": 13442 }, { "epoch": 0.999108138238573, "grad_norm": 1.9829428510359361, "learning_rate": 1.5558120495129586e-05, "loss": 0.9763, "step": 13443 }, { "epoch": 0.9991824600520253, "grad_norm": 1.880058371408332, "learning_rate": 1.5557453462104272e-05, "loss": 0.8968, "step": 13444 }, { "epoch": 0.9992567818654775, "grad_norm": 1.8509042493494565, "learning_rate": 1.5556786393300357e-05, "loss": 0.8948, "step": 13445 }, { "epoch": 0.9993311036789297, "grad_norm": 2.5744644569372173, "learning_rate": 1.5556119288722138e-05, "loss": 0.8703, "step": 13446 }, { "epoch": 0.999405425492382, "grad_norm": 2.0083323355918488, "learning_rate": 1.5555452148373917e-05, "loss": 0.9545, "step": 13447 }, { "epoch": 0.9994797473058342, "grad_norm": 1.8222090044935702, "learning_rate": 1.5554784972259977e-05, "loss": 0.7876, "step": 13448 }, { "epoch": 0.9995540691192865, "grad_norm": 1.9389516481340812, "learning_rate": 1.5554117760384625e-05, "loss": 0.6891, "step": 13449 }, { "epoch": 0.9996283909327388, "grad_norm": 1.95729231896824, "learning_rate": 1.5553450512752146e-05, "loss": 0.6453, "step": 13450 }, { "epoch": 0.999702712746191, "grad_norm": 4.4987879040151215, "learning_rate": 1.5552783229366844e-05, "loss": 0.9479, "step": 13451 }, { "epoch": 0.9997770345596433, "grad_norm": 2.7620575234766935, "learning_rate": 1.555211591023301e-05, "loss": 0.8319, "step": 13452 }, { "epoch": 0.9998513563730955, "grad_norm": 1.8084701380062023, "learning_rate": 1.5551448555354942e-05, "loss": 0.8106, "step": 13453 }, { "epoch": 0.9999256781865478, "grad_norm": 1.907100794650181, "learning_rate": 1.555078116473694e-05, "loss": 0.6033, "step": 13454 }, { "epoch": 1.0, "grad_norm": 2.8130049275434876, "learning_rate": 1.5550113738383293e-05, "loss": 0.7676, "step": 13455 }, { "epoch": 1.0000743218134522, "grad_norm": 1.8308843113553719, "learning_rate": 1.55494462762983e-05, "loss": 0.6521, "step": 13456 }, { "epoch": 1.0001486436269045, "grad_norm": 1.5402356176020335, "learning_rate": 1.5548778778486263e-05, "loss": 0.712, "step": 13457 }, { "epoch": 1.0002229654403567, "grad_norm": 1.4132230965904407, "learning_rate": 1.5548111244951477e-05, "loss": 0.525, "step": 13458 }, { "epoch": 1.000297287253809, "grad_norm": 1.7569006545084132, "learning_rate": 1.5547443675698234e-05, "loss": 0.7278, "step": 13459 }, { "epoch": 1.0003716090672612, "grad_norm": 1.729559514631598, "learning_rate": 1.5546776070730842e-05, "loss": 0.5634, "step": 13460 }, { "epoch": 1.0004459308807134, "grad_norm": 1.665772961066078, "learning_rate": 1.554610843005359e-05, "loss": 0.5838, "step": 13461 }, { "epoch": 1.0005202526941657, "grad_norm": 1.4726253020899724, "learning_rate": 1.5545440753670778e-05, "loss": 0.5912, "step": 13462 }, { "epoch": 1.000594574507618, "grad_norm": 1.6800239225984108, "learning_rate": 1.5544773041586712e-05, "loss": 0.4888, "step": 13463 }, { "epoch": 1.0006688963210701, "grad_norm": 1.8399522291246855, "learning_rate": 1.5544105293805683e-05, "loss": 0.6452, "step": 13464 }, { "epoch": 1.0007432181345224, "grad_norm": 1.7944774273598003, "learning_rate": 1.5543437510331993e-05, "loss": 0.5435, "step": 13465 }, { "epoch": 1.0008175399479746, "grad_norm": 2.30438555391316, "learning_rate": 1.554276969116993e-05, "loss": 0.7318, "step": 13466 }, { "epoch": 1.000891861761427, "grad_norm": 1.6524391136481509, "learning_rate": 1.5542101836323812e-05, "loss": 0.5689, "step": 13467 }, { "epoch": 1.0009661835748793, "grad_norm": 1.9643501072541116, "learning_rate": 1.554143394579793e-05, "loss": 0.7233, "step": 13468 }, { "epoch": 1.0010405053883316, "grad_norm": 1.9891630811017804, "learning_rate": 1.5540766019596584e-05, "loss": 0.7661, "step": 13469 }, { "epoch": 1.0011148272017838, "grad_norm": 2.0662390896380662, "learning_rate": 1.554009805772407e-05, "loss": 0.7442, "step": 13470 }, { "epoch": 1.001189149015236, "grad_norm": 2.1761235205705804, "learning_rate": 1.5539430060184697e-05, "loss": 0.6218, "step": 13471 }, { "epoch": 1.0012634708286883, "grad_norm": 1.8581674624565019, "learning_rate": 1.5538762026982757e-05, "loss": 0.7122, "step": 13472 }, { "epoch": 1.0013377926421405, "grad_norm": 1.9191432952465073, "learning_rate": 1.553809395812256e-05, "loss": 0.5981, "step": 13473 }, { "epoch": 1.0014121144555928, "grad_norm": 2.177049499879993, "learning_rate": 1.5537425853608393e-05, "loss": 0.6142, "step": 13474 }, { "epoch": 1.001486436269045, "grad_norm": 2.38649243521675, "learning_rate": 1.553675771344457e-05, "loss": 0.6495, "step": 13475 }, { "epoch": 1.0015607580824972, "grad_norm": 1.6799972514614465, "learning_rate": 1.5536089537635385e-05, "loss": 0.5869, "step": 13476 }, { "epoch": 1.0016350798959495, "grad_norm": 1.7592721874141222, "learning_rate": 1.553542132618515e-05, "loss": 0.6091, "step": 13477 }, { "epoch": 1.0017094017094017, "grad_norm": 2.2772006721361953, "learning_rate": 1.5534753079098153e-05, "loss": 0.6047, "step": 13478 }, { "epoch": 1.001783723522854, "grad_norm": 1.5269704270604125, "learning_rate": 1.5534084796378707e-05, "loss": 0.4418, "step": 13479 }, { "epoch": 1.0018580453363062, "grad_norm": 1.5995662683422363, "learning_rate": 1.553341647803111e-05, "loss": 0.5537, "step": 13480 }, { "epoch": 1.0019323671497584, "grad_norm": 1.927116180765617, "learning_rate": 1.553274812405966e-05, "loss": 0.7319, "step": 13481 }, { "epoch": 1.0020066889632107, "grad_norm": 1.5637003875357438, "learning_rate": 1.5532079734468668e-05, "loss": 0.5287, "step": 13482 }, { "epoch": 1.002081010776663, "grad_norm": 1.9953361589751586, "learning_rate": 1.5531411309262434e-05, "loss": 0.7667, "step": 13483 }, { "epoch": 1.0021553325901151, "grad_norm": 1.6778480517869914, "learning_rate": 1.553074284844526e-05, "loss": 0.6507, "step": 13484 }, { "epoch": 1.0022296544035674, "grad_norm": 1.7546863659122724, "learning_rate": 1.5530074352021453e-05, "loss": 0.6155, "step": 13485 }, { "epoch": 1.0023039762170196, "grad_norm": 1.8024742743495374, "learning_rate": 1.552940581999531e-05, "loss": 0.5536, "step": 13486 }, { "epoch": 1.0023782980304718, "grad_norm": 1.6728715388135427, "learning_rate": 1.5528737252371143e-05, "loss": 0.5396, "step": 13487 }, { "epoch": 1.002452619843924, "grad_norm": 1.7049736030198526, "learning_rate": 1.552806864915325e-05, "loss": 0.6155, "step": 13488 }, { "epoch": 1.0025269416573765, "grad_norm": 1.7165949098372868, "learning_rate": 1.552740001034594e-05, "loss": 0.6102, "step": 13489 }, { "epoch": 1.0026012634708288, "grad_norm": 1.981482718743362, "learning_rate": 1.5526731335953514e-05, "loss": 0.6527, "step": 13490 }, { "epoch": 1.002675585284281, "grad_norm": 1.6749302722375372, "learning_rate": 1.5526062625980282e-05, "loss": 0.5176, "step": 13491 }, { "epoch": 1.0027499070977333, "grad_norm": 2.161926777076083, "learning_rate": 1.552539388043054e-05, "loss": 0.5974, "step": 13492 }, { "epoch": 1.0028242289111855, "grad_norm": 1.6822380893318205, "learning_rate": 1.55247250993086e-05, "loss": 0.5415, "step": 13493 }, { "epoch": 1.0028985507246377, "grad_norm": 1.681946364928882, "learning_rate": 1.5524056282618773e-05, "loss": 0.5463, "step": 13494 }, { "epoch": 1.00297287253809, "grad_norm": 1.990862466957363, "learning_rate": 1.5523387430365347e-05, "loss": 0.6217, "step": 13495 }, { "epoch": 1.0030471943515422, "grad_norm": 1.5965725617913222, "learning_rate": 1.552271854255265e-05, "loss": 0.487, "step": 13496 }, { "epoch": 1.0031215161649945, "grad_norm": 1.9026649650221488, "learning_rate": 1.5522049619184973e-05, "loss": 0.7098, "step": 13497 }, { "epoch": 1.0031958379784467, "grad_norm": 1.980303412411412, "learning_rate": 1.5521380660266626e-05, "loss": 0.6742, "step": 13498 }, { "epoch": 1.003270159791899, "grad_norm": 1.8413554213358927, "learning_rate": 1.552071166580192e-05, "loss": 0.6475, "step": 13499 }, { "epoch": 1.0033444816053512, "grad_norm": 2.146998877796512, "learning_rate": 1.5520042635795153e-05, "loss": 0.7507, "step": 13500 }, { "epoch": 1.0034188034188034, "grad_norm": 1.723273416264872, "learning_rate": 1.5519373570250643e-05, "loss": 0.717, "step": 13501 }, { "epoch": 1.0034931252322556, "grad_norm": 1.9149598521122153, "learning_rate": 1.5518704469172693e-05, "loss": 0.6393, "step": 13502 }, { "epoch": 1.0035674470457079, "grad_norm": 2.045441385326121, "learning_rate": 1.5518035332565605e-05, "loss": 0.6523, "step": 13503 }, { "epoch": 1.0036417688591601, "grad_norm": 1.732622098999081, "learning_rate": 1.5517366160433694e-05, "loss": 0.577, "step": 13504 }, { "epoch": 1.0037160906726124, "grad_norm": 1.9555759943480266, "learning_rate": 1.5516696952781266e-05, "loss": 0.6555, "step": 13505 }, { "epoch": 1.0037904124860646, "grad_norm": 1.8405125705477665, "learning_rate": 1.551602770961263e-05, "loss": 0.5663, "step": 13506 }, { "epoch": 1.0038647342995168, "grad_norm": 1.7178393317352023, "learning_rate": 1.5515358430932094e-05, "loss": 0.5496, "step": 13507 }, { "epoch": 1.003939056112969, "grad_norm": 2.34465712560577, "learning_rate": 1.5514689116743968e-05, "loss": 0.4965, "step": 13508 }, { "epoch": 1.0040133779264213, "grad_norm": 2.84900326153868, "learning_rate": 1.551401976705256e-05, "loss": 0.6448, "step": 13509 }, { "epoch": 1.0040876997398736, "grad_norm": 1.475730879802848, "learning_rate": 1.5513350381862174e-05, "loss": 0.4254, "step": 13510 }, { "epoch": 1.0041620215533258, "grad_norm": 2.4194413872271867, "learning_rate": 1.551268096117713e-05, "loss": 0.8161, "step": 13511 }, { "epoch": 1.0042363433667783, "grad_norm": 2.111293211546425, "learning_rate": 1.5512011505001728e-05, "loss": 0.763, "step": 13512 }, { "epoch": 1.0043106651802305, "grad_norm": 1.7491171675798265, "learning_rate": 1.551134201334028e-05, "loss": 0.6614, "step": 13513 }, { "epoch": 1.0043849869936827, "grad_norm": 1.8003930442684362, "learning_rate": 1.5510672486197102e-05, "loss": 0.7114, "step": 13514 }, { "epoch": 1.004459308807135, "grad_norm": 2.054687154177034, "learning_rate": 1.55100029235765e-05, "loss": 0.6266, "step": 13515 }, { "epoch": 1.0045336306205872, "grad_norm": 1.8603737503695854, "learning_rate": 1.5509333325482785e-05, "loss": 0.6738, "step": 13516 }, { "epoch": 1.0046079524340394, "grad_norm": 2.187098475134415, "learning_rate": 1.5508663691920267e-05, "loss": 0.6305, "step": 13517 }, { "epoch": 1.0046822742474917, "grad_norm": 1.792829162893508, "learning_rate": 1.550799402289326e-05, "loss": 0.7025, "step": 13518 }, { "epoch": 1.004756596060944, "grad_norm": 2.151890763638955, "learning_rate": 1.550732431840607e-05, "loss": 0.6764, "step": 13519 }, { "epoch": 1.0048309178743962, "grad_norm": 1.8582012534162742, "learning_rate": 1.5506654578463014e-05, "loss": 0.7504, "step": 13520 }, { "epoch": 1.0049052396878484, "grad_norm": 1.5851913276187588, "learning_rate": 1.55059848030684e-05, "loss": 0.6082, "step": 13521 }, { "epoch": 1.0049795615013006, "grad_norm": 2.0254262367918128, "learning_rate": 1.5505314992226545e-05, "loss": 0.6483, "step": 13522 }, { "epoch": 1.0050538833147529, "grad_norm": 1.6573765499359188, "learning_rate": 1.5504645145941754e-05, "loss": 0.604, "step": 13523 }, { "epoch": 1.005128205128205, "grad_norm": 1.6473961790714244, "learning_rate": 1.5503975264218346e-05, "loss": 0.5907, "step": 13524 }, { "epoch": 1.0052025269416573, "grad_norm": 1.770221491715595, "learning_rate": 1.550330534706063e-05, "loss": 0.6237, "step": 13525 }, { "epoch": 1.0052768487551096, "grad_norm": 2.6740946697457737, "learning_rate": 1.5502635394472915e-05, "loss": 0.7727, "step": 13526 }, { "epoch": 1.0053511705685618, "grad_norm": 1.6594043329327812, "learning_rate": 1.5501965406459523e-05, "loss": 0.5602, "step": 13527 }, { "epoch": 1.005425492382014, "grad_norm": 2.432083507475906, "learning_rate": 1.5501295383024765e-05, "loss": 0.7813, "step": 13528 }, { "epoch": 1.0054998141954663, "grad_norm": 2.1179876986008686, "learning_rate": 1.550062532417295e-05, "loss": 0.6664, "step": 13529 }, { "epoch": 1.0055741360089185, "grad_norm": 2.2392625387838017, "learning_rate": 1.5499955229908395e-05, "loss": 0.6845, "step": 13530 }, { "epoch": 1.0056484578223708, "grad_norm": 2.0486422088737966, "learning_rate": 1.5499285100235414e-05, "loss": 0.6693, "step": 13531 }, { "epoch": 1.005722779635823, "grad_norm": 2.0004039537481817, "learning_rate": 1.5498614935158322e-05, "loss": 0.7099, "step": 13532 }, { "epoch": 1.0057971014492753, "grad_norm": 1.745458017583282, "learning_rate": 1.5497944734681433e-05, "loss": 0.4891, "step": 13533 }, { "epoch": 1.0058714232627277, "grad_norm": 2.184634969617937, "learning_rate": 1.549727449880906e-05, "loss": 0.5955, "step": 13534 }, { "epoch": 1.00594574507618, "grad_norm": 2.6138919039133315, "learning_rate": 1.549660422754552e-05, "loss": 0.6624, "step": 13535 }, { "epoch": 1.0060200668896322, "grad_norm": 1.5797530824222799, "learning_rate": 1.5495933920895124e-05, "loss": 0.4543, "step": 13536 }, { "epoch": 1.0060943887030844, "grad_norm": 1.9182822471065226, "learning_rate": 1.549526357886219e-05, "loss": 0.6198, "step": 13537 }, { "epoch": 1.0061687105165367, "grad_norm": 2.352120777005091, "learning_rate": 1.5494593201451035e-05, "loss": 0.7446, "step": 13538 }, { "epoch": 1.006243032329989, "grad_norm": 10.94623845335688, "learning_rate": 1.5493922788665975e-05, "loss": 0.675, "step": 13539 }, { "epoch": 1.0063173541434411, "grad_norm": 3.4813386839335525, "learning_rate": 1.5493252340511326e-05, "loss": 0.7035, "step": 13540 }, { "epoch": 1.0063916759568934, "grad_norm": 2.0154114766346543, "learning_rate": 1.5492581856991405e-05, "loss": 0.7003, "step": 13541 }, { "epoch": 1.0064659977703456, "grad_norm": 2.405859713308024, "learning_rate": 1.5491911338110524e-05, "loss": 0.6546, "step": 13542 }, { "epoch": 1.0065403195837979, "grad_norm": 1.8148121218362347, "learning_rate": 1.5491240783873e-05, "loss": 0.5909, "step": 13543 }, { "epoch": 1.00661464139725, "grad_norm": 1.8997811838152459, "learning_rate": 1.549057019428316e-05, "loss": 0.5999, "step": 13544 }, { "epoch": 1.0066889632107023, "grad_norm": 2.023048673833878, "learning_rate": 1.548989956934531e-05, "loss": 0.7898, "step": 13545 }, { "epoch": 1.0067632850241546, "grad_norm": 1.8082677529793547, "learning_rate": 1.5489228909063773e-05, "loss": 0.5691, "step": 13546 }, { "epoch": 1.0068376068376068, "grad_norm": 1.7927341419440583, "learning_rate": 1.5488558213442865e-05, "loss": 0.5697, "step": 13547 }, { "epoch": 1.006911928651059, "grad_norm": 1.802261317675809, "learning_rate": 1.5487887482486903e-05, "loss": 0.6608, "step": 13548 }, { "epoch": 1.0069862504645113, "grad_norm": 1.7881831716472796, "learning_rate": 1.5487216716200204e-05, "loss": 0.4417, "step": 13549 }, { "epoch": 1.0070605722779635, "grad_norm": 1.6863503938296904, "learning_rate": 1.5486545914587092e-05, "loss": 0.4876, "step": 13550 }, { "epoch": 1.0071348940914158, "grad_norm": 1.6746327338469464, "learning_rate": 1.5485875077651883e-05, "loss": 0.5786, "step": 13551 }, { "epoch": 1.007209215904868, "grad_norm": 1.9250731882503855, "learning_rate": 1.5485204205398893e-05, "loss": 0.5223, "step": 13552 }, { "epoch": 1.0072835377183202, "grad_norm": 1.7306354433368938, "learning_rate": 1.5484533297832438e-05, "loss": 0.6392, "step": 13553 }, { "epoch": 1.0073578595317725, "grad_norm": 2.1192086386603974, "learning_rate": 1.5483862354956846e-05, "loss": 0.7459, "step": 13554 }, { "epoch": 1.0074321813452247, "grad_norm": 1.8221487334110513, "learning_rate": 1.5483191376776433e-05, "loss": 0.7126, "step": 13555 }, { "epoch": 1.0075065031586772, "grad_norm": 2.0378994350050266, "learning_rate": 1.5482520363295522e-05, "loss": 0.6591, "step": 13556 }, { "epoch": 1.0075808249721294, "grad_norm": 2.341458024888913, "learning_rate": 1.5481849314518426e-05, "loss": 0.6594, "step": 13557 }, { "epoch": 1.0076551467855817, "grad_norm": 1.6258203449812543, "learning_rate": 1.548117823044947e-05, "loss": 0.6922, "step": 13558 }, { "epoch": 1.007729468599034, "grad_norm": 1.9338170618651238, "learning_rate": 1.548050711109297e-05, "loss": 0.7382, "step": 13559 }, { "epoch": 1.0078037904124861, "grad_norm": 2.406216616568737, "learning_rate": 1.5479835956453252e-05, "loss": 0.8425, "step": 13560 }, { "epoch": 1.0078781122259384, "grad_norm": 1.7148137966923902, "learning_rate": 1.5479164766534633e-05, "loss": 0.6298, "step": 13561 }, { "epoch": 1.0079524340393906, "grad_norm": 1.9532187999685027, "learning_rate": 1.547849354134144e-05, "loss": 0.6254, "step": 13562 }, { "epoch": 1.0080267558528428, "grad_norm": 1.9658165954224263, "learning_rate": 1.547782228087798e-05, "loss": 0.5456, "step": 13563 }, { "epoch": 1.008101077666295, "grad_norm": 1.8457409155684157, "learning_rate": 1.5477150985148593e-05, "loss": 0.6847, "step": 13564 }, { "epoch": 1.0081753994797473, "grad_norm": 1.7906225395317574, "learning_rate": 1.547647965415759e-05, "loss": 0.6502, "step": 13565 }, { "epoch": 1.0082497212931996, "grad_norm": 2.556046012047266, "learning_rate": 1.5475808287909294e-05, "loss": 0.6236, "step": 13566 }, { "epoch": 1.0083240431066518, "grad_norm": 1.8074971304224037, "learning_rate": 1.547513688640803e-05, "loss": 0.5062, "step": 13567 }, { "epoch": 1.008398364920104, "grad_norm": 1.6237052094078863, "learning_rate": 1.547446544965812e-05, "loss": 0.5581, "step": 13568 }, { "epoch": 1.0084726867335563, "grad_norm": 1.5533174484900663, "learning_rate": 1.547379397766388e-05, "loss": 0.4551, "step": 13569 }, { "epoch": 1.0085470085470085, "grad_norm": 2.3400294235715844, "learning_rate": 1.5473122470429644e-05, "loss": 0.6334, "step": 13570 }, { "epoch": 1.0086213303604608, "grad_norm": 2.2508812695354408, "learning_rate": 1.5472450927959727e-05, "loss": 0.7194, "step": 13571 }, { "epoch": 1.008695652173913, "grad_norm": 1.9376546553554113, "learning_rate": 1.5471779350258453e-05, "loss": 0.6315, "step": 13572 }, { "epoch": 1.0087699739873652, "grad_norm": 1.8640644979126137, "learning_rate": 1.547110773733015e-05, "loss": 0.5423, "step": 13573 }, { "epoch": 1.0088442958008175, "grad_norm": 1.987318232257676, "learning_rate": 1.547043608917914e-05, "loss": 0.6535, "step": 13574 }, { "epoch": 1.0089186176142697, "grad_norm": 2.0018423910511944, "learning_rate": 1.5469764405809745e-05, "loss": 0.7168, "step": 13575 }, { "epoch": 1.008992939427722, "grad_norm": 1.7691069578841705, "learning_rate": 1.546909268722629e-05, "loss": 0.4254, "step": 13576 }, { "epoch": 1.0090672612411742, "grad_norm": 1.677912226538056, "learning_rate": 1.54684209334331e-05, "loss": 0.5487, "step": 13577 }, { "epoch": 1.0091415830546264, "grad_norm": 2.1078146786721423, "learning_rate": 1.54677491444345e-05, "loss": 0.6968, "step": 13578 }, { "epoch": 1.0092159048680789, "grad_norm": 1.9235187595163783, "learning_rate": 1.546707732023482e-05, "loss": 0.6194, "step": 13579 }, { "epoch": 1.0092902266815311, "grad_norm": 1.8977590629268766, "learning_rate": 1.5466405460838373e-05, "loss": 0.4681, "step": 13580 }, { "epoch": 1.0093645484949834, "grad_norm": 1.7584962702710745, "learning_rate": 1.5465733566249494e-05, "loss": 0.6529, "step": 13581 }, { "epoch": 1.0094388703084356, "grad_norm": 1.8355349488519626, "learning_rate": 1.5465061636472505e-05, "loss": 0.5124, "step": 13582 }, { "epoch": 1.0095131921218878, "grad_norm": 1.7602800649273098, "learning_rate": 1.5464389671511732e-05, "loss": 0.6469, "step": 13583 }, { "epoch": 1.00958751393534, "grad_norm": 1.6642514646548139, "learning_rate": 1.5463717671371506e-05, "loss": 0.6669, "step": 13584 }, { "epoch": 1.0096618357487923, "grad_norm": 2.2892576194855385, "learning_rate": 1.5463045636056143e-05, "loss": 0.7771, "step": 13585 }, { "epoch": 1.0097361575622446, "grad_norm": 1.9799925628836441, "learning_rate": 1.546237356556998e-05, "loss": 0.4289, "step": 13586 }, { "epoch": 1.0098104793756968, "grad_norm": 2.1094713373254783, "learning_rate": 1.546170145991734e-05, "loss": 0.6126, "step": 13587 }, { "epoch": 1.009884801189149, "grad_norm": 1.799335850118088, "learning_rate": 1.5461029319102545e-05, "loss": 0.481, "step": 13588 }, { "epoch": 1.0099591230026013, "grad_norm": 1.7325107943330238, "learning_rate": 1.546035714312993e-05, "loss": 0.5561, "step": 13589 }, { "epoch": 1.0100334448160535, "grad_norm": 1.8935466252147484, "learning_rate": 1.5459684932003816e-05, "loss": 0.6243, "step": 13590 }, { "epoch": 1.0101077666295057, "grad_norm": 1.7579326132085504, "learning_rate": 1.5459012685728538e-05, "loss": 0.7114, "step": 13591 }, { "epoch": 1.010182088442958, "grad_norm": 1.9647117653407498, "learning_rate": 1.5458340404308416e-05, "loss": 0.7142, "step": 13592 }, { "epoch": 1.0102564102564102, "grad_norm": 1.9930503990408401, "learning_rate": 1.5457668087747784e-05, "loss": 0.6885, "step": 13593 }, { "epoch": 1.0103307320698625, "grad_norm": 2.1323575711775273, "learning_rate": 1.5456995736050968e-05, "loss": 0.6048, "step": 13594 }, { "epoch": 1.0104050538833147, "grad_norm": 1.8957875105142707, "learning_rate": 1.5456323349222295e-05, "loss": 0.7077, "step": 13595 }, { "epoch": 1.010479375696767, "grad_norm": 1.730411188936241, "learning_rate": 1.54556509272661e-05, "loss": 0.58, "step": 13596 }, { "epoch": 1.0105536975102192, "grad_norm": 2.3118505345593086, "learning_rate": 1.54549784701867e-05, "loss": 0.7018, "step": 13597 }, { "epoch": 1.0106280193236714, "grad_norm": 1.823339618481132, "learning_rate": 1.5454305977988434e-05, "loss": 0.6541, "step": 13598 }, { "epoch": 1.0107023411371236, "grad_norm": 1.9718681801469005, "learning_rate": 1.545363345067563e-05, "loss": 0.6435, "step": 13599 }, { "epoch": 1.0107766629505759, "grad_norm": 2.3649162533328574, "learning_rate": 1.545296088825262e-05, "loss": 0.684, "step": 13600 }, { "epoch": 1.0108509847640283, "grad_norm": 2.0228200806473953, "learning_rate": 1.545228829072373e-05, "loss": 0.5701, "step": 13601 }, { "epoch": 1.0109253065774806, "grad_norm": 2.0681320448919, "learning_rate": 1.5451615658093287e-05, "loss": 0.6747, "step": 13602 }, { "epoch": 1.0109996283909328, "grad_norm": 2.1505706201042694, "learning_rate": 1.5450942990365626e-05, "loss": 0.6768, "step": 13603 }, { "epoch": 1.011073950204385, "grad_norm": 2.1844485333588843, "learning_rate": 1.5450270287545078e-05, "loss": 0.5537, "step": 13604 }, { "epoch": 1.0111482720178373, "grad_norm": 1.949798791973459, "learning_rate": 1.544959754963597e-05, "loss": 0.5667, "step": 13605 }, { "epoch": 1.0112225938312895, "grad_norm": 2.086053861180212, "learning_rate": 1.544892477664264e-05, "loss": 0.6514, "step": 13606 }, { "epoch": 1.0112969156447418, "grad_norm": 1.9083146355220937, "learning_rate": 1.5448251968569414e-05, "loss": 0.6673, "step": 13607 }, { "epoch": 1.011371237458194, "grad_norm": 1.9109977271240042, "learning_rate": 1.5447579125420623e-05, "loss": 0.6416, "step": 13608 }, { "epoch": 1.0114455592716463, "grad_norm": 1.9707294415515886, "learning_rate": 1.5446906247200602e-05, "loss": 0.6312, "step": 13609 }, { "epoch": 1.0115198810850985, "grad_norm": 2.1185275787152693, "learning_rate": 1.544623333391368e-05, "loss": 0.8696, "step": 13610 }, { "epoch": 1.0115942028985507, "grad_norm": 1.5807331654287753, "learning_rate": 1.544556038556419e-05, "loss": 0.6425, "step": 13611 }, { "epoch": 1.011668524712003, "grad_norm": 1.868323730617037, "learning_rate": 1.5444887402156464e-05, "loss": 0.7164, "step": 13612 }, { "epoch": 1.0117428465254552, "grad_norm": 1.8684983444447143, "learning_rate": 1.5444214383694838e-05, "loss": 0.5936, "step": 13613 }, { "epoch": 1.0118171683389074, "grad_norm": 2.0942231921934162, "learning_rate": 1.5443541330183642e-05, "loss": 0.559, "step": 13614 }, { "epoch": 1.0118914901523597, "grad_norm": 1.8333863048811503, "learning_rate": 1.544286824162721e-05, "loss": 0.5261, "step": 13615 }, { "epoch": 1.011965811965812, "grad_norm": 2.0345464286690764, "learning_rate": 1.5442195118029873e-05, "loss": 0.7027, "step": 13616 }, { "epoch": 1.0120401337792642, "grad_norm": 1.7506044755584866, "learning_rate": 1.5441521959395966e-05, "loss": 0.6142, "step": 13617 }, { "epoch": 1.0121144555927164, "grad_norm": 2.359573213052223, "learning_rate": 1.544084876572982e-05, "loss": 0.6426, "step": 13618 }, { "epoch": 1.0121887774061686, "grad_norm": 2.9965320914563502, "learning_rate": 1.544017553703578e-05, "loss": 0.5859, "step": 13619 }, { "epoch": 1.0122630992196209, "grad_norm": 2.018736311967806, "learning_rate": 1.5439502273318164e-05, "loss": 0.8032, "step": 13620 }, { "epoch": 1.0123374210330731, "grad_norm": 1.761059777725982, "learning_rate": 1.543882897458132e-05, "loss": 0.4939, "step": 13621 }, { "epoch": 1.0124117428465254, "grad_norm": 1.9169663528531309, "learning_rate": 1.5438155640829574e-05, "loss": 0.4727, "step": 13622 }, { "epoch": 1.0124860646599778, "grad_norm": 1.8599416984586068, "learning_rate": 1.5437482272067264e-05, "loss": 0.5954, "step": 13623 }, { "epoch": 1.01256038647343, "grad_norm": 2.2192061823713747, "learning_rate": 1.543680886829873e-05, "loss": 0.7252, "step": 13624 }, { "epoch": 1.0126347082868823, "grad_norm": 1.6407778315326147, "learning_rate": 1.54361354295283e-05, "loss": 0.5312, "step": 13625 }, { "epoch": 1.0127090301003345, "grad_norm": 1.8808051834957427, "learning_rate": 1.543546195576031e-05, "loss": 0.6078, "step": 13626 }, { "epoch": 1.0127833519137868, "grad_norm": 1.5714665834018888, "learning_rate": 1.5434788446999097e-05, "loss": 0.5283, "step": 13627 }, { "epoch": 1.012857673727239, "grad_norm": 2.0491194976759006, "learning_rate": 1.5434114903249003e-05, "loss": 0.7066, "step": 13628 }, { "epoch": 1.0129319955406912, "grad_norm": 2.2312067873275185, "learning_rate": 1.5433441324514354e-05, "loss": 0.684, "step": 13629 }, { "epoch": 1.0130063173541435, "grad_norm": 2.5201788550308946, "learning_rate": 1.5432767710799497e-05, "loss": 0.7082, "step": 13630 }, { "epoch": 1.0130806391675957, "grad_norm": 2.2553309861427446, "learning_rate": 1.543209406210876e-05, "loss": 0.6438, "step": 13631 }, { "epoch": 1.013154960981048, "grad_norm": 1.7804375336676943, "learning_rate": 1.5431420378446485e-05, "loss": 0.6379, "step": 13632 }, { "epoch": 1.0132292827945002, "grad_norm": 1.8627417019241281, "learning_rate": 1.5430746659817003e-05, "loss": 0.6207, "step": 13633 }, { "epoch": 1.0133036046079524, "grad_norm": 1.8680881336255872, "learning_rate": 1.543007290622466e-05, "loss": 0.6347, "step": 13634 }, { "epoch": 1.0133779264214047, "grad_norm": 2.0349331191699287, "learning_rate": 1.542939911767379e-05, "loss": 0.7274, "step": 13635 }, { "epoch": 1.013452248234857, "grad_norm": 1.8306613495881447, "learning_rate": 1.542872529416873e-05, "loss": 0.6426, "step": 13636 }, { "epoch": 1.0135265700483091, "grad_norm": 1.8442662229111364, "learning_rate": 1.5428051435713814e-05, "loss": 0.6618, "step": 13637 }, { "epoch": 1.0136008918617614, "grad_norm": 1.994415785241361, "learning_rate": 1.5427377542313388e-05, "loss": 0.7642, "step": 13638 }, { "epoch": 1.0136752136752136, "grad_norm": 1.4491990638241643, "learning_rate": 1.542670361397179e-05, "loss": 0.4917, "step": 13639 }, { "epoch": 1.0137495354886659, "grad_norm": 1.8823520579474429, "learning_rate": 1.542602965069335e-05, "loss": 0.6578, "step": 13640 }, { "epoch": 1.013823857302118, "grad_norm": 2.089475627521809, "learning_rate": 1.5425355652482415e-05, "loss": 0.7521, "step": 13641 }, { "epoch": 1.0138981791155703, "grad_norm": 1.7111463385879258, "learning_rate": 1.542468161934332e-05, "loss": 0.5119, "step": 13642 }, { "epoch": 1.0139725009290226, "grad_norm": 1.8489324467718018, "learning_rate": 1.542400755128041e-05, "loss": 0.6694, "step": 13643 }, { "epoch": 1.0140468227424748, "grad_norm": 1.8178933672067208, "learning_rate": 1.5423333448298017e-05, "loss": 0.6064, "step": 13644 }, { "epoch": 1.014121144555927, "grad_norm": 1.4715716042760816, "learning_rate": 1.5422659310400487e-05, "loss": 0.441, "step": 13645 }, { "epoch": 1.0141954663693795, "grad_norm": 1.9234718661808927, "learning_rate": 1.5421985137592155e-05, "loss": 0.6639, "step": 13646 }, { "epoch": 1.0142697881828318, "grad_norm": 2.610330591209113, "learning_rate": 1.5421310929877365e-05, "loss": 0.758, "step": 13647 }, { "epoch": 1.014344109996284, "grad_norm": 1.9003978146263316, "learning_rate": 1.542063668726046e-05, "loss": 0.5273, "step": 13648 }, { "epoch": 1.0144184318097362, "grad_norm": 1.7131813957953836, "learning_rate": 1.5419962409745775e-05, "loss": 0.5937, "step": 13649 }, { "epoch": 1.0144927536231885, "grad_norm": 2.3296072066645928, "learning_rate": 1.5419288097337652e-05, "loss": 0.6081, "step": 13650 }, { "epoch": 1.0145670754366407, "grad_norm": 2.1118500595786824, "learning_rate": 1.5418613750040435e-05, "loss": 0.5562, "step": 13651 }, { "epoch": 1.014641397250093, "grad_norm": 1.7513340309580387, "learning_rate": 1.5417939367858464e-05, "loss": 0.583, "step": 13652 }, { "epoch": 1.0147157190635452, "grad_norm": 1.7527129012324474, "learning_rate": 1.541726495079608e-05, "loss": 0.5851, "step": 13653 }, { "epoch": 1.0147900408769974, "grad_norm": 1.495101899187469, "learning_rate": 1.5416590498857625e-05, "loss": 0.4627, "step": 13654 }, { "epoch": 1.0148643626904497, "grad_norm": 1.7024276345149547, "learning_rate": 1.5415916012047444e-05, "loss": 0.4772, "step": 13655 }, { "epoch": 1.014938684503902, "grad_norm": 1.7135228314922037, "learning_rate": 1.5415241490369874e-05, "loss": 0.5897, "step": 13656 }, { "epoch": 1.0150130063173541, "grad_norm": 1.8020202271018435, "learning_rate": 1.5414566933829257e-05, "loss": 0.608, "step": 13657 }, { "epoch": 1.0150873281308064, "grad_norm": 1.9333049796566637, "learning_rate": 1.5413892342429945e-05, "loss": 0.5896, "step": 13658 }, { "epoch": 1.0151616499442586, "grad_norm": 2.1352626521312317, "learning_rate": 1.541321771617627e-05, "loss": 0.6305, "step": 13659 }, { "epoch": 1.0152359717577109, "grad_norm": 2.0796608702329578, "learning_rate": 1.5412543055072584e-05, "loss": 0.6891, "step": 13660 }, { "epoch": 1.015310293571163, "grad_norm": 1.5537260539797928, "learning_rate": 1.5411868359123223e-05, "loss": 0.5345, "step": 13661 }, { "epoch": 1.0153846153846153, "grad_norm": 1.9531290686335019, "learning_rate": 1.541119362833254e-05, "loss": 0.8044, "step": 13662 }, { "epoch": 1.0154589371980676, "grad_norm": 1.7898827893024742, "learning_rate": 1.541051886270487e-05, "loss": 0.6069, "step": 13663 }, { "epoch": 1.0155332590115198, "grad_norm": 2.1289118597665326, "learning_rate": 1.5409844062244558e-05, "loss": 0.6717, "step": 13664 }, { "epoch": 1.015607580824972, "grad_norm": 2.050309440939977, "learning_rate": 1.5409169226955953e-05, "loss": 0.6463, "step": 13665 }, { "epoch": 1.0156819026384243, "grad_norm": 1.852536389034572, "learning_rate": 1.5408494356843397e-05, "loss": 0.6142, "step": 13666 }, { "epoch": 1.0157562244518765, "grad_norm": 1.849748849698626, "learning_rate": 1.5407819451911234e-05, "loss": 0.5555, "step": 13667 }, { "epoch": 1.015830546265329, "grad_norm": 2.2552144350477517, "learning_rate": 1.5407144512163812e-05, "loss": 0.7979, "step": 13668 }, { "epoch": 1.0159048680787812, "grad_norm": 1.9136163776342856, "learning_rate": 1.5406469537605473e-05, "loss": 0.6026, "step": 13669 }, { "epoch": 1.0159791898922335, "grad_norm": 2.331326758467209, "learning_rate": 1.5405794528240562e-05, "loss": 0.7974, "step": 13670 }, { "epoch": 1.0160535117056857, "grad_norm": 1.7531723248178817, "learning_rate": 1.540511948407343e-05, "loss": 0.5456, "step": 13671 }, { "epoch": 1.016127833519138, "grad_norm": 2.0658069334256544, "learning_rate": 1.5404444405108416e-05, "loss": 0.5601, "step": 13672 }, { "epoch": 1.0162021553325902, "grad_norm": 1.7592620930932827, "learning_rate": 1.5403769291349872e-05, "loss": 0.6404, "step": 13673 }, { "epoch": 1.0162764771460424, "grad_norm": 2.16846313327839, "learning_rate": 1.540309414280214e-05, "loss": 0.6812, "step": 13674 }, { "epoch": 1.0163507989594947, "grad_norm": 1.8510861011370605, "learning_rate": 1.540241895946957e-05, "loss": 0.6839, "step": 13675 }, { "epoch": 1.0164251207729469, "grad_norm": 1.670152621857349, "learning_rate": 1.5401743741356506e-05, "loss": 0.5566, "step": 13676 }, { "epoch": 1.0164994425863991, "grad_norm": 2.048134433561356, "learning_rate": 1.5401068488467292e-05, "loss": 0.6812, "step": 13677 }, { "epoch": 1.0165737643998514, "grad_norm": 2.1627810653399795, "learning_rate": 1.5400393200806284e-05, "loss": 0.5974, "step": 13678 }, { "epoch": 1.0166480862133036, "grad_norm": 2.0094343109388473, "learning_rate": 1.5399717878377826e-05, "loss": 0.5716, "step": 13679 }, { "epoch": 1.0167224080267558, "grad_norm": 2.0429220072711876, "learning_rate": 1.539904252118626e-05, "loss": 0.6457, "step": 13680 }, { "epoch": 1.016796729840208, "grad_norm": 2.119772238526372, "learning_rate": 1.539836712923594e-05, "loss": 0.5572, "step": 13681 }, { "epoch": 1.0168710516536603, "grad_norm": 1.9543314745753084, "learning_rate": 1.5397691702531216e-05, "loss": 0.6581, "step": 13682 }, { "epoch": 1.0169453734671126, "grad_norm": 1.9784592080139636, "learning_rate": 1.539701624107643e-05, "loss": 0.72, "step": 13683 }, { "epoch": 1.0170196952805648, "grad_norm": 1.7164036049919393, "learning_rate": 1.5396340744875932e-05, "loss": 0.6873, "step": 13684 }, { "epoch": 1.017094017094017, "grad_norm": 3.762780350802008, "learning_rate": 1.5395665213934075e-05, "loss": 0.8207, "step": 13685 }, { "epoch": 1.0171683389074693, "grad_norm": 2.0658813701735155, "learning_rate": 1.5394989648255206e-05, "loss": 0.7666, "step": 13686 }, { "epoch": 1.0172426607209215, "grad_norm": 2.146347348853145, "learning_rate": 1.539431404784367e-05, "loss": 0.6417, "step": 13687 }, { "epoch": 1.0173169825343737, "grad_norm": 1.7729966553678262, "learning_rate": 1.539363841270382e-05, "loss": 0.5978, "step": 13688 }, { "epoch": 1.017391304347826, "grad_norm": 1.6689370264830852, "learning_rate": 1.5392962742840006e-05, "loss": 0.7057, "step": 13689 }, { "epoch": 1.0174656261612784, "grad_norm": 1.6361494840831148, "learning_rate": 1.539228703825658e-05, "loss": 0.4451, "step": 13690 }, { "epoch": 1.0175399479747307, "grad_norm": 1.773960781508908, "learning_rate": 1.539161129895789e-05, "loss": 0.6799, "step": 13691 }, { "epoch": 1.017614269788183, "grad_norm": 1.8067045824621628, "learning_rate": 1.5390935524948284e-05, "loss": 0.5602, "step": 13692 }, { "epoch": 1.0176885916016352, "grad_norm": 1.950661012637033, "learning_rate": 1.539025971623212e-05, "loss": 0.7565, "step": 13693 }, { "epoch": 1.0177629134150874, "grad_norm": 1.7344555795793757, "learning_rate": 1.538958387281374e-05, "loss": 0.4725, "step": 13694 }, { "epoch": 1.0178372352285396, "grad_norm": 1.4821367456710788, "learning_rate": 1.5388907994697495e-05, "loss": 0.5147, "step": 13695 }, { "epoch": 1.0179115570419919, "grad_norm": 2.043452348353032, "learning_rate": 1.5388232081887744e-05, "loss": 0.6471, "step": 13696 }, { "epoch": 1.0179858788554441, "grad_norm": 1.9756053318240347, "learning_rate": 1.5387556134388836e-05, "loss": 0.5099, "step": 13697 }, { "epoch": 1.0180602006688964, "grad_norm": 1.807739352301103, "learning_rate": 1.5386880152205118e-05, "loss": 0.6156, "step": 13698 }, { "epoch": 1.0181345224823486, "grad_norm": 1.7695529733274755, "learning_rate": 1.5386204135340946e-05, "loss": 0.5735, "step": 13699 }, { "epoch": 1.0182088442958008, "grad_norm": 1.988756995764686, "learning_rate": 1.5385528083800673e-05, "loss": 0.6526, "step": 13700 }, { "epoch": 1.018283166109253, "grad_norm": 2.249974975816083, "learning_rate": 1.5384851997588644e-05, "loss": 0.6754, "step": 13701 }, { "epoch": 1.0183574879227053, "grad_norm": 3.104238245756115, "learning_rate": 1.5384175876709223e-05, "loss": 0.7031, "step": 13702 }, { "epoch": 1.0184318097361575, "grad_norm": 1.7460440427218034, "learning_rate": 1.5383499721166755e-05, "loss": 0.5645, "step": 13703 }, { "epoch": 1.0185061315496098, "grad_norm": 1.8156141187166137, "learning_rate": 1.5382823530965595e-05, "loss": 0.6063, "step": 13704 }, { "epoch": 1.018580453363062, "grad_norm": 2.266736924576902, "learning_rate": 1.5382147306110096e-05, "loss": 0.446, "step": 13705 }, { "epoch": 1.0186547751765143, "grad_norm": 1.8788328868159887, "learning_rate": 1.5381471046604612e-05, "loss": 0.6214, "step": 13706 }, { "epoch": 1.0187290969899665, "grad_norm": 1.864842829450185, "learning_rate": 1.5380794752453496e-05, "loss": 0.5772, "step": 13707 }, { "epoch": 1.0188034188034187, "grad_norm": 1.8499210885447384, "learning_rate": 1.5380118423661104e-05, "loss": 0.6692, "step": 13708 }, { "epoch": 1.018877740616871, "grad_norm": 2.1636586204542296, "learning_rate": 1.537944206023179e-05, "loss": 0.8022, "step": 13709 }, { "epoch": 1.0189520624303232, "grad_norm": 2.249044622519709, "learning_rate": 1.5378765662169904e-05, "loss": 0.8087, "step": 13710 }, { "epoch": 1.0190263842437755, "grad_norm": 1.8948232985931917, "learning_rate": 1.5378089229479807e-05, "loss": 0.7053, "step": 13711 }, { "epoch": 1.0191007060572277, "grad_norm": 1.4740891531525864, "learning_rate": 1.537741276216585e-05, "loss": 0.4244, "step": 13712 }, { "epoch": 1.0191750278706802, "grad_norm": 1.8784294048347687, "learning_rate": 1.5376736260232387e-05, "loss": 0.5924, "step": 13713 }, { "epoch": 1.0192493496841324, "grad_norm": 1.7703748305260074, "learning_rate": 1.5376059723683776e-05, "loss": 0.7519, "step": 13714 }, { "epoch": 1.0193236714975846, "grad_norm": 1.735966736893736, "learning_rate": 1.5375383152524368e-05, "loss": 0.5608, "step": 13715 }, { "epoch": 1.0193979933110369, "grad_norm": 1.7987650227906402, "learning_rate": 1.5374706546758528e-05, "loss": 0.6351, "step": 13716 }, { "epoch": 1.019472315124489, "grad_norm": 1.9783363147550084, "learning_rate": 1.5374029906390605e-05, "loss": 0.8531, "step": 13717 }, { "epoch": 1.0195466369379413, "grad_norm": 2.952720998568027, "learning_rate": 1.537335323142495e-05, "loss": 0.7293, "step": 13718 }, { "epoch": 1.0196209587513936, "grad_norm": 1.8847986130518986, "learning_rate": 1.5372676521865934e-05, "loss": 0.6731, "step": 13719 }, { "epoch": 1.0196952805648458, "grad_norm": 2.045280442415055, "learning_rate": 1.5371999777717902e-05, "loss": 0.7277, "step": 13720 }, { "epoch": 1.019769602378298, "grad_norm": 1.7986606555714515, "learning_rate": 1.5371322998985213e-05, "loss": 0.5504, "step": 13721 }, { "epoch": 1.0198439241917503, "grad_norm": 2.13667764372431, "learning_rate": 1.5370646185672225e-05, "loss": 0.7523, "step": 13722 }, { "epoch": 1.0199182460052025, "grad_norm": 1.730482581491395, "learning_rate": 1.5369969337783297e-05, "loss": 0.6622, "step": 13723 }, { "epoch": 1.0199925678186548, "grad_norm": 1.8602053839403432, "learning_rate": 1.5369292455322785e-05, "loss": 0.6292, "step": 13724 }, { "epoch": 1.020066889632107, "grad_norm": 2.1212735129009834, "learning_rate": 1.5368615538295047e-05, "loss": 0.6153, "step": 13725 }, { "epoch": 1.0201412114455592, "grad_norm": 2.011988337179976, "learning_rate": 1.5367938586704435e-05, "loss": 0.5295, "step": 13726 }, { "epoch": 1.0202155332590115, "grad_norm": 5.817183635838668, "learning_rate": 1.5367261600555322e-05, "loss": 0.5556, "step": 13727 }, { "epoch": 1.0202898550724637, "grad_norm": 1.9368084431978065, "learning_rate": 1.5366584579852053e-05, "loss": 0.6946, "step": 13728 }, { "epoch": 1.020364176885916, "grad_norm": 2.1512200681446534, "learning_rate": 1.5365907524598987e-05, "loss": 0.6637, "step": 13729 }, { "epoch": 1.0204384986993682, "grad_norm": 2.0776113673951944, "learning_rate": 1.5365230434800493e-05, "loss": 0.7377, "step": 13730 }, { "epoch": 1.0205128205128204, "grad_norm": 2.0623315691222683, "learning_rate": 1.5364553310460923e-05, "loss": 0.7493, "step": 13731 }, { "epoch": 1.0205871423262727, "grad_norm": 1.5797051585319237, "learning_rate": 1.5363876151584638e-05, "loss": 0.6931, "step": 13732 }, { "epoch": 1.020661464139725, "grad_norm": 1.754828798906444, "learning_rate": 1.5363198958175993e-05, "loss": 0.65, "step": 13733 }, { "epoch": 1.0207357859531772, "grad_norm": 2.3216100405468314, "learning_rate": 1.5362521730239353e-05, "loss": 0.5592, "step": 13734 }, { "epoch": 1.0208101077666294, "grad_norm": 1.4945156744479409, "learning_rate": 1.5361844467779078e-05, "loss": 0.4449, "step": 13735 }, { "epoch": 1.0208844295800819, "grad_norm": 2.0712799840714062, "learning_rate": 1.5361167170799523e-05, "loss": 0.4789, "step": 13736 }, { "epoch": 1.020958751393534, "grad_norm": 1.898115275745634, "learning_rate": 1.5360489839305052e-05, "loss": 0.5222, "step": 13737 }, { "epoch": 1.0210330732069863, "grad_norm": 1.8484041732216827, "learning_rate": 1.535981247330003e-05, "loss": 0.6017, "step": 13738 }, { "epoch": 1.0211073950204386, "grad_norm": 1.9517597865843852, "learning_rate": 1.535913507278881e-05, "loss": 0.6131, "step": 13739 }, { "epoch": 1.0211817168338908, "grad_norm": 1.789333888038843, "learning_rate": 1.5358457637775756e-05, "loss": 0.586, "step": 13740 }, { "epoch": 1.021256038647343, "grad_norm": 1.6972079084162217, "learning_rate": 1.5357780168265232e-05, "loss": 0.6536, "step": 13741 }, { "epoch": 1.0213303604607953, "grad_norm": 2.224016200598676, "learning_rate": 1.5357102664261595e-05, "loss": 0.7121, "step": 13742 }, { "epoch": 1.0214046822742475, "grad_norm": 2.1665578716447342, "learning_rate": 1.5356425125769213e-05, "loss": 0.5563, "step": 13743 }, { "epoch": 1.0214790040876998, "grad_norm": 2.0927777143920157, "learning_rate": 1.5355747552792442e-05, "loss": 0.7323, "step": 13744 }, { "epoch": 1.021553325901152, "grad_norm": 1.789504640849713, "learning_rate": 1.5355069945335644e-05, "loss": 0.6105, "step": 13745 }, { "epoch": 1.0216276477146042, "grad_norm": 2.162221746733002, "learning_rate": 1.535439230340318e-05, "loss": 0.7282, "step": 13746 }, { "epoch": 1.0217019695280565, "grad_norm": 1.8498618075846884, "learning_rate": 1.5353714626999426e-05, "loss": 0.5217, "step": 13747 }, { "epoch": 1.0217762913415087, "grad_norm": 1.3649030736209073, "learning_rate": 1.5353036916128727e-05, "loss": 0.4466, "step": 13748 }, { "epoch": 1.021850613154961, "grad_norm": 2.078323481225547, "learning_rate": 1.535235917079546e-05, "loss": 0.6787, "step": 13749 }, { "epoch": 1.0219249349684132, "grad_norm": 2.3171441897849308, "learning_rate": 1.535168139100398e-05, "loss": 0.672, "step": 13750 }, { "epoch": 1.0219992567818654, "grad_norm": 1.9547524891741246, "learning_rate": 1.535100357675865e-05, "loss": 0.7447, "step": 13751 }, { "epoch": 1.0220735785953177, "grad_norm": 3.573433990196488, "learning_rate": 1.535032572806384e-05, "loss": 0.671, "step": 13752 }, { "epoch": 1.02214790040877, "grad_norm": 1.777494537364843, "learning_rate": 1.534964784492391e-05, "loss": 0.6145, "step": 13753 }, { "epoch": 1.0222222222222221, "grad_norm": 1.9092581762497258, "learning_rate": 1.5348969927343228e-05, "loss": 0.6813, "step": 13754 }, { "epoch": 1.0222965440356744, "grad_norm": 1.8788632957072071, "learning_rate": 1.534829197532615e-05, "loss": 0.5709, "step": 13755 }, { "epoch": 1.0223708658491266, "grad_norm": 2.1069579602949986, "learning_rate": 1.5347613988877045e-05, "loss": 0.6895, "step": 13756 }, { "epoch": 1.0224451876625789, "grad_norm": 1.972943649545696, "learning_rate": 1.534693596800028e-05, "loss": 0.5794, "step": 13757 }, { "epoch": 1.0225195094760313, "grad_norm": 2.506682866475086, "learning_rate": 1.5346257912700218e-05, "loss": 0.6149, "step": 13758 }, { "epoch": 1.0225938312894836, "grad_norm": 1.2653459568505434, "learning_rate": 1.534557982298123e-05, "loss": 0.2728, "step": 13759 }, { "epoch": 1.0226681531029358, "grad_norm": 2.0465235501733856, "learning_rate": 1.5344901698847673e-05, "loss": 0.7574, "step": 13760 }, { "epoch": 1.022742474916388, "grad_norm": 1.7468788888085462, "learning_rate": 1.5344223540303917e-05, "loss": 0.5647, "step": 13761 }, { "epoch": 1.0228167967298403, "grad_norm": 1.775335364845769, "learning_rate": 1.5343545347354327e-05, "loss": 0.5337, "step": 13762 }, { "epoch": 1.0228911185432925, "grad_norm": 1.8306945873874492, "learning_rate": 1.5342867120003266e-05, "loss": 0.5968, "step": 13763 }, { "epoch": 1.0229654403567447, "grad_norm": 1.9569601639409444, "learning_rate": 1.5342188858255108e-05, "loss": 0.8836, "step": 13764 }, { "epoch": 1.023039762170197, "grad_norm": 2.046555645593922, "learning_rate": 1.5341510562114214e-05, "loss": 0.612, "step": 13765 }, { "epoch": 1.0231140839836492, "grad_norm": 2.4906694954889237, "learning_rate": 1.5340832231584952e-05, "loss": 0.759, "step": 13766 }, { "epoch": 1.0231884057971015, "grad_norm": 2.029869647277516, "learning_rate": 1.5340153866671688e-05, "loss": 0.5829, "step": 13767 }, { "epoch": 1.0232627276105537, "grad_norm": 2.0811368569257276, "learning_rate": 1.533947546737879e-05, "loss": 0.8334, "step": 13768 }, { "epoch": 1.023337049424006, "grad_norm": 1.8784845153886203, "learning_rate": 1.5338797033710625e-05, "loss": 0.6459, "step": 13769 }, { "epoch": 1.0234113712374582, "grad_norm": 2.3084242864549664, "learning_rate": 1.5338118565671568e-05, "loss": 0.6977, "step": 13770 }, { "epoch": 1.0234856930509104, "grad_norm": 2.1336926770199676, "learning_rate": 1.5337440063265974e-05, "loss": 0.6501, "step": 13771 }, { "epoch": 1.0235600148643627, "grad_norm": 1.6593509776841118, "learning_rate": 1.5336761526498223e-05, "loss": 0.6507, "step": 13772 }, { "epoch": 1.023634336677815, "grad_norm": 1.6506613003404857, "learning_rate": 1.5336082955372674e-05, "loss": 0.5608, "step": 13773 }, { "epoch": 1.0237086584912671, "grad_norm": 1.8153282764763627, "learning_rate": 1.5335404349893705e-05, "loss": 0.56, "step": 13774 }, { "epoch": 1.0237829803047194, "grad_norm": 2.0190072981114273, "learning_rate": 1.5334725710065676e-05, "loss": 0.7022, "step": 13775 }, { "epoch": 1.0238573021181716, "grad_norm": 1.742945192351165, "learning_rate": 1.533404703589296e-05, "loss": 0.4816, "step": 13776 }, { "epoch": 1.0239316239316238, "grad_norm": 1.974464012510336, "learning_rate": 1.5333368327379925e-05, "loss": 0.6554, "step": 13777 }, { "epoch": 1.024005945745076, "grad_norm": 1.4803255152695718, "learning_rate": 1.533268958453094e-05, "loss": 0.5734, "step": 13778 }, { "epoch": 1.0240802675585283, "grad_norm": 2.2192825799978575, "learning_rate": 1.533201080735038e-05, "loss": 0.6714, "step": 13779 }, { "epoch": 1.0241545893719808, "grad_norm": 1.8389638522537017, "learning_rate": 1.5331331995842607e-05, "loss": 0.6023, "step": 13780 }, { "epoch": 1.024228911185433, "grad_norm": 2.086603649703598, "learning_rate": 1.5330653150012002e-05, "loss": 0.6537, "step": 13781 }, { "epoch": 1.0243032329988853, "grad_norm": 1.875615332959291, "learning_rate": 1.5329974269862924e-05, "loss": 0.6814, "step": 13782 }, { "epoch": 1.0243775548123375, "grad_norm": 1.527201173365567, "learning_rate": 1.532929535539975e-05, "loss": 0.4722, "step": 13783 }, { "epoch": 1.0244518766257897, "grad_norm": 1.7754915986904056, "learning_rate": 1.5328616406626845e-05, "loss": 0.5773, "step": 13784 }, { "epoch": 1.024526198439242, "grad_norm": 1.9591484691384804, "learning_rate": 1.5327937423548587e-05, "loss": 0.6263, "step": 13785 }, { "epoch": 1.0246005202526942, "grad_norm": 2.224822544613138, "learning_rate": 1.5327258406169344e-05, "loss": 0.6664, "step": 13786 }, { "epoch": 1.0246748420661465, "grad_norm": 1.7422604532909909, "learning_rate": 1.532657935449349e-05, "loss": 0.6335, "step": 13787 }, { "epoch": 1.0247491638795987, "grad_norm": 1.7907934835953587, "learning_rate": 1.532590026852539e-05, "loss": 0.6318, "step": 13788 }, { "epoch": 1.024823485693051, "grad_norm": 1.435172737688781, "learning_rate": 1.5325221148269425e-05, "loss": 0.5036, "step": 13789 }, { "epoch": 1.0248978075065032, "grad_norm": 2.335653385298164, "learning_rate": 1.5324541993729957e-05, "loss": 0.5865, "step": 13790 }, { "epoch": 1.0249721293199554, "grad_norm": 2.5376340190716693, "learning_rate": 1.5323862804911368e-05, "loss": 0.7532, "step": 13791 }, { "epoch": 1.0250464511334076, "grad_norm": 1.7108649800161688, "learning_rate": 1.5323183581818027e-05, "loss": 0.5012, "step": 13792 }, { "epoch": 1.0251207729468599, "grad_norm": 2.2099393072624043, "learning_rate": 1.5322504324454306e-05, "loss": 0.5945, "step": 13793 }, { "epoch": 1.0251950947603121, "grad_norm": 1.4216151788930678, "learning_rate": 1.5321825032824578e-05, "loss": 0.4479, "step": 13794 }, { "epoch": 1.0252694165737644, "grad_norm": 2.1111068427158513, "learning_rate": 1.5321145706933214e-05, "loss": 0.7298, "step": 13795 }, { "epoch": 1.0253437383872166, "grad_norm": 1.8419358458813175, "learning_rate": 1.532046634678459e-05, "loss": 0.492, "step": 13796 }, { "epoch": 1.0254180602006688, "grad_norm": 1.823454086187106, "learning_rate": 1.5319786952383084e-05, "loss": 0.6257, "step": 13797 }, { "epoch": 1.025492382014121, "grad_norm": 1.9060686453062414, "learning_rate": 1.5319107523733065e-05, "loss": 0.5711, "step": 13798 }, { "epoch": 1.0255667038275733, "grad_norm": 2.403369286741326, "learning_rate": 1.5318428060838906e-05, "loss": 0.8516, "step": 13799 }, { "epoch": 1.0256410256410255, "grad_norm": 2.513386802205582, "learning_rate": 1.5317748563704982e-05, "loss": 0.6777, "step": 13800 }, { "epoch": 1.0257153474544778, "grad_norm": 1.6480225292838635, "learning_rate": 1.5317069032335667e-05, "loss": 0.5669, "step": 13801 }, { "epoch": 1.02578966926793, "grad_norm": 2.0957600139291923, "learning_rate": 1.5316389466735338e-05, "loss": 0.7127, "step": 13802 }, { "epoch": 1.0258639910813825, "grad_norm": 1.7509276368962738, "learning_rate": 1.5315709866908373e-05, "loss": 0.5564, "step": 13803 }, { "epoch": 1.0259383128948347, "grad_norm": 1.8516555316849377, "learning_rate": 1.5315030232859146e-05, "loss": 0.6785, "step": 13804 }, { "epoch": 1.026012634708287, "grad_norm": 2.2668091508288244, "learning_rate": 1.5314350564592024e-05, "loss": 0.7071, "step": 13805 }, { "epoch": 1.0260869565217392, "grad_norm": 2.140505601731203, "learning_rate": 1.531367086211139e-05, "loss": 0.6733, "step": 13806 }, { "epoch": 1.0261612783351914, "grad_norm": 1.868837344641425, "learning_rate": 1.531299112542162e-05, "loss": 0.5562, "step": 13807 }, { "epoch": 1.0262356001486437, "grad_norm": 2.203162060428603, "learning_rate": 1.5312311354527093e-05, "loss": 0.612, "step": 13808 }, { "epoch": 1.026309921962096, "grad_norm": 2.052727125222548, "learning_rate": 1.5311631549432176e-05, "loss": 0.6323, "step": 13809 }, { "epoch": 1.0263842437755482, "grad_norm": 2.991284396101387, "learning_rate": 1.5310951710141255e-05, "loss": 0.6639, "step": 13810 }, { "epoch": 1.0264585655890004, "grad_norm": 1.734170109492794, "learning_rate": 1.5310271836658697e-05, "loss": 0.6288, "step": 13811 }, { "epoch": 1.0265328874024526, "grad_norm": 1.783212359134372, "learning_rate": 1.5309591928988882e-05, "loss": 0.6416, "step": 13812 }, { "epoch": 1.0266072092159049, "grad_norm": 1.7151930159448652, "learning_rate": 1.5308911987136193e-05, "loss": 0.591, "step": 13813 }, { "epoch": 1.026681531029357, "grad_norm": 2.240648834102868, "learning_rate": 1.530823201110501e-05, "loss": 0.7193, "step": 13814 }, { "epoch": 1.0267558528428093, "grad_norm": 2.9900703455634594, "learning_rate": 1.53075520008997e-05, "loss": 0.5069, "step": 13815 }, { "epoch": 1.0268301746562616, "grad_norm": 1.9642424161361316, "learning_rate": 1.5306871956524646e-05, "loss": 0.7115, "step": 13816 }, { "epoch": 1.0269044964697138, "grad_norm": 1.924323435572404, "learning_rate": 1.5306191877984224e-05, "loss": 0.6579, "step": 13817 }, { "epoch": 1.026978818283166, "grad_norm": 2.158605746391082, "learning_rate": 1.5305511765282814e-05, "loss": 0.6376, "step": 13818 }, { "epoch": 1.0270531400966183, "grad_norm": 2.060118800444463, "learning_rate": 1.5304831618424797e-05, "loss": 0.7313, "step": 13819 }, { "epoch": 1.0271274619100705, "grad_norm": 1.881541452889261, "learning_rate": 1.530415143741455e-05, "loss": 0.4665, "step": 13820 }, { "epoch": 1.0272017837235228, "grad_norm": 2.4612271878213683, "learning_rate": 1.530347122225645e-05, "loss": 0.5914, "step": 13821 }, { "epoch": 1.027276105536975, "grad_norm": 1.924305698349893, "learning_rate": 1.5302790972954876e-05, "loss": 0.6464, "step": 13822 }, { "epoch": 1.0273504273504273, "grad_norm": 2.1969332675349103, "learning_rate": 1.530211068951421e-05, "loss": 0.5601, "step": 13823 }, { "epoch": 1.0274247491638795, "grad_norm": 1.8332534108998217, "learning_rate": 1.530143037193883e-05, "loss": 0.6675, "step": 13824 }, { "epoch": 1.027499070977332, "grad_norm": 2.1187241241830943, "learning_rate": 1.5300750020233114e-05, "loss": 0.6957, "step": 13825 }, { "epoch": 1.0275733927907842, "grad_norm": 2.4008782699205202, "learning_rate": 1.5300069634401448e-05, "loss": 0.7126, "step": 13826 }, { "epoch": 1.0276477146042364, "grad_norm": 2.0355300116545796, "learning_rate": 1.529938921444821e-05, "loss": 0.5797, "step": 13827 }, { "epoch": 1.0277220364176887, "grad_norm": 2.0300326645559275, "learning_rate": 1.5298708760377775e-05, "loss": 0.6227, "step": 13828 }, { "epoch": 1.027796358231141, "grad_norm": 1.7556995094361558, "learning_rate": 1.529802827219453e-05, "loss": 0.6887, "step": 13829 }, { "epoch": 1.0278706800445931, "grad_norm": 2.1034935247453013, "learning_rate": 1.5297347749902854e-05, "loss": 0.7832, "step": 13830 }, { "epoch": 1.0279450018580454, "grad_norm": 2.0083544901429184, "learning_rate": 1.5296667193507125e-05, "loss": 0.7273, "step": 13831 }, { "epoch": 1.0280193236714976, "grad_norm": 2.2561714941538216, "learning_rate": 1.5295986603011733e-05, "loss": 0.5129, "step": 13832 }, { "epoch": 1.0280936454849499, "grad_norm": 2.070811195781261, "learning_rate": 1.5295305978421052e-05, "loss": 0.6219, "step": 13833 }, { "epoch": 1.028167967298402, "grad_norm": 1.6555303832766215, "learning_rate": 1.5294625319739463e-05, "loss": 0.6228, "step": 13834 }, { "epoch": 1.0282422891118543, "grad_norm": 1.9876885898477625, "learning_rate": 1.5293944626971355e-05, "loss": 0.7078, "step": 13835 }, { "epoch": 1.0283166109253066, "grad_norm": 1.960779349445745, "learning_rate": 1.5293263900121103e-05, "loss": 0.6566, "step": 13836 }, { "epoch": 1.0283909327387588, "grad_norm": 1.7723513667045574, "learning_rate": 1.5292583139193094e-05, "loss": 0.729, "step": 13837 }, { "epoch": 1.028465254552211, "grad_norm": 1.9455285508801625, "learning_rate": 1.529190234419171e-05, "loss": 0.5758, "step": 13838 }, { "epoch": 1.0285395763656633, "grad_norm": 2.03252018843866, "learning_rate": 1.5291221515121333e-05, "loss": 0.7473, "step": 13839 }, { "epoch": 1.0286138981791155, "grad_norm": 1.77071706662834, "learning_rate": 1.5290540651986347e-05, "loss": 0.5565, "step": 13840 }, { "epoch": 1.0286882199925678, "grad_norm": 2.9630843479263986, "learning_rate": 1.5289859754791132e-05, "loss": 0.6263, "step": 13841 }, { "epoch": 1.02876254180602, "grad_norm": 1.7472787098955262, "learning_rate": 1.5289178823540078e-05, "loss": 0.561, "step": 13842 }, { "epoch": 1.0288368636194722, "grad_norm": 1.975625384482328, "learning_rate": 1.528849785823756e-05, "loss": 0.6137, "step": 13843 }, { "epoch": 1.0289111854329245, "grad_norm": 1.8603721294066744, "learning_rate": 1.5287816858887973e-05, "loss": 0.6807, "step": 13844 }, { "epoch": 1.0289855072463767, "grad_norm": 2.002669385414004, "learning_rate": 1.5287135825495692e-05, "loss": 0.6755, "step": 13845 }, { "epoch": 1.029059829059829, "grad_norm": 2.0016427085168003, "learning_rate": 1.5286454758065103e-05, "loss": 0.5819, "step": 13846 }, { "epoch": 1.0291341508732814, "grad_norm": 2.4347151403998115, "learning_rate": 1.52857736566006e-05, "loss": 0.6916, "step": 13847 }, { "epoch": 1.0292084726867337, "grad_norm": 2.2338579707335255, "learning_rate": 1.5285092521106553e-05, "loss": 0.645, "step": 13848 }, { "epoch": 1.029282794500186, "grad_norm": 1.7463440178387692, "learning_rate": 1.5284411351587356e-05, "loss": 0.5959, "step": 13849 }, { "epoch": 1.0293571163136381, "grad_norm": 1.9015551387997909, "learning_rate": 1.5283730148047396e-05, "loss": 0.6477, "step": 13850 }, { "epoch": 1.0294314381270904, "grad_norm": 1.9975810327245689, "learning_rate": 1.528304891049105e-05, "loss": 0.6271, "step": 13851 }, { "epoch": 1.0295057599405426, "grad_norm": 1.7884862826391976, "learning_rate": 1.5282367638922716e-05, "loss": 0.562, "step": 13852 }, { "epoch": 1.0295800817539948, "grad_norm": 1.9170711705456513, "learning_rate": 1.528168633334677e-05, "loss": 0.7101, "step": 13853 }, { "epoch": 1.029654403567447, "grad_norm": 1.707370726441532, "learning_rate": 1.5281004993767605e-05, "loss": 0.6921, "step": 13854 }, { "epoch": 1.0297287253808993, "grad_norm": 1.8323530914888213, "learning_rate": 1.52803236201896e-05, "loss": 0.6603, "step": 13855 }, { "epoch": 1.0298030471943516, "grad_norm": 1.1966814536196115, "learning_rate": 1.5279642212617142e-05, "loss": 0.4423, "step": 13856 }, { "epoch": 1.0298773690078038, "grad_norm": 2.097906630887171, "learning_rate": 1.527896077105463e-05, "loss": 0.5846, "step": 13857 }, { "epoch": 1.029951690821256, "grad_norm": 2.282314495007092, "learning_rate": 1.5278279295506436e-05, "loss": 0.6154, "step": 13858 }, { "epoch": 1.0300260126347083, "grad_norm": 2.308197347766063, "learning_rate": 1.5277597785976955e-05, "loss": 0.7548, "step": 13859 }, { "epoch": 1.0301003344481605, "grad_norm": 1.704785149855909, "learning_rate": 1.527691624247057e-05, "loss": 0.5906, "step": 13860 }, { "epoch": 1.0301746562616128, "grad_norm": 2.0496882654545567, "learning_rate": 1.527623466499168e-05, "loss": 0.5896, "step": 13861 }, { "epoch": 1.030248978075065, "grad_norm": 2.112125459096982, "learning_rate": 1.5275553053544657e-05, "loss": 0.6887, "step": 13862 }, { "epoch": 1.0303232998885172, "grad_norm": 2.450844333067741, "learning_rate": 1.5274871408133907e-05, "loss": 0.6064, "step": 13863 }, { "epoch": 1.0303976217019695, "grad_norm": 3.1241318817561883, "learning_rate": 1.52741897287638e-05, "loss": 0.8645, "step": 13864 }, { "epoch": 1.0304719435154217, "grad_norm": 1.9049563841000958, "learning_rate": 1.5273508015438735e-05, "loss": 0.5944, "step": 13865 }, { "epoch": 1.030546265328874, "grad_norm": 2.810858988112463, "learning_rate": 1.5272826268163098e-05, "loss": 0.7247, "step": 13866 }, { "epoch": 1.0306205871423262, "grad_norm": 2.1155121313682734, "learning_rate": 1.5272144486941284e-05, "loss": 0.549, "step": 13867 }, { "epoch": 1.0306949089557784, "grad_norm": 2.0401339554816755, "learning_rate": 1.5271462671777674e-05, "loss": 0.7532, "step": 13868 }, { "epoch": 1.0307692307692307, "grad_norm": 1.8721691209507063, "learning_rate": 1.5270780822676662e-05, "loss": 0.7481, "step": 13869 }, { "epoch": 1.0308435525826831, "grad_norm": 1.5731569960904876, "learning_rate": 1.5270098939642633e-05, "loss": 0.5588, "step": 13870 }, { "epoch": 1.0309178743961354, "grad_norm": 1.8742332693088202, "learning_rate": 1.5269417022679984e-05, "loss": 0.7237, "step": 13871 }, { "epoch": 1.0309921962095876, "grad_norm": 2.0935534237960978, "learning_rate": 1.52687350717931e-05, "loss": 0.704, "step": 13872 }, { "epoch": 1.0310665180230398, "grad_norm": 2.5026096720149087, "learning_rate": 1.5268053086986376e-05, "loss": 0.7198, "step": 13873 }, { "epoch": 1.031140839836492, "grad_norm": 1.6401968318379145, "learning_rate": 1.5267371068264197e-05, "loss": 0.4829, "step": 13874 }, { "epoch": 1.0312151616499443, "grad_norm": 1.8394594591668418, "learning_rate": 1.5266689015630957e-05, "loss": 0.6606, "step": 13875 }, { "epoch": 1.0312894834633966, "grad_norm": 3.5014417898307486, "learning_rate": 1.5266006929091045e-05, "loss": 0.5442, "step": 13876 }, { "epoch": 1.0313638052768488, "grad_norm": 1.8051691658296576, "learning_rate": 1.5265324808648853e-05, "loss": 0.6497, "step": 13877 }, { "epoch": 1.031438127090301, "grad_norm": 1.9205033126376108, "learning_rate": 1.5264642654308775e-05, "loss": 0.5702, "step": 13878 }, { "epoch": 1.0315124489037533, "grad_norm": 2.009928465479902, "learning_rate": 1.5263960466075202e-05, "loss": 0.7265, "step": 13879 }, { "epoch": 1.0315867707172055, "grad_norm": 1.689970281513005, "learning_rate": 1.5263278243952522e-05, "loss": 0.6488, "step": 13880 }, { "epoch": 1.0316610925306577, "grad_norm": 1.574367838220961, "learning_rate": 1.5262595987945133e-05, "loss": 0.516, "step": 13881 }, { "epoch": 1.03173541434411, "grad_norm": 1.75923751603009, "learning_rate": 1.526191369805742e-05, "loss": 0.558, "step": 13882 }, { "epoch": 1.0318097361575622, "grad_norm": 2.106617073939769, "learning_rate": 1.5261231374293783e-05, "loss": 0.6869, "step": 13883 }, { "epoch": 1.0318840579710145, "grad_norm": 2.3448804163980705, "learning_rate": 1.526054901665861e-05, "loss": 0.8068, "step": 13884 }, { "epoch": 1.0319583797844667, "grad_norm": 1.6668034216681258, "learning_rate": 1.5259866625156295e-05, "loss": 0.5016, "step": 13885 }, { "epoch": 1.032032701597919, "grad_norm": 1.7726526164610763, "learning_rate": 1.5259184199791234e-05, "loss": 0.6265, "step": 13886 }, { "epoch": 1.0321070234113712, "grad_norm": 1.9606336691145496, "learning_rate": 1.5258501740567813e-05, "loss": 0.7614, "step": 13887 }, { "epoch": 1.0321813452248234, "grad_norm": 1.9466543259556923, "learning_rate": 1.5257819247490435e-05, "loss": 0.6347, "step": 13888 }, { "epoch": 1.0322556670382756, "grad_norm": 2.0678543987142524, "learning_rate": 1.5257136720563488e-05, "loss": 0.6297, "step": 13889 }, { "epoch": 1.0323299888517279, "grad_norm": 2.032695861483973, "learning_rate": 1.5256454159791369e-05, "loss": 0.5049, "step": 13890 }, { "epoch": 1.0324043106651801, "grad_norm": 2.302827988213652, "learning_rate": 1.525577156517847e-05, "loss": 0.682, "step": 13891 }, { "epoch": 1.0324786324786326, "grad_norm": 1.9650993058153663, "learning_rate": 1.5255088936729184e-05, "loss": 0.6214, "step": 13892 }, { "epoch": 1.0325529542920848, "grad_norm": 1.8979910632057015, "learning_rate": 1.525440627444791e-05, "loss": 0.5943, "step": 13893 }, { "epoch": 1.032627276105537, "grad_norm": 1.9413176575885294, "learning_rate": 1.5253723578339038e-05, "loss": 0.634, "step": 13894 }, { "epoch": 1.0327015979189893, "grad_norm": 1.9690548010161695, "learning_rate": 1.5253040848406973e-05, "loss": 0.5264, "step": 13895 }, { "epoch": 1.0327759197324415, "grad_norm": 1.8569831222999056, "learning_rate": 1.52523580846561e-05, "loss": 0.5625, "step": 13896 }, { "epoch": 1.0328502415458938, "grad_norm": 1.9323649272988166, "learning_rate": 1.5251675287090816e-05, "loss": 0.5608, "step": 13897 }, { "epoch": 1.032924563359346, "grad_norm": 1.953060589663817, "learning_rate": 1.5250992455715521e-05, "loss": 0.5887, "step": 13898 }, { "epoch": 1.0329988851727983, "grad_norm": 2.0217514188942007, "learning_rate": 1.5250309590534608e-05, "loss": 0.665, "step": 13899 }, { "epoch": 1.0330732069862505, "grad_norm": 1.694008680471766, "learning_rate": 1.5249626691552477e-05, "loss": 0.6479, "step": 13900 }, { "epoch": 1.0331475287997027, "grad_norm": 2.160998172289444, "learning_rate": 1.5248943758773522e-05, "loss": 0.6859, "step": 13901 }, { "epoch": 1.033221850613155, "grad_norm": 1.830151340419482, "learning_rate": 1.5248260792202136e-05, "loss": 0.5062, "step": 13902 }, { "epoch": 1.0332961724266072, "grad_norm": 2.238206684329894, "learning_rate": 1.524757779184272e-05, "loss": 0.5971, "step": 13903 }, { "epoch": 1.0333704942400594, "grad_norm": 2.253075800434352, "learning_rate": 1.524689475769967e-05, "loss": 0.7839, "step": 13904 }, { "epoch": 1.0334448160535117, "grad_norm": 2.001878316688299, "learning_rate": 1.5246211689777386e-05, "loss": 0.5195, "step": 13905 }, { "epoch": 1.033519137866964, "grad_norm": 2.4748923476044786, "learning_rate": 1.5245528588080266e-05, "loss": 0.8723, "step": 13906 }, { "epoch": 1.0335934596804162, "grad_norm": 2.1171978351888265, "learning_rate": 1.5244845452612702e-05, "loss": 0.6109, "step": 13907 }, { "epoch": 1.0336677814938684, "grad_norm": 2.002978890856996, "learning_rate": 1.524416228337909e-05, "loss": 0.6804, "step": 13908 }, { "epoch": 1.0337421033073206, "grad_norm": 1.8753989035112908, "learning_rate": 1.524347908038384e-05, "loss": 0.709, "step": 13909 }, { "epoch": 1.0338164251207729, "grad_norm": 1.683727076452437, "learning_rate": 1.5242795843631341e-05, "loss": 0.5646, "step": 13910 }, { "epoch": 1.0338907469342251, "grad_norm": 1.395018694809452, "learning_rate": 1.5242112573125996e-05, "loss": 0.3602, "step": 13911 }, { "epoch": 1.0339650687476774, "grad_norm": 1.51483957786458, "learning_rate": 1.5241429268872204e-05, "loss": 0.4922, "step": 13912 }, { "epoch": 1.0340393905611296, "grad_norm": 1.8648583607988574, "learning_rate": 1.5240745930874358e-05, "loss": 0.4942, "step": 13913 }, { "epoch": 1.034113712374582, "grad_norm": 1.7006865859978715, "learning_rate": 1.5240062559136863e-05, "loss": 0.5699, "step": 13914 }, { "epoch": 1.0341880341880343, "grad_norm": 1.6013374692000726, "learning_rate": 1.5239379153664117e-05, "loss": 0.5597, "step": 13915 }, { "epoch": 1.0342623560014865, "grad_norm": 2.473243575015036, "learning_rate": 1.5238695714460519e-05, "loss": 0.5356, "step": 13916 }, { "epoch": 1.0343366778149388, "grad_norm": 2.465750100520355, "learning_rate": 1.523801224153047e-05, "loss": 0.6414, "step": 13917 }, { "epoch": 1.034410999628391, "grad_norm": 2.2155370119420583, "learning_rate": 1.5237328734878376e-05, "loss": 0.618, "step": 13918 }, { "epoch": 1.0344853214418432, "grad_norm": 1.7906263672569498, "learning_rate": 1.5236645194508627e-05, "loss": 0.7022, "step": 13919 }, { "epoch": 1.0345596432552955, "grad_norm": 2.383283101975211, "learning_rate": 1.5235961620425626e-05, "loss": 0.7229, "step": 13920 }, { "epoch": 1.0346339650687477, "grad_norm": 1.665656221441775, "learning_rate": 1.5235278012633777e-05, "loss": 0.4847, "step": 13921 }, { "epoch": 1.0347082868822, "grad_norm": 2.0144593131715984, "learning_rate": 1.523459437113748e-05, "loss": 0.7255, "step": 13922 }, { "epoch": 1.0347826086956522, "grad_norm": 2.6036808937525047, "learning_rate": 1.5233910695941137e-05, "loss": 0.4735, "step": 13923 }, { "epoch": 1.0348569305091044, "grad_norm": 2.0828802082305664, "learning_rate": 1.5233226987049146e-05, "loss": 0.6431, "step": 13924 }, { "epoch": 1.0349312523225567, "grad_norm": 1.5828162693907792, "learning_rate": 1.5232543244465912e-05, "loss": 0.5391, "step": 13925 }, { "epoch": 1.035005574136009, "grad_norm": 2.169173452031407, "learning_rate": 1.5231859468195835e-05, "loss": 0.6014, "step": 13926 }, { "epoch": 1.0350798959494611, "grad_norm": 2.0988502679697367, "learning_rate": 1.523117565824332e-05, "loss": 0.6171, "step": 13927 }, { "epoch": 1.0351542177629134, "grad_norm": 2.1986809459080274, "learning_rate": 1.523049181461277e-05, "loss": 0.6627, "step": 13928 }, { "epoch": 1.0352285395763656, "grad_norm": 1.8982347192773352, "learning_rate": 1.5229807937308582e-05, "loss": 0.6458, "step": 13929 }, { "epoch": 1.0353028613898179, "grad_norm": 2.1061972941657365, "learning_rate": 1.5229124026335163e-05, "loss": 0.5927, "step": 13930 }, { "epoch": 1.03537718320327, "grad_norm": 2.013742578259614, "learning_rate": 1.5228440081696913e-05, "loss": 0.6689, "step": 13931 }, { "epoch": 1.0354515050167223, "grad_norm": 2.149183872128814, "learning_rate": 1.5227756103398238e-05, "loss": 0.5655, "step": 13932 }, { "epoch": 1.0355258268301746, "grad_norm": 2.141935729996232, "learning_rate": 1.5227072091443543e-05, "loss": 0.6452, "step": 13933 }, { "epoch": 1.0356001486436268, "grad_norm": 2.0427733097793306, "learning_rate": 1.5226388045837226e-05, "loss": 0.6135, "step": 13934 }, { "epoch": 1.035674470457079, "grad_norm": 2.021767316736341, "learning_rate": 1.5225703966583693e-05, "loss": 0.5998, "step": 13935 }, { "epoch": 1.0357487922705313, "grad_norm": 1.6844699255503286, "learning_rate": 1.5225019853687353e-05, "loss": 0.5874, "step": 13936 }, { "epoch": 1.0358231140839838, "grad_norm": 2.6466880092029808, "learning_rate": 1.5224335707152605e-05, "loss": 0.6684, "step": 13937 }, { "epoch": 1.035897435897436, "grad_norm": 1.7800795590734886, "learning_rate": 1.5223651526983851e-05, "loss": 0.5748, "step": 13938 }, { "epoch": 1.0359717577108882, "grad_norm": 1.9315138358255513, "learning_rate": 1.5222967313185504e-05, "loss": 0.6714, "step": 13939 }, { "epoch": 1.0360460795243405, "grad_norm": 2.1438604460772517, "learning_rate": 1.5222283065761963e-05, "loss": 0.6263, "step": 13940 }, { "epoch": 1.0361204013377927, "grad_norm": 2.012251459415268, "learning_rate": 1.5221598784717632e-05, "loss": 0.6685, "step": 13941 }, { "epoch": 1.036194723151245, "grad_norm": 2.163001103001471, "learning_rate": 1.5220914470056923e-05, "loss": 0.6245, "step": 13942 }, { "epoch": 1.0362690449646972, "grad_norm": 2.0020497023646233, "learning_rate": 1.5220230121784236e-05, "loss": 0.7365, "step": 13943 }, { "epoch": 1.0363433667781494, "grad_norm": 1.8345875210807219, "learning_rate": 1.521954573990398e-05, "loss": 0.4997, "step": 13944 }, { "epoch": 1.0364176885916017, "grad_norm": 2.353913410609755, "learning_rate": 1.5218861324420555e-05, "loss": 0.6235, "step": 13945 }, { "epoch": 1.036492010405054, "grad_norm": 1.810016866383851, "learning_rate": 1.5218176875338375e-05, "loss": 0.5522, "step": 13946 }, { "epoch": 1.0365663322185061, "grad_norm": 1.6678958105971478, "learning_rate": 1.5217492392661841e-05, "loss": 0.567, "step": 13947 }, { "epoch": 1.0366406540319584, "grad_norm": 2.56735178837918, "learning_rate": 1.5216807876395364e-05, "loss": 0.5742, "step": 13948 }, { "epoch": 1.0367149758454106, "grad_norm": 2.185814177298938, "learning_rate": 1.5216123326543345e-05, "loss": 0.5966, "step": 13949 }, { "epoch": 1.0367892976588629, "grad_norm": 2.5219162846870034, "learning_rate": 1.52154387431102e-05, "loss": 0.7695, "step": 13950 }, { "epoch": 1.036863619472315, "grad_norm": 2.2543323570965392, "learning_rate": 1.521475412610033e-05, "loss": 0.7528, "step": 13951 }, { "epoch": 1.0369379412857673, "grad_norm": 1.8311730611427695, "learning_rate": 1.5214069475518141e-05, "loss": 0.6224, "step": 13952 }, { "epoch": 1.0370122630992196, "grad_norm": 2.2138140674237285, "learning_rate": 1.5213384791368043e-05, "loss": 0.7816, "step": 13953 }, { "epoch": 1.0370865849126718, "grad_norm": 2.885102163995753, "learning_rate": 1.5212700073654442e-05, "loss": 0.5514, "step": 13954 }, { "epoch": 1.037160906726124, "grad_norm": 1.8350509582978753, "learning_rate": 1.5212015322381755e-05, "loss": 0.5098, "step": 13955 }, { "epoch": 1.0372352285395763, "grad_norm": 2.1373249440091575, "learning_rate": 1.521133053755438e-05, "loss": 0.6067, "step": 13956 }, { "epoch": 1.0373095503530285, "grad_norm": 2.3285921325695043, "learning_rate": 1.5210645719176732e-05, "loss": 0.5028, "step": 13957 }, { "epoch": 1.0373838721664808, "grad_norm": 1.8837388209388417, "learning_rate": 1.5209960867253213e-05, "loss": 0.5924, "step": 13958 }, { "epoch": 1.037458193979933, "grad_norm": 1.8302976981386805, "learning_rate": 1.5209275981788239e-05, "loss": 0.6063, "step": 13959 }, { "epoch": 1.0375325157933855, "grad_norm": 2.0512892791050414, "learning_rate": 1.5208591062786213e-05, "loss": 0.5574, "step": 13960 }, { "epoch": 1.0376068376068377, "grad_norm": 2.3017359723632973, "learning_rate": 1.5207906110251554e-05, "loss": 0.6205, "step": 13961 }, { "epoch": 1.03768115942029, "grad_norm": 2.2583132225047304, "learning_rate": 1.520722112418866e-05, "loss": 0.6866, "step": 13962 }, { "epoch": 1.0377554812337422, "grad_norm": 3.4585071272218126, "learning_rate": 1.520653610460195e-05, "loss": 0.6359, "step": 13963 }, { "epoch": 1.0378298030471944, "grad_norm": 1.680406891123138, "learning_rate": 1.5205851051495831e-05, "loss": 0.4616, "step": 13964 }, { "epoch": 1.0379041248606466, "grad_norm": 1.9366306972533966, "learning_rate": 1.520516596487471e-05, "loss": 0.6059, "step": 13965 }, { "epoch": 1.0379784466740989, "grad_norm": 2.1920171194444893, "learning_rate": 1.5204480844743e-05, "loss": 0.6619, "step": 13966 }, { "epoch": 1.0380527684875511, "grad_norm": 1.9301939328945075, "learning_rate": 1.5203795691105117e-05, "loss": 0.6365, "step": 13967 }, { "epoch": 1.0381270903010034, "grad_norm": 1.8468358954001984, "learning_rate": 1.5203110503965467e-05, "loss": 0.6482, "step": 13968 }, { "epoch": 1.0382014121144556, "grad_norm": 1.7768506477489365, "learning_rate": 1.5202425283328457e-05, "loss": 0.5326, "step": 13969 }, { "epoch": 1.0382757339279078, "grad_norm": 2.1383691686543274, "learning_rate": 1.5201740029198506e-05, "loss": 0.6358, "step": 13970 }, { "epoch": 1.03835005574136, "grad_norm": 2.414384508143301, "learning_rate": 1.5201054741580022e-05, "loss": 0.7, "step": 13971 }, { "epoch": 1.0384243775548123, "grad_norm": 2.009320155114946, "learning_rate": 1.5200369420477419e-05, "loss": 0.6097, "step": 13972 }, { "epoch": 1.0384986993682646, "grad_norm": 2.4419988790023557, "learning_rate": 1.5199684065895107e-05, "loss": 0.711, "step": 13973 }, { "epoch": 1.0385730211817168, "grad_norm": 2.2902387901951626, "learning_rate": 1.5198998677837495e-05, "loss": 0.5925, "step": 13974 }, { "epoch": 1.038647342995169, "grad_norm": 1.8507357908628324, "learning_rate": 1.5198313256309002e-05, "loss": 0.5327, "step": 13975 }, { "epoch": 1.0387216648086213, "grad_norm": 2.220783233141079, "learning_rate": 1.519762780131404e-05, "loss": 0.7964, "step": 13976 }, { "epoch": 1.0387959866220735, "grad_norm": 1.8637746769302643, "learning_rate": 1.5196942312857016e-05, "loss": 0.6928, "step": 13977 }, { "epoch": 1.0388703084355257, "grad_norm": 1.8760184659238541, "learning_rate": 1.519625679094235e-05, "loss": 0.6126, "step": 13978 }, { "epoch": 1.038944630248978, "grad_norm": 1.795010878960495, "learning_rate": 1.5195571235574453e-05, "loss": 0.6156, "step": 13979 }, { "epoch": 1.0390189520624302, "grad_norm": 2.2206035297989715, "learning_rate": 1.5194885646757736e-05, "loss": 0.8023, "step": 13980 }, { "epoch": 1.0390932738758827, "grad_norm": 1.7293830622179347, "learning_rate": 1.5194200024496612e-05, "loss": 0.5743, "step": 13981 }, { "epoch": 1.039167595689335, "grad_norm": 1.7605204487336203, "learning_rate": 1.5193514368795504e-05, "loss": 0.5421, "step": 13982 }, { "epoch": 1.0392419175027872, "grad_norm": 1.8021243824648432, "learning_rate": 1.5192828679658815e-05, "loss": 0.6357, "step": 13983 }, { "epoch": 1.0393162393162394, "grad_norm": 2.5194043882044688, "learning_rate": 1.5192142957090967e-05, "loss": 0.7862, "step": 13984 }, { "epoch": 1.0393905611296916, "grad_norm": 1.7722299135511133, "learning_rate": 1.5191457201096373e-05, "loss": 0.5487, "step": 13985 }, { "epoch": 1.0394648829431439, "grad_norm": 1.9601269304123963, "learning_rate": 1.5190771411679443e-05, "loss": 0.577, "step": 13986 }, { "epoch": 1.0395392047565961, "grad_norm": 1.816927999790651, "learning_rate": 1.5190085588844599e-05, "loss": 0.5531, "step": 13987 }, { "epoch": 1.0396135265700484, "grad_norm": 1.705100159138409, "learning_rate": 1.5189399732596253e-05, "loss": 0.5797, "step": 13988 }, { "epoch": 1.0396878483835006, "grad_norm": 1.8444296241498632, "learning_rate": 1.518871384293882e-05, "loss": 0.6214, "step": 13989 }, { "epoch": 1.0397621701969528, "grad_norm": 2.2445805043276255, "learning_rate": 1.5188027919876715e-05, "loss": 0.5629, "step": 13990 }, { "epoch": 1.039836492010405, "grad_norm": 1.9745976397520009, "learning_rate": 1.518734196341436e-05, "loss": 0.6292, "step": 13991 }, { "epoch": 1.0399108138238573, "grad_norm": 1.7936744454639384, "learning_rate": 1.5186655973556161e-05, "loss": 0.5685, "step": 13992 }, { "epoch": 1.0399851356373095, "grad_norm": 2.149275195989076, "learning_rate": 1.5185969950306545e-05, "loss": 0.759, "step": 13993 }, { "epoch": 1.0400594574507618, "grad_norm": 1.756094801551107, "learning_rate": 1.518528389366992e-05, "loss": 0.5333, "step": 13994 }, { "epoch": 1.040133779264214, "grad_norm": 2.359333749980625, "learning_rate": 1.5184597803650709e-05, "loss": 0.7323, "step": 13995 }, { "epoch": 1.0402081010776663, "grad_norm": 2.02642017389977, "learning_rate": 1.5183911680253324e-05, "loss": 0.6763, "step": 13996 }, { "epoch": 1.0402824228911185, "grad_norm": 2.045104068921581, "learning_rate": 1.5183225523482186e-05, "loss": 0.6671, "step": 13997 }, { "epoch": 1.0403567447045707, "grad_norm": 1.6829555487090706, "learning_rate": 1.5182539333341711e-05, "loss": 0.5819, "step": 13998 }, { "epoch": 1.040431066518023, "grad_norm": 2.7931929849000263, "learning_rate": 1.5181853109836318e-05, "loss": 0.6872, "step": 13999 }, { "epoch": 1.0405053883314752, "grad_norm": 1.7779253953062595, "learning_rate": 1.518116685297042e-05, "loss": 0.6408, "step": 14000 }, { "epoch": 1.0405797101449274, "grad_norm": 1.922963316763489, "learning_rate": 1.518048056274844e-05, "loss": 0.5868, "step": 14001 }, { "epoch": 1.0406540319583797, "grad_norm": 1.8114106826096656, "learning_rate": 1.5179794239174794e-05, "loss": 0.4702, "step": 14002 }, { "epoch": 1.040728353771832, "grad_norm": 1.640814516521572, "learning_rate": 1.5179107882253905e-05, "loss": 0.5003, "step": 14003 }, { "epoch": 1.0408026755852844, "grad_norm": 1.958156123185645, "learning_rate": 1.5178421491990184e-05, "loss": 0.5752, "step": 14004 }, { "epoch": 1.0408769973987366, "grad_norm": 1.7473902108734478, "learning_rate": 1.5177735068388057e-05, "loss": 0.4771, "step": 14005 }, { "epoch": 1.0409513192121889, "grad_norm": 3.991611859026365, "learning_rate": 1.5177048611451941e-05, "loss": 0.8195, "step": 14006 }, { "epoch": 1.041025641025641, "grad_norm": 1.7829291865838748, "learning_rate": 1.517636212118625e-05, "loss": 0.5578, "step": 14007 }, { "epoch": 1.0410999628390933, "grad_norm": 2.104800970830566, "learning_rate": 1.5175675597595411e-05, "loss": 0.7203, "step": 14008 }, { "epoch": 1.0411742846525456, "grad_norm": 1.8462364635112174, "learning_rate": 1.5174989040683842e-05, "loss": 0.6977, "step": 14009 }, { "epoch": 1.0412486064659978, "grad_norm": 2.1081150443863548, "learning_rate": 1.5174302450455964e-05, "loss": 0.7712, "step": 14010 }, { "epoch": 1.04132292827945, "grad_norm": 2.383042175602547, "learning_rate": 1.5173615826916192e-05, "loss": 0.7285, "step": 14011 }, { "epoch": 1.0413972500929023, "grad_norm": 2.0726986694777727, "learning_rate": 1.5172929170068948e-05, "loss": 0.6721, "step": 14012 }, { "epoch": 1.0414715719063545, "grad_norm": 2.384042149004522, "learning_rate": 1.517224247991866e-05, "loss": 0.6104, "step": 14013 }, { "epoch": 1.0415458937198068, "grad_norm": 1.617539039613925, "learning_rate": 1.517155575646974e-05, "loss": 0.4319, "step": 14014 }, { "epoch": 1.041620215533259, "grad_norm": 1.9241483575986016, "learning_rate": 1.5170868999726613e-05, "loss": 0.5344, "step": 14015 }, { "epoch": 1.0416945373467112, "grad_norm": 2.37702109716296, "learning_rate": 1.5170182209693697e-05, "loss": 0.587, "step": 14016 }, { "epoch": 1.0417688591601635, "grad_norm": 2.0142832483677133, "learning_rate": 1.516949538637542e-05, "loss": 0.6136, "step": 14017 }, { "epoch": 1.0418431809736157, "grad_norm": 1.8454072282993716, "learning_rate": 1.5168808529776198e-05, "loss": 0.717, "step": 14018 }, { "epoch": 1.041917502787068, "grad_norm": 1.7173790822679056, "learning_rate": 1.5168121639900454e-05, "loss": 0.5432, "step": 14019 }, { "epoch": 1.0419918246005202, "grad_norm": 2.0611766934716136, "learning_rate": 1.5167434716752612e-05, "loss": 0.6779, "step": 14020 }, { "epoch": 1.0420661464139724, "grad_norm": 2.149599450932862, "learning_rate": 1.5166747760337094e-05, "loss": 0.7749, "step": 14021 }, { "epoch": 1.0421404682274247, "grad_norm": 2.7357200175697884, "learning_rate": 1.5166060770658323e-05, "loss": 0.8017, "step": 14022 }, { "epoch": 1.042214790040877, "grad_norm": 2.3741655068883873, "learning_rate": 1.5165373747720718e-05, "loss": 0.6865, "step": 14023 }, { "epoch": 1.0422891118543292, "grad_norm": 1.968105132111388, "learning_rate": 1.5164686691528707e-05, "loss": 0.6712, "step": 14024 }, { "epoch": 1.0423634336677814, "grad_norm": 1.7717731932182847, "learning_rate": 1.5163999602086712e-05, "loss": 0.605, "step": 14025 }, { "epoch": 1.0424377554812336, "grad_norm": 1.8791548672679603, "learning_rate": 1.5163312479399154e-05, "loss": 0.5637, "step": 14026 }, { "epoch": 1.042512077294686, "grad_norm": 2.4182781193391616, "learning_rate": 1.5162625323470457e-05, "loss": 0.7663, "step": 14027 }, { "epoch": 1.0425863991081383, "grad_norm": 2.1559150911478757, "learning_rate": 1.5161938134305048e-05, "loss": 0.6589, "step": 14028 }, { "epoch": 1.0426607209215906, "grad_norm": 2.4154473323732017, "learning_rate": 1.5161250911907346e-05, "loss": 0.5988, "step": 14029 }, { "epoch": 1.0427350427350428, "grad_norm": 1.8363772812838475, "learning_rate": 1.5160563656281784e-05, "loss": 0.6426, "step": 14030 }, { "epoch": 1.042809364548495, "grad_norm": 1.6863986952635153, "learning_rate": 1.5159876367432775e-05, "loss": 0.4968, "step": 14031 }, { "epoch": 1.0428836863619473, "grad_norm": 1.9026719665052643, "learning_rate": 1.5159189045364752e-05, "loss": 0.6236, "step": 14032 }, { "epoch": 1.0429580081753995, "grad_norm": 2.0818178790233843, "learning_rate": 1.515850169008214e-05, "loss": 0.5835, "step": 14033 }, { "epoch": 1.0430323299888518, "grad_norm": 1.8852358994774536, "learning_rate": 1.5157814301589358e-05, "loss": 0.6542, "step": 14034 }, { "epoch": 1.043106651802304, "grad_norm": 2.0729037644257655, "learning_rate": 1.5157126879890836e-05, "loss": 0.7269, "step": 14035 }, { "epoch": 1.0431809736157562, "grad_norm": 1.8051756614398404, "learning_rate": 1.5156439424991002e-05, "loss": 0.4129, "step": 14036 }, { "epoch": 1.0432552954292085, "grad_norm": 2.0088079468304163, "learning_rate": 1.5155751936894277e-05, "loss": 0.6227, "step": 14037 }, { "epoch": 1.0433296172426607, "grad_norm": 2.021887165368109, "learning_rate": 1.5155064415605086e-05, "loss": 0.69, "step": 14038 }, { "epoch": 1.043403939056113, "grad_norm": 1.790184958855225, "learning_rate": 1.5154376861127858e-05, "loss": 0.5899, "step": 14039 }, { "epoch": 1.0434782608695652, "grad_norm": 1.6947871737164553, "learning_rate": 1.515368927346702e-05, "loss": 0.5945, "step": 14040 }, { "epoch": 1.0435525826830174, "grad_norm": 2.0567104887414867, "learning_rate": 1.5153001652627e-05, "loss": 0.6195, "step": 14041 }, { "epoch": 1.0436269044964697, "grad_norm": 1.7679955521959811, "learning_rate": 1.515231399861222e-05, "loss": 0.5733, "step": 14042 }, { "epoch": 1.043701226309922, "grad_norm": 1.6584126896591884, "learning_rate": 1.5151626311427111e-05, "loss": 0.6032, "step": 14043 }, { "epoch": 1.0437755481233741, "grad_norm": 2.172766848844105, "learning_rate": 1.5150938591076099e-05, "loss": 0.6666, "step": 14044 }, { "epoch": 1.0438498699368264, "grad_norm": 1.8579448421352274, "learning_rate": 1.5150250837563611e-05, "loss": 0.6033, "step": 14045 }, { "epoch": 1.0439241917502786, "grad_norm": 1.7725743779058583, "learning_rate": 1.5149563050894077e-05, "loss": 0.4146, "step": 14046 }, { "epoch": 1.0439985135637309, "grad_norm": 2.202727472830366, "learning_rate": 1.5148875231071923e-05, "loss": 0.7558, "step": 14047 }, { "epoch": 1.0440728353771833, "grad_norm": 3.8004770364052445, "learning_rate": 1.5148187378101577e-05, "loss": 0.6599, "step": 14048 }, { "epoch": 1.0441471571906356, "grad_norm": 1.9537489818084581, "learning_rate": 1.5147499491987467e-05, "loss": 0.5496, "step": 14049 }, { "epoch": 1.0442214790040878, "grad_norm": 2.364221645274823, "learning_rate": 1.5146811572734023e-05, "loss": 0.5955, "step": 14050 }, { "epoch": 1.04429580081754, "grad_norm": 4.1800914405565415, "learning_rate": 1.5146123620345672e-05, "loss": 0.7227, "step": 14051 }, { "epoch": 1.0443701226309923, "grad_norm": 1.9355998540110535, "learning_rate": 1.5145435634826843e-05, "loss": 0.7596, "step": 14052 }, { "epoch": 1.0444444444444445, "grad_norm": 1.7627565658603914, "learning_rate": 1.5144747616181972e-05, "loss": 0.6821, "step": 14053 }, { "epoch": 1.0445187662578967, "grad_norm": 1.3574541114969145, "learning_rate": 1.5144059564415479e-05, "loss": 0.3943, "step": 14054 }, { "epoch": 1.044593088071349, "grad_norm": 2.0238801493877867, "learning_rate": 1.51433714795318e-05, "loss": 0.6072, "step": 14055 }, { "epoch": 1.0446674098848012, "grad_norm": 2.579276398685499, "learning_rate": 1.5142683361535358e-05, "loss": 0.4129, "step": 14056 }, { "epoch": 1.0447417316982535, "grad_norm": 2.1045124405706224, "learning_rate": 1.5141995210430587e-05, "loss": 0.6669, "step": 14057 }, { "epoch": 1.0448160535117057, "grad_norm": 1.6979586636040145, "learning_rate": 1.5141307026221924e-05, "loss": 0.5652, "step": 14058 }, { "epoch": 1.044890375325158, "grad_norm": 1.8129631751511557, "learning_rate": 1.514061880891379e-05, "loss": 0.58, "step": 14059 }, { "epoch": 1.0449646971386102, "grad_norm": 1.730142006837974, "learning_rate": 1.5139930558510616e-05, "loss": 0.6273, "step": 14060 }, { "epoch": 1.0450390189520624, "grad_norm": 2.163412640699821, "learning_rate": 1.5139242275016836e-05, "loss": 0.7515, "step": 14061 }, { "epoch": 1.0451133407655147, "grad_norm": 1.892363529467694, "learning_rate": 1.5138553958436883e-05, "loss": 0.765, "step": 14062 }, { "epoch": 1.045187662578967, "grad_norm": 2.256507164828934, "learning_rate": 1.5137865608775183e-05, "loss": 0.7817, "step": 14063 }, { "epoch": 1.0452619843924191, "grad_norm": 1.7829751979449542, "learning_rate": 1.5137177226036177e-05, "loss": 0.661, "step": 14064 }, { "epoch": 1.0453363062058714, "grad_norm": 2.421371935493503, "learning_rate": 1.5136488810224287e-05, "loss": 0.7282, "step": 14065 }, { "epoch": 1.0454106280193236, "grad_norm": 1.9057331979380288, "learning_rate": 1.5135800361343949e-05, "loss": 0.5749, "step": 14066 }, { "epoch": 1.0454849498327758, "grad_norm": 2.833614883556663, "learning_rate": 1.5135111879399596e-05, "loss": 0.5533, "step": 14067 }, { "epoch": 1.045559271646228, "grad_norm": 1.9456276475512229, "learning_rate": 1.5134423364395656e-05, "loss": 0.5416, "step": 14068 }, { "epoch": 1.0456335934596803, "grad_norm": 1.818166952046239, "learning_rate": 1.5133734816336567e-05, "loss": 0.5236, "step": 14069 }, { "epoch": 1.0457079152731326, "grad_norm": 1.9352570205460744, "learning_rate": 1.513304623522676e-05, "loss": 0.7109, "step": 14070 }, { "epoch": 1.045782237086585, "grad_norm": 2.2102839518308803, "learning_rate": 1.5132357621070668e-05, "loss": 0.7417, "step": 14071 }, { "epoch": 1.0458565589000373, "grad_norm": 2.0198251312349016, "learning_rate": 1.5131668973872721e-05, "loss": 0.6986, "step": 14072 }, { "epoch": 1.0459308807134895, "grad_norm": 2.3230319958075114, "learning_rate": 1.5130980293637356e-05, "loss": 0.5512, "step": 14073 }, { "epoch": 1.0460052025269417, "grad_norm": 2.919408018172231, "learning_rate": 1.5130291580369005e-05, "loss": 0.5475, "step": 14074 }, { "epoch": 1.046079524340394, "grad_norm": 2.2217014773237835, "learning_rate": 1.5129602834072109e-05, "loss": 0.7321, "step": 14075 }, { "epoch": 1.0461538461538462, "grad_norm": 1.8087181870252482, "learning_rate": 1.5128914054751093e-05, "loss": 0.6464, "step": 14076 }, { "epoch": 1.0462281679672985, "grad_norm": 2.077280581203898, "learning_rate": 1.5128225242410393e-05, "loss": 0.5237, "step": 14077 }, { "epoch": 1.0463024897807507, "grad_norm": 2.100247174756415, "learning_rate": 1.5127536397054445e-05, "loss": 0.6295, "step": 14078 }, { "epoch": 1.046376811594203, "grad_norm": 1.9378668023910666, "learning_rate": 1.5126847518687684e-05, "loss": 0.5039, "step": 14079 }, { "epoch": 1.0464511334076552, "grad_norm": 2.082144938153211, "learning_rate": 1.5126158607314545e-05, "loss": 0.7502, "step": 14080 }, { "epoch": 1.0465254552211074, "grad_norm": 1.870780550143383, "learning_rate": 1.5125469662939467e-05, "loss": 0.6206, "step": 14081 }, { "epoch": 1.0465997770345596, "grad_norm": 2.1579303959989837, "learning_rate": 1.5124780685566875e-05, "loss": 0.5764, "step": 14082 }, { "epoch": 1.0466740988480119, "grad_norm": 2.4844709010759582, "learning_rate": 1.5124091675201213e-05, "loss": 0.6903, "step": 14083 }, { "epoch": 1.0467484206614641, "grad_norm": 2.092480484612436, "learning_rate": 1.5123402631846916e-05, "loss": 0.523, "step": 14084 }, { "epoch": 1.0468227424749164, "grad_norm": 2.130793747285759, "learning_rate": 1.5122713555508414e-05, "loss": 0.6236, "step": 14085 }, { "epoch": 1.0468970642883686, "grad_norm": 1.9886062917757612, "learning_rate": 1.5122024446190154e-05, "loss": 0.7796, "step": 14086 }, { "epoch": 1.0469713861018208, "grad_norm": 1.9135808122334526, "learning_rate": 1.5121335303896563e-05, "loss": 0.6036, "step": 14087 }, { "epoch": 1.047045707915273, "grad_norm": 1.6524709875195096, "learning_rate": 1.5120646128632083e-05, "loss": 0.5125, "step": 14088 }, { "epoch": 1.0471200297287253, "grad_norm": 1.8559954707560042, "learning_rate": 1.5119956920401146e-05, "loss": 0.6224, "step": 14089 }, { "epoch": 1.0471943515421775, "grad_norm": 1.9273111416533273, "learning_rate": 1.5119267679208194e-05, "loss": 0.6628, "step": 14090 }, { "epoch": 1.0472686733556298, "grad_norm": 1.7907021078386005, "learning_rate": 1.5118578405057662e-05, "loss": 0.4471, "step": 14091 }, { "epoch": 1.047342995169082, "grad_norm": 2.0208698950276283, "learning_rate": 1.5117889097953988e-05, "loss": 0.6765, "step": 14092 }, { "epoch": 1.0474173169825343, "grad_norm": 2.4663189656276363, "learning_rate": 1.511719975790161e-05, "loss": 0.5424, "step": 14093 }, { "epoch": 1.0474916387959867, "grad_norm": 1.8027084015027923, "learning_rate": 1.5116510384904964e-05, "loss": 0.5577, "step": 14094 }, { "epoch": 1.047565960609439, "grad_norm": 2.10887344093411, "learning_rate": 1.5115820978968491e-05, "loss": 0.6875, "step": 14095 }, { "epoch": 1.0476402824228912, "grad_norm": 1.7754535860658625, "learning_rate": 1.5115131540096629e-05, "loss": 0.5648, "step": 14096 }, { "epoch": 1.0477146042363434, "grad_norm": 2.2380478400743002, "learning_rate": 1.5114442068293814e-05, "loss": 0.879, "step": 14097 }, { "epoch": 1.0477889260497957, "grad_norm": 2.250185356141082, "learning_rate": 1.5113752563564487e-05, "loss": 0.6235, "step": 14098 }, { "epoch": 1.047863247863248, "grad_norm": 2.7587527323112346, "learning_rate": 1.5113063025913084e-05, "loss": 0.6501, "step": 14099 }, { "epoch": 1.0479375696767002, "grad_norm": 1.9295178575709224, "learning_rate": 1.5112373455344047e-05, "loss": 0.5694, "step": 14100 }, { "epoch": 1.0480118914901524, "grad_norm": 1.701524632458391, "learning_rate": 1.5111683851861818e-05, "loss": 0.6098, "step": 14101 }, { "epoch": 1.0480862133036046, "grad_norm": 2.0998293493546116, "learning_rate": 1.5110994215470831e-05, "loss": 0.5235, "step": 14102 }, { "epoch": 1.0481605351170569, "grad_norm": 1.718507471809312, "learning_rate": 1.5110304546175529e-05, "loss": 0.632, "step": 14103 }, { "epoch": 1.048234856930509, "grad_norm": 1.7657057017015938, "learning_rate": 1.510961484398035e-05, "loss": 0.6027, "step": 14104 }, { "epoch": 1.0483091787439613, "grad_norm": 2.4605878555584377, "learning_rate": 1.510892510888974e-05, "loss": 0.7, "step": 14105 }, { "epoch": 1.0483835005574136, "grad_norm": 2.0404036326365134, "learning_rate": 1.5108235340908133e-05, "loss": 0.5551, "step": 14106 }, { "epoch": 1.0484578223708658, "grad_norm": 1.7733793039088368, "learning_rate": 1.510754554003997e-05, "loss": 0.5608, "step": 14107 }, { "epoch": 1.048532144184318, "grad_norm": 2.182521040471292, "learning_rate": 1.5106855706289696e-05, "loss": 0.7248, "step": 14108 }, { "epoch": 1.0486064659977703, "grad_norm": 1.962928709070154, "learning_rate": 1.510616583966175e-05, "loss": 0.6152, "step": 14109 }, { "epoch": 1.0486807878112225, "grad_norm": 1.8037970502334004, "learning_rate": 1.5105475940160572e-05, "loss": 0.5576, "step": 14110 }, { "epoch": 1.0487551096246748, "grad_norm": 1.8881192185309235, "learning_rate": 1.5104786007790606e-05, "loss": 0.7032, "step": 14111 }, { "epoch": 1.048829431438127, "grad_norm": 2.1118952693599855, "learning_rate": 1.5104096042556291e-05, "loss": 0.643, "step": 14112 }, { "epoch": 1.0489037532515793, "grad_norm": 1.9490155803653164, "learning_rate": 1.5103406044462072e-05, "loss": 0.6623, "step": 14113 }, { "epoch": 1.0489780750650315, "grad_norm": 1.94759577859587, "learning_rate": 1.510271601351239e-05, "loss": 0.6006, "step": 14114 }, { "epoch": 1.0490523968784837, "grad_norm": 2.441416846103723, "learning_rate": 1.5102025949711688e-05, "loss": 0.741, "step": 14115 }, { "epoch": 1.0491267186919362, "grad_norm": 2.2124222958688287, "learning_rate": 1.5101335853064404e-05, "loss": 0.6572, "step": 14116 }, { "epoch": 1.0492010405053884, "grad_norm": 2.069616273415306, "learning_rate": 1.5100645723574987e-05, "loss": 0.7564, "step": 14117 }, { "epoch": 1.0492753623188407, "grad_norm": 2.267711558350674, "learning_rate": 1.5099955561247876e-05, "loss": 0.7862, "step": 14118 }, { "epoch": 1.049349684132293, "grad_norm": 1.6762570157462293, "learning_rate": 1.5099265366087517e-05, "loss": 0.6142, "step": 14119 }, { "epoch": 1.0494240059457451, "grad_norm": 2.490225602907281, "learning_rate": 1.509857513809835e-05, "loss": 0.5907, "step": 14120 }, { "epoch": 1.0494983277591974, "grad_norm": 1.637417056448599, "learning_rate": 1.5097884877284825e-05, "loss": 0.5748, "step": 14121 }, { "epoch": 1.0495726495726496, "grad_norm": 2.245133041058061, "learning_rate": 1.5097194583651378e-05, "loss": 0.7038, "step": 14122 }, { "epoch": 1.0496469713861019, "grad_norm": 3.051564837406914, "learning_rate": 1.5096504257202459e-05, "loss": 0.5939, "step": 14123 }, { "epoch": 1.049721293199554, "grad_norm": 1.551894106824365, "learning_rate": 1.5095813897942508e-05, "loss": 0.4773, "step": 14124 }, { "epoch": 1.0497956150130063, "grad_norm": 2.0797724032144185, "learning_rate": 1.5095123505875972e-05, "loss": 0.7059, "step": 14125 }, { "epoch": 1.0498699368264586, "grad_norm": 2.6033571757918472, "learning_rate": 1.5094433081007296e-05, "loss": 0.5505, "step": 14126 }, { "epoch": 1.0499442586399108, "grad_norm": 2.042806310566793, "learning_rate": 1.5093742623340925e-05, "loss": 0.7372, "step": 14127 }, { "epoch": 1.050018580453363, "grad_norm": 2.1118425018892233, "learning_rate": 1.5093052132881302e-05, "loss": 0.7753, "step": 14128 }, { "epoch": 1.0500929022668153, "grad_norm": 1.6177183844638394, "learning_rate": 1.5092361609632873e-05, "loss": 0.4982, "step": 14129 }, { "epoch": 1.0501672240802675, "grad_norm": 1.7782250139097369, "learning_rate": 1.5091671053600086e-05, "loss": 0.4538, "step": 14130 }, { "epoch": 1.0502415458937198, "grad_norm": 2.0988502261750837, "learning_rate": 1.5090980464787382e-05, "loss": 0.5946, "step": 14131 }, { "epoch": 1.050315867707172, "grad_norm": 1.8042656144575624, "learning_rate": 1.5090289843199215e-05, "loss": 0.603, "step": 14132 }, { "epoch": 1.0503901895206242, "grad_norm": 2.17429347148501, "learning_rate": 1.5089599188840023e-05, "loss": 0.6466, "step": 14133 }, { "epoch": 1.0504645113340765, "grad_norm": 1.9103779447380167, "learning_rate": 1.5088908501714254e-05, "loss": 0.5021, "step": 14134 }, { "epoch": 1.0505388331475287, "grad_norm": 1.4185977246993209, "learning_rate": 1.508821778182636e-05, "loss": 0.4256, "step": 14135 }, { "epoch": 1.050613154960981, "grad_norm": 1.6833360090452318, "learning_rate": 1.508752702918078e-05, "loss": 0.5541, "step": 14136 }, { "epoch": 1.0506874767744332, "grad_norm": 1.7333887555478134, "learning_rate": 1.5086836243781963e-05, "loss": 0.529, "step": 14137 }, { "epoch": 1.0507617985878857, "grad_norm": 2.213992576368831, "learning_rate": 1.5086145425634364e-05, "loss": 0.7082, "step": 14138 }, { "epoch": 1.050836120401338, "grad_norm": 2.107761481519626, "learning_rate": 1.5085454574742421e-05, "loss": 0.6646, "step": 14139 }, { "epoch": 1.0509104422147901, "grad_norm": 1.8593922300100512, "learning_rate": 1.508476369111059e-05, "loss": 0.6501, "step": 14140 }, { "epoch": 1.0509847640282424, "grad_norm": 1.654017868187183, "learning_rate": 1.5084072774743309e-05, "loss": 0.5956, "step": 14141 }, { "epoch": 1.0510590858416946, "grad_norm": 1.9018484076557816, "learning_rate": 1.5083381825645033e-05, "loss": 0.5454, "step": 14142 }, { "epoch": 1.0511334076551468, "grad_norm": 1.574250724100415, "learning_rate": 1.5082690843820209e-05, "loss": 0.4961, "step": 14143 }, { "epoch": 1.051207729468599, "grad_norm": 2.0962124707219796, "learning_rate": 1.5081999829273283e-05, "loss": 0.6455, "step": 14144 }, { "epoch": 1.0512820512820513, "grad_norm": 1.985893225235037, "learning_rate": 1.5081308782008707e-05, "loss": 0.6074, "step": 14145 }, { "epoch": 1.0513563730955036, "grad_norm": 2.058697798151412, "learning_rate": 1.5080617702030931e-05, "loss": 0.5889, "step": 14146 }, { "epoch": 1.0514306949089558, "grad_norm": 2.0246605466061633, "learning_rate": 1.5079926589344402e-05, "loss": 0.6395, "step": 14147 }, { "epoch": 1.051505016722408, "grad_norm": 2.711716455504755, "learning_rate": 1.5079235443953566e-05, "loss": 0.7233, "step": 14148 }, { "epoch": 1.0515793385358603, "grad_norm": 1.8726987248623737, "learning_rate": 1.5078544265862878e-05, "loss": 0.688, "step": 14149 }, { "epoch": 1.0516536603493125, "grad_norm": 1.8595389824041022, "learning_rate": 1.5077853055076782e-05, "loss": 0.7538, "step": 14150 }, { "epoch": 1.0517279821627648, "grad_norm": 2.1215562035170406, "learning_rate": 1.5077161811599736e-05, "loss": 0.7196, "step": 14151 }, { "epoch": 1.051802303976217, "grad_norm": 1.8306317226678097, "learning_rate": 1.5076470535436185e-05, "loss": 0.7062, "step": 14152 }, { "epoch": 1.0518766257896692, "grad_norm": 1.9050833774350286, "learning_rate": 1.5075779226590576e-05, "loss": 0.6717, "step": 14153 }, { "epoch": 1.0519509476031215, "grad_norm": 1.9213460580464454, "learning_rate": 1.5075087885067368e-05, "loss": 0.6681, "step": 14154 }, { "epoch": 1.0520252694165737, "grad_norm": 1.6966685052004282, "learning_rate": 1.5074396510871005e-05, "loss": 0.6918, "step": 14155 }, { "epoch": 1.052099591230026, "grad_norm": 1.9636267999836485, "learning_rate": 1.5073705104005942e-05, "loss": 0.5833, "step": 14156 }, { "epoch": 1.0521739130434782, "grad_norm": 1.8004794813451188, "learning_rate": 1.507301366447663e-05, "loss": 0.5952, "step": 14157 }, { "epoch": 1.0522482348569304, "grad_norm": 1.9754602982163123, "learning_rate": 1.5072322192287519e-05, "loss": 0.6107, "step": 14158 }, { "epoch": 1.0523225566703827, "grad_norm": 1.7434643144610078, "learning_rate": 1.5071630687443057e-05, "loss": 0.5695, "step": 14159 }, { "epoch": 1.052396878483835, "grad_norm": 1.8419341523029162, "learning_rate": 1.5070939149947705e-05, "loss": 0.5557, "step": 14160 }, { "epoch": 1.0524712002972874, "grad_norm": 1.6306104601624067, "learning_rate": 1.5070247579805906e-05, "loss": 0.62, "step": 14161 }, { "epoch": 1.0525455221107396, "grad_norm": 1.6173015229512049, "learning_rate": 1.5069555977022119e-05, "loss": 0.5464, "step": 14162 }, { "epoch": 1.0526198439241918, "grad_norm": 1.6732255450336855, "learning_rate": 1.506886434160079e-05, "loss": 0.5036, "step": 14163 }, { "epoch": 1.052694165737644, "grad_norm": 1.92155331148633, "learning_rate": 1.5068172673546377e-05, "loss": 0.5473, "step": 14164 }, { "epoch": 1.0527684875510963, "grad_norm": 1.6738349819654523, "learning_rate": 1.5067480972863332e-05, "loss": 0.593, "step": 14165 }, { "epoch": 1.0528428093645485, "grad_norm": 2.02961995460578, "learning_rate": 1.5066789239556108e-05, "loss": 0.6902, "step": 14166 }, { "epoch": 1.0529171311780008, "grad_norm": 1.8250957430224393, "learning_rate": 1.5066097473629158e-05, "loss": 0.5705, "step": 14167 }, { "epoch": 1.052991452991453, "grad_norm": 1.797084869805426, "learning_rate": 1.5065405675086933e-05, "loss": 0.5828, "step": 14168 }, { "epoch": 1.0530657748049053, "grad_norm": 3.463467004719348, "learning_rate": 1.5064713843933891e-05, "loss": 0.7301, "step": 14169 }, { "epoch": 1.0531400966183575, "grad_norm": 1.8369538461476034, "learning_rate": 1.5064021980174484e-05, "loss": 0.5549, "step": 14170 }, { "epoch": 1.0532144184318097, "grad_norm": 1.7926003718181538, "learning_rate": 1.5063330083813164e-05, "loss": 0.5934, "step": 14171 }, { "epoch": 1.053288740245262, "grad_norm": 2.089739285430366, "learning_rate": 1.506263815485439e-05, "loss": 0.5566, "step": 14172 }, { "epoch": 1.0533630620587142, "grad_norm": 1.99736905334099, "learning_rate": 1.5061946193302615e-05, "loss": 0.5155, "step": 14173 }, { "epoch": 1.0534373838721665, "grad_norm": 2.0267950029844015, "learning_rate": 1.506125419916229e-05, "loss": 0.5709, "step": 14174 }, { "epoch": 1.0535117056856187, "grad_norm": 2.083388974549176, "learning_rate": 1.5060562172437875e-05, "loss": 0.5783, "step": 14175 }, { "epoch": 1.053586027499071, "grad_norm": 1.5778870727787937, "learning_rate": 1.5059870113133821e-05, "loss": 0.4259, "step": 14176 }, { "epoch": 1.0536603493125232, "grad_norm": 1.5815843081496939, "learning_rate": 1.505917802125459e-05, "loss": 0.674, "step": 14177 }, { "epoch": 1.0537346711259754, "grad_norm": 1.9307681905441034, "learning_rate": 1.5058485896804631e-05, "loss": 0.6453, "step": 14178 }, { "epoch": 1.0538089929394276, "grad_norm": 2.003606914867571, "learning_rate": 1.5057793739788404e-05, "loss": 0.4873, "step": 14179 }, { "epoch": 1.0538833147528799, "grad_norm": 1.9912652014322445, "learning_rate": 1.505710155021036e-05, "loss": 0.5996, "step": 14180 }, { "epoch": 1.0539576365663321, "grad_norm": 1.482613808339566, "learning_rate": 1.505640932807496e-05, "loss": 0.5368, "step": 14181 }, { "epoch": 1.0540319583797844, "grad_norm": 2.6784696224539144, "learning_rate": 1.5055717073386657e-05, "loss": 0.8706, "step": 14182 }, { "epoch": 1.0541062801932368, "grad_norm": 1.8747358483904506, "learning_rate": 1.5055024786149916e-05, "loss": 0.6116, "step": 14183 }, { "epoch": 1.054180602006689, "grad_norm": 1.8473431610270574, "learning_rate": 1.5054332466369183e-05, "loss": 0.7504, "step": 14184 }, { "epoch": 1.0542549238201413, "grad_norm": 2.116204604443755, "learning_rate": 1.505364011404892e-05, "loss": 0.6218, "step": 14185 }, { "epoch": 1.0543292456335935, "grad_norm": 1.9611220586952292, "learning_rate": 1.5052947729193586e-05, "loss": 0.6096, "step": 14186 }, { "epoch": 1.0544035674470458, "grad_norm": 1.9946071903580804, "learning_rate": 1.5052255311807632e-05, "loss": 0.8099, "step": 14187 }, { "epoch": 1.054477889260498, "grad_norm": 2.349895443208268, "learning_rate": 1.5051562861895527e-05, "loss": 0.5394, "step": 14188 }, { "epoch": 1.0545522110739503, "grad_norm": 1.5015454023852457, "learning_rate": 1.5050870379461719e-05, "loss": 0.4147, "step": 14189 }, { "epoch": 1.0546265328874025, "grad_norm": 2.2230319455999132, "learning_rate": 1.5050177864510672e-05, "loss": 0.6387, "step": 14190 }, { "epoch": 1.0547008547008547, "grad_norm": 2.203861662304489, "learning_rate": 1.5049485317046839e-05, "loss": 0.6151, "step": 14191 }, { "epoch": 1.054775176514307, "grad_norm": 1.6811560324516368, "learning_rate": 1.5048792737074683e-05, "loss": 0.5175, "step": 14192 }, { "epoch": 1.0548494983277592, "grad_norm": 1.8384542371846746, "learning_rate": 1.5048100124598661e-05, "loss": 0.5862, "step": 14193 }, { "epoch": 1.0549238201412114, "grad_norm": 1.8755174918817994, "learning_rate": 1.5047407479623233e-05, "loss": 0.5657, "step": 14194 }, { "epoch": 1.0549981419546637, "grad_norm": 2.6014404880749193, "learning_rate": 1.5046714802152857e-05, "loss": 0.7158, "step": 14195 }, { "epoch": 1.055072463768116, "grad_norm": 1.9378111348682587, "learning_rate": 1.5046022092191992e-05, "loss": 0.6193, "step": 14196 }, { "epoch": 1.0551467855815682, "grad_norm": 2.1312679890518083, "learning_rate": 1.5045329349745099e-05, "loss": 0.6418, "step": 14197 }, { "epoch": 1.0552211073950204, "grad_norm": 2.329227930262208, "learning_rate": 1.5044636574816638e-05, "loss": 0.7645, "step": 14198 }, { "epoch": 1.0552954292084726, "grad_norm": 1.8082156839768393, "learning_rate": 1.5043943767411067e-05, "loss": 0.6117, "step": 14199 }, { "epoch": 1.0553697510219249, "grad_norm": 1.8108969192819715, "learning_rate": 1.5043250927532848e-05, "loss": 0.5769, "step": 14200 }, { "epoch": 1.0554440728353771, "grad_norm": 2.0180822314307387, "learning_rate": 1.5042558055186445e-05, "loss": 0.6613, "step": 14201 }, { "epoch": 1.0555183946488294, "grad_norm": 1.9741209292179256, "learning_rate": 1.5041865150376312e-05, "loss": 0.5745, "step": 14202 }, { "epoch": 1.0555927164622816, "grad_norm": 2.4280150142680066, "learning_rate": 1.504117221310691e-05, "loss": 0.7078, "step": 14203 }, { "epoch": 1.0556670382757338, "grad_norm": 2.0385811684865063, "learning_rate": 1.5040479243382707e-05, "loss": 0.4818, "step": 14204 }, { "epoch": 1.0557413600891863, "grad_norm": 2.190667269789642, "learning_rate": 1.503978624120816e-05, "loss": 0.8236, "step": 14205 }, { "epoch": 1.0558156819026385, "grad_norm": 1.7980453807628116, "learning_rate": 1.5039093206587731e-05, "loss": 0.5176, "step": 14206 }, { "epoch": 1.0558900037160908, "grad_norm": 1.7809855348605685, "learning_rate": 1.503840013952588e-05, "loss": 0.6077, "step": 14207 }, { "epoch": 1.055964325529543, "grad_norm": 1.9039594581542896, "learning_rate": 1.5037707040027068e-05, "loss": 0.5659, "step": 14208 }, { "epoch": 1.0560386473429952, "grad_norm": 2.1192555003141944, "learning_rate": 1.5037013908095762e-05, "loss": 0.6202, "step": 14209 }, { "epoch": 1.0561129691564475, "grad_norm": 2.670858665745892, "learning_rate": 1.503632074373642e-05, "loss": 0.6958, "step": 14210 }, { "epoch": 1.0561872909698997, "grad_norm": 1.5268699903687128, "learning_rate": 1.5035627546953509e-05, "loss": 0.4708, "step": 14211 }, { "epoch": 1.056261612783352, "grad_norm": 2.1829214664180414, "learning_rate": 1.5034934317751488e-05, "loss": 0.6364, "step": 14212 }, { "epoch": 1.0563359345968042, "grad_norm": 2.2486807276467107, "learning_rate": 1.5034241056134821e-05, "loss": 0.8222, "step": 14213 }, { "epoch": 1.0564102564102564, "grad_norm": 1.9913199698366526, "learning_rate": 1.5033547762107972e-05, "loss": 0.6698, "step": 14214 }, { "epoch": 1.0564845782237087, "grad_norm": 2.2438460718165034, "learning_rate": 1.5032854435675402e-05, "loss": 0.6258, "step": 14215 }, { "epoch": 1.056558900037161, "grad_norm": 1.8894682582222293, "learning_rate": 1.5032161076841579e-05, "loss": 0.722, "step": 14216 }, { "epoch": 1.0566332218506131, "grad_norm": 2.4631340022124557, "learning_rate": 1.5031467685610963e-05, "loss": 0.659, "step": 14217 }, { "epoch": 1.0567075436640654, "grad_norm": 1.5194902652929676, "learning_rate": 1.503077426198802e-05, "loss": 0.2755, "step": 14218 }, { "epoch": 1.0567818654775176, "grad_norm": 1.9846694152315998, "learning_rate": 1.503008080597721e-05, "loss": 0.632, "step": 14219 }, { "epoch": 1.0568561872909699, "grad_norm": 1.7385563704835776, "learning_rate": 1.5029387317583001e-05, "loss": 0.5051, "step": 14220 }, { "epoch": 1.056930509104422, "grad_norm": 1.6734258605297163, "learning_rate": 1.502869379680986e-05, "loss": 0.5355, "step": 14221 }, { "epoch": 1.0570048309178743, "grad_norm": 1.8162294921406064, "learning_rate": 1.5028000243662249e-05, "loss": 0.6818, "step": 14222 }, { "epoch": 1.0570791527313266, "grad_norm": 2.192922946165054, "learning_rate": 1.5027306658144633e-05, "loss": 0.7383, "step": 14223 }, { "epoch": 1.0571534745447788, "grad_norm": 2.0469824032747845, "learning_rate": 1.5026613040261477e-05, "loss": 0.7272, "step": 14224 }, { "epoch": 1.057227796358231, "grad_norm": 2.120974185109628, "learning_rate": 1.5025919390017247e-05, "loss": 0.5824, "step": 14225 }, { "epoch": 1.0573021181716833, "grad_norm": 1.708391956033209, "learning_rate": 1.5025225707416406e-05, "loss": 0.561, "step": 14226 }, { "epoch": 1.0573764399851355, "grad_norm": 2.452239858790925, "learning_rate": 1.5024531992463429e-05, "loss": 0.754, "step": 14227 }, { "epoch": 1.057450761798588, "grad_norm": 1.858642988686781, "learning_rate": 1.5023838245162774e-05, "loss": 0.6833, "step": 14228 }, { "epoch": 1.0575250836120402, "grad_norm": 1.8612361296661986, "learning_rate": 1.5023144465518907e-05, "loss": 0.6486, "step": 14229 }, { "epoch": 1.0575994054254925, "grad_norm": 2.0129795002805344, "learning_rate": 1.5022450653536296e-05, "loss": 0.5813, "step": 14230 }, { "epoch": 1.0576737272389447, "grad_norm": 2.0125025886331214, "learning_rate": 1.502175680921941e-05, "loss": 0.5732, "step": 14231 }, { "epoch": 1.057748049052397, "grad_norm": 1.6405497664124953, "learning_rate": 1.502106293257271e-05, "loss": 0.4981, "step": 14232 }, { "epoch": 1.0578223708658492, "grad_norm": 3.4442888913758902, "learning_rate": 1.5020369023600674e-05, "loss": 0.6649, "step": 14233 }, { "epoch": 1.0578966926793014, "grad_norm": 2.0899875733179143, "learning_rate": 1.5019675082307758e-05, "loss": 0.703, "step": 14234 }, { "epoch": 1.0579710144927537, "grad_norm": 2.196620086887189, "learning_rate": 1.5018981108698438e-05, "loss": 0.7161, "step": 14235 }, { "epoch": 1.058045336306206, "grad_norm": 1.6010027539644538, "learning_rate": 1.5018287102777173e-05, "loss": 0.6471, "step": 14236 }, { "epoch": 1.0581196581196581, "grad_norm": 2.16264705914983, "learning_rate": 1.5017593064548441e-05, "loss": 0.6619, "step": 14237 }, { "epoch": 1.0581939799331104, "grad_norm": 1.9912549672170592, "learning_rate": 1.5016898994016704e-05, "loss": 0.5816, "step": 14238 }, { "epoch": 1.0582683017465626, "grad_norm": 1.943789322334332, "learning_rate": 1.501620489118643e-05, "loss": 0.6157, "step": 14239 }, { "epoch": 1.0583426235600149, "grad_norm": 2.2289076510116557, "learning_rate": 1.5015510756062091e-05, "loss": 0.5231, "step": 14240 }, { "epoch": 1.058416945373467, "grad_norm": 2.1586112778858393, "learning_rate": 1.5014816588648152e-05, "loss": 0.6744, "step": 14241 }, { "epoch": 1.0584912671869193, "grad_norm": 2.067834426221793, "learning_rate": 1.5014122388949084e-05, "loss": 0.5626, "step": 14242 }, { "epoch": 1.0585655890003716, "grad_norm": 1.7709201526051321, "learning_rate": 1.5013428156969358e-05, "loss": 0.5462, "step": 14243 }, { "epoch": 1.0586399108138238, "grad_norm": 1.6201371863656095, "learning_rate": 1.501273389271344e-05, "loss": 0.509, "step": 14244 }, { "epoch": 1.058714232627276, "grad_norm": 2.362971607947864, "learning_rate": 1.5012039596185803e-05, "loss": 0.8894, "step": 14245 }, { "epoch": 1.0587885544407283, "grad_norm": 1.7213100102424654, "learning_rate": 1.5011345267390915e-05, "loss": 0.4438, "step": 14246 }, { "epoch": 1.0588628762541805, "grad_norm": 1.8852774579272054, "learning_rate": 1.5010650906333244e-05, "loss": 0.7404, "step": 14247 }, { "epoch": 1.0589371980676328, "grad_norm": 2.354132597713607, "learning_rate": 1.5009956513017263e-05, "loss": 0.6214, "step": 14248 }, { "epoch": 1.059011519881085, "grad_norm": 1.7336304693258557, "learning_rate": 1.5009262087447444e-05, "loss": 0.558, "step": 14249 }, { "epoch": 1.0590858416945372, "grad_norm": 1.8116375711351123, "learning_rate": 1.5008567629628252e-05, "loss": 0.5219, "step": 14250 }, { "epoch": 1.0591601635079897, "grad_norm": 2.132206946953636, "learning_rate": 1.5007873139564167e-05, "loss": 0.6067, "step": 14251 }, { "epoch": 1.059234485321442, "grad_norm": 1.6515721646396828, "learning_rate": 1.500717861725965e-05, "loss": 0.5965, "step": 14252 }, { "epoch": 1.0593088071348942, "grad_norm": 1.697221428556304, "learning_rate": 1.5006484062719177e-05, "loss": 0.5598, "step": 14253 }, { "epoch": 1.0593831289483464, "grad_norm": 1.63413157520999, "learning_rate": 1.5005789475947223e-05, "loss": 0.6005, "step": 14254 }, { "epoch": 1.0594574507617986, "grad_norm": 2.3150001633546937, "learning_rate": 1.5005094856948254e-05, "loss": 0.7573, "step": 14255 }, { "epoch": 1.0595317725752509, "grad_norm": 1.986521219019026, "learning_rate": 1.5004400205726745e-05, "loss": 0.7402, "step": 14256 }, { "epoch": 1.0596060943887031, "grad_norm": 1.9170654880276998, "learning_rate": 1.5003705522287165e-05, "loss": 0.7561, "step": 14257 }, { "epoch": 1.0596804162021554, "grad_norm": 1.7493581078674083, "learning_rate": 1.5003010806633986e-05, "loss": 0.6305, "step": 14258 }, { "epoch": 1.0597547380156076, "grad_norm": 2.4294777199061675, "learning_rate": 1.5002316058771688e-05, "loss": 0.689, "step": 14259 }, { "epoch": 1.0598290598290598, "grad_norm": 2.076491017389727, "learning_rate": 1.5001621278704739e-05, "loss": 0.6215, "step": 14260 }, { "epoch": 1.059903381642512, "grad_norm": 1.9418946197411184, "learning_rate": 1.5000926466437608e-05, "loss": 0.6833, "step": 14261 }, { "epoch": 1.0599777034559643, "grad_norm": 2.806290601025138, "learning_rate": 1.5000231621974774e-05, "loss": 0.675, "step": 14262 }, { "epoch": 1.0600520252694166, "grad_norm": 1.9175681873748296, "learning_rate": 1.4999536745320709e-05, "loss": 0.6017, "step": 14263 }, { "epoch": 1.0601263470828688, "grad_norm": 1.7844611826455454, "learning_rate": 1.4998841836479882e-05, "loss": 0.6837, "step": 14264 }, { "epoch": 1.060200668896321, "grad_norm": 1.8555955627749408, "learning_rate": 1.4998146895456778e-05, "loss": 0.5364, "step": 14265 }, { "epoch": 1.0602749907097733, "grad_norm": 2.379370550242327, "learning_rate": 1.4997451922255857e-05, "loss": 0.5824, "step": 14266 }, { "epoch": 1.0603493125232255, "grad_norm": 1.2978690736116234, "learning_rate": 1.4996756916881601e-05, "loss": 0.4653, "step": 14267 }, { "epoch": 1.0604236343366777, "grad_norm": 1.9007411410778465, "learning_rate": 1.4996061879338486e-05, "loss": 0.5605, "step": 14268 }, { "epoch": 1.06049795615013, "grad_norm": 2.3909773887531136, "learning_rate": 1.499536680963098e-05, "loss": 0.65, "step": 14269 }, { "epoch": 1.0605722779635822, "grad_norm": 1.978583558167466, "learning_rate": 1.4994671707763564e-05, "loss": 0.6133, "step": 14270 }, { "epoch": 1.0606465997770345, "grad_norm": 2.276898301088941, "learning_rate": 1.4993976573740711e-05, "loss": 0.729, "step": 14271 }, { "epoch": 1.060720921590487, "grad_norm": 3.590192973519874, "learning_rate": 1.4993281407566895e-05, "loss": 0.771, "step": 14272 }, { "epoch": 1.0607952434039392, "grad_norm": 2.0016829335298367, "learning_rate": 1.4992586209246594e-05, "loss": 0.7149, "step": 14273 }, { "epoch": 1.0608695652173914, "grad_norm": 1.7923227695922879, "learning_rate": 1.4991890978784281e-05, "loss": 0.648, "step": 14274 }, { "epoch": 1.0609438870308436, "grad_norm": 2.2590576394654494, "learning_rate": 1.499119571618443e-05, "loss": 0.6619, "step": 14275 }, { "epoch": 1.0610182088442959, "grad_norm": 2.113331749441177, "learning_rate": 1.4990500421451524e-05, "loss": 0.7138, "step": 14276 }, { "epoch": 1.0610925306577481, "grad_norm": 1.6441730718094014, "learning_rate": 1.4989805094590033e-05, "loss": 0.5491, "step": 14277 }, { "epoch": 1.0611668524712004, "grad_norm": 1.4847772875142675, "learning_rate": 1.4989109735604436e-05, "loss": 0.515, "step": 14278 }, { "epoch": 1.0612411742846526, "grad_norm": 2.0371671641360676, "learning_rate": 1.4988414344499211e-05, "loss": 0.6489, "step": 14279 }, { "epoch": 1.0613154960981048, "grad_norm": 2.0728846652395005, "learning_rate": 1.4987718921278834e-05, "loss": 0.687, "step": 14280 }, { "epoch": 1.061389817911557, "grad_norm": 2.064133346746106, "learning_rate": 1.4987023465947776e-05, "loss": 0.596, "step": 14281 }, { "epoch": 1.0614641397250093, "grad_norm": 1.598011740003186, "learning_rate": 1.4986327978510526e-05, "loss": 0.6105, "step": 14282 }, { "epoch": 1.0615384615384615, "grad_norm": 1.8680632374886903, "learning_rate": 1.498563245897155e-05, "loss": 0.6445, "step": 14283 }, { "epoch": 1.0616127833519138, "grad_norm": 1.5431833012973608, "learning_rate": 1.4984936907335337e-05, "loss": 0.4012, "step": 14284 }, { "epoch": 1.061687105165366, "grad_norm": 1.8370291434562798, "learning_rate": 1.4984241323606354e-05, "loss": 0.7017, "step": 14285 }, { "epoch": 1.0617614269788183, "grad_norm": 1.9088865638686043, "learning_rate": 1.4983545707789084e-05, "loss": 0.56, "step": 14286 }, { "epoch": 1.0618357487922705, "grad_norm": 1.8117007439876023, "learning_rate": 1.4982850059888008e-05, "loss": 0.5877, "step": 14287 }, { "epoch": 1.0619100706057227, "grad_norm": 1.9174077960972027, "learning_rate": 1.4982154379907598e-05, "loss": 0.6103, "step": 14288 }, { "epoch": 1.061984392419175, "grad_norm": 1.829166239677923, "learning_rate": 1.498145866785234e-05, "loss": 0.6154, "step": 14289 }, { "epoch": 1.0620587142326272, "grad_norm": 1.936794548616536, "learning_rate": 1.4980762923726709e-05, "loss": 0.6079, "step": 14290 }, { "epoch": 1.0621330360460794, "grad_norm": 1.9847726387477975, "learning_rate": 1.4980067147535185e-05, "loss": 0.5528, "step": 14291 }, { "epoch": 1.0622073578595317, "grad_norm": 2.3264685968135317, "learning_rate": 1.4979371339282245e-05, "loss": 0.653, "step": 14292 }, { "epoch": 1.062281679672984, "grad_norm": 1.9826665517139577, "learning_rate": 1.4978675498972372e-05, "loss": 0.6046, "step": 14293 }, { "epoch": 1.0623560014864362, "grad_norm": 2.143011998061156, "learning_rate": 1.4977979626610043e-05, "loss": 0.7263, "step": 14294 }, { "epoch": 1.0624303232998886, "grad_norm": 2.627393485421043, "learning_rate": 1.4977283722199742e-05, "loss": 0.6722, "step": 14295 }, { "epoch": 1.0625046451133409, "grad_norm": 1.8416251407607773, "learning_rate": 1.4976587785745947e-05, "loss": 0.6113, "step": 14296 }, { "epoch": 1.062578966926793, "grad_norm": 2.5091823323702607, "learning_rate": 1.4975891817253136e-05, "loss": 0.8272, "step": 14297 }, { "epoch": 1.0626532887402453, "grad_norm": 1.984181945732191, "learning_rate": 1.4975195816725793e-05, "loss": 0.596, "step": 14298 }, { "epoch": 1.0627276105536976, "grad_norm": 2.0865842610634435, "learning_rate": 1.4974499784168397e-05, "loss": 0.633, "step": 14299 }, { "epoch": 1.0628019323671498, "grad_norm": 1.7883939972987077, "learning_rate": 1.497380371958543e-05, "loss": 0.5023, "step": 14300 }, { "epoch": 1.062876254180602, "grad_norm": 1.988832890010378, "learning_rate": 1.497310762298137e-05, "loss": 0.6228, "step": 14301 }, { "epoch": 1.0629505759940543, "grad_norm": 1.8837722957460314, "learning_rate": 1.4972411494360704e-05, "loss": 0.5649, "step": 14302 }, { "epoch": 1.0630248978075065, "grad_norm": 1.7810458462412906, "learning_rate": 1.497171533372791e-05, "loss": 0.6075, "step": 14303 }, { "epoch": 1.0630992196209588, "grad_norm": 1.9524086266426521, "learning_rate": 1.497101914108747e-05, "loss": 0.7136, "step": 14304 }, { "epoch": 1.063173541434411, "grad_norm": 2.091707449916367, "learning_rate": 1.4970322916443868e-05, "loss": 0.6127, "step": 14305 }, { "epoch": 1.0632478632478632, "grad_norm": 2.0145143648276878, "learning_rate": 1.4969626659801584e-05, "loss": 0.7487, "step": 14306 }, { "epoch": 1.0633221850613155, "grad_norm": 1.9437269007571611, "learning_rate": 1.4968930371165103e-05, "loss": 0.6125, "step": 14307 }, { "epoch": 1.0633965068747677, "grad_norm": 2.3077496635475883, "learning_rate": 1.4968234050538903e-05, "loss": 0.776, "step": 14308 }, { "epoch": 1.06347082868822, "grad_norm": 2.1720208563702275, "learning_rate": 1.4967537697927474e-05, "loss": 0.7166, "step": 14309 }, { "epoch": 1.0635451505016722, "grad_norm": 2.0319125755952854, "learning_rate": 1.4966841313335291e-05, "loss": 0.7734, "step": 14310 }, { "epoch": 1.0636194723151244, "grad_norm": 2.0669606347134395, "learning_rate": 1.4966144896766843e-05, "loss": 0.7426, "step": 14311 }, { "epoch": 1.0636937941285767, "grad_norm": 1.778786370114128, "learning_rate": 1.4965448448226612e-05, "loss": 0.7286, "step": 14312 }, { "epoch": 1.063768115942029, "grad_norm": 2.00765697888814, "learning_rate": 1.4964751967719083e-05, "loss": 0.6331, "step": 14313 }, { "epoch": 1.0638424377554812, "grad_norm": 2.8996144993613013, "learning_rate": 1.4964055455248737e-05, "loss": 0.6331, "step": 14314 }, { "epoch": 1.0639167595689334, "grad_norm": 2.4017275351214655, "learning_rate": 1.4963358910820057e-05, "loss": 0.708, "step": 14315 }, { "epoch": 1.0639910813823856, "grad_norm": 2.1342760375518264, "learning_rate": 1.4962662334437535e-05, "loss": 0.6065, "step": 14316 }, { "epoch": 1.0640654031958379, "grad_norm": 1.8904470328234546, "learning_rate": 1.4961965726105646e-05, "loss": 0.6009, "step": 14317 }, { "epoch": 1.0641397250092903, "grad_norm": 1.70598208701546, "learning_rate": 1.4961269085828882e-05, "loss": 0.5647, "step": 14318 }, { "epoch": 1.0642140468227426, "grad_norm": 2.1000658908706193, "learning_rate": 1.4960572413611723e-05, "loss": 0.8541, "step": 14319 }, { "epoch": 1.0642883686361948, "grad_norm": 1.8819095015738452, "learning_rate": 1.4959875709458658e-05, "loss": 0.6931, "step": 14320 }, { "epoch": 1.064362690449647, "grad_norm": 1.8299751576532763, "learning_rate": 1.4959178973374168e-05, "loss": 0.6015, "step": 14321 }, { "epoch": 1.0644370122630993, "grad_norm": 1.864546869757306, "learning_rate": 1.4958482205362743e-05, "loss": 0.6825, "step": 14322 }, { "epoch": 1.0645113340765515, "grad_norm": 2.0535162032629932, "learning_rate": 1.4957785405428866e-05, "loss": 0.6906, "step": 14323 }, { "epoch": 1.0645856558900038, "grad_norm": 2.0678731195651823, "learning_rate": 1.4957088573577025e-05, "loss": 0.6153, "step": 14324 }, { "epoch": 1.064659977703456, "grad_norm": 1.6076142038478902, "learning_rate": 1.4956391709811704e-05, "loss": 0.5052, "step": 14325 }, { "epoch": 1.0647342995169082, "grad_norm": 1.6927343830272252, "learning_rate": 1.4955694814137391e-05, "loss": 0.551, "step": 14326 }, { "epoch": 1.0648086213303605, "grad_norm": 1.7201525297438391, "learning_rate": 1.4954997886558573e-05, "loss": 0.7128, "step": 14327 }, { "epoch": 1.0648829431438127, "grad_norm": 1.6747440624404635, "learning_rate": 1.4954300927079733e-05, "loss": 0.4942, "step": 14328 }, { "epoch": 1.064957264957265, "grad_norm": 1.768048958600146, "learning_rate": 1.495360393570536e-05, "loss": 0.6595, "step": 14329 }, { "epoch": 1.0650315867707172, "grad_norm": 1.6605471847219884, "learning_rate": 1.4952906912439946e-05, "loss": 0.5672, "step": 14330 }, { "epoch": 1.0651059085841694, "grad_norm": 2.0976684020599867, "learning_rate": 1.4952209857287974e-05, "loss": 0.7376, "step": 14331 }, { "epoch": 1.0651802303976217, "grad_norm": 1.9515899334089246, "learning_rate": 1.4951512770253929e-05, "loss": 0.6439, "step": 14332 }, { "epoch": 1.065254552211074, "grad_norm": 5.625568912587169, "learning_rate": 1.49508156513423e-05, "loss": 0.7069, "step": 14333 }, { "epoch": 1.0653288740245261, "grad_norm": 1.8171810494313243, "learning_rate": 1.495011850055758e-05, "loss": 0.6238, "step": 14334 }, { "epoch": 1.0654031958379784, "grad_norm": 1.6216487508338926, "learning_rate": 1.4949421317904256e-05, "loss": 0.6089, "step": 14335 }, { "epoch": 1.0654775176514306, "grad_norm": 1.7957225358709814, "learning_rate": 1.4948724103386809e-05, "loss": 0.5335, "step": 14336 }, { "epoch": 1.0655518394648829, "grad_norm": 1.5284559902295554, "learning_rate": 1.494802685700974e-05, "loss": 0.456, "step": 14337 }, { "epoch": 1.065626161278335, "grad_norm": 1.7639532347903413, "learning_rate": 1.4947329578777522e-05, "loss": 0.6451, "step": 14338 }, { "epoch": 1.0657004830917876, "grad_norm": 1.6601545923271865, "learning_rate": 1.494663226869466e-05, "loss": 0.5428, "step": 14339 }, { "epoch": 1.0657748049052396, "grad_norm": 2.3108597903796055, "learning_rate": 1.4945934926765632e-05, "loss": 0.7671, "step": 14340 }, { "epoch": 1.065849126718692, "grad_norm": 1.7995223994447376, "learning_rate": 1.4945237552994933e-05, "loss": 0.6629, "step": 14341 }, { "epoch": 1.0659234485321443, "grad_norm": 1.910777156726313, "learning_rate": 1.4944540147387055e-05, "loss": 0.5783, "step": 14342 }, { "epoch": 1.0659977703455965, "grad_norm": 1.634347414581655, "learning_rate": 1.494384270994648e-05, "loss": 0.6021, "step": 14343 }, { "epoch": 1.0660720921590487, "grad_norm": 1.8713587410426349, "learning_rate": 1.49431452406777e-05, "loss": 0.5928, "step": 14344 }, { "epoch": 1.066146413972501, "grad_norm": 2.2726569110969184, "learning_rate": 1.4942447739585212e-05, "loss": 0.7394, "step": 14345 }, { "epoch": 1.0662207357859532, "grad_norm": 2.119119498267825, "learning_rate": 1.4941750206673498e-05, "loss": 0.706, "step": 14346 }, { "epoch": 1.0662950575994055, "grad_norm": 2.1896789854381122, "learning_rate": 1.4941052641947055e-05, "loss": 0.8134, "step": 14347 }, { "epoch": 1.0663693794128577, "grad_norm": 1.58057562468274, "learning_rate": 1.4940355045410373e-05, "loss": 0.5259, "step": 14348 }, { "epoch": 1.06644370122631, "grad_norm": 2.5945911931222314, "learning_rate": 1.4939657417067937e-05, "loss": 0.534, "step": 14349 }, { "epoch": 1.0665180230397622, "grad_norm": 2.484513898580611, "learning_rate": 1.4938959756924246e-05, "loss": 0.6684, "step": 14350 }, { "epoch": 1.0665923448532144, "grad_norm": 1.6790692418108115, "learning_rate": 1.4938262064983787e-05, "loss": 0.5621, "step": 14351 }, { "epoch": 1.0666666666666667, "grad_norm": 2.417939952528963, "learning_rate": 1.4937564341251054e-05, "loss": 0.7173, "step": 14352 }, { "epoch": 1.066740988480119, "grad_norm": 2.147720713376971, "learning_rate": 1.4936866585730538e-05, "loss": 0.5652, "step": 14353 }, { "epoch": 1.0668153102935711, "grad_norm": 1.8243738755938796, "learning_rate": 1.4936168798426731e-05, "loss": 0.6443, "step": 14354 }, { "epoch": 1.0668896321070234, "grad_norm": 1.603129979298422, "learning_rate": 1.4935470979344126e-05, "loss": 0.5569, "step": 14355 }, { "epoch": 1.0669639539204756, "grad_norm": 1.9866243939902264, "learning_rate": 1.4934773128487213e-05, "loss": 0.7476, "step": 14356 }, { "epoch": 1.0670382757339278, "grad_norm": 1.8844334176745894, "learning_rate": 1.4934075245860486e-05, "loss": 0.6152, "step": 14357 }, { "epoch": 1.06711259754738, "grad_norm": 2.1727902780672954, "learning_rate": 1.4933377331468441e-05, "loss": 0.7527, "step": 14358 }, { "epoch": 1.0671869193608323, "grad_norm": 2.433488524689897, "learning_rate": 1.493267938531557e-05, "loss": 0.6168, "step": 14359 }, { "epoch": 1.0672612411742846, "grad_norm": 1.8659272467611905, "learning_rate": 1.4931981407406361e-05, "loss": 0.6165, "step": 14360 }, { "epoch": 1.0673355629877368, "grad_norm": 1.765593362883163, "learning_rate": 1.4931283397745316e-05, "loss": 0.5328, "step": 14361 }, { "epoch": 1.0674098848011893, "grad_norm": 1.936672103266244, "learning_rate": 1.4930585356336921e-05, "loss": 0.5695, "step": 14362 }, { "epoch": 1.0674842066146415, "grad_norm": 2.18413403239763, "learning_rate": 1.4929887283185676e-05, "loss": 0.7236, "step": 14363 }, { "epoch": 1.0675585284280937, "grad_norm": 2.0281477922026756, "learning_rate": 1.492918917829607e-05, "loss": 0.6841, "step": 14364 }, { "epoch": 1.067632850241546, "grad_norm": 1.8681065386780686, "learning_rate": 1.4928491041672601e-05, "loss": 0.4979, "step": 14365 }, { "epoch": 1.0677071720549982, "grad_norm": 1.8246335305935186, "learning_rate": 1.4927792873319762e-05, "loss": 0.5836, "step": 14366 }, { "epoch": 1.0677814938684504, "grad_norm": 1.9135595135636894, "learning_rate": 1.492709467324205e-05, "loss": 0.6975, "step": 14367 }, { "epoch": 1.0678558156819027, "grad_norm": 1.9388778807615035, "learning_rate": 1.4926396441443954e-05, "loss": 0.5518, "step": 14368 }, { "epoch": 1.067930137495355, "grad_norm": 2.2618667616215453, "learning_rate": 1.4925698177929978e-05, "loss": 0.5489, "step": 14369 }, { "epoch": 1.0680044593088072, "grad_norm": 1.88704507441393, "learning_rate": 1.4924999882704609e-05, "loss": 0.5378, "step": 14370 }, { "epoch": 1.0680787811222594, "grad_norm": 2.0080051996614046, "learning_rate": 1.4924301555772347e-05, "loss": 0.5583, "step": 14371 }, { "epoch": 1.0681531029357116, "grad_norm": 1.9045726923699775, "learning_rate": 1.4923603197137688e-05, "loss": 0.6115, "step": 14372 }, { "epoch": 1.0682274247491639, "grad_norm": 1.9592207018427026, "learning_rate": 1.4922904806805125e-05, "loss": 0.6902, "step": 14373 }, { "epoch": 1.0683017465626161, "grad_norm": 1.7298504139984923, "learning_rate": 1.492220638477916e-05, "loss": 0.5966, "step": 14374 }, { "epoch": 1.0683760683760684, "grad_norm": 2.2182662102483963, "learning_rate": 1.4921507931064286e-05, "loss": 0.5384, "step": 14375 }, { "epoch": 1.0684503901895206, "grad_norm": 2.252244286692854, "learning_rate": 1.4920809445664997e-05, "loss": 0.5676, "step": 14376 }, { "epoch": 1.0685247120029728, "grad_norm": 2.981346948627095, "learning_rate": 1.492011092858579e-05, "loss": 0.6481, "step": 14377 }, { "epoch": 1.068599033816425, "grad_norm": 2.143018326846244, "learning_rate": 1.4919412379831167e-05, "loss": 0.7493, "step": 14378 }, { "epoch": 1.0686733556298773, "grad_norm": 1.9104134683786258, "learning_rate": 1.4918713799405622e-05, "loss": 0.6676, "step": 14379 }, { "epoch": 1.0687476774433295, "grad_norm": 1.9321973006063042, "learning_rate": 1.4918015187313653e-05, "loss": 0.6437, "step": 14380 }, { "epoch": 1.0688219992567818, "grad_norm": 1.857127342526124, "learning_rate": 1.4917316543559757e-05, "loss": 0.5516, "step": 14381 }, { "epoch": 1.068896321070234, "grad_norm": 1.6523261004589864, "learning_rate": 1.4916617868148434e-05, "loss": 0.4882, "step": 14382 }, { "epoch": 1.0689706428836863, "grad_norm": 1.8530614328777568, "learning_rate": 1.4915919161084177e-05, "loss": 0.5663, "step": 14383 }, { "epoch": 1.0690449646971385, "grad_norm": 2.206731210731746, "learning_rate": 1.4915220422371489e-05, "loss": 0.7888, "step": 14384 }, { "epoch": 1.069119286510591, "grad_norm": 2.0712138033173804, "learning_rate": 1.4914521652014867e-05, "loss": 0.6928, "step": 14385 }, { "epoch": 1.0691936083240432, "grad_norm": 2.127747719070863, "learning_rate": 1.491382285001881e-05, "loss": 0.7203, "step": 14386 }, { "epoch": 1.0692679301374954, "grad_norm": 2.0759747231214414, "learning_rate": 1.4913124016387814e-05, "loss": 0.6316, "step": 14387 }, { "epoch": 1.0693422519509477, "grad_norm": 2.1627005270527606, "learning_rate": 1.4912425151126382e-05, "loss": 0.7108, "step": 14388 }, { "epoch": 1.0694165737644, "grad_norm": 2.220993639000048, "learning_rate": 1.4911726254239012e-05, "loss": 0.5301, "step": 14389 }, { "epoch": 1.0694908955778522, "grad_norm": 2.1327623487299614, "learning_rate": 1.4911027325730205e-05, "loss": 0.7662, "step": 14390 }, { "epoch": 1.0695652173913044, "grad_norm": 2.0205812496809736, "learning_rate": 1.4910328365604457e-05, "loss": 0.7323, "step": 14391 }, { "epoch": 1.0696395392047566, "grad_norm": 2.041681431198451, "learning_rate": 1.4909629373866272e-05, "loss": 0.5708, "step": 14392 }, { "epoch": 1.0697138610182089, "grad_norm": 2.6111729715236116, "learning_rate": 1.4908930350520146e-05, "loss": 0.5878, "step": 14393 }, { "epoch": 1.069788182831661, "grad_norm": 1.844650688459434, "learning_rate": 1.4908231295570579e-05, "loss": 0.6401, "step": 14394 }, { "epoch": 1.0698625046451133, "grad_norm": 2.006422624499987, "learning_rate": 1.4907532209022075e-05, "loss": 0.6306, "step": 14395 }, { "epoch": 1.0699368264585656, "grad_norm": 1.8744019291503133, "learning_rate": 1.4906833090879133e-05, "loss": 0.6338, "step": 14396 }, { "epoch": 1.0700111482720178, "grad_norm": 2.302670498460816, "learning_rate": 1.4906133941146258e-05, "loss": 0.7543, "step": 14397 }, { "epoch": 1.07008547008547, "grad_norm": 2.4880971744842277, "learning_rate": 1.4905434759827943e-05, "loss": 0.6958, "step": 14398 }, { "epoch": 1.0701597918989223, "grad_norm": 2.0884626613400186, "learning_rate": 1.4904735546928695e-05, "loss": 0.7219, "step": 14399 }, { "epoch": 1.0702341137123745, "grad_norm": 1.8855737232682004, "learning_rate": 1.4904036302453012e-05, "loss": 0.5956, "step": 14400 }, { "epoch": 1.0703084355258268, "grad_norm": 2.028308250145196, "learning_rate": 1.4903337026405403e-05, "loss": 0.7786, "step": 14401 }, { "epoch": 1.070382757339279, "grad_norm": 1.4994566146109516, "learning_rate": 1.490263771879036e-05, "loss": 0.4207, "step": 14402 }, { "epoch": 1.0704570791527313, "grad_norm": 2.0274274487537376, "learning_rate": 1.4901938379612392e-05, "loss": 0.666, "step": 14403 }, { "epoch": 1.0705314009661835, "grad_norm": 2.114656292120347, "learning_rate": 1.4901239008875998e-05, "loss": 0.7226, "step": 14404 }, { "epoch": 1.0706057227796357, "grad_norm": 1.5654300176922518, "learning_rate": 1.490053960658568e-05, "loss": 0.4959, "step": 14405 }, { "epoch": 1.0706800445930882, "grad_norm": 1.8930990424884575, "learning_rate": 1.4899840172745943e-05, "loss": 0.6143, "step": 14406 }, { "epoch": 1.0707543664065402, "grad_norm": 1.6597961907150032, "learning_rate": 1.4899140707361292e-05, "loss": 0.531, "step": 14407 }, { "epoch": 1.0708286882199927, "grad_norm": 1.9888963119079228, "learning_rate": 1.4898441210436225e-05, "loss": 0.584, "step": 14408 }, { "epoch": 1.070903010033445, "grad_norm": 1.903938855651894, "learning_rate": 1.4897741681975249e-05, "loss": 0.5445, "step": 14409 }, { "epoch": 1.0709773318468971, "grad_norm": 2.0901446099165963, "learning_rate": 1.4897042121982866e-05, "loss": 0.5773, "step": 14410 }, { "epoch": 1.0710516536603494, "grad_norm": 1.8417790789098878, "learning_rate": 1.4896342530463577e-05, "loss": 0.5863, "step": 14411 }, { "epoch": 1.0711259754738016, "grad_norm": 2.005312988144693, "learning_rate": 1.4895642907421892e-05, "loss": 0.5426, "step": 14412 }, { "epoch": 1.0712002972872539, "grad_norm": 2.0571783070899317, "learning_rate": 1.4894943252862311e-05, "loss": 0.6808, "step": 14413 }, { "epoch": 1.071274619100706, "grad_norm": 2.040837385096578, "learning_rate": 1.4894243566789342e-05, "loss": 0.5981, "step": 14414 }, { "epoch": 1.0713489409141583, "grad_norm": 2.2516389436725595, "learning_rate": 1.489354384920748e-05, "loss": 0.7236, "step": 14415 }, { "epoch": 1.0714232627276106, "grad_norm": 2.0428209593011335, "learning_rate": 1.4892844100121242e-05, "loss": 0.6567, "step": 14416 }, { "epoch": 1.0714975845410628, "grad_norm": 1.7979475291780758, "learning_rate": 1.4892144319535127e-05, "loss": 0.5924, "step": 14417 }, { "epoch": 1.071571906354515, "grad_norm": 2.013776245236264, "learning_rate": 1.489144450745364e-05, "loss": 0.7449, "step": 14418 }, { "epoch": 1.0716462281679673, "grad_norm": 2.0490202291279602, "learning_rate": 1.4890744663881286e-05, "loss": 0.6718, "step": 14419 }, { "epoch": 1.0717205499814195, "grad_norm": 1.9521135996900882, "learning_rate": 1.4890044788822575e-05, "loss": 0.5469, "step": 14420 }, { "epoch": 1.0717948717948718, "grad_norm": 2.1484923118095085, "learning_rate": 1.4889344882282007e-05, "loss": 0.6321, "step": 14421 }, { "epoch": 1.071869193608324, "grad_norm": 1.9060301024839827, "learning_rate": 1.4888644944264088e-05, "loss": 0.7176, "step": 14422 }, { "epoch": 1.0719435154217762, "grad_norm": 1.6789448455340694, "learning_rate": 1.4887944974773331e-05, "loss": 0.4567, "step": 14423 }, { "epoch": 1.0720178372352285, "grad_norm": 2.004678780187452, "learning_rate": 1.4887244973814237e-05, "loss": 0.6423, "step": 14424 }, { "epoch": 1.0720921590486807, "grad_norm": 2.5608720678204273, "learning_rate": 1.488654494139131e-05, "loss": 0.5954, "step": 14425 }, { "epoch": 1.072166480862133, "grad_norm": 2.2611666658354945, "learning_rate": 1.4885844877509063e-05, "loss": 0.6483, "step": 14426 }, { "epoch": 1.0722408026755852, "grad_norm": 2.0927011449029536, "learning_rate": 1.4885144782172e-05, "loss": 0.6402, "step": 14427 }, { "epoch": 1.0723151244890374, "grad_norm": 1.7970507371636337, "learning_rate": 1.4884444655384626e-05, "loss": 0.5934, "step": 14428 }, { "epoch": 1.07238944630249, "grad_norm": 2.0104645164056363, "learning_rate": 1.4883744497151455e-05, "loss": 0.6874, "step": 14429 }, { "epoch": 1.0724637681159421, "grad_norm": 2.185127847730738, "learning_rate": 1.4883044307476988e-05, "loss": 0.632, "step": 14430 }, { "epoch": 1.0725380899293944, "grad_norm": 1.9405654160531765, "learning_rate": 1.4882344086365736e-05, "loss": 0.787, "step": 14431 }, { "epoch": 1.0726124117428466, "grad_norm": 2.086587403777906, "learning_rate": 1.4881643833822206e-05, "loss": 0.6473, "step": 14432 }, { "epoch": 1.0726867335562988, "grad_norm": 2.394426344020897, "learning_rate": 1.4880943549850903e-05, "loss": 0.7748, "step": 14433 }, { "epoch": 1.072761055369751, "grad_norm": 1.6190133299373015, "learning_rate": 1.4880243234456343e-05, "loss": 0.4659, "step": 14434 }, { "epoch": 1.0728353771832033, "grad_norm": 1.8070179537200486, "learning_rate": 1.4879542887643027e-05, "loss": 0.7098, "step": 14435 }, { "epoch": 1.0729096989966556, "grad_norm": 2.394604529254244, "learning_rate": 1.487884250941547e-05, "loss": 0.8008, "step": 14436 }, { "epoch": 1.0729840208101078, "grad_norm": 2.0008031176992467, "learning_rate": 1.4878142099778176e-05, "loss": 0.5705, "step": 14437 }, { "epoch": 1.07305834262356, "grad_norm": 2.0141478032353066, "learning_rate": 1.487744165873566e-05, "loss": 0.6863, "step": 14438 }, { "epoch": 1.0731326644370123, "grad_norm": 1.8673327058658749, "learning_rate": 1.4876741186292423e-05, "loss": 0.4307, "step": 14439 }, { "epoch": 1.0732069862504645, "grad_norm": 2.395210864162624, "learning_rate": 1.4876040682452981e-05, "loss": 0.632, "step": 14440 }, { "epoch": 1.0732813080639168, "grad_norm": 1.7328554364233224, "learning_rate": 1.4875340147221842e-05, "loss": 0.5718, "step": 14441 }, { "epoch": 1.073355629877369, "grad_norm": 2.4248417023775186, "learning_rate": 1.4874639580603514e-05, "loss": 0.6091, "step": 14442 }, { "epoch": 1.0734299516908212, "grad_norm": 1.8342134831807693, "learning_rate": 1.4873938982602514e-05, "loss": 0.5213, "step": 14443 }, { "epoch": 1.0735042735042735, "grad_norm": 2.2710942479543244, "learning_rate": 1.4873238353223345e-05, "loss": 0.7204, "step": 14444 }, { "epoch": 1.0735785953177257, "grad_norm": 1.6828459047405477, "learning_rate": 1.4872537692470519e-05, "loss": 0.5988, "step": 14445 }, { "epoch": 1.073652917131178, "grad_norm": 1.8823336200281362, "learning_rate": 1.487183700034855e-05, "loss": 0.6598, "step": 14446 }, { "epoch": 1.0737272389446302, "grad_norm": 2.3745634355548093, "learning_rate": 1.4871136276861945e-05, "loss": 0.7117, "step": 14447 }, { "epoch": 1.0738015607580824, "grad_norm": 1.8314821129271046, "learning_rate": 1.4870435522015223e-05, "loss": 0.5239, "step": 14448 }, { "epoch": 1.0738758825715347, "grad_norm": 2.298087455866226, "learning_rate": 1.4869734735812884e-05, "loss": 0.6504, "step": 14449 }, { "epoch": 1.073950204384987, "grad_norm": 1.6802305230826107, "learning_rate": 1.4869033918259448e-05, "loss": 0.5333, "step": 14450 }, { "epoch": 1.0740245261984391, "grad_norm": 1.7593464447936575, "learning_rate": 1.4868333069359425e-05, "loss": 0.5593, "step": 14451 }, { "epoch": 1.0740988480118916, "grad_norm": 2.27831483517441, "learning_rate": 1.4867632189117323e-05, "loss": 0.7166, "step": 14452 }, { "epoch": 1.0741731698253438, "grad_norm": 2.0589135387427695, "learning_rate": 1.486693127753766e-05, "loss": 0.6433, "step": 14453 }, { "epoch": 1.074247491638796, "grad_norm": 2.800745753829826, "learning_rate": 1.4866230334624945e-05, "loss": 0.7682, "step": 14454 }, { "epoch": 1.0743218134522483, "grad_norm": 2.1197182924941007, "learning_rate": 1.4865529360383692e-05, "loss": 0.5584, "step": 14455 }, { "epoch": 1.0743961352657005, "grad_norm": 1.8975023192323024, "learning_rate": 1.4864828354818414e-05, "loss": 0.699, "step": 14456 }, { "epoch": 1.0744704570791528, "grad_norm": 1.9297764483107793, "learning_rate": 1.4864127317933623e-05, "loss": 0.4441, "step": 14457 }, { "epoch": 1.074544778892605, "grad_norm": 1.8093783930070808, "learning_rate": 1.4863426249733833e-05, "loss": 0.6884, "step": 14458 }, { "epoch": 1.0746191007060573, "grad_norm": 2.5788584283130267, "learning_rate": 1.4862725150223556e-05, "loss": 0.9087, "step": 14459 }, { "epoch": 1.0746934225195095, "grad_norm": 2.0017699777008726, "learning_rate": 1.4862024019407309e-05, "loss": 0.5284, "step": 14460 }, { "epoch": 1.0747677443329617, "grad_norm": 1.8475999093999083, "learning_rate": 1.4861322857289601e-05, "loss": 0.495, "step": 14461 }, { "epoch": 1.074842066146414, "grad_norm": 1.8226099925286368, "learning_rate": 1.4860621663874953e-05, "loss": 0.5849, "step": 14462 }, { "epoch": 1.0749163879598662, "grad_norm": 2.200130673187512, "learning_rate": 1.485992043916787e-05, "loss": 0.7795, "step": 14463 }, { "epoch": 1.0749907097733185, "grad_norm": 1.824268650356332, "learning_rate": 1.4859219183172872e-05, "loss": 0.6586, "step": 14464 }, { "epoch": 1.0750650315867707, "grad_norm": 2.52668118260566, "learning_rate": 1.4858517895894475e-05, "loss": 0.7142, "step": 14465 }, { "epoch": 1.075139353400223, "grad_norm": 2.097159878917101, "learning_rate": 1.4857816577337192e-05, "loss": 0.6731, "step": 14466 }, { "epoch": 1.0752136752136752, "grad_norm": 1.517223337014693, "learning_rate": 1.4857115227505538e-05, "loss": 0.5386, "step": 14467 }, { "epoch": 1.0752879970271274, "grad_norm": 1.7435721841288263, "learning_rate": 1.4856413846404028e-05, "loss": 0.5157, "step": 14468 }, { "epoch": 1.0753623188405796, "grad_norm": 1.6780572144466543, "learning_rate": 1.4855712434037175e-05, "loss": 0.5866, "step": 14469 }, { "epoch": 1.0754366406540319, "grad_norm": 1.7032684268837623, "learning_rate": 1.48550109904095e-05, "loss": 0.4961, "step": 14470 }, { "epoch": 1.0755109624674841, "grad_norm": 2.313438773781867, "learning_rate": 1.4854309515525516e-05, "loss": 0.8208, "step": 14471 }, { "epoch": 1.0755852842809364, "grad_norm": 1.626456571238229, "learning_rate": 1.4853608009389738e-05, "loss": 0.5451, "step": 14472 }, { "epoch": 1.0756596060943888, "grad_norm": 2.2973599009131065, "learning_rate": 1.4852906472006685e-05, "loss": 0.6012, "step": 14473 }, { "epoch": 1.0757339279078408, "grad_norm": 2.3043727618904395, "learning_rate": 1.4852204903380871e-05, "loss": 0.6601, "step": 14474 }, { "epoch": 1.0758082497212933, "grad_norm": 1.9432640485254655, "learning_rate": 1.4851503303516813e-05, "loss": 0.5382, "step": 14475 }, { "epoch": 1.0758825715347455, "grad_norm": 2.517083815483173, "learning_rate": 1.485080167241903e-05, "loss": 0.664, "step": 14476 }, { "epoch": 1.0759568933481978, "grad_norm": 2.002795206726217, "learning_rate": 1.4850100010092034e-05, "loss": 0.7779, "step": 14477 }, { "epoch": 1.07603121516165, "grad_norm": 2.5508312412129324, "learning_rate": 1.4849398316540351e-05, "loss": 0.8487, "step": 14478 }, { "epoch": 1.0761055369751023, "grad_norm": 1.697233169325526, "learning_rate": 1.484869659176849e-05, "loss": 0.5873, "step": 14479 }, { "epoch": 1.0761798587885545, "grad_norm": 2.5322024096971076, "learning_rate": 1.4847994835780973e-05, "loss": 0.6703, "step": 14480 }, { "epoch": 1.0762541806020067, "grad_norm": 2.000730375357272, "learning_rate": 1.4847293048582313e-05, "loss": 0.5909, "step": 14481 }, { "epoch": 1.076328502415459, "grad_norm": 1.6087462877341292, "learning_rate": 1.484659123017704e-05, "loss": 0.4057, "step": 14482 }, { "epoch": 1.0764028242289112, "grad_norm": 2.276576516759915, "learning_rate": 1.4845889380569658e-05, "loss": 0.5837, "step": 14483 }, { "epoch": 1.0764771460423634, "grad_norm": 1.8936660956914284, "learning_rate": 1.4845187499764693e-05, "loss": 0.6269, "step": 14484 }, { "epoch": 1.0765514678558157, "grad_norm": 2.308853244934875, "learning_rate": 1.484448558776666e-05, "loss": 0.6437, "step": 14485 }, { "epoch": 1.076625789669268, "grad_norm": 1.760129313159126, "learning_rate": 1.4843783644580083e-05, "loss": 0.6155, "step": 14486 }, { "epoch": 1.0767001114827202, "grad_norm": 1.9739112735626685, "learning_rate": 1.4843081670209478e-05, "loss": 0.771, "step": 14487 }, { "epoch": 1.0767744332961724, "grad_norm": 2.0143264029283405, "learning_rate": 1.4842379664659365e-05, "loss": 0.5498, "step": 14488 }, { "epoch": 1.0768487551096246, "grad_norm": 2.130270749619585, "learning_rate": 1.4841677627934263e-05, "loss": 0.718, "step": 14489 }, { "epoch": 1.0769230769230769, "grad_norm": 1.988654254048677, "learning_rate": 1.484097556003869e-05, "loss": 0.7125, "step": 14490 }, { "epoch": 1.076997398736529, "grad_norm": 1.9936042475677949, "learning_rate": 1.4840273460977168e-05, "loss": 0.6366, "step": 14491 }, { "epoch": 1.0770717205499813, "grad_norm": 1.915760206266639, "learning_rate": 1.4839571330754216e-05, "loss": 0.7417, "step": 14492 }, { "epoch": 1.0771460423634336, "grad_norm": 2.191930419907818, "learning_rate": 1.4838869169374356e-05, "loss": 0.5347, "step": 14493 }, { "epoch": 1.0772203641768858, "grad_norm": 2.0517820966610385, "learning_rate": 1.4838166976842108e-05, "loss": 0.6513, "step": 14494 }, { "epoch": 1.077294685990338, "grad_norm": 2.031595962589148, "learning_rate": 1.4837464753161993e-05, "loss": 0.8025, "step": 14495 }, { "epoch": 1.0773690078037905, "grad_norm": 1.8670001077995524, "learning_rate": 1.4836762498338529e-05, "loss": 0.7181, "step": 14496 }, { "epoch": 1.0774433296172428, "grad_norm": 1.9841035417290795, "learning_rate": 1.4836060212376238e-05, "loss": 0.5162, "step": 14497 }, { "epoch": 1.077517651430695, "grad_norm": 1.988090925228087, "learning_rate": 1.4835357895279643e-05, "loss": 0.4815, "step": 14498 }, { "epoch": 1.0775919732441472, "grad_norm": 1.653114641351919, "learning_rate": 1.4834655547053265e-05, "loss": 0.4725, "step": 14499 }, { "epoch": 1.0776662950575995, "grad_norm": 1.7057156565095872, "learning_rate": 1.4833953167701628e-05, "loss": 0.5548, "step": 14500 }, { "epoch": 1.0777406168710517, "grad_norm": 1.844150111744303, "learning_rate": 1.4833250757229248e-05, "loss": 0.585, "step": 14501 }, { "epoch": 1.077814938684504, "grad_norm": 1.9634948137970856, "learning_rate": 1.483254831564065e-05, "loss": 0.7249, "step": 14502 }, { "epoch": 1.0778892604979562, "grad_norm": 1.694128517639093, "learning_rate": 1.4831845842940357e-05, "loss": 0.5449, "step": 14503 }, { "epoch": 1.0779635823114084, "grad_norm": 2.109916461281272, "learning_rate": 1.4831143339132892e-05, "loss": 0.6209, "step": 14504 }, { "epoch": 1.0780379041248607, "grad_norm": 1.409898597029915, "learning_rate": 1.483044080422278e-05, "loss": 0.4737, "step": 14505 }, { "epoch": 1.078112225938313, "grad_norm": 2.54057330725884, "learning_rate": 1.4829738238214537e-05, "loss": 0.7111, "step": 14506 }, { "epoch": 1.0781865477517651, "grad_norm": 2.356216893829184, "learning_rate": 1.482903564111269e-05, "loss": 0.6193, "step": 14507 }, { "epoch": 1.0782608695652174, "grad_norm": 2.6898871912003406, "learning_rate": 1.482833301292176e-05, "loss": 0.5845, "step": 14508 }, { "epoch": 1.0783351913786696, "grad_norm": 1.624328442520998, "learning_rate": 1.4827630353646276e-05, "loss": 0.5251, "step": 14509 }, { "epoch": 1.0784095131921219, "grad_norm": 2.0705359619529116, "learning_rate": 1.482692766329076e-05, "loss": 0.6913, "step": 14510 }, { "epoch": 1.078483835005574, "grad_norm": 2.2539852238626974, "learning_rate": 1.4826224941859732e-05, "loss": 0.7181, "step": 14511 }, { "epoch": 1.0785581568190263, "grad_norm": 1.7495993303194615, "learning_rate": 1.4825522189357718e-05, "loss": 0.5247, "step": 14512 }, { "epoch": 1.0786324786324786, "grad_norm": 2.342258458990717, "learning_rate": 1.482481940578924e-05, "loss": 0.6576, "step": 14513 }, { "epoch": 1.0787068004459308, "grad_norm": 1.835299813001246, "learning_rate": 1.4824116591158829e-05, "loss": 0.6, "step": 14514 }, { "epoch": 1.078781122259383, "grad_norm": 1.7876114737120505, "learning_rate": 1.4823413745471005e-05, "loss": 0.6798, "step": 14515 }, { "epoch": 1.0788554440728353, "grad_norm": 3.5147038601191265, "learning_rate": 1.4822710868730293e-05, "loss": 0.7693, "step": 14516 }, { "epoch": 1.0789297658862875, "grad_norm": 2.5772255319433603, "learning_rate": 1.4822007960941219e-05, "loss": 0.7925, "step": 14517 }, { "epoch": 1.0790040876997398, "grad_norm": 2.077819225001568, "learning_rate": 1.4821305022108306e-05, "loss": 0.5805, "step": 14518 }, { "epoch": 1.0790784095131922, "grad_norm": 1.955826260808032, "learning_rate": 1.4820602052236082e-05, "loss": 0.7652, "step": 14519 }, { "epoch": 1.0791527313266445, "grad_norm": 2.354313808206503, "learning_rate": 1.4819899051329073e-05, "loss": 0.6291, "step": 14520 }, { "epoch": 1.0792270531400967, "grad_norm": 1.9028005704632096, "learning_rate": 1.4819196019391804e-05, "loss": 0.6224, "step": 14521 }, { "epoch": 1.079301374953549, "grad_norm": 1.5716843792664823, "learning_rate": 1.4818492956428803e-05, "loss": 0.4672, "step": 14522 }, { "epoch": 1.0793756967670012, "grad_norm": 2.2370558782589436, "learning_rate": 1.4817789862444592e-05, "loss": 0.6967, "step": 14523 }, { "epoch": 1.0794500185804534, "grad_norm": 1.864975870480966, "learning_rate": 1.4817086737443698e-05, "loss": 0.5726, "step": 14524 }, { "epoch": 1.0795243403939057, "grad_norm": 1.500351197018214, "learning_rate": 1.4816383581430652e-05, "loss": 0.5825, "step": 14525 }, { "epoch": 1.079598662207358, "grad_norm": 1.7861149078024092, "learning_rate": 1.4815680394409978e-05, "loss": 0.5872, "step": 14526 }, { "epoch": 1.0796729840208101, "grad_norm": 2.0613473467872763, "learning_rate": 1.4814977176386205e-05, "loss": 0.5169, "step": 14527 }, { "epoch": 1.0797473058342624, "grad_norm": 1.9843746639277475, "learning_rate": 1.4814273927363859e-05, "loss": 0.5935, "step": 14528 }, { "epoch": 1.0798216276477146, "grad_norm": 1.8619366035194822, "learning_rate": 1.4813570647347462e-05, "loss": 0.5813, "step": 14529 }, { "epoch": 1.0798959494611668, "grad_norm": 2.314770402152868, "learning_rate": 1.4812867336341552e-05, "loss": 0.7166, "step": 14530 }, { "epoch": 1.079970271274619, "grad_norm": 2.039839985255729, "learning_rate": 1.4812163994350648e-05, "loss": 0.6492, "step": 14531 }, { "epoch": 1.0800445930880713, "grad_norm": 2.5771531363172264, "learning_rate": 1.4811460621379285e-05, "loss": 0.7057, "step": 14532 }, { "epoch": 1.0801189149015236, "grad_norm": 1.9679915059493052, "learning_rate": 1.481075721743199e-05, "loss": 0.5804, "step": 14533 }, { "epoch": 1.0801932367149758, "grad_norm": 1.8630578003541194, "learning_rate": 1.4810053782513284e-05, "loss": 0.5737, "step": 14534 }, { "epoch": 1.080267558528428, "grad_norm": 1.7620252451838827, "learning_rate": 1.4809350316627705e-05, "loss": 0.5603, "step": 14535 }, { "epoch": 1.0803418803418803, "grad_norm": 1.946891995405999, "learning_rate": 1.4808646819779777e-05, "loss": 0.6234, "step": 14536 }, { "epoch": 1.0804162021553325, "grad_norm": 2.034204032469341, "learning_rate": 1.4807943291974033e-05, "loss": 0.6907, "step": 14537 }, { "epoch": 1.0804905239687848, "grad_norm": 2.3356975959341755, "learning_rate": 1.4807239733214997e-05, "loss": 0.662, "step": 14538 }, { "epoch": 1.080564845782237, "grad_norm": 2.3284948709934086, "learning_rate": 1.4806536143507202e-05, "loss": 0.7023, "step": 14539 }, { "epoch": 1.0806391675956892, "grad_norm": 2.0062227624349274, "learning_rate": 1.4805832522855175e-05, "loss": 0.6349, "step": 14540 }, { "epoch": 1.0807134894091415, "grad_norm": 1.9124928899657025, "learning_rate": 1.4805128871263448e-05, "loss": 0.5075, "step": 14541 }, { "epoch": 1.080787811222594, "grad_norm": 1.7776497537666924, "learning_rate": 1.480442518873655e-05, "loss": 0.4815, "step": 14542 }, { "epoch": 1.0808621330360462, "grad_norm": 1.91837677894974, "learning_rate": 1.4803721475279016e-05, "loss": 0.5849, "step": 14543 }, { "epoch": 1.0809364548494984, "grad_norm": 1.969784711642555, "learning_rate": 1.480301773089537e-05, "loss": 0.6932, "step": 14544 }, { "epoch": 1.0810107766629506, "grad_norm": 1.7867733156218413, "learning_rate": 1.4802313955590144e-05, "loss": 0.6249, "step": 14545 }, { "epoch": 1.0810850984764029, "grad_norm": 1.9473724723679022, "learning_rate": 1.4801610149367872e-05, "loss": 0.7386, "step": 14546 }, { "epoch": 1.0811594202898551, "grad_norm": 1.834430866320522, "learning_rate": 1.4800906312233079e-05, "loss": 0.6441, "step": 14547 }, { "epoch": 1.0812337421033074, "grad_norm": 1.9032818032329786, "learning_rate": 1.4800202444190306e-05, "loss": 0.5073, "step": 14548 }, { "epoch": 1.0813080639167596, "grad_norm": 2.078811537786816, "learning_rate": 1.4799498545244074e-05, "loss": 0.5933, "step": 14549 }, { "epoch": 1.0813823857302118, "grad_norm": 1.8400800393281078, "learning_rate": 1.4798794615398921e-05, "loss": 0.6401, "step": 14550 }, { "epoch": 1.081456707543664, "grad_norm": 1.9898669509889821, "learning_rate": 1.4798090654659379e-05, "loss": 0.6457, "step": 14551 }, { "epoch": 1.0815310293571163, "grad_norm": 2.219728902068001, "learning_rate": 1.4797386663029976e-05, "loss": 0.6207, "step": 14552 }, { "epoch": 1.0816053511705686, "grad_norm": 1.734469037467393, "learning_rate": 1.4796682640515245e-05, "loss": 0.5916, "step": 14553 }, { "epoch": 1.0816796729840208, "grad_norm": 3.645399507465559, "learning_rate": 1.4795978587119724e-05, "loss": 0.5435, "step": 14554 }, { "epoch": 1.081753994797473, "grad_norm": 2.0514160859886434, "learning_rate": 1.4795274502847939e-05, "loss": 0.6707, "step": 14555 }, { "epoch": 1.0818283166109253, "grad_norm": 1.9301687621494175, "learning_rate": 1.4794570387704427e-05, "loss": 0.5641, "step": 14556 }, { "epoch": 1.0819026384243775, "grad_norm": 1.9681939534946915, "learning_rate": 1.4793866241693719e-05, "loss": 0.6719, "step": 14557 }, { "epoch": 1.0819769602378297, "grad_norm": 2.1962685595556537, "learning_rate": 1.4793162064820349e-05, "loss": 0.71, "step": 14558 }, { "epoch": 1.082051282051282, "grad_norm": 1.8171676569350395, "learning_rate": 1.4792457857088851e-05, "loss": 0.6812, "step": 14559 }, { "epoch": 1.0821256038647342, "grad_norm": 2.0002562962002783, "learning_rate": 1.4791753618503757e-05, "loss": 0.5951, "step": 14560 }, { "epoch": 1.0821999256781865, "grad_norm": 2.021545096082179, "learning_rate": 1.4791049349069605e-05, "loss": 0.5438, "step": 14561 }, { "epoch": 1.0822742474916387, "grad_norm": 2.0013444187359943, "learning_rate": 1.479034504879092e-05, "loss": 0.6198, "step": 14562 }, { "epoch": 1.0823485693050912, "grad_norm": 2.680881533380499, "learning_rate": 1.4789640717672245e-05, "loss": 0.6516, "step": 14563 }, { "epoch": 1.0824228911185434, "grad_norm": 2.0091325514569647, "learning_rate": 1.478893635571811e-05, "loss": 0.6944, "step": 14564 }, { "epoch": 1.0824972129319956, "grad_norm": 2.033473658828756, "learning_rate": 1.4788231962933055e-05, "loss": 0.712, "step": 14565 }, { "epoch": 1.0825715347454479, "grad_norm": 2.285891612547702, "learning_rate": 1.478752753932161e-05, "loss": 0.6301, "step": 14566 }, { "epoch": 1.0826458565589, "grad_norm": 1.498518266985865, "learning_rate": 1.478682308488831e-05, "loss": 0.4403, "step": 14567 }, { "epoch": 1.0827201783723523, "grad_norm": 1.7112205681669592, "learning_rate": 1.4786118599637691e-05, "loss": 0.6099, "step": 14568 }, { "epoch": 1.0827945001858046, "grad_norm": 1.675434922941782, "learning_rate": 1.4785414083574287e-05, "loss": 0.4812, "step": 14569 }, { "epoch": 1.0828688219992568, "grad_norm": 2.116207441884361, "learning_rate": 1.478470953670264e-05, "loss": 0.6564, "step": 14570 }, { "epoch": 1.082943143812709, "grad_norm": 1.7096816499471126, "learning_rate": 1.4784004959027278e-05, "loss": 0.513, "step": 14571 }, { "epoch": 1.0830174656261613, "grad_norm": 1.8978099797632673, "learning_rate": 1.478330035055274e-05, "loss": 0.7175, "step": 14572 }, { "epoch": 1.0830917874396135, "grad_norm": 2.1252492667086957, "learning_rate": 1.4782595711283567e-05, "loss": 0.6708, "step": 14573 }, { "epoch": 1.0831661092530658, "grad_norm": 1.9741034213495494, "learning_rate": 1.4781891041224286e-05, "loss": 0.6498, "step": 14574 }, { "epoch": 1.083240431066518, "grad_norm": 2.3023216237556765, "learning_rate": 1.478118634037944e-05, "loss": 0.6392, "step": 14575 }, { "epoch": 1.0833147528799703, "grad_norm": 2.14404380620069, "learning_rate": 1.4780481608753565e-05, "loss": 0.6868, "step": 14576 }, { "epoch": 1.0833890746934225, "grad_norm": 1.6588624242749508, "learning_rate": 1.4779776846351197e-05, "loss": 0.5486, "step": 14577 }, { "epoch": 1.0834633965068747, "grad_norm": 1.9790028932721857, "learning_rate": 1.4779072053176875e-05, "loss": 0.5439, "step": 14578 }, { "epoch": 1.083537718320327, "grad_norm": 2.1862852825901005, "learning_rate": 1.4778367229235132e-05, "loss": 0.7241, "step": 14579 }, { "epoch": 1.0836120401337792, "grad_norm": 1.7468648122417754, "learning_rate": 1.477766237453051e-05, "loss": 0.4689, "step": 14580 }, { "epoch": 1.0836863619472314, "grad_norm": 2.119085181957705, "learning_rate": 1.4776957489067549e-05, "loss": 0.5221, "step": 14581 }, { "epoch": 1.0837606837606837, "grad_norm": 1.9688225538279884, "learning_rate": 1.477625257285078e-05, "loss": 0.5517, "step": 14582 }, { "epoch": 1.083835005574136, "grad_norm": 1.6198651023451864, "learning_rate": 1.4775547625884745e-05, "loss": 0.5306, "step": 14583 }, { "epoch": 1.0839093273875882, "grad_norm": 2.185851257327901, "learning_rate": 1.4774842648173983e-05, "loss": 0.7659, "step": 14584 }, { "epoch": 1.0839836492010404, "grad_norm": 1.9509642100179043, "learning_rate": 1.4774137639723032e-05, "loss": 0.6154, "step": 14585 }, { "epoch": 1.0840579710144929, "grad_norm": 1.5615295389477561, "learning_rate": 1.4773432600536428e-05, "loss": 0.4326, "step": 14586 }, { "epoch": 1.084132292827945, "grad_norm": 2.1459699368255287, "learning_rate": 1.4772727530618718e-05, "loss": 0.6129, "step": 14587 }, { "epoch": 1.0842066146413973, "grad_norm": 1.7832503073724282, "learning_rate": 1.4772022429974431e-05, "loss": 0.567, "step": 14588 }, { "epoch": 1.0842809364548496, "grad_norm": 1.9952109687905073, "learning_rate": 1.4771317298608115e-05, "loss": 0.51, "step": 14589 }, { "epoch": 1.0843552582683018, "grad_norm": 2.171846250800416, "learning_rate": 1.4770612136524301e-05, "loss": 0.6018, "step": 14590 }, { "epoch": 1.084429580081754, "grad_norm": 2.5777406755334087, "learning_rate": 1.4769906943727534e-05, "loss": 0.6875, "step": 14591 }, { "epoch": 1.0845039018952063, "grad_norm": 1.8503741226844546, "learning_rate": 1.4769201720222359e-05, "loss": 0.6139, "step": 14592 }, { "epoch": 1.0845782237086585, "grad_norm": 1.8108375864834658, "learning_rate": 1.4768496466013308e-05, "loss": 0.6185, "step": 14593 }, { "epoch": 1.0846525455221108, "grad_norm": 2.4210283707215106, "learning_rate": 1.4767791181104922e-05, "loss": 0.7704, "step": 14594 }, { "epoch": 1.084726867335563, "grad_norm": 1.2744775704499798, "learning_rate": 1.4767085865501747e-05, "loss": 0.3584, "step": 14595 }, { "epoch": 1.0848011891490152, "grad_norm": 2.276482934323369, "learning_rate": 1.4766380519208317e-05, "loss": 0.5895, "step": 14596 }, { "epoch": 1.0848755109624675, "grad_norm": 4.169666231810736, "learning_rate": 1.4765675142229178e-05, "loss": 0.5751, "step": 14597 }, { "epoch": 1.0849498327759197, "grad_norm": 2.244695849759309, "learning_rate": 1.476496973456887e-05, "loss": 0.6782, "step": 14598 }, { "epoch": 1.085024154589372, "grad_norm": 2.337708996658382, "learning_rate": 1.4764264296231933e-05, "loss": 0.6478, "step": 14599 }, { "epoch": 1.0850984764028242, "grad_norm": 1.5076864983802607, "learning_rate": 1.4763558827222912e-05, "loss": 0.4223, "step": 14600 }, { "epoch": 1.0851727982162764, "grad_norm": 1.929090194580688, "learning_rate": 1.4762853327546347e-05, "loss": 0.6199, "step": 14601 }, { "epoch": 1.0852471200297287, "grad_norm": 1.9797078208954775, "learning_rate": 1.4762147797206777e-05, "loss": 0.7517, "step": 14602 }, { "epoch": 1.085321441843181, "grad_norm": 2.7213125789963506, "learning_rate": 1.4761442236208748e-05, "loss": 0.6396, "step": 14603 }, { "epoch": 1.0853957636566332, "grad_norm": 2.2453120284204897, "learning_rate": 1.4760736644556798e-05, "loss": 0.6932, "step": 14604 }, { "epoch": 1.0854700854700854, "grad_norm": 1.6801955532799215, "learning_rate": 1.4760031022255476e-05, "loss": 0.631, "step": 14605 }, { "epoch": 1.0855444072835376, "grad_norm": 1.7542196929352742, "learning_rate": 1.475932536930932e-05, "loss": 0.5474, "step": 14606 }, { "epoch": 1.0856187290969899, "grad_norm": 2.0304763234408725, "learning_rate": 1.4758619685722875e-05, "loss": 0.6735, "step": 14607 }, { "epoch": 1.085693050910442, "grad_norm": 1.9687503885163584, "learning_rate": 1.4757913971500682e-05, "loss": 0.7089, "step": 14608 }, { "epoch": 1.0857673727238946, "grad_norm": 2.1984908539684023, "learning_rate": 1.4757208226647286e-05, "loss": 0.703, "step": 14609 }, { "epoch": 1.0858416945373468, "grad_norm": 1.7147297762482334, "learning_rate": 1.475650245116723e-05, "loss": 0.5598, "step": 14610 }, { "epoch": 1.085916016350799, "grad_norm": 1.8833024117694033, "learning_rate": 1.4755796645065059e-05, "loss": 0.6095, "step": 14611 }, { "epoch": 1.0859903381642513, "grad_norm": 1.8627495923850932, "learning_rate": 1.4755090808345315e-05, "loss": 0.6076, "step": 14612 }, { "epoch": 1.0860646599777035, "grad_norm": 2.2273450261378365, "learning_rate": 1.4754384941012542e-05, "loss": 0.6064, "step": 14613 }, { "epoch": 1.0861389817911558, "grad_norm": 2.3434826498812944, "learning_rate": 1.4753679043071292e-05, "loss": 0.7842, "step": 14614 }, { "epoch": 1.086213303604608, "grad_norm": 1.5825176685786404, "learning_rate": 1.4752973114526095e-05, "loss": 0.5112, "step": 14615 }, { "epoch": 1.0862876254180602, "grad_norm": 2.146828713035445, "learning_rate": 1.4752267155381508e-05, "loss": 0.6223, "step": 14616 }, { "epoch": 1.0863619472315125, "grad_norm": 2.4388965105686764, "learning_rate": 1.475156116564207e-05, "loss": 0.6075, "step": 14617 }, { "epoch": 1.0864362690449647, "grad_norm": 1.8199243675481174, "learning_rate": 1.4750855145312329e-05, "loss": 0.6082, "step": 14618 }, { "epoch": 1.086510590858417, "grad_norm": 2.2791675550237174, "learning_rate": 1.4750149094396826e-05, "loss": 0.7655, "step": 14619 }, { "epoch": 1.0865849126718692, "grad_norm": 2.0351540640644754, "learning_rate": 1.4749443012900113e-05, "loss": 0.6314, "step": 14620 }, { "epoch": 1.0866592344853214, "grad_norm": 1.9735486933809403, "learning_rate": 1.474873690082673e-05, "loss": 0.5409, "step": 14621 }, { "epoch": 1.0867335562987737, "grad_norm": 2.08980229604279, "learning_rate": 1.4748030758181226e-05, "loss": 0.5558, "step": 14622 }, { "epoch": 1.086807878112226, "grad_norm": 2.2097401531830614, "learning_rate": 1.4747324584968147e-05, "loss": 0.5863, "step": 14623 }, { "epoch": 1.0868821999256781, "grad_norm": 1.792371723889988, "learning_rate": 1.474661838119204e-05, "loss": 0.6571, "step": 14624 }, { "epoch": 1.0869565217391304, "grad_norm": 2.1805724749131006, "learning_rate": 1.474591214685745e-05, "loss": 0.7141, "step": 14625 }, { "epoch": 1.0870308435525826, "grad_norm": 1.8321897589510516, "learning_rate": 1.474520588196892e-05, "loss": 0.5712, "step": 14626 }, { "epoch": 1.0871051653660349, "grad_norm": 1.764467729789328, "learning_rate": 1.4744499586531003e-05, "loss": 0.5449, "step": 14627 }, { "epoch": 1.087179487179487, "grad_norm": 1.753467154023395, "learning_rate": 1.4743793260548244e-05, "loss": 0.6249, "step": 14628 }, { "epoch": 1.0872538089929393, "grad_norm": 1.9213083847729697, "learning_rate": 1.474308690402519e-05, "loss": 0.6078, "step": 14629 }, { "epoch": 1.0873281308063918, "grad_norm": 1.6401999219556116, "learning_rate": 1.474238051696639e-05, "loss": 0.5254, "step": 14630 }, { "epoch": 1.0874024526198438, "grad_norm": 1.8439034537640082, "learning_rate": 1.474167409937639e-05, "loss": 0.51, "step": 14631 }, { "epoch": 1.0874767744332963, "grad_norm": 1.642131216175554, "learning_rate": 1.4740967651259736e-05, "loss": 0.6239, "step": 14632 }, { "epoch": 1.0875510962467485, "grad_norm": 1.7064459220585946, "learning_rate": 1.4740261172620978e-05, "loss": 0.5325, "step": 14633 }, { "epoch": 1.0876254180602007, "grad_norm": 1.399669659370902, "learning_rate": 1.4739554663464666e-05, "loss": 0.3741, "step": 14634 }, { "epoch": 1.087699739873653, "grad_norm": 1.7407351992440439, "learning_rate": 1.4738848123795349e-05, "loss": 0.5859, "step": 14635 }, { "epoch": 1.0877740616871052, "grad_norm": 1.5863720141746724, "learning_rate": 1.473814155361757e-05, "loss": 0.4702, "step": 14636 }, { "epoch": 1.0878483835005575, "grad_norm": 2.012751837569827, "learning_rate": 1.4737434952935883e-05, "loss": 0.6013, "step": 14637 }, { "epoch": 1.0879227053140097, "grad_norm": 1.9738919325539328, "learning_rate": 1.4736728321754836e-05, "loss": 0.5994, "step": 14638 }, { "epoch": 1.087997027127462, "grad_norm": 1.9660276530399947, "learning_rate": 1.4736021660078976e-05, "loss": 0.5407, "step": 14639 }, { "epoch": 1.0880713489409142, "grad_norm": 2.1228091074421656, "learning_rate": 1.4735314967912858e-05, "loss": 0.6401, "step": 14640 }, { "epoch": 1.0881456707543664, "grad_norm": 2.553715298028532, "learning_rate": 1.4734608245261026e-05, "loss": 0.7084, "step": 14641 }, { "epoch": 1.0882199925678187, "grad_norm": 3.0162390869962246, "learning_rate": 1.473390149212803e-05, "loss": 0.802, "step": 14642 }, { "epoch": 1.0882943143812709, "grad_norm": 2.1375595402671026, "learning_rate": 1.4733194708518424e-05, "loss": 0.562, "step": 14643 }, { "epoch": 1.0883686361947231, "grad_norm": 2.054280178953277, "learning_rate": 1.4732487894436756e-05, "loss": 0.6316, "step": 14644 }, { "epoch": 1.0884429580081754, "grad_norm": 1.667277869795269, "learning_rate": 1.4731781049887574e-05, "loss": 0.4239, "step": 14645 }, { "epoch": 1.0885172798216276, "grad_norm": 1.7228911543388596, "learning_rate": 1.4731074174875435e-05, "loss": 0.5394, "step": 14646 }, { "epoch": 1.0885916016350798, "grad_norm": 2.122437820392977, "learning_rate": 1.4730367269404884e-05, "loss": 0.6831, "step": 14647 }, { "epoch": 1.088665923448532, "grad_norm": 2.0786251221475998, "learning_rate": 1.4729660333480474e-05, "loss": 0.6589, "step": 14648 }, { "epoch": 1.0887402452619843, "grad_norm": 1.9180210293315383, "learning_rate": 1.4728953367106755e-05, "loss": 0.5958, "step": 14649 }, { "epoch": 1.0888145670754366, "grad_norm": 1.7758549964961667, "learning_rate": 1.472824637028828e-05, "loss": 0.6024, "step": 14650 }, { "epoch": 1.0888888888888888, "grad_norm": 2.4085394176654584, "learning_rate": 1.4727539343029601e-05, "loss": 0.6608, "step": 14651 }, { "epoch": 1.088963210702341, "grad_norm": 1.9627288315786433, "learning_rate": 1.4726832285335271e-05, "loss": 0.7498, "step": 14652 }, { "epoch": 1.0890375325157935, "grad_norm": 2.0687585450536523, "learning_rate": 1.4726125197209837e-05, "loss": 0.7471, "step": 14653 }, { "epoch": 1.0891118543292457, "grad_norm": 2.544023147811618, "learning_rate": 1.4725418078657857e-05, "loss": 0.689, "step": 14654 }, { "epoch": 1.089186176142698, "grad_norm": 1.8827923169336593, "learning_rate": 1.4724710929683877e-05, "loss": 0.58, "step": 14655 }, { "epoch": 1.0892604979561502, "grad_norm": 2.103979774966957, "learning_rate": 1.4724003750292452e-05, "loss": 0.7406, "step": 14656 }, { "epoch": 1.0893348197696024, "grad_norm": 1.954568898570469, "learning_rate": 1.4723296540488142e-05, "loss": 0.5844, "step": 14657 }, { "epoch": 1.0894091415830547, "grad_norm": 2.2249614606088524, "learning_rate": 1.4722589300275492e-05, "loss": 0.6887, "step": 14658 }, { "epoch": 1.089483463396507, "grad_norm": 1.9141143501283473, "learning_rate": 1.4721882029659053e-05, "loss": 0.6036, "step": 14659 }, { "epoch": 1.0895577852099592, "grad_norm": 1.67545992040623, "learning_rate": 1.4721174728643385e-05, "loss": 0.457, "step": 14660 }, { "epoch": 1.0896321070234114, "grad_norm": 2.1328334888548692, "learning_rate": 1.472046739723304e-05, "loss": 0.7303, "step": 14661 }, { "epoch": 1.0897064288368636, "grad_norm": 2.1160188197948813, "learning_rate": 1.471976003543257e-05, "loss": 0.5386, "step": 14662 }, { "epoch": 1.0897807506503159, "grad_norm": 1.9953862609717157, "learning_rate": 1.4719052643246534e-05, "loss": 0.7845, "step": 14663 }, { "epoch": 1.0898550724637681, "grad_norm": 1.8000705838036253, "learning_rate": 1.4718345220679475e-05, "loss": 0.6131, "step": 14664 }, { "epoch": 1.0899293942772204, "grad_norm": 1.9737327002674294, "learning_rate": 1.4717637767735958e-05, "loss": 0.6194, "step": 14665 }, { "epoch": 1.0900037160906726, "grad_norm": 1.7810796990035942, "learning_rate": 1.4716930284420531e-05, "loss": 0.5792, "step": 14666 }, { "epoch": 1.0900780379041248, "grad_norm": 2.5009473874892465, "learning_rate": 1.4716222770737754e-05, "loss": 0.7315, "step": 14667 }, { "epoch": 1.090152359717577, "grad_norm": 2.843256408016801, "learning_rate": 1.4715515226692181e-05, "loss": 0.6072, "step": 14668 }, { "epoch": 1.0902266815310293, "grad_norm": 2.018206086059385, "learning_rate": 1.4714807652288366e-05, "loss": 0.6141, "step": 14669 }, { "epoch": 1.0903010033444815, "grad_norm": 1.7213783720437432, "learning_rate": 1.471410004753086e-05, "loss": 0.4715, "step": 14670 }, { "epoch": 1.0903753251579338, "grad_norm": 2.4022042881802292, "learning_rate": 1.4713392412424223e-05, "loss": 0.6211, "step": 14671 }, { "epoch": 1.090449646971386, "grad_norm": 2.10748738549172, "learning_rate": 1.4712684746973011e-05, "loss": 0.4837, "step": 14672 }, { "epoch": 1.0905239687848383, "grad_norm": 2.1935891611887564, "learning_rate": 1.471197705118178e-05, "loss": 0.7186, "step": 14673 }, { "epoch": 1.0905982905982905, "grad_norm": 2.290491543364004, "learning_rate": 1.4711269325055088e-05, "loss": 0.6067, "step": 14674 }, { "epoch": 1.0906726124117427, "grad_norm": 1.5232844752234476, "learning_rate": 1.4710561568597484e-05, "loss": 0.4959, "step": 14675 }, { "epoch": 1.0907469342251952, "grad_norm": 1.8952583824241587, "learning_rate": 1.470985378181353e-05, "loss": 0.5856, "step": 14676 }, { "epoch": 1.0908212560386474, "grad_norm": 1.8231287571418269, "learning_rate": 1.4709145964707781e-05, "loss": 0.6075, "step": 14677 }, { "epoch": 1.0908955778520997, "grad_norm": 1.6455274172177028, "learning_rate": 1.4708438117284795e-05, "loss": 0.5102, "step": 14678 }, { "epoch": 1.090969899665552, "grad_norm": 2.1728531204684525, "learning_rate": 1.4707730239549132e-05, "loss": 0.6622, "step": 14679 }, { "epoch": 1.0910442214790042, "grad_norm": 2.3735732475000666, "learning_rate": 1.4707022331505343e-05, "loss": 0.6996, "step": 14680 }, { "epoch": 1.0911185432924564, "grad_norm": 2.457355313792717, "learning_rate": 1.470631439315799e-05, "loss": 0.5487, "step": 14681 }, { "epoch": 1.0911928651059086, "grad_norm": 2.201443986407488, "learning_rate": 1.4705606424511627e-05, "loss": 0.5458, "step": 14682 }, { "epoch": 1.0912671869193609, "grad_norm": 2.0191198394453935, "learning_rate": 1.4704898425570813e-05, "loss": 0.64, "step": 14683 }, { "epoch": 1.091341508732813, "grad_norm": 2.09283703492802, "learning_rate": 1.470419039634011e-05, "loss": 0.7467, "step": 14684 }, { "epoch": 1.0914158305462653, "grad_norm": 2.015190603708212, "learning_rate": 1.4703482336824073e-05, "loss": 0.559, "step": 14685 }, { "epoch": 1.0914901523597176, "grad_norm": 2.00786855479641, "learning_rate": 1.4702774247027262e-05, "loss": 0.6147, "step": 14686 }, { "epoch": 1.0915644741731698, "grad_norm": 2.11852033523114, "learning_rate": 1.4702066126954231e-05, "loss": 0.5461, "step": 14687 }, { "epoch": 1.091638795986622, "grad_norm": 2.089320574346062, "learning_rate": 1.4701357976609542e-05, "loss": 0.6613, "step": 14688 }, { "epoch": 1.0917131178000743, "grad_norm": 1.7887974504459547, "learning_rate": 1.4700649795997757e-05, "loss": 0.5702, "step": 14689 }, { "epoch": 1.0917874396135265, "grad_norm": 1.7849250599180484, "learning_rate": 1.4699941585123431e-05, "loss": 0.6084, "step": 14690 }, { "epoch": 1.0918617614269788, "grad_norm": 2.0862042290393537, "learning_rate": 1.4699233343991129e-05, "loss": 0.5293, "step": 14691 }, { "epoch": 1.091936083240431, "grad_norm": 1.897019461313172, "learning_rate": 1.4698525072605403e-05, "loss": 0.5362, "step": 14692 }, { "epoch": 1.0920104050538832, "grad_norm": 2.031766200997529, "learning_rate": 1.4697816770970814e-05, "loss": 0.6746, "step": 14693 }, { "epoch": 1.0920847268673355, "grad_norm": 2.0227211333331354, "learning_rate": 1.4697108439091928e-05, "loss": 0.7814, "step": 14694 }, { "epoch": 1.0921590486807877, "grad_norm": 1.8586674215902326, "learning_rate": 1.4696400076973301e-05, "loss": 0.6408, "step": 14695 }, { "epoch": 1.09223337049424, "grad_norm": 1.6481186476382992, "learning_rate": 1.4695691684619493e-05, "loss": 0.6707, "step": 14696 }, { "epoch": 1.0923076923076924, "grad_norm": 1.4748458373406765, "learning_rate": 1.4694983262035068e-05, "loss": 0.495, "step": 14697 }, { "epoch": 1.0923820141211444, "grad_norm": 2.179993788025816, "learning_rate": 1.469427480922458e-05, "loss": 0.5789, "step": 14698 }, { "epoch": 1.092456335934597, "grad_norm": 1.7060209540664608, "learning_rate": 1.4693566326192596e-05, "loss": 0.4902, "step": 14699 }, { "epoch": 1.0925306577480491, "grad_norm": 2.6650065541379013, "learning_rate": 1.4692857812943678e-05, "loss": 0.7076, "step": 14700 }, { "epoch": 1.0926049795615014, "grad_norm": 1.9481926380403236, "learning_rate": 1.4692149269482384e-05, "loss": 0.5912, "step": 14701 }, { "epoch": 1.0926793013749536, "grad_norm": 1.7077770933635228, "learning_rate": 1.4691440695813276e-05, "loss": 0.5791, "step": 14702 }, { "epoch": 1.0927536231884059, "grad_norm": 2.05692933413769, "learning_rate": 1.4690732091940916e-05, "loss": 0.5865, "step": 14703 }, { "epoch": 1.092827945001858, "grad_norm": 2.045656962220463, "learning_rate": 1.4690023457869865e-05, "loss": 0.6185, "step": 14704 }, { "epoch": 1.0929022668153103, "grad_norm": 1.6033243254265992, "learning_rate": 1.4689314793604687e-05, "loss": 0.4736, "step": 14705 }, { "epoch": 1.0929765886287626, "grad_norm": 2.0450602233660264, "learning_rate": 1.4688606099149945e-05, "loss": 0.5617, "step": 14706 }, { "epoch": 1.0930509104422148, "grad_norm": 2.5425407765665087, "learning_rate": 1.4687897374510199e-05, "loss": 0.6451, "step": 14707 }, { "epoch": 1.093125232255667, "grad_norm": 1.5778046348407992, "learning_rate": 1.4687188619690013e-05, "loss": 0.5343, "step": 14708 }, { "epoch": 1.0931995540691193, "grad_norm": 2.121032397209401, "learning_rate": 1.4686479834693949e-05, "loss": 0.6725, "step": 14709 }, { "epoch": 1.0932738758825715, "grad_norm": 2.0072520952276607, "learning_rate": 1.4685771019526574e-05, "loss": 0.6571, "step": 14710 }, { "epoch": 1.0933481976960238, "grad_norm": 1.5735933442200296, "learning_rate": 1.4685062174192443e-05, "loss": 0.5762, "step": 14711 }, { "epoch": 1.093422519509476, "grad_norm": 1.8474186946272446, "learning_rate": 1.468435329869613e-05, "loss": 0.5502, "step": 14712 }, { "epoch": 1.0934968413229282, "grad_norm": 1.5798943216347603, "learning_rate": 1.4683644393042191e-05, "loss": 0.594, "step": 14713 }, { "epoch": 1.0935711631363805, "grad_norm": 1.8070591833227194, "learning_rate": 1.4682935457235194e-05, "loss": 0.696, "step": 14714 }, { "epoch": 1.0936454849498327, "grad_norm": 1.5489445521711513, "learning_rate": 1.4682226491279702e-05, "loss": 0.4735, "step": 14715 }, { "epoch": 1.093719806763285, "grad_norm": 1.7892194997587572, "learning_rate": 1.4681517495180275e-05, "loss": 0.6486, "step": 14716 }, { "epoch": 1.0937941285767372, "grad_norm": 2.6097679823790556, "learning_rate": 1.4680808468941486e-05, "loss": 0.609, "step": 14717 }, { "epoch": 1.0938684503901894, "grad_norm": 1.6973996821257027, "learning_rate": 1.4680099412567893e-05, "loss": 0.5212, "step": 14718 }, { "epoch": 1.0939427722036417, "grad_norm": 1.8526477756677897, "learning_rate": 1.4679390326064062e-05, "loss": 0.5443, "step": 14719 }, { "epoch": 1.0940170940170941, "grad_norm": 1.7393674499247518, "learning_rate": 1.467868120943456e-05, "loss": 0.4062, "step": 14720 }, { "epoch": 1.0940914158305464, "grad_norm": 1.9388926265232735, "learning_rate": 1.4677972062683949e-05, "loss": 0.5721, "step": 14721 }, { "epoch": 1.0941657376439986, "grad_norm": 1.9994200956448498, "learning_rate": 1.4677262885816797e-05, "loss": 0.6543, "step": 14722 }, { "epoch": 1.0942400594574508, "grad_norm": 1.747496295688165, "learning_rate": 1.4676553678837673e-05, "loss": 0.6048, "step": 14723 }, { "epoch": 1.094314381270903, "grad_norm": 2.0544361354791802, "learning_rate": 1.4675844441751134e-05, "loss": 0.3845, "step": 14724 }, { "epoch": 1.0943887030843553, "grad_norm": 1.9313565521670393, "learning_rate": 1.4675135174561754e-05, "loss": 0.7233, "step": 14725 }, { "epoch": 1.0944630248978076, "grad_norm": 3.3907980375678872, "learning_rate": 1.4674425877274097e-05, "loss": 0.6917, "step": 14726 }, { "epoch": 1.0945373467112598, "grad_norm": 2.126543701991382, "learning_rate": 1.4673716549892727e-05, "loss": 0.7489, "step": 14727 }, { "epoch": 1.094611668524712, "grad_norm": 1.9915610393472643, "learning_rate": 1.4673007192422214e-05, "loss": 0.7282, "step": 14728 }, { "epoch": 1.0946859903381643, "grad_norm": 1.7188947150064873, "learning_rate": 1.4672297804867119e-05, "loss": 0.4938, "step": 14729 }, { "epoch": 1.0947603121516165, "grad_norm": 2.5999887303417295, "learning_rate": 1.4671588387232016e-05, "loss": 0.6486, "step": 14730 }, { "epoch": 1.0948346339650687, "grad_norm": 1.7429026663483482, "learning_rate": 1.4670878939521471e-05, "loss": 0.5893, "step": 14731 }, { "epoch": 1.094908955778521, "grad_norm": 1.983267202877864, "learning_rate": 1.4670169461740047e-05, "loss": 0.7474, "step": 14732 }, { "epoch": 1.0949832775919732, "grad_norm": 2.3175882937888304, "learning_rate": 1.4669459953892315e-05, "loss": 0.6656, "step": 14733 }, { "epoch": 1.0950575994054255, "grad_norm": 1.8390120249681694, "learning_rate": 1.4668750415982845e-05, "loss": 0.5654, "step": 14734 }, { "epoch": 1.0951319212188777, "grad_norm": 2.530270370917986, "learning_rate": 1.4668040848016199e-05, "loss": 0.7881, "step": 14735 }, { "epoch": 1.09520624303233, "grad_norm": 1.8811988537007587, "learning_rate": 1.466733124999695e-05, "loss": 0.616, "step": 14736 }, { "epoch": 1.0952805648457822, "grad_norm": 2.7178459371484527, "learning_rate": 1.4666621621929663e-05, "loss": 0.754, "step": 14737 }, { "epoch": 1.0953548866592344, "grad_norm": 1.8150309916195588, "learning_rate": 1.4665911963818909e-05, "loss": 0.6594, "step": 14738 }, { "epoch": 1.0954292084726867, "grad_norm": 1.8464587897959794, "learning_rate": 1.4665202275669257e-05, "loss": 0.591, "step": 14739 }, { "epoch": 1.095503530286139, "grad_norm": 2.1530100743577107, "learning_rate": 1.4664492557485273e-05, "loss": 0.6962, "step": 14740 }, { "epoch": 1.0955778520995911, "grad_norm": 3.654760512157499, "learning_rate": 1.4663782809271525e-05, "loss": 0.7384, "step": 14741 }, { "epoch": 1.0956521739130434, "grad_norm": 1.5974492307733736, "learning_rate": 1.4663073031032592e-05, "loss": 0.5536, "step": 14742 }, { "epoch": 1.0957264957264958, "grad_norm": 2.6607771474146484, "learning_rate": 1.466236322277303e-05, "loss": 0.8438, "step": 14743 }, { "epoch": 1.095800817539948, "grad_norm": 1.299015968165843, "learning_rate": 1.466165338449742e-05, "loss": 0.3895, "step": 14744 }, { "epoch": 1.0958751393534003, "grad_norm": 1.8672584592624335, "learning_rate": 1.4660943516210328e-05, "loss": 0.6062, "step": 14745 }, { "epoch": 1.0959494611668525, "grad_norm": 2.262353753569427, "learning_rate": 1.4660233617916321e-05, "loss": 0.6704, "step": 14746 }, { "epoch": 1.0960237829803048, "grad_norm": 2.004751975156535, "learning_rate": 1.4659523689619972e-05, "loss": 0.5553, "step": 14747 }, { "epoch": 1.096098104793757, "grad_norm": 1.5933095957986407, "learning_rate": 1.4658813731325855e-05, "loss": 0.6801, "step": 14748 }, { "epoch": 1.0961724266072093, "grad_norm": 1.929964368956932, "learning_rate": 1.4658103743038529e-05, "loss": 0.5514, "step": 14749 }, { "epoch": 1.0962467484206615, "grad_norm": 1.716781811835268, "learning_rate": 1.4657393724762581e-05, "loss": 0.5833, "step": 14750 }, { "epoch": 1.0963210702341137, "grad_norm": 1.7257831409741127, "learning_rate": 1.465668367650257e-05, "loss": 0.7209, "step": 14751 }, { "epoch": 1.096395392047566, "grad_norm": 1.7466660613725309, "learning_rate": 1.465597359826307e-05, "loss": 0.646, "step": 14752 }, { "epoch": 1.0964697138610182, "grad_norm": 1.5441747318290842, "learning_rate": 1.4655263490048657e-05, "loss": 0.5354, "step": 14753 }, { "epoch": 1.0965440356744705, "grad_norm": 1.8714107706182883, "learning_rate": 1.4654553351863897e-05, "loss": 0.5616, "step": 14754 }, { "epoch": 1.0966183574879227, "grad_norm": 2.00868881513863, "learning_rate": 1.4653843183713363e-05, "loss": 0.5584, "step": 14755 }, { "epoch": 1.096692679301375, "grad_norm": 1.6813291552808054, "learning_rate": 1.465313298560163e-05, "loss": 0.6018, "step": 14756 }, { "epoch": 1.0967670011148272, "grad_norm": 2.085733583362472, "learning_rate": 1.4652422757533266e-05, "loss": 0.5513, "step": 14757 }, { "epoch": 1.0968413229282794, "grad_norm": 2.220901268920548, "learning_rate": 1.4651712499512847e-05, "loss": 0.6956, "step": 14758 }, { "epoch": 1.0969156447417316, "grad_norm": 1.7461160344506081, "learning_rate": 1.4651002211544945e-05, "loss": 0.6484, "step": 14759 }, { "epoch": 1.0969899665551839, "grad_norm": 2.963114875983902, "learning_rate": 1.4650291893634132e-05, "loss": 0.671, "step": 14760 }, { "epoch": 1.0970642883686361, "grad_norm": 2.7268894458030237, "learning_rate": 1.464958154578498e-05, "loss": 0.5844, "step": 14761 }, { "epoch": 1.0971386101820884, "grad_norm": 2.162484617626319, "learning_rate": 1.4648871168002065e-05, "loss": 0.7486, "step": 14762 }, { "epoch": 1.0972129319955406, "grad_norm": 2.02517202420153, "learning_rate": 1.4648160760289955e-05, "loss": 0.7104, "step": 14763 }, { "epoch": 1.097287253808993, "grad_norm": 1.8136471165867607, "learning_rate": 1.4647450322653232e-05, "loss": 0.5679, "step": 14764 }, { "epoch": 1.097361575622445, "grad_norm": 1.9122055361532597, "learning_rate": 1.4646739855096464e-05, "loss": 0.5732, "step": 14765 }, { "epoch": 1.0974358974358975, "grad_norm": 7.157554392519738, "learning_rate": 1.4646029357624225e-05, "loss": 0.6563, "step": 14766 }, { "epoch": 1.0975102192493498, "grad_norm": 1.629263261104065, "learning_rate": 1.464531883024109e-05, "loss": 0.5057, "step": 14767 }, { "epoch": 1.097584541062802, "grad_norm": 2.085989661633724, "learning_rate": 1.464460827295163e-05, "loss": 0.7545, "step": 14768 }, { "epoch": 1.0976588628762542, "grad_norm": 2.1623433286085647, "learning_rate": 1.4643897685760426e-05, "loss": 0.6382, "step": 14769 }, { "epoch": 1.0977331846897065, "grad_norm": 2.127451651776686, "learning_rate": 1.4643187068672051e-05, "loss": 0.7728, "step": 14770 }, { "epoch": 1.0978075065031587, "grad_norm": 1.7396526680883744, "learning_rate": 1.4642476421691077e-05, "loss": 0.459, "step": 14771 }, { "epoch": 1.097881828316611, "grad_norm": 1.9956719383341468, "learning_rate": 1.4641765744822083e-05, "loss": 0.6333, "step": 14772 }, { "epoch": 1.0979561501300632, "grad_norm": 2.544567221508628, "learning_rate": 1.4641055038069638e-05, "loss": 0.7438, "step": 14773 }, { "epoch": 1.0980304719435154, "grad_norm": 1.4719454641103034, "learning_rate": 1.4640344301438323e-05, "loss": 0.5553, "step": 14774 }, { "epoch": 1.0981047937569677, "grad_norm": 1.9220128144775157, "learning_rate": 1.463963353493271e-05, "loss": 0.6576, "step": 14775 }, { "epoch": 1.09817911557042, "grad_norm": 1.859780896609634, "learning_rate": 1.4638922738557381e-05, "loss": 0.6446, "step": 14776 }, { "epoch": 1.0982534373838722, "grad_norm": 1.6707620641120604, "learning_rate": 1.4638211912316909e-05, "loss": 0.6889, "step": 14777 }, { "epoch": 1.0983277591973244, "grad_norm": 2.0464081591587235, "learning_rate": 1.4637501056215869e-05, "loss": 0.676, "step": 14778 }, { "epoch": 1.0984020810107766, "grad_norm": 1.7781430328366234, "learning_rate": 1.4636790170258835e-05, "loss": 0.6767, "step": 14779 }, { "epoch": 1.0984764028242289, "grad_norm": 2.1535414039372016, "learning_rate": 1.4636079254450386e-05, "loss": 0.6795, "step": 14780 }, { "epoch": 1.098550724637681, "grad_norm": 2.0368033735461535, "learning_rate": 1.46353683087951e-05, "loss": 0.5799, "step": 14781 }, { "epoch": 1.0986250464511333, "grad_norm": 1.8475689560011137, "learning_rate": 1.4634657333297554e-05, "loss": 0.5518, "step": 14782 }, { "epoch": 1.0986993682645856, "grad_norm": 2.02125571366645, "learning_rate": 1.4633946327962326e-05, "loss": 0.6842, "step": 14783 }, { "epoch": 1.0987736900780378, "grad_norm": 2.5251225120699683, "learning_rate": 1.463323529279399e-05, "loss": 0.6867, "step": 14784 }, { "epoch": 1.09884801189149, "grad_norm": 1.9427718599641133, "learning_rate": 1.4632524227797124e-05, "loss": 0.6214, "step": 14785 }, { "epoch": 1.0989223337049423, "grad_norm": 1.8398322328866068, "learning_rate": 1.463181313297631e-05, "loss": 0.6057, "step": 14786 }, { "epoch": 1.0989966555183948, "grad_norm": 2.2609627701633115, "learning_rate": 1.4631102008336124e-05, "loss": 0.5794, "step": 14787 }, { "epoch": 1.099070977331847, "grad_norm": 2.165489729457324, "learning_rate": 1.4630390853881145e-05, "loss": 0.5443, "step": 14788 }, { "epoch": 1.0991452991452992, "grad_norm": 1.931911477060351, "learning_rate": 1.4629679669615946e-05, "loss": 0.6432, "step": 14789 }, { "epoch": 1.0992196209587515, "grad_norm": 1.934335718807755, "learning_rate": 1.462896845554511e-05, "loss": 0.6917, "step": 14790 }, { "epoch": 1.0992939427722037, "grad_norm": 2.084240006258321, "learning_rate": 1.4628257211673216e-05, "loss": 0.4832, "step": 14791 }, { "epoch": 1.099368264585656, "grad_norm": 1.8147497944064936, "learning_rate": 1.4627545938004841e-05, "loss": 0.531, "step": 14792 }, { "epoch": 1.0994425863991082, "grad_norm": 1.9933206266911936, "learning_rate": 1.462683463454457e-05, "loss": 0.65, "step": 14793 }, { "epoch": 1.0995169082125604, "grad_norm": 3.9321968756640393, "learning_rate": 1.4626123301296975e-05, "loss": 0.6253, "step": 14794 }, { "epoch": 1.0995912300260127, "grad_norm": 1.8668101984035508, "learning_rate": 1.4625411938266636e-05, "loss": 0.586, "step": 14795 }, { "epoch": 1.099665551839465, "grad_norm": 2.463322925842807, "learning_rate": 1.4624700545458134e-05, "loss": 0.5903, "step": 14796 }, { "epoch": 1.0997398736529171, "grad_norm": 2.1549360719221236, "learning_rate": 1.4623989122876055e-05, "loss": 0.7173, "step": 14797 }, { "epoch": 1.0998141954663694, "grad_norm": 1.8572682539308147, "learning_rate": 1.462327767052497e-05, "loss": 0.5675, "step": 14798 }, { "epoch": 1.0998885172798216, "grad_norm": 2.5337515850332113, "learning_rate": 1.4622566188409469e-05, "loss": 0.6892, "step": 14799 }, { "epoch": 1.0999628390932739, "grad_norm": 1.6668189168455905, "learning_rate": 1.462185467653412e-05, "loss": 0.5689, "step": 14800 }, { "epoch": 1.100037160906726, "grad_norm": 1.9766742659804073, "learning_rate": 1.4621143134903513e-05, "loss": 0.6817, "step": 14801 }, { "epoch": 1.1001114827201783, "grad_norm": 1.8505736418591476, "learning_rate": 1.4620431563522224e-05, "loss": 0.5984, "step": 14802 }, { "epoch": 1.1001858045336306, "grad_norm": 1.4740843024564931, "learning_rate": 1.4619719962394839e-05, "loss": 0.4031, "step": 14803 }, { "epoch": 1.1002601263470828, "grad_norm": 1.637690425046334, "learning_rate": 1.4619008331525936e-05, "loss": 0.4996, "step": 14804 }, { "epoch": 1.100334448160535, "grad_norm": 1.9053694440410156, "learning_rate": 1.4618296670920097e-05, "loss": 0.6399, "step": 14805 }, { "epoch": 1.1004087699739873, "grad_norm": 1.8765907615403874, "learning_rate": 1.4617584980581905e-05, "loss": 0.5917, "step": 14806 }, { "epoch": 1.1004830917874395, "grad_norm": 1.9143322534559528, "learning_rate": 1.4616873260515939e-05, "loss": 0.5124, "step": 14807 }, { "epoch": 1.1005574136008918, "grad_norm": 1.7470571611315149, "learning_rate": 1.461616151072678e-05, "loss": 0.6152, "step": 14808 }, { "epoch": 1.100631735414344, "grad_norm": 1.7001283584266689, "learning_rate": 1.4615449731219018e-05, "loss": 0.6613, "step": 14809 }, { "epoch": 1.1007060572277965, "grad_norm": 1.569524275169705, "learning_rate": 1.4614737921997229e-05, "loss": 0.5464, "step": 14810 }, { "epoch": 1.1007803790412487, "grad_norm": 1.8755673297743638, "learning_rate": 1.4614026083065996e-05, "loss": 0.4402, "step": 14811 }, { "epoch": 1.100854700854701, "grad_norm": 1.6199391697384287, "learning_rate": 1.4613314214429898e-05, "loss": 0.455, "step": 14812 }, { "epoch": 1.1009290226681532, "grad_norm": 1.8594998373665794, "learning_rate": 1.4612602316093528e-05, "loss": 0.6278, "step": 14813 }, { "epoch": 1.1010033444816054, "grad_norm": 2.1222920092282105, "learning_rate": 1.461189038806146e-05, "loss": 0.6033, "step": 14814 }, { "epoch": 1.1010776662950577, "grad_norm": 2.0848110171018917, "learning_rate": 1.4611178430338284e-05, "loss": 0.6786, "step": 14815 }, { "epoch": 1.10115198810851, "grad_norm": 1.8835462948046107, "learning_rate": 1.4610466442928579e-05, "loss": 0.6539, "step": 14816 }, { "epoch": 1.1012263099219621, "grad_norm": 2.0753173342939113, "learning_rate": 1.460975442583693e-05, "loss": 0.6983, "step": 14817 }, { "epoch": 1.1013006317354144, "grad_norm": 2.3762179813532103, "learning_rate": 1.4609042379067922e-05, "loss": 0.7948, "step": 14818 }, { "epoch": 1.1013749535488666, "grad_norm": 2.036560603714737, "learning_rate": 1.4608330302626136e-05, "loss": 0.6929, "step": 14819 }, { "epoch": 1.1014492753623188, "grad_norm": 1.9615207733043603, "learning_rate": 1.4607618196516162e-05, "loss": 0.6629, "step": 14820 }, { "epoch": 1.101523597175771, "grad_norm": 1.8842824135234673, "learning_rate": 1.4606906060742581e-05, "loss": 0.6666, "step": 14821 }, { "epoch": 1.1015979189892233, "grad_norm": 1.89861100681881, "learning_rate": 1.4606193895309976e-05, "loss": 0.6873, "step": 14822 }, { "epoch": 1.1016722408026756, "grad_norm": 1.8610369299477987, "learning_rate": 1.4605481700222934e-05, "loss": 0.6916, "step": 14823 }, { "epoch": 1.1017465626161278, "grad_norm": 1.8363412214723749, "learning_rate": 1.4604769475486041e-05, "loss": 0.658, "step": 14824 }, { "epoch": 1.10182088442958, "grad_norm": 2.292569942423223, "learning_rate": 1.4604057221103879e-05, "loss": 0.6767, "step": 14825 }, { "epoch": 1.1018952062430323, "grad_norm": 2.817266240920109, "learning_rate": 1.4603344937081037e-05, "loss": 0.6524, "step": 14826 }, { "epoch": 1.1019695280564845, "grad_norm": 2.5603047906555063, "learning_rate": 1.46026326234221e-05, "loss": 0.7611, "step": 14827 }, { "epoch": 1.1020438498699368, "grad_norm": 2.32262201685079, "learning_rate": 1.4601920280131649e-05, "loss": 0.5093, "step": 14828 }, { "epoch": 1.102118171683389, "grad_norm": 6.143633819389882, "learning_rate": 1.4601207907214278e-05, "loss": 0.5082, "step": 14829 }, { "epoch": 1.1021924934968412, "grad_norm": 1.6821043570731038, "learning_rate": 1.4600495504674568e-05, "loss": 0.4974, "step": 14830 }, { "epoch": 1.1022668153102935, "grad_norm": 3.1365862168070127, "learning_rate": 1.4599783072517107e-05, "loss": 0.6498, "step": 14831 }, { "epoch": 1.1023411371237457, "grad_norm": 2.5202944382043935, "learning_rate": 1.4599070610746481e-05, "loss": 0.6959, "step": 14832 }, { "epoch": 1.1024154589371982, "grad_norm": 2.0337118713892948, "learning_rate": 1.4598358119367277e-05, "loss": 0.6739, "step": 14833 }, { "epoch": 1.1024897807506504, "grad_norm": 2.061510069240137, "learning_rate": 1.459764559838408e-05, "loss": 0.6008, "step": 14834 }, { "epoch": 1.1025641025641026, "grad_norm": 2.635882135287251, "learning_rate": 1.459693304780148e-05, "loss": 0.6774, "step": 14835 }, { "epoch": 1.1026384243775549, "grad_norm": 2.260422692916343, "learning_rate": 1.4596220467624065e-05, "loss": 0.6168, "step": 14836 }, { "epoch": 1.1027127461910071, "grad_norm": 2.1207798008616705, "learning_rate": 1.4595507857856421e-05, "loss": 0.6362, "step": 14837 }, { "epoch": 1.1027870680044594, "grad_norm": 2.6732529651021824, "learning_rate": 1.4594795218503137e-05, "loss": 0.5494, "step": 14838 }, { "epoch": 1.1028613898179116, "grad_norm": 2.31091522573319, "learning_rate": 1.45940825495688e-05, "loss": 0.5349, "step": 14839 }, { "epoch": 1.1029357116313638, "grad_norm": 1.9564525785930447, "learning_rate": 1.4593369851057995e-05, "loss": 0.5805, "step": 14840 }, { "epoch": 1.103010033444816, "grad_norm": 2.2208538538408913, "learning_rate": 1.4592657122975314e-05, "loss": 0.6804, "step": 14841 }, { "epoch": 1.1030843552582683, "grad_norm": 2.2309535184213574, "learning_rate": 1.4591944365325346e-05, "loss": 0.6509, "step": 14842 }, { "epoch": 1.1031586770717206, "grad_norm": 1.5025161404016878, "learning_rate": 1.4591231578112677e-05, "loss": 0.5376, "step": 14843 }, { "epoch": 1.1032329988851728, "grad_norm": 1.7695838360760539, "learning_rate": 1.4590518761341898e-05, "loss": 0.6856, "step": 14844 }, { "epoch": 1.103307320698625, "grad_norm": 1.815099262444218, "learning_rate": 1.4589805915017598e-05, "loss": 0.7377, "step": 14845 }, { "epoch": 1.1033816425120773, "grad_norm": 2.1664486756005825, "learning_rate": 1.4589093039144364e-05, "loss": 0.7772, "step": 14846 }, { "epoch": 1.1034559643255295, "grad_norm": 1.9883417668568915, "learning_rate": 1.4588380133726788e-05, "loss": 0.6275, "step": 14847 }, { "epoch": 1.1035302861389817, "grad_norm": 1.9309768609919276, "learning_rate": 1.458766719876946e-05, "loss": 0.5939, "step": 14848 }, { "epoch": 1.103604607952434, "grad_norm": 2.1570313513963852, "learning_rate": 1.4586954234276969e-05, "loss": 0.652, "step": 14849 }, { "epoch": 1.1036789297658862, "grad_norm": 2.3190744124207576, "learning_rate": 1.4586241240253902e-05, "loss": 0.6695, "step": 14850 }, { "epoch": 1.1037532515793385, "grad_norm": 2.8116967922183562, "learning_rate": 1.4585528216704854e-05, "loss": 0.6147, "step": 14851 }, { "epoch": 1.1038275733927907, "grad_norm": 1.7787070918151662, "learning_rate": 1.4584815163634411e-05, "loss": 0.5148, "step": 14852 }, { "epoch": 1.103901895206243, "grad_norm": 2.4427485458446068, "learning_rate": 1.4584102081047169e-05, "loss": 0.7554, "step": 14853 }, { "epoch": 1.1039762170196954, "grad_norm": 2.0774391703348547, "learning_rate": 1.4583388968947714e-05, "loss": 0.5388, "step": 14854 }, { "epoch": 1.1040505388331476, "grad_norm": 1.9617082610640366, "learning_rate": 1.458267582734064e-05, "loss": 0.7121, "step": 14855 }, { "epoch": 1.1041248606465999, "grad_norm": 1.9504080930504115, "learning_rate": 1.4581962656230532e-05, "loss": 0.7671, "step": 14856 }, { "epoch": 1.104199182460052, "grad_norm": 1.7538325315419068, "learning_rate": 1.4581249455621991e-05, "loss": 0.5834, "step": 14857 }, { "epoch": 1.1042735042735043, "grad_norm": 1.836884420853236, "learning_rate": 1.4580536225519599e-05, "loss": 0.7379, "step": 14858 }, { "epoch": 1.1043478260869566, "grad_norm": 1.6381352000925875, "learning_rate": 1.457982296592796e-05, "loss": 0.5858, "step": 14859 }, { "epoch": 1.1044221479004088, "grad_norm": 1.9374597350018876, "learning_rate": 1.4579109676851653e-05, "loss": 0.532, "step": 14860 }, { "epoch": 1.104496469713861, "grad_norm": 1.7930773662979582, "learning_rate": 1.4578396358295277e-05, "loss": 0.5791, "step": 14861 }, { "epoch": 1.1045707915273133, "grad_norm": 1.9255853771997782, "learning_rate": 1.4577683010263422e-05, "loss": 0.6154, "step": 14862 }, { "epoch": 1.1046451133407655, "grad_norm": 2.0997356093180555, "learning_rate": 1.457696963276068e-05, "loss": 0.4635, "step": 14863 }, { "epoch": 1.1047194351542178, "grad_norm": 2.173471340923867, "learning_rate": 1.457625622579165e-05, "loss": 0.7695, "step": 14864 }, { "epoch": 1.10479375696767, "grad_norm": 1.667765050248572, "learning_rate": 1.4575542789360915e-05, "loss": 0.5512, "step": 14865 }, { "epoch": 1.1048680787811223, "grad_norm": 2.407822059016761, "learning_rate": 1.4574829323473074e-05, "loss": 0.5756, "step": 14866 }, { "epoch": 1.1049424005945745, "grad_norm": 2.267187390652793, "learning_rate": 1.457411582813272e-05, "loss": 0.7046, "step": 14867 }, { "epoch": 1.1050167224080267, "grad_norm": 1.9046251116214468, "learning_rate": 1.4573402303344444e-05, "loss": 0.5511, "step": 14868 }, { "epoch": 1.105091044221479, "grad_norm": 2.182594281935678, "learning_rate": 1.4572688749112843e-05, "loss": 0.7888, "step": 14869 }, { "epoch": 1.1051653660349312, "grad_norm": 1.952948427102112, "learning_rate": 1.457197516544251e-05, "loss": 0.6587, "step": 14870 }, { "epoch": 1.1052396878483834, "grad_norm": 1.5421759766175829, "learning_rate": 1.4571261552338034e-05, "loss": 0.3974, "step": 14871 }, { "epoch": 1.1053140096618357, "grad_norm": 1.9399560144823196, "learning_rate": 1.4570547909804019e-05, "loss": 0.7575, "step": 14872 }, { "epoch": 1.105388331475288, "grad_norm": 1.6774486607361971, "learning_rate": 1.4569834237845048e-05, "loss": 0.5311, "step": 14873 }, { "epoch": 1.1054626532887402, "grad_norm": 1.9981976563751869, "learning_rate": 1.4569120536465724e-05, "loss": 0.5071, "step": 14874 }, { "epoch": 1.1055369751021924, "grad_norm": 1.6871547899146997, "learning_rate": 1.456840680567064e-05, "loss": 0.5999, "step": 14875 }, { "epoch": 1.1056112969156446, "grad_norm": 1.9962389559718432, "learning_rate": 1.4567693045464389e-05, "loss": 0.6013, "step": 14876 }, { "epoch": 1.105685618729097, "grad_norm": 2.407622465346092, "learning_rate": 1.4566979255851567e-05, "loss": 0.772, "step": 14877 }, { "epoch": 1.1057599405425493, "grad_norm": 2.1285604206220374, "learning_rate": 1.456626543683677e-05, "loss": 0.6503, "step": 14878 }, { "epoch": 1.1058342623560016, "grad_norm": 2.0240601628876056, "learning_rate": 1.4565551588424594e-05, "loss": 0.6974, "step": 14879 }, { "epoch": 1.1059085841694538, "grad_norm": 2.022818330804617, "learning_rate": 1.456483771061963e-05, "loss": 0.8028, "step": 14880 }, { "epoch": 1.105982905982906, "grad_norm": 2.037162736399827, "learning_rate": 1.4564123803426483e-05, "loss": 0.7246, "step": 14881 }, { "epoch": 1.1060572277963583, "grad_norm": 1.829013974269306, "learning_rate": 1.456340986684974e-05, "loss": 0.5461, "step": 14882 }, { "epoch": 1.1061315496098105, "grad_norm": 2.421381746684609, "learning_rate": 1.4562695900894004e-05, "loss": 0.689, "step": 14883 }, { "epoch": 1.1062058714232628, "grad_norm": 1.9719424589727208, "learning_rate": 1.4561981905563864e-05, "loss": 0.6144, "step": 14884 }, { "epoch": 1.106280193236715, "grad_norm": 1.8585996852169508, "learning_rate": 1.4561267880863925e-05, "loss": 0.6094, "step": 14885 }, { "epoch": 1.1063545150501672, "grad_norm": 1.8542038801604352, "learning_rate": 1.4560553826798781e-05, "loss": 0.8366, "step": 14886 }, { "epoch": 1.1064288368636195, "grad_norm": 1.7591765797326002, "learning_rate": 1.4559839743373024e-05, "loss": 0.742, "step": 14887 }, { "epoch": 1.1065031586770717, "grad_norm": 1.767677167150684, "learning_rate": 1.4559125630591258e-05, "loss": 0.3989, "step": 14888 }, { "epoch": 1.106577480490524, "grad_norm": 2.181496290904463, "learning_rate": 1.4558411488458078e-05, "loss": 0.5984, "step": 14889 }, { "epoch": 1.1066518023039762, "grad_norm": 2.2831868081592037, "learning_rate": 1.4557697316978079e-05, "loss": 0.6277, "step": 14890 }, { "epoch": 1.1067261241174284, "grad_norm": 1.9825846067550421, "learning_rate": 1.4556983116155862e-05, "loss": 0.6749, "step": 14891 }, { "epoch": 1.1068004459308807, "grad_norm": 1.6922384005423343, "learning_rate": 1.4556268885996025e-05, "loss": 0.6096, "step": 14892 }, { "epoch": 1.106874767744333, "grad_norm": 1.9418561087808757, "learning_rate": 1.4555554626503163e-05, "loss": 0.6401, "step": 14893 }, { "epoch": 1.1069490895577851, "grad_norm": 1.6250327527686688, "learning_rate": 1.455484033768188e-05, "loss": 0.4338, "step": 14894 }, { "epoch": 1.1070234113712374, "grad_norm": 1.8224352740789371, "learning_rate": 1.4554126019536772e-05, "loss": 0.62, "step": 14895 }, { "epoch": 1.1070977331846896, "grad_norm": 2.545565432336526, "learning_rate": 1.4553411672072433e-05, "loss": 0.5799, "step": 14896 }, { "epoch": 1.1071720549981419, "grad_norm": 2.087297176483005, "learning_rate": 1.455269729529347e-05, "loss": 0.7965, "step": 14897 }, { "epoch": 1.107246376811594, "grad_norm": 2.219459346621331, "learning_rate": 1.4551982889204474e-05, "loss": 0.7138, "step": 14898 }, { "epoch": 1.1073206986250463, "grad_norm": 1.815180059931977, "learning_rate": 1.455126845381005e-05, "loss": 0.5553, "step": 14899 }, { "epoch": 1.1073950204384988, "grad_norm": 2.149479539780478, "learning_rate": 1.4550553989114797e-05, "loss": 0.7159, "step": 14900 }, { "epoch": 1.107469342251951, "grad_norm": 2.0912939618165303, "learning_rate": 1.4549839495123312e-05, "loss": 0.6873, "step": 14901 }, { "epoch": 1.1075436640654033, "grad_norm": 2.511114234998214, "learning_rate": 1.4549124971840198e-05, "loss": 0.6988, "step": 14902 }, { "epoch": 1.1076179858788555, "grad_norm": 1.770747032388687, "learning_rate": 1.4548410419270052e-05, "loss": 0.5702, "step": 14903 }, { "epoch": 1.1076923076923078, "grad_norm": 1.7760966291621183, "learning_rate": 1.4547695837417478e-05, "loss": 0.6293, "step": 14904 }, { "epoch": 1.10776662950576, "grad_norm": 1.72751306265307, "learning_rate": 1.4546981226287069e-05, "loss": 0.5787, "step": 14905 }, { "epoch": 1.1078409513192122, "grad_norm": 1.8387660303177749, "learning_rate": 1.4546266585883437e-05, "loss": 0.5874, "step": 14906 }, { "epoch": 1.1079152731326645, "grad_norm": 1.7930930786328485, "learning_rate": 1.4545551916211171e-05, "loss": 0.5775, "step": 14907 }, { "epoch": 1.1079895949461167, "grad_norm": 4.239647137214083, "learning_rate": 1.4544837217274882e-05, "loss": 0.6505, "step": 14908 }, { "epoch": 1.108063916759569, "grad_norm": 2.221368916258985, "learning_rate": 1.4544122489079167e-05, "loss": 0.6788, "step": 14909 }, { "epoch": 1.1081382385730212, "grad_norm": 1.9164510922348827, "learning_rate": 1.4543407731628623e-05, "loss": 0.7354, "step": 14910 }, { "epoch": 1.1082125603864734, "grad_norm": 2.40621907489261, "learning_rate": 1.4542692944927859e-05, "loss": 0.7787, "step": 14911 }, { "epoch": 1.1082868821999257, "grad_norm": 2.0166435465693757, "learning_rate": 1.4541978128981473e-05, "loss": 0.7245, "step": 14912 }, { "epoch": 1.108361204013378, "grad_norm": 2.0859820222839356, "learning_rate": 1.4541263283794067e-05, "loss": 0.8106, "step": 14913 }, { "epoch": 1.1084355258268301, "grad_norm": 2.3836714294983756, "learning_rate": 1.4540548409370243e-05, "loss": 0.6904, "step": 14914 }, { "epoch": 1.1085098476402824, "grad_norm": 2.3120564757140465, "learning_rate": 1.4539833505714605e-05, "loss": 0.7019, "step": 14915 }, { "epoch": 1.1085841694537346, "grad_norm": 2.091903982929771, "learning_rate": 1.4539118572831754e-05, "loss": 0.7961, "step": 14916 }, { "epoch": 1.1086584912671869, "grad_norm": 2.1109216403727307, "learning_rate": 1.4538403610726292e-05, "loss": 0.6121, "step": 14917 }, { "epoch": 1.108732813080639, "grad_norm": 1.9775398587601845, "learning_rate": 1.4537688619402826e-05, "loss": 0.5949, "step": 14918 }, { "epoch": 1.1088071348940913, "grad_norm": 2.449410930663757, "learning_rate": 1.4536973598865956e-05, "loss": 0.7204, "step": 14919 }, { "epoch": 1.1088814567075436, "grad_norm": 1.8007565019035974, "learning_rate": 1.453625854912028e-05, "loss": 0.4989, "step": 14920 }, { "epoch": 1.108955778520996, "grad_norm": 2.132331885837893, "learning_rate": 1.4535543470170412e-05, "loss": 0.5842, "step": 14921 }, { "epoch": 1.109030100334448, "grad_norm": 1.719409066331317, "learning_rate": 1.453482836202095e-05, "loss": 0.5764, "step": 14922 }, { "epoch": 1.1091044221479005, "grad_norm": 1.7617528720959854, "learning_rate": 1.4534113224676498e-05, "loss": 0.5217, "step": 14923 }, { "epoch": 1.1091787439613527, "grad_norm": 2.0978113405306704, "learning_rate": 1.4533398058141661e-05, "loss": 0.7145, "step": 14924 }, { "epoch": 1.109253065774805, "grad_norm": 1.7652416445145636, "learning_rate": 1.4532682862421039e-05, "loss": 0.5637, "step": 14925 }, { "epoch": 1.1093273875882572, "grad_norm": 1.9574451600533518, "learning_rate": 1.4531967637519242e-05, "loss": 0.7034, "step": 14926 }, { "epoch": 1.1094017094017095, "grad_norm": 2.3447298001734533, "learning_rate": 1.4531252383440873e-05, "loss": 0.6662, "step": 14927 }, { "epoch": 1.1094760312151617, "grad_norm": 1.8685445992952507, "learning_rate": 1.4530537100190535e-05, "loss": 0.5002, "step": 14928 }, { "epoch": 1.109550353028614, "grad_norm": 2.2529797730677323, "learning_rate": 1.4529821787772837e-05, "loss": 0.7726, "step": 14929 }, { "epoch": 1.1096246748420662, "grad_norm": 1.993972374866931, "learning_rate": 1.452910644619238e-05, "loss": 0.6143, "step": 14930 }, { "epoch": 1.1096989966555184, "grad_norm": 2.1225273213020897, "learning_rate": 1.452839107545377e-05, "loss": 0.5907, "step": 14931 }, { "epoch": 1.1097733184689706, "grad_norm": 2.5505110657438363, "learning_rate": 1.4527675675561611e-05, "loss": 0.6916, "step": 14932 }, { "epoch": 1.1098476402824229, "grad_norm": 2.419098056457874, "learning_rate": 1.4526960246520514e-05, "loss": 0.8272, "step": 14933 }, { "epoch": 1.1099219620958751, "grad_norm": 1.8053913940348336, "learning_rate": 1.4526244788335082e-05, "loss": 0.6335, "step": 14934 }, { "epoch": 1.1099962839093274, "grad_norm": 1.7193849711615976, "learning_rate": 1.4525529301009922e-05, "loss": 0.6584, "step": 14935 }, { "epoch": 1.1100706057227796, "grad_norm": 2.7479996492513648, "learning_rate": 1.4524813784549637e-05, "loss": 0.5864, "step": 14936 }, { "epoch": 1.1101449275362318, "grad_norm": 2.0433104341196935, "learning_rate": 1.4524098238958835e-05, "loss": 0.4718, "step": 14937 }, { "epoch": 1.110219249349684, "grad_norm": 1.960597844403164, "learning_rate": 1.4523382664242122e-05, "loss": 0.6237, "step": 14938 }, { "epoch": 1.1102935711631363, "grad_norm": 1.7829670924309182, "learning_rate": 1.452266706040411e-05, "loss": 0.632, "step": 14939 }, { "epoch": 1.1103678929765886, "grad_norm": 1.69272279972128, "learning_rate": 1.4521951427449402e-05, "loss": 0.6018, "step": 14940 }, { "epoch": 1.1104422147900408, "grad_norm": 1.8246784945390504, "learning_rate": 1.4521235765382605e-05, "loss": 0.567, "step": 14941 }, { "epoch": 1.110516536603493, "grad_norm": 1.9925784251038574, "learning_rate": 1.4520520074208323e-05, "loss": 0.6438, "step": 14942 }, { "epoch": 1.1105908584169453, "grad_norm": 1.7764052216938335, "learning_rate": 1.451980435393117e-05, "loss": 0.5231, "step": 14943 }, { "epoch": 1.1106651802303977, "grad_norm": 1.9614833189683956, "learning_rate": 1.451908860455575e-05, "loss": 0.6638, "step": 14944 }, { "epoch": 1.11073950204385, "grad_norm": 1.8620232756736455, "learning_rate": 1.4518372826086674e-05, "loss": 0.5661, "step": 14945 }, { "epoch": 1.1108138238573022, "grad_norm": 2.1586449764960993, "learning_rate": 1.4517657018528548e-05, "loss": 0.5501, "step": 14946 }, { "epoch": 1.1108881456707544, "grad_norm": 5.346515489618783, "learning_rate": 1.4516941181885979e-05, "loss": 0.7479, "step": 14947 }, { "epoch": 1.1109624674842067, "grad_norm": 1.7532086482881364, "learning_rate": 1.4516225316163577e-05, "loss": 0.6859, "step": 14948 }, { "epoch": 1.111036789297659, "grad_norm": 2.138807736905618, "learning_rate": 1.4515509421365952e-05, "loss": 0.6223, "step": 14949 }, { "epoch": 1.1111111111111112, "grad_norm": 2.042949125892563, "learning_rate": 1.451479349749771e-05, "loss": 0.6927, "step": 14950 }, { "epoch": 1.1111854329245634, "grad_norm": 2.3995791862006723, "learning_rate": 1.4514077544563463e-05, "loss": 0.591, "step": 14951 }, { "epoch": 1.1112597547380156, "grad_norm": 2.052097032163525, "learning_rate": 1.451336156256782e-05, "loss": 0.5843, "step": 14952 }, { "epoch": 1.1113340765514679, "grad_norm": 1.5856438927371401, "learning_rate": 1.4512645551515387e-05, "loss": 0.5314, "step": 14953 }, { "epoch": 1.1114083983649201, "grad_norm": 1.9319648471672397, "learning_rate": 1.4511929511410779e-05, "loss": 0.668, "step": 14954 }, { "epoch": 1.1114827201783724, "grad_norm": 2.058691181546367, "learning_rate": 1.45112134422586e-05, "loss": 0.6492, "step": 14955 }, { "epoch": 1.1115570419918246, "grad_norm": 2.0969024592365013, "learning_rate": 1.4510497344063463e-05, "loss": 0.6051, "step": 14956 }, { "epoch": 1.1116313638052768, "grad_norm": 2.1263828263955746, "learning_rate": 1.4509781216829981e-05, "loss": 0.5314, "step": 14957 }, { "epoch": 1.111705685618729, "grad_norm": 1.922085128501206, "learning_rate": 1.450906506056276e-05, "loss": 0.7665, "step": 14958 }, { "epoch": 1.1117800074321813, "grad_norm": 2.0250192393472206, "learning_rate": 1.4508348875266411e-05, "loss": 0.5879, "step": 14959 }, { "epoch": 1.1118543292456335, "grad_norm": 2.018677941265622, "learning_rate": 1.4507632660945547e-05, "loss": 0.6589, "step": 14960 }, { "epoch": 1.1119286510590858, "grad_norm": 2.149000239201936, "learning_rate": 1.4506916417604777e-05, "loss": 0.6236, "step": 14961 }, { "epoch": 1.112002972872538, "grad_norm": 2.147633398353727, "learning_rate": 1.4506200145248715e-05, "loss": 0.6852, "step": 14962 }, { "epoch": 1.1120772946859903, "grad_norm": 2.1553555546250984, "learning_rate": 1.4505483843881968e-05, "loss": 0.6733, "step": 14963 }, { "epoch": 1.1121516164994425, "grad_norm": 1.6237363884225005, "learning_rate": 1.450476751350915e-05, "loss": 0.5691, "step": 14964 }, { "epoch": 1.1122259383128947, "grad_norm": 1.8814967366853064, "learning_rate": 1.4504051154134873e-05, "loss": 0.7243, "step": 14965 }, { "epoch": 1.112300260126347, "grad_norm": 1.7102187585651742, "learning_rate": 1.4503334765763748e-05, "loss": 0.5782, "step": 14966 }, { "epoch": 1.1123745819397994, "grad_norm": 1.6720259080648545, "learning_rate": 1.450261834840039e-05, "loss": 0.5333, "step": 14967 }, { "epoch": 1.1124489037532517, "grad_norm": 1.6623947044833898, "learning_rate": 1.4501901902049406e-05, "loss": 0.6509, "step": 14968 }, { "epoch": 1.112523225566704, "grad_norm": 2.4983136181360868, "learning_rate": 1.4501185426715412e-05, "loss": 0.6972, "step": 14969 }, { "epoch": 1.1125975473801561, "grad_norm": 1.8711194355939365, "learning_rate": 1.450046892240302e-05, "loss": 0.6159, "step": 14970 }, { "epoch": 1.1126718691936084, "grad_norm": 2.0167071581438267, "learning_rate": 1.4499752389116841e-05, "loss": 0.6413, "step": 14971 }, { "epoch": 1.1127461910070606, "grad_norm": 1.8928138723229344, "learning_rate": 1.449903582686149e-05, "loss": 0.6577, "step": 14972 }, { "epoch": 1.1128205128205129, "grad_norm": 1.6613037276543352, "learning_rate": 1.4498319235641583e-05, "loss": 0.5256, "step": 14973 }, { "epoch": 1.112894834633965, "grad_norm": 1.6020931872402304, "learning_rate": 1.4497602615461728e-05, "loss": 0.4486, "step": 14974 }, { "epoch": 1.1129691564474173, "grad_norm": 1.6607505292190126, "learning_rate": 1.4496885966326539e-05, "loss": 0.5261, "step": 14975 }, { "epoch": 1.1130434782608696, "grad_norm": 2.8295589104773238, "learning_rate": 1.4496169288240632e-05, "loss": 0.7715, "step": 14976 }, { "epoch": 1.1131178000743218, "grad_norm": 1.5939975469716092, "learning_rate": 1.449545258120862e-05, "loss": 0.5125, "step": 14977 }, { "epoch": 1.113192121887774, "grad_norm": 1.9367807647370952, "learning_rate": 1.4494735845235121e-05, "loss": 0.6755, "step": 14978 }, { "epoch": 1.1132664437012263, "grad_norm": 1.9925178584630883, "learning_rate": 1.4494019080324747e-05, "loss": 0.5755, "step": 14979 }, { "epoch": 1.1133407655146785, "grad_norm": 2.0770333724376653, "learning_rate": 1.4493302286482106e-05, "loss": 0.6862, "step": 14980 }, { "epoch": 1.1134150873281308, "grad_norm": 1.9526065344594443, "learning_rate": 1.4492585463711819e-05, "loss": 0.7517, "step": 14981 }, { "epoch": 1.113489409141583, "grad_norm": 1.6046646556073032, "learning_rate": 1.4491868612018501e-05, "loss": 0.5882, "step": 14982 }, { "epoch": 1.1135637309550352, "grad_norm": 2.5942953735148895, "learning_rate": 1.4491151731406764e-05, "loss": 0.7804, "step": 14983 }, { "epoch": 1.1136380527684875, "grad_norm": 2.1396538751584258, "learning_rate": 1.4490434821881228e-05, "loss": 0.7669, "step": 14984 }, { "epoch": 1.1137123745819397, "grad_norm": 1.9683804725135583, "learning_rate": 1.4489717883446505e-05, "loss": 0.6424, "step": 14985 }, { "epoch": 1.113786696395392, "grad_norm": 1.70624008903382, "learning_rate": 1.448900091610721e-05, "loss": 0.5303, "step": 14986 }, { "epoch": 1.1138610182088442, "grad_norm": 1.7284017198867128, "learning_rate": 1.448828391986796e-05, "loss": 0.6284, "step": 14987 }, { "epoch": 1.1139353400222967, "grad_norm": 2.1114366723888938, "learning_rate": 1.448756689473337e-05, "loss": 0.6446, "step": 14988 }, { "epoch": 1.1140096618357487, "grad_norm": 2.010782340514567, "learning_rate": 1.4486849840708063e-05, "loss": 0.711, "step": 14989 }, { "epoch": 1.1140839836492011, "grad_norm": 1.844358943122773, "learning_rate": 1.4486132757796645e-05, "loss": 0.4893, "step": 14990 }, { "epoch": 1.1141583054626534, "grad_norm": 2.186093095293984, "learning_rate": 1.4485415646003738e-05, "loss": 0.68, "step": 14991 }, { "epoch": 1.1142326272761056, "grad_norm": 1.9161117208978995, "learning_rate": 1.4484698505333956e-05, "loss": 0.6979, "step": 14992 }, { "epoch": 1.1143069490895579, "grad_norm": 2.1316981931415206, "learning_rate": 1.448398133579192e-05, "loss": 0.458, "step": 14993 }, { "epoch": 1.11438127090301, "grad_norm": 2.304391760063943, "learning_rate": 1.4483264137382243e-05, "loss": 0.7337, "step": 14994 }, { "epoch": 1.1144555927164623, "grad_norm": 2.138613400781558, "learning_rate": 1.4482546910109548e-05, "loss": 0.7058, "step": 14995 }, { "epoch": 1.1145299145299146, "grad_norm": 2.6639827543076633, "learning_rate": 1.4481829653978445e-05, "loss": 0.7551, "step": 14996 }, { "epoch": 1.1146042363433668, "grad_norm": 1.763162989274948, "learning_rate": 1.4481112368993555e-05, "loss": 0.6426, "step": 14997 }, { "epoch": 1.114678558156819, "grad_norm": 2.599874772933614, "learning_rate": 1.4480395055159498e-05, "loss": 0.5941, "step": 14998 }, { "epoch": 1.1147528799702713, "grad_norm": 2.0297074011758776, "learning_rate": 1.4479677712480888e-05, "loss": 0.6278, "step": 14999 }, { "epoch": 1.1148272017837235, "grad_norm": 2.011786271973383, "learning_rate": 1.4478960340962347e-05, "loss": 0.6904, "step": 15000 }, { "epoch": 1.1149015235971758, "grad_norm": 1.8396317694630386, "learning_rate": 1.4478242940608488e-05, "loss": 0.6401, "step": 15001 }, { "epoch": 1.114975845410628, "grad_norm": 1.692766198868059, "learning_rate": 1.447752551142394e-05, "loss": 0.5674, "step": 15002 }, { "epoch": 1.1150501672240802, "grad_norm": 1.9285507037611758, "learning_rate": 1.4476808053413312e-05, "loss": 0.6784, "step": 15003 }, { "epoch": 1.1151244890375325, "grad_norm": 1.7385714107229493, "learning_rate": 1.4476090566581222e-05, "loss": 0.5227, "step": 15004 }, { "epoch": 1.1151988108509847, "grad_norm": 2.2629160373795534, "learning_rate": 1.4475373050932298e-05, "loss": 0.7847, "step": 15005 }, { "epoch": 1.115273132664437, "grad_norm": 2.1201883920213382, "learning_rate": 1.4474655506471154e-05, "loss": 0.6301, "step": 15006 }, { "epoch": 1.1153474544778892, "grad_norm": 1.7709501668239074, "learning_rate": 1.447393793320241e-05, "loss": 0.6327, "step": 15007 }, { "epoch": 1.1154217762913414, "grad_norm": 1.9338756805694464, "learning_rate": 1.4473220331130687e-05, "loss": 0.6519, "step": 15008 }, { "epoch": 1.1154960981047937, "grad_norm": 1.8621077168760254, "learning_rate": 1.4472502700260597e-05, "loss": 0.6456, "step": 15009 }, { "epoch": 1.115570419918246, "grad_norm": 1.5659191296615795, "learning_rate": 1.4471785040596773e-05, "loss": 0.4788, "step": 15010 }, { "epoch": 1.1156447417316984, "grad_norm": 1.8357464639837717, "learning_rate": 1.4471067352143828e-05, "loss": 0.6864, "step": 15011 }, { "epoch": 1.1157190635451506, "grad_norm": 1.7437456632053958, "learning_rate": 1.4470349634906382e-05, "loss": 0.668, "step": 15012 }, { "epoch": 1.1157933853586028, "grad_norm": 1.970493670408802, "learning_rate": 1.446963188888906e-05, "loss": 0.5953, "step": 15013 }, { "epoch": 1.115867707172055, "grad_norm": 1.758553489435938, "learning_rate": 1.4468914114096474e-05, "loss": 0.5768, "step": 15014 }, { "epoch": 1.1159420289855073, "grad_norm": 1.7528647814922722, "learning_rate": 1.4468196310533255e-05, "loss": 0.6321, "step": 15015 }, { "epoch": 1.1160163507989596, "grad_norm": 3.1352257516185422, "learning_rate": 1.4467478478204017e-05, "loss": 0.6648, "step": 15016 }, { "epoch": 1.1160906726124118, "grad_norm": 3.1099557528682493, "learning_rate": 1.446676061711339e-05, "loss": 0.6016, "step": 15017 }, { "epoch": 1.116164994425864, "grad_norm": 1.819139636226548, "learning_rate": 1.4466042727265985e-05, "loss": 0.4873, "step": 15018 }, { "epoch": 1.1162393162393163, "grad_norm": 2.4981374422335305, "learning_rate": 1.4465324808666432e-05, "loss": 0.5594, "step": 15019 }, { "epoch": 1.1163136380527685, "grad_norm": 1.5637129054536953, "learning_rate": 1.4464606861319346e-05, "loss": 0.5294, "step": 15020 }, { "epoch": 1.1163879598662207, "grad_norm": 2.1912669916013097, "learning_rate": 1.4463888885229354e-05, "loss": 0.6163, "step": 15021 }, { "epoch": 1.116462281679673, "grad_norm": 2.06737951446964, "learning_rate": 1.4463170880401079e-05, "loss": 0.6026, "step": 15022 }, { "epoch": 1.1165366034931252, "grad_norm": 2.0948453264179254, "learning_rate": 1.4462452846839139e-05, "loss": 0.7031, "step": 15023 }, { "epoch": 1.1166109253065775, "grad_norm": 2.0924363558390286, "learning_rate": 1.4461734784548162e-05, "loss": 0.7729, "step": 15024 }, { "epoch": 1.1166852471200297, "grad_norm": 2.190898952670279, "learning_rate": 1.4461016693532766e-05, "loss": 0.6456, "step": 15025 }, { "epoch": 1.116759568933482, "grad_norm": 2.0236480083650408, "learning_rate": 1.4460298573797573e-05, "loss": 0.5313, "step": 15026 }, { "epoch": 1.1168338907469342, "grad_norm": 1.952012859397029, "learning_rate": 1.4459580425347213e-05, "loss": 0.5995, "step": 15027 }, { "epoch": 1.1169082125603864, "grad_norm": 1.9244439917894247, "learning_rate": 1.4458862248186306e-05, "loss": 0.5294, "step": 15028 }, { "epoch": 1.1169825343738387, "grad_norm": 1.7852097679642867, "learning_rate": 1.4458144042319474e-05, "loss": 0.7107, "step": 15029 }, { "epoch": 1.117056856187291, "grad_norm": 1.5701550500338093, "learning_rate": 1.4457425807751342e-05, "loss": 0.4644, "step": 15030 }, { "epoch": 1.1171311780007431, "grad_norm": 2.0410939740152174, "learning_rate": 1.4456707544486532e-05, "loss": 0.6843, "step": 15031 }, { "epoch": 1.1172054998141954, "grad_norm": 1.9864293198236398, "learning_rate": 1.4455989252529671e-05, "loss": 0.6304, "step": 15032 }, { "epoch": 1.1172798216276476, "grad_norm": 1.796760438197741, "learning_rate": 1.4455270931885386e-05, "loss": 0.5783, "step": 15033 }, { "epoch": 1.1173541434411, "grad_norm": 1.6566229812823015, "learning_rate": 1.4454552582558294e-05, "loss": 0.5497, "step": 15034 }, { "epoch": 1.1174284652545523, "grad_norm": 2.0665545990527945, "learning_rate": 1.4453834204553024e-05, "loss": 0.661, "step": 15035 }, { "epoch": 1.1175027870680045, "grad_norm": 2.529410231438369, "learning_rate": 1.4453115797874203e-05, "loss": 0.6939, "step": 15036 }, { "epoch": 1.1175771088814568, "grad_norm": 2.012912834048524, "learning_rate": 1.4452397362526452e-05, "loss": 0.6466, "step": 15037 }, { "epoch": 1.117651430694909, "grad_norm": 1.7323233554116788, "learning_rate": 1.44516788985144e-05, "loss": 0.6092, "step": 15038 }, { "epoch": 1.1177257525083613, "grad_norm": 2.140150320918098, "learning_rate": 1.4450960405842665e-05, "loss": 0.5796, "step": 15039 }, { "epoch": 1.1178000743218135, "grad_norm": 2.2249138114720046, "learning_rate": 1.445024188451588e-05, "loss": 0.4829, "step": 15040 }, { "epoch": 1.1178743961352657, "grad_norm": 2.4505605839768587, "learning_rate": 1.444952333453867e-05, "loss": 0.7023, "step": 15041 }, { "epoch": 1.117948717948718, "grad_norm": 2.086311866941125, "learning_rate": 1.4448804755915657e-05, "loss": 0.5004, "step": 15042 }, { "epoch": 1.1180230397621702, "grad_norm": 1.761610141743318, "learning_rate": 1.4448086148651472e-05, "loss": 0.4596, "step": 15043 }, { "epoch": 1.1180973615756225, "grad_norm": 2.1263251170465307, "learning_rate": 1.444736751275074e-05, "loss": 0.6641, "step": 15044 }, { "epoch": 1.1181716833890747, "grad_norm": 2.635665894194872, "learning_rate": 1.4446648848218085e-05, "loss": 0.739, "step": 15045 }, { "epoch": 1.118246005202527, "grad_norm": 1.3914699403845814, "learning_rate": 1.4445930155058136e-05, "loss": 0.4394, "step": 15046 }, { "epoch": 1.1183203270159792, "grad_norm": 1.9596151931555927, "learning_rate": 1.4445211433275522e-05, "loss": 0.6828, "step": 15047 }, { "epoch": 1.1183946488294314, "grad_norm": 1.660787009324103, "learning_rate": 1.4444492682874862e-05, "loss": 0.5229, "step": 15048 }, { "epoch": 1.1184689706428836, "grad_norm": 2.3446136430180626, "learning_rate": 1.4443773903860794e-05, "loss": 0.574, "step": 15049 }, { "epoch": 1.1185432924563359, "grad_norm": 1.7873948405089817, "learning_rate": 1.4443055096237936e-05, "loss": 0.5309, "step": 15050 }, { "epoch": 1.1186176142697881, "grad_norm": 2.040271670627443, "learning_rate": 1.4442336260010921e-05, "loss": 0.67, "step": 15051 }, { "epoch": 1.1186919360832404, "grad_norm": 2.039204678916144, "learning_rate": 1.4441617395184378e-05, "loss": 0.6544, "step": 15052 }, { "epoch": 1.1187662578966926, "grad_norm": 1.8708573751088384, "learning_rate": 1.444089850176293e-05, "loss": 0.6313, "step": 15053 }, { "epoch": 1.1188405797101448, "grad_norm": 1.8951926667803598, "learning_rate": 1.4440179579751208e-05, "loss": 0.6311, "step": 15054 }, { "epoch": 1.118914901523597, "grad_norm": 1.7677928335012252, "learning_rate": 1.4439460629153844e-05, "loss": 0.6218, "step": 15055 }, { "epoch": 1.1189892233370493, "grad_norm": 2.3523414979701376, "learning_rate": 1.4438741649975459e-05, "loss": 0.7586, "step": 15056 }, { "epoch": 1.1190635451505018, "grad_norm": 2.915223661400027, "learning_rate": 1.4438022642220685e-05, "loss": 0.619, "step": 15057 }, { "epoch": 1.119137866963954, "grad_norm": 2.432082667295828, "learning_rate": 1.4437303605894154e-05, "loss": 0.5408, "step": 15058 }, { "epoch": 1.1192121887774062, "grad_norm": 1.9694065642510425, "learning_rate": 1.4436584541000492e-05, "loss": 0.6429, "step": 15059 }, { "epoch": 1.1192865105908585, "grad_norm": 1.8219422991947367, "learning_rate": 1.4435865447544331e-05, "loss": 0.6637, "step": 15060 }, { "epoch": 1.1193608324043107, "grad_norm": 1.568491827733067, "learning_rate": 1.4435146325530294e-05, "loss": 0.472, "step": 15061 }, { "epoch": 1.119435154217763, "grad_norm": 1.8044852304273586, "learning_rate": 1.4434427174963018e-05, "loss": 0.6233, "step": 15062 }, { "epoch": 1.1195094760312152, "grad_norm": 2.196392298826949, "learning_rate": 1.4433707995847127e-05, "loss": 0.6937, "step": 15063 }, { "epoch": 1.1195837978446674, "grad_norm": 1.975875350885171, "learning_rate": 1.4432988788187257e-05, "loss": 0.6809, "step": 15064 }, { "epoch": 1.1196581196581197, "grad_norm": 1.9644571132503321, "learning_rate": 1.4432269551988032e-05, "loss": 0.4594, "step": 15065 }, { "epoch": 1.119732441471572, "grad_norm": 2.0200575248953077, "learning_rate": 1.4431550287254091e-05, "loss": 0.6119, "step": 15066 }, { "epoch": 1.1198067632850242, "grad_norm": 2.437335146087503, "learning_rate": 1.4430830993990053e-05, "loss": 0.6576, "step": 15067 }, { "epoch": 1.1198810850984764, "grad_norm": 1.9496804972961175, "learning_rate": 1.4430111672200557e-05, "loss": 0.5753, "step": 15068 }, { "epoch": 1.1199554069119286, "grad_norm": 2.0436671872597816, "learning_rate": 1.4429392321890232e-05, "loss": 0.6806, "step": 15069 }, { "epoch": 1.1200297287253809, "grad_norm": 1.568264622398676, "learning_rate": 1.4428672943063713e-05, "loss": 0.5423, "step": 15070 }, { "epoch": 1.120104050538833, "grad_norm": 2.0662005181157745, "learning_rate": 1.4427953535725625e-05, "loss": 0.4953, "step": 15071 }, { "epoch": 1.1201783723522853, "grad_norm": 1.5516017507019773, "learning_rate": 1.44272340998806e-05, "loss": 0.4995, "step": 15072 }, { "epoch": 1.1202526941657376, "grad_norm": 1.9224654465312074, "learning_rate": 1.442651463553327e-05, "loss": 0.6802, "step": 15073 }, { "epoch": 1.1203270159791898, "grad_norm": 1.9423993629441603, "learning_rate": 1.4425795142688273e-05, "loss": 0.6499, "step": 15074 }, { "epoch": 1.120401337792642, "grad_norm": 1.6122705400880415, "learning_rate": 1.4425075621350236e-05, "loss": 0.4815, "step": 15075 }, { "epoch": 1.1204756596060943, "grad_norm": 2.372586744320348, "learning_rate": 1.4424356071523789e-05, "loss": 0.654, "step": 15076 }, { "epoch": 1.1205499814195465, "grad_norm": 2.411509820717075, "learning_rate": 1.442363649321357e-05, "loss": 0.5839, "step": 15077 }, { "epoch": 1.120624303232999, "grad_norm": 1.8241921111148351, "learning_rate": 1.4422916886424205e-05, "loss": 0.5672, "step": 15078 }, { "epoch": 1.1206986250464512, "grad_norm": 1.4020733894024524, "learning_rate": 1.4422197251160333e-05, "loss": 0.3748, "step": 15079 }, { "epoch": 1.1207729468599035, "grad_norm": 1.9532632810211776, "learning_rate": 1.4421477587426584e-05, "loss": 0.563, "step": 15080 }, { "epoch": 1.1208472686733557, "grad_norm": 1.5574819489110139, "learning_rate": 1.4420757895227592e-05, "loss": 0.5036, "step": 15081 }, { "epoch": 1.120921590486808, "grad_norm": 1.6031920410658587, "learning_rate": 1.4420038174567995e-05, "loss": 0.45, "step": 15082 }, { "epoch": 1.1209959123002602, "grad_norm": 1.8389597297554676, "learning_rate": 1.4419318425452415e-05, "loss": 0.4974, "step": 15083 }, { "epoch": 1.1210702341137124, "grad_norm": 1.8000056192429528, "learning_rate": 1.4418598647885494e-05, "loss": 0.6432, "step": 15084 }, { "epoch": 1.1211445559271647, "grad_norm": 2.099243927140357, "learning_rate": 1.4417878841871861e-05, "loss": 0.6338, "step": 15085 }, { "epoch": 1.121218877740617, "grad_norm": 1.8932129834871154, "learning_rate": 1.4417159007416157e-05, "loss": 0.5309, "step": 15086 }, { "epoch": 1.1212931995540691, "grad_norm": 1.9542913971753983, "learning_rate": 1.4416439144523013e-05, "loss": 0.6249, "step": 15087 }, { "epoch": 1.1213675213675214, "grad_norm": 2.194724049980827, "learning_rate": 1.4415719253197064e-05, "loss": 0.7454, "step": 15088 }, { "epoch": 1.1214418431809736, "grad_norm": 1.761942461574766, "learning_rate": 1.441499933344294e-05, "loss": 0.4163, "step": 15089 }, { "epoch": 1.1215161649944259, "grad_norm": 2.035185568242915, "learning_rate": 1.4414279385265282e-05, "loss": 0.6199, "step": 15090 }, { "epoch": 1.121590486807878, "grad_norm": 1.5915005694425821, "learning_rate": 1.4413559408668718e-05, "loss": 0.5786, "step": 15091 }, { "epoch": 1.1216648086213303, "grad_norm": 1.7828007231532919, "learning_rate": 1.4412839403657892e-05, "loss": 0.5289, "step": 15092 }, { "epoch": 1.1217391304347826, "grad_norm": 1.7906783974028408, "learning_rate": 1.4412119370237436e-05, "loss": 0.5797, "step": 15093 }, { "epoch": 1.1218134522482348, "grad_norm": 2.130877436489858, "learning_rate": 1.4411399308411982e-05, "loss": 0.5475, "step": 15094 }, { "epoch": 1.121887774061687, "grad_norm": 1.90964776267273, "learning_rate": 1.4410679218186168e-05, "loss": 0.6285, "step": 15095 }, { "epoch": 1.1219620958751393, "grad_norm": 1.8469638147688971, "learning_rate": 1.4409959099564627e-05, "loss": 0.4518, "step": 15096 }, { "epoch": 1.1220364176885915, "grad_norm": 2.0495615972382804, "learning_rate": 1.4409238952552002e-05, "loss": 0.4534, "step": 15097 }, { "epoch": 1.1221107395020438, "grad_norm": 2.095299939569929, "learning_rate": 1.4408518777152926e-05, "loss": 0.6233, "step": 15098 }, { "epoch": 1.122185061315496, "grad_norm": 2.032163583707884, "learning_rate": 1.4407798573372036e-05, "loss": 0.8161, "step": 15099 }, { "epoch": 1.1222593831289482, "grad_norm": 1.9252601550125847, "learning_rate": 1.4407078341213963e-05, "loss": 0.5771, "step": 15100 }, { "epoch": 1.1223337049424007, "grad_norm": 2.2106885200938238, "learning_rate": 1.4406358080683349e-05, "loss": 0.788, "step": 15101 }, { "epoch": 1.122408026755853, "grad_norm": 1.6106664451287775, "learning_rate": 1.4405637791784833e-05, "loss": 0.4909, "step": 15102 }, { "epoch": 1.1224823485693052, "grad_norm": 1.8696025762737574, "learning_rate": 1.4404917474523048e-05, "loss": 0.7, "step": 15103 }, { "epoch": 1.1225566703827574, "grad_norm": 1.8590857335243596, "learning_rate": 1.4404197128902635e-05, "loss": 0.5383, "step": 15104 }, { "epoch": 1.1226309921962097, "grad_norm": 2.0108594650252765, "learning_rate": 1.4403476754928227e-05, "loss": 0.5294, "step": 15105 }, { "epoch": 1.122705314009662, "grad_norm": 1.9842381503456232, "learning_rate": 1.4402756352604463e-05, "loss": 0.7308, "step": 15106 }, { "epoch": 1.1227796358231141, "grad_norm": 1.7750026229878944, "learning_rate": 1.4402035921935984e-05, "loss": 0.6214, "step": 15107 }, { "epoch": 1.1228539576365664, "grad_norm": 2.328067748676677, "learning_rate": 1.4401315462927426e-05, "loss": 0.6312, "step": 15108 }, { "epoch": 1.1229282794500186, "grad_norm": 1.7528730185033499, "learning_rate": 1.4400594975583429e-05, "loss": 0.5432, "step": 15109 }, { "epoch": 1.1230026012634708, "grad_norm": 1.9134048944908773, "learning_rate": 1.4399874459908629e-05, "loss": 0.6427, "step": 15110 }, { "epoch": 1.123076923076923, "grad_norm": 1.9845722276616244, "learning_rate": 1.4399153915907662e-05, "loss": 0.5354, "step": 15111 }, { "epoch": 1.1231512448903753, "grad_norm": 2.064832392612773, "learning_rate": 1.4398433343585174e-05, "loss": 0.6575, "step": 15112 }, { "epoch": 1.1232255667038276, "grad_norm": 1.8700731303193234, "learning_rate": 1.4397712742945797e-05, "loss": 0.6445, "step": 15113 }, { "epoch": 1.1232998885172798, "grad_norm": 1.788066496749601, "learning_rate": 1.4396992113994178e-05, "loss": 0.5568, "step": 15114 }, { "epoch": 1.123374210330732, "grad_norm": 1.8514475564750632, "learning_rate": 1.4396271456734951e-05, "loss": 0.6167, "step": 15115 }, { "epoch": 1.1234485321441843, "grad_norm": 1.7779359004238156, "learning_rate": 1.4395550771172754e-05, "loss": 0.7069, "step": 15116 }, { "epoch": 1.1235228539576365, "grad_norm": 1.7778084528484503, "learning_rate": 1.439483005731223e-05, "loss": 0.5237, "step": 15117 }, { "epoch": 1.1235971757710888, "grad_norm": 1.8045415722446998, "learning_rate": 1.4394109315158018e-05, "loss": 0.5184, "step": 15118 }, { "epoch": 1.123671497584541, "grad_norm": 1.8268073251253443, "learning_rate": 1.439338854471476e-05, "loss": 0.3253, "step": 15119 }, { "epoch": 1.1237458193979932, "grad_norm": 1.9386819324507312, "learning_rate": 1.4392667745987092e-05, "loss": 0.6232, "step": 15120 }, { "epoch": 1.1238201412114455, "grad_norm": 1.5742240831971666, "learning_rate": 1.4391946918979661e-05, "loss": 0.4248, "step": 15121 }, { "epoch": 1.1238944630248977, "grad_norm": 1.6082284774515434, "learning_rate": 1.43912260636971e-05, "loss": 0.6613, "step": 15122 }, { "epoch": 1.12396878483835, "grad_norm": 2.156421955128426, "learning_rate": 1.4390505180144054e-05, "loss": 0.6081, "step": 15123 }, { "epoch": 1.1240431066518024, "grad_norm": 2.163179484812376, "learning_rate": 1.4389784268325162e-05, "loss": 0.5137, "step": 15124 }, { "epoch": 1.1241174284652546, "grad_norm": 2.3846671871841725, "learning_rate": 1.438906332824507e-05, "loss": 0.607, "step": 15125 }, { "epoch": 1.1241917502787069, "grad_norm": 1.7040895506094964, "learning_rate": 1.4388342359908417e-05, "loss": 0.6158, "step": 15126 }, { "epoch": 1.1242660720921591, "grad_norm": 1.7985760447757662, "learning_rate": 1.438762136331984e-05, "loss": 0.6014, "step": 15127 }, { "epoch": 1.1243403939056114, "grad_norm": 2.1391458349201815, "learning_rate": 1.4386900338483984e-05, "loss": 0.4334, "step": 15128 }, { "epoch": 1.1244147157190636, "grad_norm": 2.048668261145069, "learning_rate": 1.4386179285405492e-05, "loss": 0.6016, "step": 15129 }, { "epoch": 1.1244890375325158, "grad_norm": 1.8991926540559336, "learning_rate": 1.4385458204089004e-05, "loss": 0.5604, "step": 15130 }, { "epoch": 1.124563359345968, "grad_norm": 2.341094282932588, "learning_rate": 1.4384737094539168e-05, "loss": 0.6806, "step": 15131 }, { "epoch": 1.1246376811594203, "grad_norm": 2.154982284508539, "learning_rate": 1.4384015956760622e-05, "loss": 0.6272, "step": 15132 }, { "epoch": 1.1247120029728725, "grad_norm": 2.561242108151853, "learning_rate": 1.4383294790758005e-05, "loss": 0.7158, "step": 15133 }, { "epoch": 1.1247863247863248, "grad_norm": 3.538360423080628, "learning_rate": 1.4382573596535965e-05, "loss": 0.7299, "step": 15134 }, { "epoch": 1.124860646599777, "grad_norm": 1.8761411264812133, "learning_rate": 1.4381852374099144e-05, "loss": 0.5811, "step": 15135 }, { "epoch": 1.1249349684132293, "grad_norm": 2.0627821856531043, "learning_rate": 1.4381131123452184e-05, "loss": 0.6618, "step": 15136 }, { "epoch": 1.1250092902266815, "grad_norm": 2.025082597793103, "learning_rate": 1.4380409844599732e-05, "loss": 0.6737, "step": 15137 }, { "epoch": 1.1250836120401337, "grad_norm": 1.5063077634851512, "learning_rate": 1.4379688537546423e-05, "loss": 0.45, "step": 15138 }, { "epoch": 1.125157933853586, "grad_norm": 1.9651421335712091, "learning_rate": 1.437896720229691e-05, "loss": 0.6949, "step": 15139 }, { "epoch": 1.1252322556670382, "grad_norm": 2.1487076541033723, "learning_rate": 1.4378245838855834e-05, "loss": 0.6359, "step": 15140 }, { "epoch": 1.1253065774804905, "grad_norm": 1.697004337924505, "learning_rate": 1.4377524447227836e-05, "loss": 0.4989, "step": 15141 }, { "epoch": 1.1253808992939427, "grad_norm": 1.6168226956248326, "learning_rate": 1.4376803027417565e-05, "loss": 0.5041, "step": 15142 }, { "epoch": 1.125455221107395, "grad_norm": 1.9474936656022768, "learning_rate": 1.4376081579429663e-05, "loss": 0.5001, "step": 15143 }, { "epoch": 1.1255295429208472, "grad_norm": 1.9076834174429573, "learning_rate": 1.4375360103268773e-05, "loss": 0.4816, "step": 15144 }, { "epoch": 1.1256038647342996, "grad_norm": 2.108812065321495, "learning_rate": 1.4374638598939543e-05, "loss": 0.7044, "step": 15145 }, { "epoch": 1.1256781865477516, "grad_norm": 1.9434528497483818, "learning_rate": 1.4373917066446617e-05, "loss": 0.6739, "step": 15146 }, { "epoch": 1.125752508361204, "grad_norm": 1.7760792091929407, "learning_rate": 1.437319550579464e-05, "loss": 0.5797, "step": 15147 }, { "epoch": 1.1258268301746563, "grad_norm": 1.7092885474043422, "learning_rate": 1.4372473916988256e-05, "loss": 0.5601, "step": 15148 }, { "epoch": 1.1259011519881086, "grad_norm": 1.8622109038799597, "learning_rate": 1.4371752300032113e-05, "loss": 0.6019, "step": 15149 }, { "epoch": 1.1259754738015608, "grad_norm": 1.859174753486051, "learning_rate": 1.4371030654930856e-05, "loss": 0.6525, "step": 15150 }, { "epoch": 1.126049795615013, "grad_norm": 1.9157033335178584, "learning_rate": 1.4370308981689128e-05, "loss": 0.5874, "step": 15151 }, { "epoch": 1.1261241174284653, "grad_norm": 1.7945166128705214, "learning_rate": 1.4369587280311581e-05, "loss": 0.4786, "step": 15152 }, { "epoch": 1.1261984392419175, "grad_norm": 2.009029877529081, "learning_rate": 1.4368865550802855e-05, "loss": 0.6827, "step": 15153 }, { "epoch": 1.1262727610553698, "grad_norm": 2.075482950153149, "learning_rate": 1.4368143793167601e-05, "loss": 0.6201, "step": 15154 }, { "epoch": 1.126347082868822, "grad_norm": 1.9254462643427768, "learning_rate": 1.4367422007410463e-05, "loss": 0.6712, "step": 15155 }, { "epoch": 1.1264214046822743, "grad_norm": 2.329793435910836, "learning_rate": 1.4366700193536087e-05, "loss": 0.6335, "step": 15156 }, { "epoch": 1.1264957264957265, "grad_norm": 2.082034562651548, "learning_rate": 1.4365978351549123e-05, "loss": 0.6616, "step": 15157 }, { "epoch": 1.1265700483091787, "grad_norm": 2.16771125813983, "learning_rate": 1.436525648145422e-05, "loss": 0.6143, "step": 15158 }, { "epoch": 1.126644370122631, "grad_norm": 2.0297276306115757, "learning_rate": 1.4364534583256017e-05, "loss": 0.643, "step": 15159 }, { "epoch": 1.1267186919360832, "grad_norm": 1.867467089105224, "learning_rate": 1.4363812656959173e-05, "loss": 0.5616, "step": 15160 }, { "epoch": 1.1267930137495354, "grad_norm": 2.0893732150476954, "learning_rate": 1.4363090702568324e-05, "loss": 0.6407, "step": 15161 }, { "epoch": 1.1268673355629877, "grad_norm": 2.5853651871379144, "learning_rate": 1.4362368720088125e-05, "loss": 0.7823, "step": 15162 }, { "epoch": 1.12694165737644, "grad_norm": 2.1216508555752838, "learning_rate": 1.4361646709523226e-05, "loss": 0.6435, "step": 15163 }, { "epoch": 1.1270159791898922, "grad_norm": 1.7028871272508252, "learning_rate": 1.4360924670878267e-05, "loss": 0.5585, "step": 15164 }, { "epoch": 1.1270903010033444, "grad_norm": 1.7399392009656134, "learning_rate": 1.4360202604157902e-05, "loss": 0.5308, "step": 15165 }, { "epoch": 1.1271646228167966, "grad_norm": 1.8386627317511588, "learning_rate": 1.435948050936678e-05, "loss": 0.4808, "step": 15166 }, { "epoch": 1.1272389446302489, "grad_norm": 2.1233562595653592, "learning_rate": 1.4358758386509549e-05, "loss": 0.706, "step": 15167 }, { "epoch": 1.1273132664437013, "grad_norm": 2.049203789620988, "learning_rate": 1.4358036235590858e-05, "loss": 0.6494, "step": 15168 }, { "epoch": 1.1273875882571534, "grad_norm": 2.1904277791777576, "learning_rate": 1.4357314056615354e-05, "loss": 0.7505, "step": 15169 }, { "epoch": 1.1274619100706058, "grad_norm": 2.09092141377316, "learning_rate": 1.435659184958769e-05, "loss": 0.716, "step": 15170 }, { "epoch": 1.127536231884058, "grad_norm": 2.0999854578420973, "learning_rate": 1.4355869614512514e-05, "loss": 0.5958, "step": 15171 }, { "epoch": 1.1276105536975103, "grad_norm": 1.8592526284194013, "learning_rate": 1.4355147351394474e-05, "loss": 0.6309, "step": 15172 }, { "epoch": 1.1276848755109625, "grad_norm": 1.936735730216794, "learning_rate": 1.4354425060238221e-05, "loss": 0.6066, "step": 15173 }, { "epoch": 1.1277591973244148, "grad_norm": 1.8744638083763612, "learning_rate": 1.4353702741048407e-05, "loss": 0.5734, "step": 15174 }, { "epoch": 1.127833519137867, "grad_norm": 1.841083388704402, "learning_rate": 1.4352980393829678e-05, "loss": 0.6316, "step": 15175 }, { "epoch": 1.1279078409513192, "grad_norm": 1.9203547658495816, "learning_rate": 1.435225801858669e-05, "loss": 0.5376, "step": 15176 }, { "epoch": 1.1279821627647715, "grad_norm": 2.0288264881709095, "learning_rate": 1.435153561532409e-05, "loss": 0.5875, "step": 15177 }, { "epoch": 1.1280564845782237, "grad_norm": 3.787032865474538, "learning_rate": 1.4350813184046528e-05, "loss": 0.6927, "step": 15178 }, { "epoch": 1.128130806391676, "grad_norm": 1.810807985957564, "learning_rate": 1.4350090724758658e-05, "loss": 0.6742, "step": 15179 }, { "epoch": 1.1282051282051282, "grad_norm": 1.9703575155273736, "learning_rate": 1.4349368237465129e-05, "loss": 0.556, "step": 15180 }, { "epoch": 1.1282794500185804, "grad_norm": 1.9554047590678247, "learning_rate": 1.4348645722170594e-05, "loss": 0.4502, "step": 15181 }, { "epoch": 1.1283537718320327, "grad_norm": 1.8534481926292834, "learning_rate": 1.43479231788797e-05, "loss": 0.6359, "step": 15182 }, { "epoch": 1.128428093645485, "grad_norm": 1.9046845874256852, "learning_rate": 1.4347200607597106e-05, "loss": 0.5679, "step": 15183 }, { "epoch": 1.1285024154589371, "grad_norm": 2.836868697908973, "learning_rate": 1.4346478008327461e-05, "loss": 0.7912, "step": 15184 }, { "epoch": 1.1285767372723894, "grad_norm": 2.060047014670673, "learning_rate": 1.4345755381075414e-05, "loss": 0.6707, "step": 15185 }, { "epoch": 1.1286510590858416, "grad_norm": 2.233261429943235, "learning_rate": 1.4345032725845617e-05, "loss": 0.6417, "step": 15186 }, { "epoch": 1.1287253808992939, "grad_norm": 2.6619312347734994, "learning_rate": 1.4344310042642728e-05, "loss": 0.5558, "step": 15187 }, { "epoch": 1.128799702712746, "grad_norm": 2.71746206881081, "learning_rate": 1.4343587331471396e-05, "loss": 0.6353, "step": 15188 }, { "epoch": 1.1288740245261986, "grad_norm": 2.480308536263241, "learning_rate": 1.4342864592336274e-05, "loss": 0.6037, "step": 15189 }, { "epoch": 1.1289483463396506, "grad_norm": 2.1380851205707856, "learning_rate": 1.4342141825242013e-05, "loss": 0.5406, "step": 15190 }, { "epoch": 1.129022668153103, "grad_norm": 1.5974018862655202, "learning_rate": 1.434141903019327e-05, "loss": 0.4563, "step": 15191 }, { "epoch": 1.1290969899665553, "grad_norm": 2.0207363115535935, "learning_rate": 1.4340696207194698e-05, "loss": 0.7211, "step": 15192 }, { "epoch": 1.1291713117800075, "grad_norm": 1.8104078913388066, "learning_rate": 1.4339973356250947e-05, "loss": 0.5589, "step": 15193 }, { "epoch": 1.1292456335934598, "grad_norm": 2.2522838312213707, "learning_rate": 1.4339250477366673e-05, "loss": 0.6624, "step": 15194 }, { "epoch": 1.129319955406912, "grad_norm": 2.0354395942013195, "learning_rate": 1.433852757054653e-05, "loss": 0.5144, "step": 15195 }, { "epoch": 1.1293942772203642, "grad_norm": 1.7807111583692532, "learning_rate": 1.4337804635795171e-05, "loss": 0.5309, "step": 15196 }, { "epoch": 1.1294685990338165, "grad_norm": 2.0430004658180905, "learning_rate": 1.4337081673117252e-05, "loss": 0.7144, "step": 15197 }, { "epoch": 1.1295429208472687, "grad_norm": 2.0591789468306354, "learning_rate": 1.4336358682517423e-05, "loss": 0.5978, "step": 15198 }, { "epoch": 1.129617242660721, "grad_norm": 2.6321048930775173, "learning_rate": 1.4335635664000345e-05, "loss": 0.8162, "step": 15199 }, { "epoch": 1.1296915644741732, "grad_norm": 2.0803956077891494, "learning_rate": 1.4334912617570668e-05, "loss": 0.5633, "step": 15200 }, { "epoch": 1.1297658862876254, "grad_norm": 2.269307666090883, "learning_rate": 1.433418954323305e-05, "loss": 0.6806, "step": 15201 }, { "epoch": 1.1298402081010777, "grad_norm": 1.8670091714028165, "learning_rate": 1.4333466440992145e-05, "loss": 0.6419, "step": 15202 }, { "epoch": 1.12991452991453, "grad_norm": 2.441167435729085, "learning_rate": 1.4332743310852606e-05, "loss": 0.7026, "step": 15203 }, { "epoch": 1.1299888517279821, "grad_norm": 1.6344819702911413, "learning_rate": 1.433202015281909e-05, "loss": 0.5432, "step": 15204 }, { "epoch": 1.1300631735414344, "grad_norm": 1.9066968017165244, "learning_rate": 1.4331296966896258e-05, "loss": 0.6529, "step": 15205 }, { "epoch": 1.1301374953548866, "grad_norm": 2.0521881840105936, "learning_rate": 1.4330573753088754e-05, "loss": 0.5664, "step": 15206 }, { "epoch": 1.1302118171683389, "grad_norm": 2.22912751205508, "learning_rate": 1.4329850511401246e-05, "loss": 0.6473, "step": 15207 }, { "epoch": 1.130286138981791, "grad_norm": 1.9669656908214537, "learning_rate": 1.4329127241838383e-05, "loss": 0.5908, "step": 15208 }, { "epoch": 1.1303604607952433, "grad_norm": 1.9927645477842768, "learning_rate": 1.4328403944404822e-05, "loss": 0.6667, "step": 15209 }, { "epoch": 1.1304347826086956, "grad_norm": 1.8206617369146676, "learning_rate": 1.4327680619105223e-05, "loss": 0.6343, "step": 15210 }, { "epoch": 1.1305091044221478, "grad_norm": 2.644616878825944, "learning_rate": 1.4326957265944242e-05, "loss": 0.6509, "step": 15211 }, { "epoch": 1.1305834262356003, "grad_norm": 1.5775010705007972, "learning_rate": 1.432623388492653e-05, "loss": 0.5229, "step": 15212 }, { "epoch": 1.1306577480490523, "grad_norm": 1.9079280581063125, "learning_rate": 1.4325510476056753e-05, "loss": 0.7205, "step": 15213 }, { "epoch": 1.1307320698625047, "grad_norm": 1.6968512650521297, "learning_rate": 1.4324787039339563e-05, "loss": 0.3793, "step": 15214 }, { "epoch": 1.130806391675957, "grad_norm": 1.8285620320655165, "learning_rate": 1.4324063574779617e-05, "loss": 0.5063, "step": 15215 }, { "epoch": 1.1308807134894092, "grad_norm": 1.923473531581058, "learning_rate": 1.4323340082381574e-05, "loss": 0.656, "step": 15216 }, { "epoch": 1.1309550353028615, "grad_norm": 1.8485813706969372, "learning_rate": 1.4322616562150093e-05, "loss": 0.5495, "step": 15217 }, { "epoch": 1.1310293571163137, "grad_norm": 1.8703001475827221, "learning_rate": 1.4321893014089832e-05, "loss": 0.6708, "step": 15218 }, { "epoch": 1.131103678929766, "grad_norm": 1.4856289332466763, "learning_rate": 1.4321169438205448e-05, "loss": 0.5805, "step": 15219 }, { "epoch": 1.1311780007432182, "grad_norm": 2.142383647593811, "learning_rate": 1.4320445834501595e-05, "loss": 0.6158, "step": 15220 }, { "epoch": 1.1312523225566704, "grad_norm": 1.6467666742255587, "learning_rate": 1.431972220298294e-05, "loss": 0.5224, "step": 15221 }, { "epoch": 1.1313266443701226, "grad_norm": 2.219599401004995, "learning_rate": 1.4318998543654137e-05, "loss": 0.4995, "step": 15222 }, { "epoch": 1.1314009661835749, "grad_norm": 2.3055438850816, "learning_rate": 1.4318274856519845e-05, "loss": 0.544, "step": 15223 }, { "epoch": 1.1314752879970271, "grad_norm": 1.9475413605013325, "learning_rate": 1.4317551141584724e-05, "loss": 0.6659, "step": 15224 }, { "epoch": 1.1315496098104794, "grad_norm": 1.90163338238777, "learning_rate": 1.4316827398853433e-05, "loss": 0.615, "step": 15225 }, { "epoch": 1.1316239316239316, "grad_norm": 2.1782146176487034, "learning_rate": 1.431610362833063e-05, "loss": 0.7208, "step": 15226 }, { "epoch": 1.1316982534373838, "grad_norm": 1.940649406040461, "learning_rate": 1.4315379830020974e-05, "loss": 0.6217, "step": 15227 }, { "epoch": 1.131772575250836, "grad_norm": 1.842519407974134, "learning_rate": 1.431465600392913e-05, "loss": 0.551, "step": 15228 }, { "epoch": 1.1318468970642883, "grad_norm": 2.201455425997795, "learning_rate": 1.4313932150059755e-05, "loss": 0.6456, "step": 15229 }, { "epoch": 1.1319212188777406, "grad_norm": 1.9816952318762673, "learning_rate": 1.4313208268417506e-05, "loss": 0.7082, "step": 15230 }, { "epoch": 1.1319955406911928, "grad_norm": 2.0785025852310723, "learning_rate": 1.4312484359007046e-05, "loss": 0.7211, "step": 15231 }, { "epoch": 1.132069862504645, "grad_norm": 1.974708881905847, "learning_rate": 1.4311760421833036e-05, "loss": 0.6087, "step": 15232 }, { "epoch": 1.1321441843180973, "grad_norm": 2.0695620326952096, "learning_rate": 1.4311036456900138e-05, "loss": 0.6487, "step": 15233 }, { "epoch": 1.1322185061315495, "grad_norm": 1.8466965058300258, "learning_rate": 1.431031246421301e-05, "loss": 0.5454, "step": 15234 }, { "epoch": 1.132292827945002, "grad_norm": 2.230473367179969, "learning_rate": 1.4309588443776314e-05, "loss": 0.5554, "step": 15235 }, { "epoch": 1.132367149758454, "grad_norm": 1.9079931149700167, "learning_rate": 1.4308864395594711e-05, "loss": 0.5244, "step": 15236 }, { "epoch": 1.1324414715719064, "grad_norm": 2.2072235236215705, "learning_rate": 1.430814031967286e-05, "loss": 0.7465, "step": 15237 }, { "epoch": 1.1325157933853587, "grad_norm": 1.951392704857902, "learning_rate": 1.4307416216015428e-05, "loss": 0.6747, "step": 15238 }, { "epoch": 1.132590115198811, "grad_norm": 2.530573243042242, "learning_rate": 1.4306692084627075e-05, "loss": 0.8104, "step": 15239 }, { "epoch": 1.1326644370122632, "grad_norm": 1.901798790964279, "learning_rate": 1.430596792551246e-05, "loss": 0.5915, "step": 15240 }, { "epoch": 1.1327387588257154, "grad_norm": 1.7329091786082516, "learning_rate": 1.4305243738676246e-05, "loss": 0.518, "step": 15241 }, { "epoch": 1.1328130806391676, "grad_norm": 2.1343030148868727, "learning_rate": 1.4304519524123097e-05, "loss": 0.6616, "step": 15242 }, { "epoch": 1.1328874024526199, "grad_norm": 2.2005141540841264, "learning_rate": 1.4303795281857675e-05, "loss": 0.7617, "step": 15243 }, { "epoch": 1.1329617242660721, "grad_norm": 1.8211350886145248, "learning_rate": 1.4303071011884642e-05, "loss": 0.6291, "step": 15244 }, { "epoch": 1.1330360460795244, "grad_norm": 1.9438068704921465, "learning_rate": 1.4302346714208663e-05, "loss": 0.6916, "step": 15245 }, { "epoch": 1.1331103678929766, "grad_norm": 1.5717524922685038, "learning_rate": 1.43016223888344e-05, "loss": 0.4291, "step": 15246 }, { "epoch": 1.1331846897064288, "grad_norm": 2.32401529155445, "learning_rate": 1.430089803576651e-05, "loss": 0.6917, "step": 15247 }, { "epoch": 1.133259011519881, "grad_norm": 1.9931004501265002, "learning_rate": 1.4300173655009664e-05, "loss": 0.7266, "step": 15248 }, { "epoch": 1.1333333333333333, "grad_norm": 1.9405470625752308, "learning_rate": 1.4299449246568524e-05, "loss": 0.7023, "step": 15249 }, { "epoch": 1.1334076551467855, "grad_norm": 2.172741717654473, "learning_rate": 1.4298724810447753e-05, "loss": 0.7322, "step": 15250 }, { "epoch": 1.1334819769602378, "grad_norm": 1.6092364493369193, "learning_rate": 1.4298000346652018e-05, "loss": 0.4697, "step": 15251 }, { "epoch": 1.13355629877369, "grad_norm": 1.792029238629715, "learning_rate": 1.4297275855185973e-05, "loss": 0.6046, "step": 15252 }, { "epoch": 1.1336306205871423, "grad_norm": 2.284659537898519, "learning_rate": 1.429655133605429e-05, "loss": 0.7486, "step": 15253 }, { "epoch": 1.1337049424005945, "grad_norm": 1.6434737779490476, "learning_rate": 1.4295826789261633e-05, "loss": 0.4978, "step": 15254 }, { "epoch": 1.1337792642140467, "grad_norm": 1.98862605179216, "learning_rate": 1.4295102214812668e-05, "loss": 0.6077, "step": 15255 }, { "epoch": 1.1338535860274992, "grad_norm": 1.5417440611390203, "learning_rate": 1.4294377612712059e-05, "loss": 0.5148, "step": 15256 }, { "epoch": 1.1339279078409512, "grad_norm": 2.124372214543777, "learning_rate": 1.4293652982964468e-05, "loss": 0.5218, "step": 15257 }, { "epoch": 1.1340022296544037, "grad_norm": 1.8233248450113495, "learning_rate": 1.4292928325574562e-05, "loss": 0.6605, "step": 15258 }, { "epoch": 1.134076551467856, "grad_norm": 1.8984484858102364, "learning_rate": 1.4292203640547004e-05, "loss": 0.5878, "step": 15259 }, { "epoch": 1.1341508732813081, "grad_norm": 1.4304457769799424, "learning_rate": 1.4291478927886464e-05, "loss": 0.337, "step": 15260 }, { "epoch": 1.1342251950947604, "grad_norm": 2.2414793070320296, "learning_rate": 1.4290754187597606e-05, "loss": 0.6496, "step": 15261 }, { "epoch": 1.1342995169082126, "grad_norm": 1.7594091383819392, "learning_rate": 1.4290029419685094e-05, "loss": 0.7117, "step": 15262 }, { "epoch": 1.1343738387216649, "grad_norm": 1.8039537573739135, "learning_rate": 1.4289304624153595e-05, "loss": 0.6499, "step": 15263 }, { "epoch": 1.134448160535117, "grad_norm": 1.961217582699373, "learning_rate": 1.4288579801007776e-05, "loss": 0.7295, "step": 15264 }, { "epoch": 1.1345224823485693, "grad_norm": 1.9255944496401938, "learning_rate": 1.4287854950252303e-05, "loss": 0.6064, "step": 15265 }, { "epoch": 1.1345968041620216, "grad_norm": 2.17813800049953, "learning_rate": 1.428713007189184e-05, "loss": 0.6333, "step": 15266 }, { "epoch": 1.1346711259754738, "grad_norm": 2.2377372622475016, "learning_rate": 1.4286405165931059e-05, "loss": 0.5796, "step": 15267 }, { "epoch": 1.134745447788926, "grad_norm": 2.1514849484098675, "learning_rate": 1.4285680232374624e-05, "loss": 0.6475, "step": 15268 }, { "epoch": 1.1348197696023783, "grad_norm": 2.0052713549531402, "learning_rate": 1.4284955271227198e-05, "loss": 0.5586, "step": 15269 }, { "epoch": 1.1348940914158305, "grad_norm": 1.934402737482475, "learning_rate": 1.4284230282493457e-05, "loss": 0.5759, "step": 15270 }, { "epoch": 1.1349684132292828, "grad_norm": 2.2323683602352986, "learning_rate": 1.4283505266178059e-05, "loss": 0.6768, "step": 15271 }, { "epoch": 1.135042735042735, "grad_norm": 2.005126724029499, "learning_rate": 1.428278022228568e-05, "loss": 0.632, "step": 15272 }, { "epoch": 1.1351170568561872, "grad_norm": 2.4845740621873174, "learning_rate": 1.4282055150820986e-05, "loss": 0.7204, "step": 15273 }, { "epoch": 1.1351913786696395, "grad_norm": 1.725302347739883, "learning_rate": 1.428133005178864e-05, "loss": 0.4903, "step": 15274 }, { "epoch": 1.1352657004830917, "grad_norm": 3.5097572760193274, "learning_rate": 1.4280604925193313e-05, "loss": 0.7219, "step": 15275 }, { "epoch": 1.135340022296544, "grad_norm": 1.992950040865562, "learning_rate": 1.4279879771039673e-05, "loss": 0.7223, "step": 15276 }, { "epoch": 1.1354143441099962, "grad_norm": 2.1614077250088966, "learning_rate": 1.4279154589332388e-05, "loss": 0.6371, "step": 15277 }, { "epoch": 1.1354886659234484, "grad_norm": 1.4740837918676246, "learning_rate": 1.4278429380076133e-05, "loss": 0.4644, "step": 15278 }, { "epoch": 1.135562987736901, "grad_norm": 1.8696752158017358, "learning_rate": 1.427770414327557e-05, "loss": 0.6408, "step": 15279 }, { "epoch": 1.135637309550353, "grad_norm": 2.135908819131156, "learning_rate": 1.4276978878935368e-05, "loss": 0.69, "step": 15280 }, { "epoch": 1.1357116313638054, "grad_norm": 1.8620485050323057, "learning_rate": 1.4276253587060198e-05, "loss": 0.6585, "step": 15281 }, { "epoch": 1.1357859531772576, "grad_norm": 2.2810773347711972, "learning_rate": 1.4275528267654728e-05, "loss": 0.7381, "step": 15282 }, { "epoch": 1.1358602749907099, "grad_norm": 2.1177016267953075, "learning_rate": 1.4274802920723631e-05, "loss": 0.6651, "step": 15283 }, { "epoch": 1.135934596804162, "grad_norm": 1.7337548619026732, "learning_rate": 1.4274077546271576e-05, "loss": 0.5453, "step": 15284 }, { "epoch": 1.1360089186176143, "grad_norm": 2.27943412165333, "learning_rate": 1.427335214430323e-05, "loss": 0.6817, "step": 15285 }, { "epoch": 1.1360832404310666, "grad_norm": 2.019471178449751, "learning_rate": 1.4272626714823263e-05, "loss": 0.6519, "step": 15286 }, { "epoch": 1.1361575622445188, "grad_norm": 3.6525694457849944, "learning_rate": 1.4271901257836347e-05, "loss": 0.4903, "step": 15287 }, { "epoch": 1.136231884057971, "grad_norm": 2.491942631625563, "learning_rate": 1.4271175773347157e-05, "loss": 0.6748, "step": 15288 }, { "epoch": 1.1363062058714233, "grad_norm": 2.5362780137700818, "learning_rate": 1.4270450261360354e-05, "loss": 0.676, "step": 15289 }, { "epoch": 1.1363805276848755, "grad_norm": 1.9922159731179445, "learning_rate": 1.4269724721880616e-05, "loss": 0.6066, "step": 15290 }, { "epoch": 1.1364548494983278, "grad_norm": 1.6699150334917157, "learning_rate": 1.4268999154912611e-05, "loss": 0.5899, "step": 15291 }, { "epoch": 1.13652917131178, "grad_norm": 2.280134154098563, "learning_rate": 1.426827356046101e-05, "loss": 0.6365, "step": 15292 }, { "epoch": 1.1366034931252322, "grad_norm": 2.0229031403385447, "learning_rate": 1.4267547938530487e-05, "loss": 0.7173, "step": 15293 }, { "epoch": 1.1366778149386845, "grad_norm": 1.9181533633029284, "learning_rate": 1.426682228912571e-05, "loss": 0.6822, "step": 15294 }, { "epoch": 1.1367521367521367, "grad_norm": 1.6863014179324567, "learning_rate": 1.4266096612251352e-05, "loss": 0.5992, "step": 15295 }, { "epoch": 1.136826458565589, "grad_norm": 1.6217277565415043, "learning_rate": 1.426537090791209e-05, "loss": 0.622, "step": 15296 }, { "epoch": 1.1369007803790412, "grad_norm": 2.1865126802833985, "learning_rate": 1.4264645176112589e-05, "loss": 0.6463, "step": 15297 }, { "epoch": 1.1369751021924934, "grad_norm": 2.14711965675689, "learning_rate": 1.4263919416857522e-05, "loss": 0.6455, "step": 15298 }, { "epoch": 1.1370494240059457, "grad_norm": 1.877306928989751, "learning_rate": 1.4263193630151564e-05, "loss": 0.5544, "step": 15299 }, { "epoch": 1.137123745819398, "grad_norm": 3.085457231207979, "learning_rate": 1.4262467815999385e-05, "loss": 0.692, "step": 15300 }, { "epoch": 1.1371980676328501, "grad_norm": 2.2884349541496736, "learning_rate": 1.4261741974405662e-05, "loss": 0.6947, "step": 15301 }, { "epoch": 1.1372723894463026, "grad_norm": 1.7596837777858525, "learning_rate": 1.4261016105375064e-05, "loss": 0.5776, "step": 15302 }, { "epoch": 1.1373467112597546, "grad_norm": 1.9817386828667913, "learning_rate": 1.4260290208912266e-05, "loss": 0.6158, "step": 15303 }, { "epoch": 1.137421033073207, "grad_norm": 1.57035095193618, "learning_rate": 1.4259564285021941e-05, "loss": 0.5204, "step": 15304 }, { "epoch": 1.1374953548866593, "grad_norm": 1.8265879471315056, "learning_rate": 1.4258838333708762e-05, "loss": 0.7649, "step": 15305 }, { "epoch": 1.1375696767001116, "grad_norm": 1.9785505891262645, "learning_rate": 1.4258112354977401e-05, "loss": 0.7414, "step": 15306 }, { "epoch": 1.1376439985135638, "grad_norm": 2.468684373227947, "learning_rate": 1.4257386348832535e-05, "loss": 0.7634, "step": 15307 }, { "epoch": 1.137718320327016, "grad_norm": 4.66227712978582, "learning_rate": 1.4256660315278837e-05, "loss": 0.6571, "step": 15308 }, { "epoch": 1.1377926421404683, "grad_norm": 2.189843737097011, "learning_rate": 1.425593425432098e-05, "loss": 0.8083, "step": 15309 }, { "epoch": 1.1378669639539205, "grad_norm": 1.69033755178836, "learning_rate": 1.4255208165963642e-05, "loss": 0.5873, "step": 15310 }, { "epoch": 1.1379412857673727, "grad_norm": 1.9253933971323507, "learning_rate": 1.425448205021149e-05, "loss": 0.6757, "step": 15311 }, { "epoch": 1.138015607580825, "grad_norm": 1.8000220527748123, "learning_rate": 1.4253755907069207e-05, "loss": 0.5974, "step": 15312 }, { "epoch": 1.1380899293942772, "grad_norm": 1.7720855737744265, "learning_rate": 1.4253029736541462e-05, "loss": 0.5218, "step": 15313 }, { "epoch": 1.1381642512077295, "grad_norm": 2.3241609366999767, "learning_rate": 1.425230353863293e-05, "loss": 0.7782, "step": 15314 }, { "epoch": 1.1382385730211817, "grad_norm": 2.0872759337310764, "learning_rate": 1.4251577313348293e-05, "loss": 0.6206, "step": 15315 }, { "epoch": 1.138312894834634, "grad_norm": 2.0220676592032554, "learning_rate": 1.4250851060692218e-05, "loss": 0.483, "step": 15316 }, { "epoch": 1.1383872166480862, "grad_norm": 2.4372260125592744, "learning_rate": 1.4250124780669388e-05, "loss": 0.7701, "step": 15317 }, { "epoch": 1.1384615384615384, "grad_norm": 1.5334777731551272, "learning_rate": 1.4249398473284474e-05, "loss": 0.4433, "step": 15318 }, { "epoch": 1.1385358602749907, "grad_norm": 1.9098276109190098, "learning_rate": 1.4248672138542152e-05, "loss": 0.4842, "step": 15319 }, { "epoch": 1.138610182088443, "grad_norm": 2.2925140905245396, "learning_rate": 1.42479457764471e-05, "loss": 0.7202, "step": 15320 }, { "epoch": 1.1386845039018951, "grad_norm": 2.0437793107267748, "learning_rate": 1.4247219387003994e-05, "loss": 0.602, "step": 15321 }, { "epoch": 1.1387588257153474, "grad_norm": 2.610508257585556, "learning_rate": 1.424649297021751e-05, "loss": 0.7635, "step": 15322 }, { "epoch": 1.1388331475287998, "grad_norm": 2.4910128151255218, "learning_rate": 1.4245766526092321e-05, "loss": 0.6396, "step": 15323 }, { "epoch": 1.1389074693422518, "grad_norm": 1.6805646590174022, "learning_rate": 1.4245040054633115e-05, "loss": 0.5023, "step": 15324 }, { "epoch": 1.1389817911557043, "grad_norm": 1.6920204643413956, "learning_rate": 1.4244313555844556e-05, "loss": 0.4521, "step": 15325 }, { "epoch": 1.1390561129691565, "grad_norm": 2.0222472210641995, "learning_rate": 1.4243587029731325e-05, "loss": 0.7295, "step": 15326 }, { "epoch": 1.1391304347826088, "grad_norm": 1.6113802508327255, "learning_rate": 1.4242860476298105e-05, "loss": 0.5002, "step": 15327 }, { "epoch": 1.139204756596061, "grad_norm": 1.7792377993174682, "learning_rate": 1.4242133895549565e-05, "loss": 0.6796, "step": 15328 }, { "epoch": 1.1392790784095133, "grad_norm": 2.0586505830571693, "learning_rate": 1.4241407287490392e-05, "loss": 0.7467, "step": 15329 }, { "epoch": 1.1393534002229655, "grad_norm": 1.9348195547269693, "learning_rate": 1.4240680652125256e-05, "loss": 0.659, "step": 15330 }, { "epoch": 1.1394277220364177, "grad_norm": 1.9828307424587837, "learning_rate": 1.423995398945884e-05, "loss": 0.6577, "step": 15331 }, { "epoch": 1.13950204384987, "grad_norm": 2.0622898310893376, "learning_rate": 1.4239227299495818e-05, "loss": 0.7281, "step": 15332 }, { "epoch": 1.1395763656633222, "grad_norm": 1.959642555325847, "learning_rate": 1.4238500582240872e-05, "loss": 0.6891, "step": 15333 }, { "epoch": 1.1396506874767744, "grad_norm": 1.9550507893092886, "learning_rate": 1.4237773837698679e-05, "loss": 0.6506, "step": 15334 }, { "epoch": 1.1397250092902267, "grad_norm": 1.7417761156978622, "learning_rate": 1.4237047065873916e-05, "loss": 0.5687, "step": 15335 }, { "epoch": 1.139799331103679, "grad_norm": 1.9065576435994607, "learning_rate": 1.4236320266771266e-05, "loss": 0.604, "step": 15336 }, { "epoch": 1.1398736529171312, "grad_norm": 2.155091257677686, "learning_rate": 1.4235593440395405e-05, "loss": 0.6658, "step": 15337 }, { "epoch": 1.1399479747305834, "grad_norm": 1.8752406945539954, "learning_rate": 1.4234866586751015e-05, "loss": 0.5196, "step": 15338 }, { "epoch": 1.1400222965440356, "grad_norm": 1.7819541413959126, "learning_rate": 1.4234139705842772e-05, "loss": 0.5655, "step": 15339 }, { "epoch": 1.1400966183574879, "grad_norm": 1.8041900495440426, "learning_rate": 1.4233412797675356e-05, "loss": 0.4213, "step": 15340 }, { "epoch": 1.1401709401709401, "grad_norm": 2.039542342294865, "learning_rate": 1.423268586225345e-05, "loss": 0.6529, "step": 15341 }, { "epoch": 1.1402452619843924, "grad_norm": 2.3472964446863207, "learning_rate": 1.4231958899581733e-05, "loss": 0.6394, "step": 15342 }, { "epoch": 1.1403195837978446, "grad_norm": 1.919445981631608, "learning_rate": 1.4231231909664883e-05, "loss": 0.6035, "step": 15343 }, { "epoch": 1.1403939056112968, "grad_norm": 2.01324851257891, "learning_rate": 1.4230504892507581e-05, "loss": 0.6248, "step": 15344 }, { "epoch": 1.140468227424749, "grad_norm": 1.7484094248807847, "learning_rate": 1.4229777848114508e-05, "loss": 0.5546, "step": 15345 }, { "epoch": 1.1405425492382015, "grad_norm": 1.7941597775116342, "learning_rate": 1.4229050776490345e-05, "loss": 0.5686, "step": 15346 }, { "epoch": 1.1406168710516535, "grad_norm": 1.5462952646966048, "learning_rate": 1.4228323677639773e-05, "loss": 0.5082, "step": 15347 }, { "epoch": 1.140691192865106, "grad_norm": 1.792726778054504, "learning_rate": 1.422759655156747e-05, "loss": 0.6064, "step": 15348 }, { "epoch": 1.1407655146785582, "grad_norm": 2.236172045812821, "learning_rate": 1.4226869398278122e-05, "loss": 0.6072, "step": 15349 }, { "epoch": 1.1408398364920105, "grad_norm": 2.102836166828355, "learning_rate": 1.4226142217776408e-05, "loss": 0.7258, "step": 15350 }, { "epoch": 1.1409141583054627, "grad_norm": 2.3051687284504427, "learning_rate": 1.4225415010067009e-05, "loss": 0.6596, "step": 15351 }, { "epoch": 1.140988480118915, "grad_norm": 1.7369861439708167, "learning_rate": 1.4224687775154611e-05, "loss": 0.5266, "step": 15352 }, { "epoch": 1.1410628019323672, "grad_norm": 2.1518091272262736, "learning_rate": 1.4223960513043889e-05, "loss": 0.6779, "step": 15353 }, { "epoch": 1.1411371237458194, "grad_norm": 1.834221014952274, "learning_rate": 1.422323322373953e-05, "loss": 0.748, "step": 15354 }, { "epoch": 1.1412114455592717, "grad_norm": 1.6672297213329448, "learning_rate": 1.422250590724621e-05, "loss": 0.5922, "step": 15355 }, { "epoch": 1.141285767372724, "grad_norm": 2.095687627731361, "learning_rate": 1.422177856356862e-05, "loss": 0.7209, "step": 15356 }, { "epoch": 1.1413600891861762, "grad_norm": 2.1555453927613364, "learning_rate": 1.4221051192711437e-05, "loss": 0.5561, "step": 15357 }, { "epoch": 1.1414344109996284, "grad_norm": 2.062162020885539, "learning_rate": 1.4220323794679346e-05, "loss": 0.5934, "step": 15358 }, { "epoch": 1.1415087328130806, "grad_norm": 1.7429394035737358, "learning_rate": 1.4219596369477031e-05, "loss": 0.552, "step": 15359 }, { "epoch": 1.1415830546265329, "grad_norm": 2.5663207285947096, "learning_rate": 1.4218868917109175e-05, "loss": 0.709, "step": 15360 }, { "epoch": 1.141657376439985, "grad_norm": 1.8363356141613947, "learning_rate": 1.4218141437580455e-05, "loss": 0.5825, "step": 15361 }, { "epoch": 1.1417316982534373, "grad_norm": 1.8927281022374807, "learning_rate": 1.4217413930895561e-05, "loss": 0.6502, "step": 15362 }, { "epoch": 1.1418060200668896, "grad_norm": 1.8773503559009221, "learning_rate": 1.4216686397059177e-05, "loss": 0.7228, "step": 15363 }, { "epoch": 1.1418803418803418, "grad_norm": 1.7960769418329734, "learning_rate": 1.4215958836075982e-05, "loss": 0.6679, "step": 15364 }, { "epoch": 1.141954663693794, "grad_norm": 1.7975180740477936, "learning_rate": 1.4215231247950664e-05, "loss": 0.5687, "step": 15365 }, { "epoch": 1.1420289855072463, "grad_norm": 1.9376112750279364, "learning_rate": 1.4214503632687908e-05, "loss": 0.6444, "step": 15366 }, { "epoch": 1.1421033073206985, "grad_norm": 1.9462997208496113, "learning_rate": 1.4213775990292393e-05, "loss": 0.6261, "step": 15367 }, { "epoch": 1.1421776291341508, "grad_norm": 1.7685903399513778, "learning_rate": 1.4213048320768808e-05, "loss": 0.6706, "step": 15368 }, { "epoch": 1.1422519509476032, "grad_norm": 2.6907693209384504, "learning_rate": 1.4212320624121839e-05, "loss": 0.8706, "step": 15369 }, { "epoch": 1.1423262727610553, "grad_norm": 1.8643474013813621, "learning_rate": 1.4211592900356164e-05, "loss": 0.6407, "step": 15370 }, { "epoch": 1.1424005945745077, "grad_norm": 1.862473608411932, "learning_rate": 1.4210865149476476e-05, "loss": 0.7048, "step": 15371 }, { "epoch": 1.14247491638796, "grad_norm": 2.3249609415447723, "learning_rate": 1.4210137371487454e-05, "loss": 0.6574, "step": 15372 }, { "epoch": 1.1425492382014122, "grad_norm": 2.2758993681608923, "learning_rate": 1.4209409566393786e-05, "loss": 0.7334, "step": 15373 }, { "epoch": 1.1426235600148644, "grad_norm": 2.1947274746781327, "learning_rate": 1.4208681734200159e-05, "loss": 0.6759, "step": 15374 }, { "epoch": 1.1426978818283167, "grad_norm": 1.612374767233597, "learning_rate": 1.4207953874911256e-05, "loss": 0.47, "step": 15375 }, { "epoch": 1.142772203641769, "grad_norm": 1.7358303496473568, "learning_rate": 1.4207225988531768e-05, "loss": 0.6325, "step": 15376 }, { "epoch": 1.1428465254552211, "grad_norm": 2.64185527901332, "learning_rate": 1.4206498075066372e-05, "loss": 0.6667, "step": 15377 }, { "epoch": 1.1429208472686734, "grad_norm": 2.1388155615178475, "learning_rate": 1.4205770134519762e-05, "loss": 0.6814, "step": 15378 }, { "epoch": 1.1429951690821256, "grad_norm": 1.9993718245430152, "learning_rate": 1.4205042166896623e-05, "loss": 0.6424, "step": 15379 }, { "epoch": 1.1430694908955779, "grad_norm": 2.196358017131669, "learning_rate": 1.4204314172201641e-05, "loss": 0.7933, "step": 15380 }, { "epoch": 1.14314381270903, "grad_norm": 2.1575325313050717, "learning_rate": 1.4203586150439501e-05, "loss": 0.7486, "step": 15381 }, { "epoch": 1.1432181345224823, "grad_norm": 2.053974214233758, "learning_rate": 1.4202858101614893e-05, "loss": 0.6331, "step": 15382 }, { "epoch": 1.1432924563359346, "grad_norm": 1.6510850945516045, "learning_rate": 1.4202130025732501e-05, "loss": 0.5433, "step": 15383 }, { "epoch": 1.1433667781493868, "grad_norm": 1.832520777731086, "learning_rate": 1.4201401922797016e-05, "loss": 0.7373, "step": 15384 }, { "epoch": 1.143441099962839, "grad_norm": 2.2110740828840627, "learning_rate": 1.420067379281312e-05, "loss": 0.7139, "step": 15385 }, { "epoch": 1.1435154217762913, "grad_norm": 2.1796820409113593, "learning_rate": 1.4199945635785508e-05, "loss": 0.6587, "step": 15386 }, { "epoch": 1.1435897435897435, "grad_norm": 2.2045029780180516, "learning_rate": 1.4199217451718864e-05, "loss": 0.6961, "step": 15387 }, { "epoch": 1.1436640654031958, "grad_norm": 2.1854705362900195, "learning_rate": 1.4198489240617872e-05, "loss": 0.7544, "step": 15388 }, { "epoch": 1.143738387216648, "grad_norm": 2.0513174532724103, "learning_rate": 1.4197761002487227e-05, "loss": 0.4853, "step": 15389 }, { "epoch": 1.1438127090301002, "grad_norm": 2.286931479209067, "learning_rate": 1.4197032737331614e-05, "loss": 0.8095, "step": 15390 }, { "epoch": 1.1438870308435525, "grad_norm": 1.8490559113977667, "learning_rate": 1.419630444515572e-05, "loss": 0.699, "step": 15391 }, { "epoch": 1.143961352657005, "grad_norm": 2.2642460479888253, "learning_rate": 1.4195576125964236e-05, "loss": 0.6851, "step": 15392 }, { "epoch": 1.1440356744704572, "grad_norm": 1.8990968526275664, "learning_rate": 1.4194847779761855e-05, "loss": 0.5894, "step": 15393 }, { "epoch": 1.1441099962839094, "grad_norm": 1.697396554349911, "learning_rate": 1.4194119406553257e-05, "loss": 0.4387, "step": 15394 }, { "epoch": 1.1441843180973617, "grad_norm": 1.8100712346071781, "learning_rate": 1.4193391006343137e-05, "loss": 0.6284, "step": 15395 }, { "epoch": 1.144258639910814, "grad_norm": 1.8798604547953202, "learning_rate": 1.4192662579136185e-05, "loss": 0.6807, "step": 15396 }, { "epoch": 1.1443329617242661, "grad_norm": 1.9414464720983773, "learning_rate": 1.4191934124937087e-05, "loss": 0.6331, "step": 15397 }, { "epoch": 1.1444072835377184, "grad_norm": 2.2036718684374, "learning_rate": 1.4191205643750538e-05, "loss": 0.6794, "step": 15398 }, { "epoch": 1.1444816053511706, "grad_norm": 1.8181870236531783, "learning_rate": 1.419047713558122e-05, "loss": 0.6148, "step": 15399 }, { "epoch": 1.1445559271646228, "grad_norm": 2.098154580725548, "learning_rate": 1.4189748600433828e-05, "loss": 0.6068, "step": 15400 }, { "epoch": 1.144630248978075, "grad_norm": 2.3869435064815194, "learning_rate": 1.4189020038313052e-05, "loss": 0.539, "step": 15401 }, { "epoch": 1.1447045707915273, "grad_norm": 1.6861612206309171, "learning_rate": 1.4188291449223583e-05, "loss": 0.5468, "step": 15402 }, { "epoch": 1.1447788926049796, "grad_norm": 1.654278084019911, "learning_rate": 1.4187562833170114e-05, "loss": 0.5533, "step": 15403 }, { "epoch": 1.1448532144184318, "grad_norm": 1.6375887673435119, "learning_rate": 1.4186834190157332e-05, "loss": 0.4251, "step": 15404 }, { "epoch": 1.144927536231884, "grad_norm": 2.1680751037723756, "learning_rate": 1.4186105520189926e-05, "loss": 0.7048, "step": 15405 }, { "epoch": 1.1450018580453363, "grad_norm": 1.9747367009056611, "learning_rate": 1.418537682327259e-05, "loss": 0.6475, "step": 15406 }, { "epoch": 1.1450761798587885, "grad_norm": 2.287320251961011, "learning_rate": 1.4184648099410017e-05, "loss": 0.7969, "step": 15407 }, { "epoch": 1.1451505016722408, "grad_norm": 1.9761880289020668, "learning_rate": 1.4183919348606896e-05, "loss": 0.6295, "step": 15408 }, { "epoch": 1.145224823485693, "grad_norm": 3.688365646145583, "learning_rate": 1.4183190570867922e-05, "loss": 0.6879, "step": 15409 }, { "epoch": 1.1452991452991452, "grad_norm": 1.444928646044344, "learning_rate": 1.418246176619778e-05, "loss": 0.5441, "step": 15410 }, { "epoch": 1.1453734671125975, "grad_norm": 1.8390242887309698, "learning_rate": 1.4181732934601166e-05, "loss": 0.6396, "step": 15411 }, { "epoch": 1.1454477889260497, "grad_norm": 1.8706541297497608, "learning_rate": 1.4181004076082774e-05, "loss": 0.6159, "step": 15412 }, { "epoch": 1.1455221107395022, "grad_norm": 2.0049764951543882, "learning_rate": 1.4180275190647295e-05, "loss": 0.6418, "step": 15413 }, { "epoch": 1.1455964325529542, "grad_norm": 1.9777392921138834, "learning_rate": 1.4179546278299423e-05, "loss": 0.5581, "step": 15414 }, { "epoch": 1.1456707543664066, "grad_norm": 2.2684411600197856, "learning_rate": 1.4178817339043845e-05, "loss": 0.8249, "step": 15415 }, { "epoch": 1.1457450761798589, "grad_norm": 1.7534745179102604, "learning_rate": 1.417808837288526e-05, "loss": 0.6238, "step": 15416 }, { "epoch": 1.1458193979933111, "grad_norm": 1.8957342579150132, "learning_rate": 1.4177359379828354e-05, "loss": 0.5268, "step": 15417 }, { "epoch": 1.1458937198067634, "grad_norm": 2.4255064324573654, "learning_rate": 1.417663035987783e-05, "loss": 0.8471, "step": 15418 }, { "epoch": 1.1459680416202156, "grad_norm": 1.4921855675163918, "learning_rate": 1.4175901313038378e-05, "loss": 0.495, "step": 15419 }, { "epoch": 1.1460423634336678, "grad_norm": 1.949227215275477, "learning_rate": 1.417517223931469e-05, "loss": 0.7537, "step": 15420 }, { "epoch": 1.14611668524712, "grad_norm": 1.8204208769815258, "learning_rate": 1.4174443138711456e-05, "loss": 0.5114, "step": 15421 }, { "epoch": 1.1461910070605723, "grad_norm": 4.806177974671894, "learning_rate": 1.4173714011233374e-05, "loss": 0.5473, "step": 15422 }, { "epoch": 1.1462653288740245, "grad_norm": 1.4694182768773392, "learning_rate": 1.4172984856885139e-05, "loss": 0.4781, "step": 15423 }, { "epoch": 1.1463396506874768, "grad_norm": 1.7683464595229657, "learning_rate": 1.4172255675671445e-05, "loss": 0.5856, "step": 15424 }, { "epoch": 1.146413972500929, "grad_norm": 2.0886419992356977, "learning_rate": 1.4171526467596986e-05, "loss": 0.6281, "step": 15425 }, { "epoch": 1.1464882943143813, "grad_norm": 1.923583278969646, "learning_rate": 1.4170797232666454e-05, "loss": 0.7485, "step": 15426 }, { "epoch": 1.1465626161278335, "grad_norm": 2.427180470136461, "learning_rate": 1.4170067970884544e-05, "loss": 0.6763, "step": 15427 }, { "epoch": 1.1466369379412857, "grad_norm": 2.1614483710546417, "learning_rate": 1.4169338682255956e-05, "loss": 0.6448, "step": 15428 }, { "epoch": 1.146711259754738, "grad_norm": 2.0154813035301515, "learning_rate": 1.416860936678538e-05, "loss": 0.7116, "step": 15429 }, { "epoch": 1.1467855815681902, "grad_norm": 2.117602275160043, "learning_rate": 1.4167880024477515e-05, "loss": 0.7111, "step": 15430 }, { "epoch": 1.1468599033816425, "grad_norm": 2.0768126779420935, "learning_rate": 1.4167150655337057e-05, "loss": 0.5491, "step": 15431 }, { "epoch": 1.1469342251950947, "grad_norm": 1.4464719493640168, "learning_rate": 1.4166421259368695e-05, "loss": 0.4876, "step": 15432 }, { "epoch": 1.147008547008547, "grad_norm": 1.6178987226208803, "learning_rate": 1.4165691836577126e-05, "loss": 0.506, "step": 15433 }, { "epoch": 1.1470828688219992, "grad_norm": 1.800204607533549, "learning_rate": 1.4164962386967055e-05, "loss": 0.5591, "step": 15434 }, { "epoch": 1.1471571906354514, "grad_norm": 1.9008259467438402, "learning_rate": 1.4164232910543173e-05, "loss": 0.6377, "step": 15435 }, { "epoch": 1.1472315124489039, "grad_norm": 2.052115003090535, "learning_rate": 1.4163503407310171e-05, "loss": 0.5981, "step": 15436 }, { "epoch": 1.1473058342623559, "grad_norm": 2.1061992927229816, "learning_rate": 1.4162773877272752e-05, "loss": 0.6806, "step": 15437 }, { "epoch": 1.1473801560758083, "grad_norm": 5.073768038888544, "learning_rate": 1.4162044320435611e-05, "loss": 0.6849, "step": 15438 }, { "epoch": 1.1474544778892606, "grad_norm": 1.9828535736454513, "learning_rate": 1.4161314736803445e-05, "loss": 0.6974, "step": 15439 }, { "epoch": 1.1475287997027128, "grad_norm": 2.06955771304827, "learning_rate": 1.416058512638095e-05, "loss": 0.5809, "step": 15440 }, { "epoch": 1.147603121516165, "grad_norm": 2.215780366440788, "learning_rate": 1.4159855489172825e-05, "loss": 0.8245, "step": 15441 }, { "epoch": 1.1476774433296173, "grad_norm": 1.8573558751363144, "learning_rate": 1.4159125825183766e-05, "loss": 0.6436, "step": 15442 }, { "epoch": 1.1477517651430695, "grad_norm": 2.3225003439281218, "learning_rate": 1.4158396134418471e-05, "loss": 0.6546, "step": 15443 }, { "epoch": 1.1478260869565218, "grad_norm": 2.796364128198932, "learning_rate": 1.4157666416881634e-05, "loss": 0.7861, "step": 15444 }, { "epoch": 1.147900408769974, "grad_norm": 1.954982462575629, "learning_rate": 1.4156936672577959e-05, "loss": 0.6392, "step": 15445 }, { "epoch": 1.1479747305834263, "grad_norm": 2.1833304445013457, "learning_rate": 1.4156206901512141e-05, "loss": 0.8549, "step": 15446 }, { "epoch": 1.1480490523968785, "grad_norm": 2.115452855106629, "learning_rate": 1.415547710368888e-05, "loss": 0.7639, "step": 15447 }, { "epoch": 1.1481233742103307, "grad_norm": 1.9949440180488196, "learning_rate": 1.4154747279112872e-05, "loss": 0.6249, "step": 15448 }, { "epoch": 1.148197696023783, "grad_norm": 2.2500103558699203, "learning_rate": 1.4154017427788818e-05, "loss": 0.7487, "step": 15449 }, { "epoch": 1.1482720178372352, "grad_norm": 1.9275902397840308, "learning_rate": 1.4153287549721411e-05, "loss": 0.6525, "step": 15450 }, { "epoch": 1.1483463396506874, "grad_norm": 1.7327817601091624, "learning_rate": 1.4152557644915356e-05, "loss": 0.5776, "step": 15451 }, { "epoch": 1.1484206614641397, "grad_norm": 2.403558582744225, "learning_rate": 1.4151827713375354e-05, "loss": 0.6712, "step": 15452 }, { "epoch": 1.148494983277592, "grad_norm": 2.1960967263523252, "learning_rate": 1.4151097755106097e-05, "loss": 0.6329, "step": 15453 }, { "epoch": 1.1485693050910442, "grad_norm": 1.8106687392338068, "learning_rate": 1.415036777011229e-05, "loss": 0.557, "step": 15454 }, { "epoch": 1.1486436269044964, "grad_norm": 1.9000542814094952, "learning_rate": 1.4149637758398629e-05, "loss": 0.5914, "step": 15455 }, { "epoch": 1.1487179487179486, "grad_norm": 1.6721027194370446, "learning_rate": 1.4148907719969816e-05, "loss": 0.58, "step": 15456 }, { "epoch": 1.1487922705314009, "grad_norm": 2.100729835592676, "learning_rate": 1.4148177654830551e-05, "loss": 0.581, "step": 15457 }, { "epoch": 1.148866592344853, "grad_norm": 1.8395063638679834, "learning_rate": 1.4147447562985532e-05, "loss": 0.7427, "step": 15458 }, { "epoch": 1.1489409141583056, "grad_norm": 2.1863159848470977, "learning_rate": 1.4146717444439464e-05, "loss": 0.594, "step": 15459 }, { "epoch": 1.1490152359717576, "grad_norm": 1.8841150879631046, "learning_rate": 1.4145987299197042e-05, "loss": 0.6452, "step": 15460 }, { "epoch": 1.14908955778521, "grad_norm": 2.0617901557001423, "learning_rate": 1.4145257127262967e-05, "loss": 0.5984, "step": 15461 }, { "epoch": 1.1491638795986623, "grad_norm": 2.2995966535437735, "learning_rate": 1.4144526928641944e-05, "loss": 0.6095, "step": 15462 }, { "epoch": 1.1492382014121145, "grad_norm": 2.1944166282490154, "learning_rate": 1.4143796703338672e-05, "loss": 0.6838, "step": 15463 }, { "epoch": 1.1493125232255668, "grad_norm": 2.4555444478197064, "learning_rate": 1.4143066451357849e-05, "loss": 0.7005, "step": 15464 }, { "epoch": 1.149386845039019, "grad_norm": 2.0984714084866956, "learning_rate": 1.4142336172704184e-05, "loss": 0.5975, "step": 15465 }, { "epoch": 1.1494611668524712, "grad_norm": 1.9296503790079909, "learning_rate": 1.414160586738237e-05, "loss": 0.5441, "step": 15466 }, { "epoch": 1.1495354886659235, "grad_norm": 2.421468767030377, "learning_rate": 1.4140875535397114e-05, "loss": 0.8105, "step": 15467 }, { "epoch": 1.1496098104793757, "grad_norm": 1.9336007008697724, "learning_rate": 1.4140145176753115e-05, "loss": 0.6835, "step": 15468 }, { "epoch": 1.149684132292828, "grad_norm": 1.8358648382434284, "learning_rate": 1.4139414791455077e-05, "loss": 0.5489, "step": 15469 }, { "epoch": 1.1497584541062802, "grad_norm": 1.5292254321836602, "learning_rate": 1.41386843795077e-05, "loss": 0.4634, "step": 15470 }, { "epoch": 1.1498327759197324, "grad_norm": 1.9903504244785393, "learning_rate": 1.413795394091569e-05, "loss": 0.4399, "step": 15471 }, { "epoch": 1.1499070977331847, "grad_norm": 2.0580274282030198, "learning_rate": 1.4137223475683746e-05, "loss": 0.512, "step": 15472 }, { "epoch": 1.149981419546637, "grad_norm": 1.90197580850056, "learning_rate": 1.413649298381657e-05, "loss": 0.6876, "step": 15473 }, { "epoch": 1.1500557413600891, "grad_norm": 2.2848258094501146, "learning_rate": 1.413576246531887e-05, "loss": 0.7248, "step": 15474 }, { "epoch": 1.1501300631735414, "grad_norm": 1.9406534089450895, "learning_rate": 1.4135031920195343e-05, "loss": 0.5674, "step": 15475 }, { "epoch": 1.1502043849869936, "grad_norm": 1.9149865348961994, "learning_rate": 1.4134301348450698e-05, "loss": 0.5897, "step": 15476 }, { "epoch": 1.1502787068004459, "grad_norm": 2.2797426850449454, "learning_rate": 1.4133570750089634e-05, "loss": 0.7164, "step": 15477 }, { "epoch": 1.150353028613898, "grad_norm": 2.003934336827221, "learning_rate": 1.4132840125116855e-05, "loss": 0.704, "step": 15478 }, { "epoch": 1.1504273504273503, "grad_norm": 1.7202936107370885, "learning_rate": 1.4132109473537069e-05, "loss": 0.5785, "step": 15479 }, { "epoch": 1.1505016722408028, "grad_norm": 1.902640979904284, "learning_rate": 1.4131378795354972e-05, "loss": 0.6286, "step": 15480 }, { "epoch": 1.1505759940542548, "grad_norm": 1.9891878977804145, "learning_rate": 1.4130648090575274e-05, "loss": 0.6624, "step": 15481 }, { "epoch": 1.1506503158677073, "grad_norm": 1.8539929798932384, "learning_rate": 1.4129917359202681e-05, "loss": 0.6431, "step": 15482 }, { "epoch": 1.1507246376811595, "grad_norm": 1.4863734045630566, "learning_rate": 1.412918660124189e-05, "loss": 0.3772, "step": 15483 }, { "epoch": 1.1507989594946118, "grad_norm": 1.5300747411952462, "learning_rate": 1.4128455816697613e-05, "loss": 0.5389, "step": 15484 }, { "epoch": 1.150873281308064, "grad_norm": 2.086500897265734, "learning_rate": 1.4127725005574551e-05, "loss": 0.6742, "step": 15485 }, { "epoch": 1.1509476031215162, "grad_norm": 2.4389306213354622, "learning_rate": 1.4126994167877408e-05, "loss": 0.7453, "step": 15486 }, { "epoch": 1.1510219249349685, "grad_norm": 2.292933413730089, "learning_rate": 1.4126263303610892e-05, "loss": 0.6579, "step": 15487 }, { "epoch": 1.1510962467484207, "grad_norm": 1.7395456611706728, "learning_rate": 1.4125532412779707e-05, "loss": 0.5928, "step": 15488 }, { "epoch": 1.151170568561873, "grad_norm": 2.0136691470178225, "learning_rate": 1.4124801495388558e-05, "loss": 0.5651, "step": 15489 }, { "epoch": 1.1512448903753252, "grad_norm": 1.7047521181080052, "learning_rate": 1.4124070551442152e-05, "loss": 0.5911, "step": 15490 }, { "epoch": 1.1513192121887774, "grad_norm": 2.2239362097974076, "learning_rate": 1.4123339580945195e-05, "loss": 0.7811, "step": 15491 }, { "epoch": 1.1513935340022297, "grad_norm": 1.7791318658505295, "learning_rate": 1.4122608583902389e-05, "loss": 0.5056, "step": 15492 }, { "epoch": 1.151467855815682, "grad_norm": 2.50506049177625, "learning_rate": 1.4121877560318444e-05, "loss": 0.8034, "step": 15493 }, { "epoch": 1.1515421776291341, "grad_norm": 1.9617008659371467, "learning_rate": 1.4121146510198065e-05, "loss": 0.6413, "step": 15494 }, { "epoch": 1.1516164994425864, "grad_norm": 8.665419096821704, "learning_rate": 1.4120415433545958e-05, "loss": 0.6421, "step": 15495 }, { "epoch": 1.1516908212560386, "grad_norm": 3.085957748049107, "learning_rate": 1.411968433036683e-05, "loss": 0.7606, "step": 15496 }, { "epoch": 1.1517651430694908, "grad_norm": 1.418814243293169, "learning_rate": 1.4118953200665388e-05, "loss": 0.4395, "step": 15497 }, { "epoch": 1.151839464882943, "grad_norm": 1.3857189741216154, "learning_rate": 1.411822204444634e-05, "loss": 0.4185, "step": 15498 }, { "epoch": 1.1519137866963953, "grad_norm": 1.8319527309684291, "learning_rate": 1.4117490861714394e-05, "loss": 0.5715, "step": 15499 }, { "epoch": 1.1519881085098476, "grad_norm": 2.0648171101470307, "learning_rate": 1.411675965247425e-05, "loss": 0.6024, "step": 15500 }, { "epoch": 1.1520624303232998, "grad_norm": 2.0064923501825684, "learning_rate": 1.4116028416730627e-05, "loss": 0.7349, "step": 15501 }, { "epoch": 1.152136752136752, "grad_norm": 2.104383754904209, "learning_rate": 1.4115297154488224e-05, "loss": 0.5165, "step": 15502 }, { "epoch": 1.1522110739502045, "grad_norm": 1.9086807935918204, "learning_rate": 1.411456586575175e-05, "loss": 0.5897, "step": 15503 }, { "epoch": 1.1522853957636565, "grad_norm": 2.542628946620879, "learning_rate": 1.4113834550525915e-05, "loss": 0.8018, "step": 15504 }, { "epoch": 1.152359717577109, "grad_norm": 2.0618313752543416, "learning_rate": 1.4113103208815428e-05, "loss": 0.579, "step": 15505 }, { "epoch": 1.1524340393905612, "grad_norm": 2.1358796129671136, "learning_rate": 1.4112371840624994e-05, "loss": 0.6332, "step": 15506 }, { "epoch": 1.1525083612040135, "grad_norm": 2.0414217814088906, "learning_rate": 1.4111640445959326e-05, "loss": 0.6196, "step": 15507 }, { "epoch": 1.1525826830174657, "grad_norm": 2.724514397813469, "learning_rate": 1.411090902482313e-05, "loss": 0.575, "step": 15508 }, { "epoch": 1.152657004830918, "grad_norm": 1.7141457124267667, "learning_rate": 1.4110177577221113e-05, "loss": 0.562, "step": 15509 }, { "epoch": 1.1527313266443702, "grad_norm": 2.01518844048792, "learning_rate": 1.410944610315799e-05, "loss": 0.6364, "step": 15510 }, { "epoch": 1.1528056484578224, "grad_norm": 1.6150986853174238, "learning_rate": 1.410871460263846e-05, "loss": 0.5148, "step": 15511 }, { "epoch": 1.1528799702712746, "grad_norm": 13.472177807118197, "learning_rate": 1.4107983075667243e-05, "loss": 0.6433, "step": 15512 }, { "epoch": 1.1529542920847269, "grad_norm": 1.9946697734437198, "learning_rate": 1.4107251522249043e-05, "loss": 0.5957, "step": 15513 }, { "epoch": 1.1530286138981791, "grad_norm": 1.921894140515597, "learning_rate": 1.410651994238857e-05, "loss": 0.6682, "step": 15514 }, { "epoch": 1.1531029357116314, "grad_norm": 2.0026322596307544, "learning_rate": 1.4105788336090536e-05, "loss": 0.7364, "step": 15515 }, { "epoch": 1.1531772575250836, "grad_norm": 2.2402876043266144, "learning_rate": 1.410505670335965e-05, "loss": 0.6307, "step": 15516 }, { "epoch": 1.1532515793385358, "grad_norm": 1.9223497322785084, "learning_rate": 1.410432504420062e-05, "loss": 0.6218, "step": 15517 }, { "epoch": 1.153325901151988, "grad_norm": 1.792396733784046, "learning_rate": 1.410359335861816e-05, "loss": 0.5962, "step": 15518 }, { "epoch": 1.1534002229654403, "grad_norm": 1.8500194835897492, "learning_rate": 1.4102861646616978e-05, "loss": 0.5927, "step": 15519 }, { "epoch": 1.1534745447788926, "grad_norm": 2.364113918558092, "learning_rate": 1.4102129908201785e-05, "loss": 0.7068, "step": 15520 }, { "epoch": 1.1535488665923448, "grad_norm": 1.5466375189666903, "learning_rate": 1.4101398143377294e-05, "loss": 0.473, "step": 15521 }, { "epoch": 1.153623188405797, "grad_norm": 1.9706840879845706, "learning_rate": 1.4100666352148214e-05, "loss": 0.6676, "step": 15522 }, { "epoch": 1.1536975102192493, "grad_norm": 2.1022575785147843, "learning_rate": 1.4099934534519257e-05, "loss": 0.7263, "step": 15523 }, { "epoch": 1.1537718320327015, "grad_norm": 2.19755646392076, "learning_rate": 1.4099202690495135e-05, "loss": 0.5885, "step": 15524 }, { "epoch": 1.1538461538461537, "grad_norm": 1.7634521828759149, "learning_rate": 1.4098470820080556e-05, "loss": 0.4469, "step": 15525 }, { "epoch": 1.1539204756596062, "grad_norm": 1.9783911259997884, "learning_rate": 1.4097738923280235e-05, "loss": 0.6091, "step": 15526 }, { "epoch": 1.1539947974730582, "grad_norm": 1.8798072041478302, "learning_rate": 1.4097007000098886e-05, "loss": 0.5377, "step": 15527 }, { "epoch": 1.1540691192865107, "grad_norm": 2.407867043099073, "learning_rate": 1.4096275050541217e-05, "loss": 0.7309, "step": 15528 }, { "epoch": 1.154143441099963, "grad_norm": 2.471231082445631, "learning_rate": 1.4095543074611944e-05, "loss": 0.6967, "step": 15529 }, { "epoch": 1.1542177629134152, "grad_norm": 1.9933458577831313, "learning_rate": 1.4094811072315773e-05, "loss": 0.7036, "step": 15530 }, { "epoch": 1.1542920847268674, "grad_norm": 2.7175889202457335, "learning_rate": 1.4094079043657422e-05, "loss": 0.4608, "step": 15531 }, { "epoch": 1.1543664065403196, "grad_norm": 2.8799638605662072, "learning_rate": 1.4093346988641603e-05, "loss": 0.6335, "step": 15532 }, { "epoch": 1.1544407283537719, "grad_norm": 17.029335101433432, "learning_rate": 1.409261490727303e-05, "loss": 0.6604, "step": 15533 }, { "epoch": 1.154515050167224, "grad_norm": 1.8954829501673573, "learning_rate": 1.4091882799556416e-05, "loss": 0.4718, "step": 15534 }, { "epoch": 1.1545893719806763, "grad_norm": 1.8836709528877855, "learning_rate": 1.409115066549647e-05, "loss": 0.5993, "step": 15535 }, { "epoch": 1.1546636937941286, "grad_norm": 1.8118352274670357, "learning_rate": 1.4090418505097907e-05, "loss": 0.5731, "step": 15536 }, { "epoch": 1.1547380156075808, "grad_norm": 1.6617295355489783, "learning_rate": 1.4089686318365443e-05, "loss": 0.6075, "step": 15537 }, { "epoch": 1.154812337421033, "grad_norm": 1.6375726696810964, "learning_rate": 1.4088954105303791e-05, "loss": 0.5495, "step": 15538 }, { "epoch": 1.1548866592344853, "grad_norm": 2.6397180445996997, "learning_rate": 1.4088221865917666e-05, "loss": 0.547, "step": 15539 }, { "epoch": 1.1549609810479375, "grad_norm": 1.8228765050119302, "learning_rate": 1.4087489600211778e-05, "loss": 0.6352, "step": 15540 }, { "epoch": 1.1550353028613898, "grad_norm": 1.9507863174117044, "learning_rate": 1.4086757308190847e-05, "loss": 0.6463, "step": 15541 }, { "epoch": 1.155109624674842, "grad_norm": 3.4897777230360876, "learning_rate": 1.4086024989859581e-05, "loss": 0.4925, "step": 15542 }, { "epoch": 1.1551839464882943, "grad_norm": 3.100710511390514, "learning_rate": 1.4085292645222701e-05, "loss": 0.6288, "step": 15543 }, { "epoch": 1.1552582683017465, "grad_norm": 2.020471113259775, "learning_rate": 1.408456027428492e-05, "loss": 0.5624, "step": 15544 }, { "epoch": 1.1553325901151987, "grad_norm": 1.824182936217178, "learning_rate": 1.408382787705095e-05, "loss": 0.6154, "step": 15545 }, { "epoch": 1.155406911928651, "grad_norm": 2.3536761729865665, "learning_rate": 1.4083095453525508e-05, "loss": 0.7429, "step": 15546 }, { "epoch": 1.1554812337421034, "grad_norm": 1.7431499542301117, "learning_rate": 1.4082363003713309e-05, "loss": 0.5484, "step": 15547 }, { "epoch": 1.1555555555555554, "grad_norm": 2.213604243086052, "learning_rate": 1.4081630527619069e-05, "loss": 0.6366, "step": 15548 }, { "epoch": 1.155629877369008, "grad_norm": 2.6655559726627662, "learning_rate": 1.4080898025247505e-05, "loss": 0.5424, "step": 15549 }, { "epoch": 1.1557041991824601, "grad_norm": 2.177469907823098, "learning_rate": 1.408016549660333e-05, "loss": 0.6646, "step": 15550 }, { "epoch": 1.1557785209959124, "grad_norm": 2.0095194426377923, "learning_rate": 1.407943294169126e-05, "loss": 0.4937, "step": 15551 }, { "epoch": 1.1558528428093646, "grad_norm": 1.7411113772581408, "learning_rate": 1.4078700360516012e-05, "loss": 0.5516, "step": 15552 }, { "epoch": 1.1559271646228169, "grad_norm": 2.280133682225726, "learning_rate": 1.4077967753082305e-05, "loss": 0.7023, "step": 15553 }, { "epoch": 1.156001486436269, "grad_norm": 1.8611627343062918, "learning_rate": 1.4077235119394853e-05, "loss": 0.5803, "step": 15554 }, { "epoch": 1.1560758082497213, "grad_norm": 2.0555909280822062, "learning_rate": 1.4076502459458372e-05, "loss": 0.5648, "step": 15555 }, { "epoch": 1.1561501300631736, "grad_norm": 2.169294338031453, "learning_rate": 1.407576977327758e-05, "loss": 0.5133, "step": 15556 }, { "epoch": 1.1562244518766258, "grad_norm": 2.6993962502812243, "learning_rate": 1.4075037060857196e-05, "loss": 0.7674, "step": 15557 }, { "epoch": 1.156298773690078, "grad_norm": 2.445596930583229, "learning_rate": 1.407430432220193e-05, "loss": 0.5698, "step": 15558 }, { "epoch": 1.1563730955035303, "grad_norm": 2.265061152289241, "learning_rate": 1.4073571557316506e-05, "loss": 0.6025, "step": 15559 }, { "epoch": 1.1564474173169825, "grad_norm": 1.7306968051996219, "learning_rate": 1.4072838766205644e-05, "loss": 0.5499, "step": 15560 }, { "epoch": 1.1565217391304348, "grad_norm": 2.063964403208384, "learning_rate": 1.4072105948874054e-05, "loss": 0.5807, "step": 15561 }, { "epoch": 1.156596060943887, "grad_norm": 1.6575161680410107, "learning_rate": 1.4071373105326456e-05, "loss": 0.4993, "step": 15562 }, { "epoch": 1.1566703827573392, "grad_norm": 2.402412661442967, "learning_rate": 1.4070640235567567e-05, "loss": 0.7539, "step": 15563 }, { "epoch": 1.1567447045707915, "grad_norm": 2.068507857274009, "learning_rate": 1.4069907339602112e-05, "loss": 0.679, "step": 15564 }, { "epoch": 1.1568190263842437, "grad_norm": 2.003689778596259, "learning_rate": 1.40691744174348e-05, "loss": 0.6047, "step": 15565 }, { "epoch": 1.156893348197696, "grad_norm": 2.091817880051096, "learning_rate": 1.4068441469070359e-05, "loss": 0.6806, "step": 15566 }, { "epoch": 1.1569676700111482, "grad_norm": 1.8593079614951775, "learning_rate": 1.4067708494513504e-05, "loss": 0.7102, "step": 15567 }, { "epoch": 1.1570419918246004, "grad_norm": 1.733666821682116, "learning_rate": 1.4066975493768948e-05, "loss": 0.6246, "step": 15568 }, { "epoch": 1.1571163136380527, "grad_norm": 2.075212478376508, "learning_rate": 1.4066242466841417e-05, "loss": 0.74, "step": 15569 }, { "epoch": 1.1571906354515051, "grad_norm": 1.9069576277916656, "learning_rate": 1.4065509413735627e-05, "loss": 0.5512, "step": 15570 }, { "epoch": 1.1572649572649572, "grad_norm": 2.0235796778016604, "learning_rate": 1.4064776334456301e-05, "loss": 0.5593, "step": 15571 }, { "epoch": 1.1573392790784096, "grad_norm": 1.4078406479028052, "learning_rate": 1.4064043229008155e-05, "loss": 0.3833, "step": 15572 }, { "epoch": 1.1574136008918618, "grad_norm": 2.397985213487717, "learning_rate": 1.4063310097395908e-05, "loss": 0.6897, "step": 15573 }, { "epoch": 1.157487922705314, "grad_norm": 1.7376028797703134, "learning_rate": 1.4062576939624281e-05, "loss": 0.5422, "step": 15574 }, { "epoch": 1.1575622445187663, "grad_norm": 1.8434399387830926, "learning_rate": 1.4061843755697995e-05, "loss": 0.7061, "step": 15575 }, { "epoch": 1.1576365663322186, "grad_norm": 2.074012912341686, "learning_rate": 1.4061110545621771e-05, "loss": 0.6097, "step": 15576 }, { "epoch": 1.1577108881456708, "grad_norm": 2.120951312542953, "learning_rate": 1.4060377309400328e-05, "loss": 0.5113, "step": 15577 }, { "epoch": 1.157785209959123, "grad_norm": 1.7728408489900989, "learning_rate": 1.4059644047038386e-05, "loss": 0.7169, "step": 15578 }, { "epoch": 1.1578595317725753, "grad_norm": 2.1130792601051405, "learning_rate": 1.4058910758540667e-05, "loss": 0.5429, "step": 15579 }, { "epoch": 1.1579338535860275, "grad_norm": 2.168913334799958, "learning_rate": 1.405817744391189e-05, "loss": 0.7177, "step": 15580 }, { "epoch": 1.1580081753994798, "grad_norm": 1.8936238543518065, "learning_rate": 1.4057444103156774e-05, "loss": 0.6767, "step": 15581 }, { "epoch": 1.158082497212932, "grad_norm": 2.3359372953104045, "learning_rate": 1.405671073628005e-05, "loss": 0.6819, "step": 15582 }, { "epoch": 1.1581568190263842, "grad_norm": 2.2919436818271497, "learning_rate": 1.4055977343286427e-05, "loss": 0.6326, "step": 15583 }, { "epoch": 1.1582311408398365, "grad_norm": 1.9813490753773344, "learning_rate": 1.4055243924180637e-05, "loss": 0.543, "step": 15584 }, { "epoch": 1.1583054626532887, "grad_norm": 2.042187184273703, "learning_rate": 1.4054510478967394e-05, "loss": 0.7291, "step": 15585 }, { "epoch": 1.158379784466741, "grad_norm": 1.8444471905192297, "learning_rate": 1.4053777007651423e-05, "loss": 0.4954, "step": 15586 }, { "epoch": 1.1584541062801932, "grad_norm": 1.786149338678344, "learning_rate": 1.4053043510237445e-05, "loss": 0.6347, "step": 15587 }, { "epoch": 1.1585284280936454, "grad_norm": 2.356823288370055, "learning_rate": 1.4052309986730185e-05, "loss": 0.6893, "step": 15588 }, { "epoch": 1.1586027499070977, "grad_norm": 2.4886400308776055, "learning_rate": 1.4051576437134367e-05, "loss": 0.7434, "step": 15589 }, { "epoch": 1.15867707172055, "grad_norm": 1.8432161902419342, "learning_rate": 1.4050842861454706e-05, "loss": 0.5582, "step": 15590 }, { "epoch": 1.1587513935340021, "grad_norm": 1.628275041184866, "learning_rate": 1.405010925969593e-05, "loss": 0.5256, "step": 15591 }, { "epoch": 1.1588257153474544, "grad_norm": 2.061850556925853, "learning_rate": 1.4049375631862758e-05, "loss": 0.622, "step": 15592 }, { "epoch": 1.1589000371609068, "grad_norm": 1.9763626685642146, "learning_rate": 1.4048641977959921e-05, "loss": 0.7471, "step": 15593 }, { "epoch": 1.1589743589743589, "grad_norm": 2.153700504869617, "learning_rate": 1.4047908297992132e-05, "loss": 0.8055, "step": 15594 }, { "epoch": 1.1590486807878113, "grad_norm": 2.1080704654293654, "learning_rate": 1.4047174591964124e-05, "loss": 0.6007, "step": 15595 }, { "epoch": 1.1591230026012636, "grad_norm": 2.1514198841628778, "learning_rate": 1.404644085988061e-05, "loss": 0.6601, "step": 15596 }, { "epoch": 1.1591973244147158, "grad_norm": 1.8573376775043327, "learning_rate": 1.4045707101746322e-05, "loss": 0.6219, "step": 15597 }, { "epoch": 1.159271646228168, "grad_norm": 2.1266207697606805, "learning_rate": 1.4044973317565984e-05, "loss": 0.7057, "step": 15598 }, { "epoch": 1.1593459680416203, "grad_norm": 2.5237253641672917, "learning_rate": 1.4044239507344318e-05, "loss": 0.7436, "step": 15599 }, { "epoch": 1.1594202898550725, "grad_norm": 2.3489260070725373, "learning_rate": 1.4043505671086044e-05, "loss": 0.6256, "step": 15600 }, { "epoch": 1.1594946116685247, "grad_norm": 1.9166566725676173, "learning_rate": 1.4042771808795894e-05, "loss": 0.6432, "step": 15601 }, { "epoch": 1.159568933481977, "grad_norm": 1.9648304023247636, "learning_rate": 1.4042037920478586e-05, "loss": 0.6956, "step": 15602 }, { "epoch": 1.1596432552954292, "grad_norm": 2.484190200724444, "learning_rate": 1.4041304006138849e-05, "loss": 0.5846, "step": 15603 }, { "epoch": 1.1597175771088815, "grad_norm": 2.0939185824596, "learning_rate": 1.4040570065781407e-05, "loss": 0.5204, "step": 15604 }, { "epoch": 1.1597918989223337, "grad_norm": 1.8676914904588868, "learning_rate": 1.4039836099410984e-05, "loss": 0.6924, "step": 15605 }, { "epoch": 1.159866220735786, "grad_norm": 1.953352470922397, "learning_rate": 1.4039102107032308e-05, "loss": 0.5735, "step": 15606 }, { "epoch": 1.1599405425492382, "grad_norm": 1.688131684551454, "learning_rate": 1.4038368088650099e-05, "loss": 0.5408, "step": 15607 }, { "epoch": 1.1600148643626904, "grad_norm": 2.0211192125823136, "learning_rate": 1.4037634044269087e-05, "loss": 0.6183, "step": 15608 }, { "epoch": 1.1600891861761427, "grad_norm": 1.752357080764578, "learning_rate": 1.4036899973893993e-05, "loss": 0.6474, "step": 15609 }, { "epoch": 1.1601635079895949, "grad_norm": 2.0073692919430064, "learning_rate": 1.4036165877529552e-05, "loss": 0.6495, "step": 15610 }, { "epoch": 1.1602378298030471, "grad_norm": 2.459878444148248, "learning_rate": 1.4035431755180482e-05, "loss": 0.7149, "step": 15611 }, { "epoch": 1.1603121516164994, "grad_norm": 1.6999748369631584, "learning_rate": 1.4034697606851513e-05, "loss": 0.6483, "step": 15612 }, { "epoch": 1.1603864734299516, "grad_norm": 1.5958218947824947, "learning_rate": 1.4033963432547368e-05, "loss": 0.6131, "step": 15613 }, { "epoch": 1.1604607952434038, "grad_norm": 2.3052036252965395, "learning_rate": 1.4033229232272776e-05, "loss": 0.6063, "step": 15614 }, { "epoch": 1.160535117056856, "grad_norm": 1.957143573762959, "learning_rate": 1.4032495006032467e-05, "loss": 0.7239, "step": 15615 }, { "epoch": 1.1606094388703085, "grad_norm": 1.8470948575144144, "learning_rate": 1.403176075383116e-05, "loss": 0.5338, "step": 15616 }, { "epoch": 1.1606837606837608, "grad_norm": 2.1086443394721295, "learning_rate": 1.403102647567359e-05, "loss": 0.7059, "step": 15617 }, { "epoch": 1.160758082497213, "grad_norm": 1.9182440120389779, "learning_rate": 1.4030292171564477e-05, "loss": 0.7102, "step": 15618 }, { "epoch": 1.1608324043106653, "grad_norm": 1.779045408539383, "learning_rate": 1.4029557841508554e-05, "loss": 0.6214, "step": 15619 }, { "epoch": 1.1609067261241175, "grad_norm": 1.9741542936579746, "learning_rate": 1.4028823485510546e-05, "loss": 0.5928, "step": 15620 }, { "epoch": 1.1609810479375697, "grad_norm": 2.0518282516448028, "learning_rate": 1.4028089103575182e-05, "loss": 0.6545, "step": 15621 }, { "epoch": 1.161055369751022, "grad_norm": 1.9099962705377378, "learning_rate": 1.4027354695707192e-05, "loss": 0.5856, "step": 15622 }, { "epoch": 1.1611296915644742, "grad_norm": 2.013477218801912, "learning_rate": 1.4026620261911298e-05, "loss": 0.6439, "step": 15623 }, { "epoch": 1.1612040133779264, "grad_norm": 2.1955415246068317, "learning_rate": 1.4025885802192232e-05, "loss": 0.7907, "step": 15624 }, { "epoch": 1.1612783351913787, "grad_norm": 1.8979600550112352, "learning_rate": 1.4025151316554722e-05, "loss": 0.6543, "step": 15625 }, { "epoch": 1.161352657004831, "grad_norm": 2.004200290321789, "learning_rate": 1.40244168050035e-05, "loss": 0.615, "step": 15626 }, { "epoch": 1.1614269788182832, "grad_norm": 1.609505166806258, "learning_rate": 1.4023682267543288e-05, "loss": 0.5656, "step": 15627 }, { "epoch": 1.1615013006317354, "grad_norm": 1.7598100119464755, "learning_rate": 1.4022947704178817e-05, "loss": 0.5803, "step": 15628 }, { "epoch": 1.1615756224451876, "grad_norm": 2.084708251216082, "learning_rate": 1.4022213114914822e-05, "loss": 0.7511, "step": 15629 }, { "epoch": 1.1616499442586399, "grad_norm": 2.0216393536729194, "learning_rate": 1.4021478499756023e-05, "loss": 0.6863, "step": 15630 }, { "epoch": 1.1617242660720921, "grad_norm": 2.434025187980868, "learning_rate": 1.4020743858707157e-05, "loss": 0.6346, "step": 15631 }, { "epoch": 1.1617985878855444, "grad_norm": 1.8543959908111372, "learning_rate": 1.402000919177295e-05, "loss": 0.469, "step": 15632 }, { "epoch": 1.1618729096989966, "grad_norm": 2.357924060997524, "learning_rate": 1.4019274498958133e-05, "loss": 0.6849, "step": 15633 }, { "epoch": 1.1619472315124488, "grad_norm": 2.5724600877122654, "learning_rate": 1.4018539780267435e-05, "loss": 0.7624, "step": 15634 }, { "epoch": 1.162021553325901, "grad_norm": 1.7206931829222434, "learning_rate": 1.4017805035705587e-05, "loss": 0.6234, "step": 15635 }, { "epoch": 1.1620958751393533, "grad_norm": 2.1251727162918104, "learning_rate": 1.4017070265277317e-05, "loss": 0.6302, "step": 15636 }, { "epoch": 1.1621701969528058, "grad_norm": 1.920563631005132, "learning_rate": 1.401633546898736e-05, "loss": 0.5969, "step": 15637 }, { "epoch": 1.1622445187662578, "grad_norm": 3.0840896986041635, "learning_rate": 1.4015600646840439e-05, "loss": 0.6305, "step": 15638 }, { "epoch": 1.1623188405797102, "grad_norm": 1.640269714286643, "learning_rate": 1.401486579884129e-05, "loss": 0.4907, "step": 15639 }, { "epoch": 1.1623931623931625, "grad_norm": 2.368898119609766, "learning_rate": 1.4014130924994646e-05, "loss": 0.6417, "step": 15640 }, { "epoch": 1.1624674842066147, "grad_norm": 3.5775359337968937, "learning_rate": 1.4013396025305234e-05, "loss": 0.6408, "step": 15641 }, { "epoch": 1.162541806020067, "grad_norm": 3.7390189286472344, "learning_rate": 1.4012661099777786e-05, "loss": 0.6723, "step": 15642 }, { "epoch": 1.1626161278335192, "grad_norm": 2.4945072011995015, "learning_rate": 1.4011926148417038e-05, "loss": 0.5635, "step": 15643 }, { "epoch": 1.1626904496469714, "grad_norm": 2.0144331513057394, "learning_rate": 1.4011191171227713e-05, "loss": 0.6091, "step": 15644 }, { "epoch": 1.1627647714604237, "grad_norm": 2.0330315603796754, "learning_rate": 1.401045616821455e-05, "loss": 0.6082, "step": 15645 }, { "epoch": 1.162839093273876, "grad_norm": 1.8588216684729908, "learning_rate": 1.4009721139382277e-05, "loss": 0.5774, "step": 15646 }, { "epoch": 1.1629134150873282, "grad_norm": 2.6463730133358054, "learning_rate": 1.400898608473563e-05, "loss": 0.826, "step": 15647 }, { "epoch": 1.1629877369007804, "grad_norm": 2.195407663137048, "learning_rate": 1.4008251004279336e-05, "loss": 0.7209, "step": 15648 }, { "epoch": 1.1630620587142326, "grad_norm": 2.190331697565299, "learning_rate": 1.4007515898018129e-05, "loss": 0.6153, "step": 15649 }, { "epoch": 1.1631363805276849, "grad_norm": 1.6142224891085253, "learning_rate": 1.4006780765956744e-05, "loss": 0.4905, "step": 15650 }, { "epoch": 1.163210702341137, "grad_norm": 2.4284508089521575, "learning_rate": 1.4006045608099914e-05, "loss": 0.6358, "step": 15651 }, { "epoch": 1.1632850241545893, "grad_norm": 1.9775750296711716, "learning_rate": 1.4005310424452367e-05, "loss": 0.7036, "step": 15652 }, { "epoch": 1.1633593459680416, "grad_norm": 1.6571302507809942, "learning_rate": 1.400457521501884e-05, "loss": 0.5807, "step": 15653 }, { "epoch": 1.1634336677814938, "grad_norm": 2.1794623298080134, "learning_rate": 1.400383997980407e-05, "loss": 0.6746, "step": 15654 }, { "epoch": 1.163507989594946, "grad_norm": 1.7397432081918351, "learning_rate": 1.4003104718812782e-05, "loss": 0.5833, "step": 15655 }, { "epoch": 1.1635823114083983, "grad_norm": 1.8126314220325719, "learning_rate": 1.4002369432049712e-05, "loss": 0.486, "step": 15656 }, { "epoch": 1.1636566332218505, "grad_norm": 2.4610724412468183, "learning_rate": 1.4001634119519598e-05, "loss": 0.6618, "step": 15657 }, { "epoch": 1.1637309550353028, "grad_norm": 1.8904550920555325, "learning_rate": 1.400089878122717e-05, "loss": 0.7011, "step": 15658 }, { "epoch": 1.163805276848755, "grad_norm": 1.8760315638689378, "learning_rate": 1.4000163417177165e-05, "loss": 0.5312, "step": 15659 }, { "epoch": 1.1638795986622075, "grad_norm": 2.1197595866290326, "learning_rate": 1.3999428027374313e-05, "loss": 0.6976, "step": 15660 }, { "epoch": 1.1639539204756595, "grad_norm": 1.9035269969900164, "learning_rate": 1.3998692611823353e-05, "loss": 0.6398, "step": 15661 }, { "epoch": 1.164028242289112, "grad_norm": 2.064309160077955, "learning_rate": 1.3997957170529013e-05, "loss": 0.6585, "step": 15662 }, { "epoch": 1.1641025641025642, "grad_norm": 2.222800352425854, "learning_rate": 1.3997221703496038e-05, "loss": 0.5819, "step": 15663 }, { "epoch": 1.1641768859160164, "grad_norm": 2.0092304530048612, "learning_rate": 1.3996486210729155e-05, "loss": 0.6275, "step": 15664 }, { "epoch": 1.1642512077294687, "grad_norm": 2.110681144910519, "learning_rate": 1.3995750692233101e-05, "loss": 0.5136, "step": 15665 }, { "epoch": 1.164325529542921, "grad_norm": 2.3467805202185734, "learning_rate": 1.3995015148012612e-05, "loss": 0.8738, "step": 15666 }, { "epoch": 1.1643998513563731, "grad_norm": 1.8369648414447202, "learning_rate": 1.3994279578072419e-05, "loss": 0.5446, "step": 15667 }, { "epoch": 1.1644741731698254, "grad_norm": 1.8176004321436856, "learning_rate": 1.3993543982417266e-05, "loss": 0.562, "step": 15668 }, { "epoch": 1.1645484949832776, "grad_norm": 1.626777969251538, "learning_rate": 1.3992808361051881e-05, "loss": 0.5938, "step": 15669 }, { "epoch": 1.1646228167967299, "grad_norm": 1.7812899085602243, "learning_rate": 1.3992072713981006e-05, "loss": 0.4686, "step": 15670 }, { "epoch": 1.164697138610182, "grad_norm": 2.05778659446461, "learning_rate": 1.3991337041209371e-05, "loss": 0.7689, "step": 15671 }, { "epoch": 1.1647714604236343, "grad_norm": 1.9483547662523808, "learning_rate": 1.3990601342741717e-05, "loss": 0.6011, "step": 15672 }, { "epoch": 1.1648457822370866, "grad_norm": 1.8761260817361884, "learning_rate": 1.3989865618582777e-05, "loss": 0.6337, "step": 15673 }, { "epoch": 1.1649201040505388, "grad_norm": 1.896407053475985, "learning_rate": 1.3989129868737293e-05, "loss": 0.5286, "step": 15674 }, { "epoch": 1.164994425863991, "grad_norm": 2.0050608522472184, "learning_rate": 1.3988394093209996e-05, "loss": 0.6062, "step": 15675 }, { "epoch": 1.1650687476774433, "grad_norm": 1.7749327655464129, "learning_rate": 1.3987658292005622e-05, "loss": 0.575, "step": 15676 }, { "epoch": 1.1651430694908955, "grad_norm": 1.7921666300480965, "learning_rate": 1.3986922465128912e-05, "loss": 0.582, "step": 15677 }, { "epoch": 1.1652173913043478, "grad_norm": 2.361252697147581, "learning_rate": 1.3986186612584602e-05, "loss": 0.7902, "step": 15678 }, { "epoch": 1.1652917131178, "grad_norm": 1.726127298811091, "learning_rate": 1.398545073437743e-05, "loss": 0.6409, "step": 15679 }, { "epoch": 1.1653660349312522, "grad_norm": 1.9126584067658194, "learning_rate": 1.3984714830512134e-05, "loss": 0.4844, "step": 15680 }, { "epoch": 1.1654403567447045, "grad_norm": 2.0862859382507826, "learning_rate": 1.398397890099345e-05, "loss": 0.7956, "step": 15681 }, { "epoch": 1.1655146785581567, "grad_norm": 2.1684904270733774, "learning_rate": 1.3983242945826117e-05, "loss": 0.7563, "step": 15682 }, { "epoch": 1.1655890003716092, "grad_norm": 1.991922822537117, "learning_rate": 1.398250696501487e-05, "loss": 0.6499, "step": 15683 }, { "epoch": 1.1656633221850614, "grad_norm": 2.2620632447351734, "learning_rate": 1.398177095856445e-05, "loss": 0.749, "step": 15684 }, { "epoch": 1.1657376439985137, "grad_norm": 2.21488867645756, "learning_rate": 1.39810349264796e-05, "loss": 0.6546, "step": 15685 }, { "epoch": 1.165811965811966, "grad_norm": 1.903801592753599, "learning_rate": 1.3980298868765052e-05, "loss": 0.659, "step": 15686 }, { "epoch": 1.1658862876254181, "grad_norm": 2.473916648806841, "learning_rate": 1.3979562785425544e-05, "loss": 0.8002, "step": 15687 }, { "epoch": 1.1659606094388704, "grad_norm": 1.9700901755427311, "learning_rate": 1.3978826676465818e-05, "loss": 0.4931, "step": 15688 }, { "epoch": 1.1660349312523226, "grad_norm": 1.660987796898335, "learning_rate": 1.3978090541890612e-05, "loss": 0.5283, "step": 15689 }, { "epoch": 1.1661092530657748, "grad_norm": 2.1627055830595765, "learning_rate": 1.3977354381704666e-05, "loss": 0.616, "step": 15690 }, { "epoch": 1.166183574879227, "grad_norm": 2.2253012987643483, "learning_rate": 1.3976618195912719e-05, "loss": 0.6538, "step": 15691 }, { "epoch": 1.1662578966926793, "grad_norm": 2.3496689969092843, "learning_rate": 1.3975881984519513e-05, "loss": 0.5014, "step": 15692 }, { "epoch": 1.1663322185061316, "grad_norm": 1.9500149313470756, "learning_rate": 1.3975145747529782e-05, "loss": 0.5838, "step": 15693 }, { "epoch": 1.1664065403195838, "grad_norm": 2.1496795692529855, "learning_rate": 1.3974409484948267e-05, "loss": 0.6819, "step": 15694 }, { "epoch": 1.166480862133036, "grad_norm": 2.912804380987881, "learning_rate": 1.3973673196779714e-05, "loss": 0.7803, "step": 15695 }, { "epoch": 1.1665551839464883, "grad_norm": 2.239097002994967, "learning_rate": 1.3972936883028857e-05, "loss": 0.8602, "step": 15696 }, { "epoch": 1.1666295057599405, "grad_norm": 2.2589425183130505, "learning_rate": 1.397220054370044e-05, "loss": 0.7225, "step": 15697 }, { "epoch": 1.1667038275733927, "grad_norm": 1.6977988379250228, "learning_rate": 1.39714641787992e-05, "loss": 0.5195, "step": 15698 }, { "epoch": 1.166778149386845, "grad_norm": 2.0070606314105235, "learning_rate": 1.3970727788329882e-05, "loss": 0.6493, "step": 15699 }, { "epoch": 1.1668524712002972, "grad_norm": 1.9423188660830446, "learning_rate": 1.396999137229722e-05, "loss": 0.6814, "step": 15700 }, { "epoch": 1.1669267930137495, "grad_norm": 1.666979564737355, "learning_rate": 1.3969254930705965e-05, "loss": 0.4272, "step": 15701 }, { "epoch": 1.1670011148272017, "grad_norm": 1.5288523027484697, "learning_rate": 1.396851846356085e-05, "loss": 0.5211, "step": 15702 }, { "epoch": 1.167075436640654, "grad_norm": 1.8305501778595068, "learning_rate": 1.3967781970866621e-05, "loss": 0.553, "step": 15703 }, { "epoch": 1.1671497584541064, "grad_norm": 2.1095336993079115, "learning_rate": 1.3967045452628014e-05, "loss": 0.6782, "step": 15704 }, { "epoch": 1.1672240802675584, "grad_norm": 1.7280892325421293, "learning_rate": 1.3966308908849775e-05, "loss": 0.5181, "step": 15705 }, { "epoch": 1.1672984020810109, "grad_norm": 1.8828289065638881, "learning_rate": 1.3965572339536646e-05, "loss": 0.6507, "step": 15706 }, { "epoch": 1.1673727238944631, "grad_norm": 2.219138858992839, "learning_rate": 1.396483574469337e-05, "loss": 0.5725, "step": 15707 }, { "epoch": 1.1674470457079154, "grad_norm": 1.7717268412583693, "learning_rate": 1.3964099124324685e-05, "loss": 0.6242, "step": 15708 }, { "epoch": 1.1675213675213676, "grad_norm": 1.9970272124352104, "learning_rate": 1.3963362478435339e-05, "loss": 0.6604, "step": 15709 }, { "epoch": 1.1675956893348198, "grad_norm": 2.3893788527157067, "learning_rate": 1.3962625807030066e-05, "loss": 0.626, "step": 15710 }, { "epoch": 1.167670011148272, "grad_norm": 2.62302702525558, "learning_rate": 1.3961889110113615e-05, "loss": 0.713, "step": 15711 }, { "epoch": 1.1677443329617243, "grad_norm": 2.3030926989774954, "learning_rate": 1.3961152387690727e-05, "loss": 0.7517, "step": 15712 }, { "epoch": 1.1678186547751765, "grad_norm": 1.9957496148130272, "learning_rate": 1.3960415639766147e-05, "loss": 0.7778, "step": 15713 }, { "epoch": 1.1678929765886288, "grad_norm": 1.7469418599960578, "learning_rate": 1.3959678866344618e-05, "loss": 0.6521, "step": 15714 }, { "epoch": 1.167967298402081, "grad_norm": 1.7299347770254176, "learning_rate": 1.3958942067430881e-05, "loss": 0.476, "step": 15715 }, { "epoch": 1.1680416202155333, "grad_norm": 2.386428005603264, "learning_rate": 1.3958205243029678e-05, "loss": 0.6743, "step": 15716 }, { "epoch": 1.1681159420289855, "grad_norm": 2.054881579602445, "learning_rate": 1.3957468393145757e-05, "loss": 0.7105, "step": 15717 }, { "epoch": 1.1681902638424377, "grad_norm": 1.792575646146666, "learning_rate": 1.3956731517783859e-05, "loss": 0.5264, "step": 15718 }, { "epoch": 1.16826458565589, "grad_norm": 2.2899805040454244, "learning_rate": 1.3955994616948731e-05, "loss": 0.653, "step": 15719 }, { "epoch": 1.1683389074693422, "grad_norm": 1.7382144877617727, "learning_rate": 1.3955257690645113e-05, "loss": 0.5345, "step": 15720 }, { "epoch": 1.1684132292827945, "grad_norm": 1.91463353926846, "learning_rate": 1.3954520738877751e-05, "loss": 0.6348, "step": 15721 }, { "epoch": 1.1684875510962467, "grad_norm": 1.712368479711659, "learning_rate": 1.3953783761651388e-05, "loss": 0.5803, "step": 15722 }, { "epoch": 1.168561872909699, "grad_norm": 1.6960145967036635, "learning_rate": 1.3953046758970771e-05, "loss": 0.5701, "step": 15723 }, { "epoch": 1.1686361947231512, "grad_norm": 2.356654364466925, "learning_rate": 1.3952309730840647e-05, "loss": 0.7273, "step": 15724 }, { "epoch": 1.1687105165366034, "grad_norm": 1.6606989411258606, "learning_rate": 1.395157267726576e-05, "loss": 0.5571, "step": 15725 }, { "epoch": 1.1687848383500556, "grad_norm": 2.1311372617322646, "learning_rate": 1.3950835598250847e-05, "loss": 0.69, "step": 15726 }, { "epoch": 1.168859160163508, "grad_norm": 2.076941895620597, "learning_rate": 1.395009849380066e-05, "loss": 0.6704, "step": 15727 }, { "epoch": 1.1689334819769601, "grad_norm": 1.7460067141109328, "learning_rate": 1.3949361363919946e-05, "loss": 0.6211, "step": 15728 }, { "epoch": 1.1690078037904126, "grad_norm": 2.252933572732404, "learning_rate": 1.3948624208613449e-05, "loss": 0.531, "step": 15729 }, { "epoch": 1.1690821256038648, "grad_norm": 2.2919994237814376, "learning_rate": 1.3947887027885916e-05, "loss": 0.7078, "step": 15730 }, { "epoch": 1.169156447417317, "grad_norm": 2.0614774646159386, "learning_rate": 1.3947149821742088e-05, "loss": 0.8127, "step": 15731 }, { "epoch": 1.1692307692307693, "grad_norm": 2.3622557921982024, "learning_rate": 1.3946412590186713e-05, "loss": 0.6494, "step": 15732 }, { "epoch": 1.1693050910442215, "grad_norm": 8.526062990503378, "learning_rate": 1.394567533322454e-05, "loss": 0.7321, "step": 15733 }, { "epoch": 1.1693794128576738, "grad_norm": 1.7390785497915044, "learning_rate": 1.3944938050860313e-05, "loss": 0.5335, "step": 15734 }, { "epoch": 1.169453734671126, "grad_norm": 2.0141588192377875, "learning_rate": 1.3944200743098782e-05, "loss": 0.5728, "step": 15735 }, { "epoch": 1.1695280564845782, "grad_norm": 1.627537195651493, "learning_rate": 1.394346340994469e-05, "loss": 0.5204, "step": 15736 }, { "epoch": 1.1696023782980305, "grad_norm": 2.1493684592201925, "learning_rate": 1.3942726051402786e-05, "loss": 0.6577, "step": 15737 }, { "epoch": 1.1696767001114827, "grad_norm": 2.0549486359198994, "learning_rate": 1.3941988667477816e-05, "loss": 0.7592, "step": 15738 }, { "epoch": 1.169751021924935, "grad_norm": 1.9208245332453797, "learning_rate": 1.3941251258174523e-05, "loss": 0.5861, "step": 15739 }, { "epoch": 1.1698253437383872, "grad_norm": 2.1208141766138318, "learning_rate": 1.3940513823497666e-05, "loss": 0.5746, "step": 15740 }, { "epoch": 1.1698996655518394, "grad_norm": 1.785585330854449, "learning_rate": 1.3939776363451983e-05, "loss": 0.4976, "step": 15741 }, { "epoch": 1.1699739873652917, "grad_norm": 1.9849769098760364, "learning_rate": 1.3939038878042224e-05, "loss": 0.4976, "step": 15742 }, { "epoch": 1.170048309178744, "grad_norm": 2.346279963798956, "learning_rate": 1.3938301367273135e-05, "loss": 0.6049, "step": 15743 }, { "epoch": 1.1701226309921962, "grad_norm": 2.155175231065629, "learning_rate": 1.3937563831149467e-05, "loss": 0.7285, "step": 15744 }, { "epoch": 1.1701969528056484, "grad_norm": 3.6472928293933813, "learning_rate": 1.3936826269675967e-05, "loss": 0.5771, "step": 15745 }, { "epoch": 1.1702712746191006, "grad_norm": 2.1358667401633613, "learning_rate": 1.3936088682857387e-05, "loss": 0.7265, "step": 15746 }, { "epoch": 1.1703455964325529, "grad_norm": 2.316657948433181, "learning_rate": 1.3935351070698474e-05, "loss": 0.6749, "step": 15747 }, { "epoch": 1.170419918246005, "grad_norm": 1.747390565975891, "learning_rate": 1.393461343320397e-05, "loss": 0.6575, "step": 15748 }, { "epoch": 1.1704942400594573, "grad_norm": 2.1030180791397846, "learning_rate": 1.3933875770378626e-05, "loss": 0.6423, "step": 15749 }, { "epoch": 1.1705685618729098, "grad_norm": 4.57206088455541, "learning_rate": 1.39331380822272e-05, "loss": 0.5512, "step": 15750 }, { "epoch": 1.1706428836863618, "grad_norm": 2.1829211047585066, "learning_rate": 1.3932400368754432e-05, "loss": 0.554, "step": 15751 }, { "epoch": 1.1707172054998143, "grad_norm": 1.863262379214052, "learning_rate": 1.3931662629965078e-05, "loss": 0.4932, "step": 15752 }, { "epoch": 1.1707915273132665, "grad_norm": 2.0252002037618135, "learning_rate": 1.393092486586388e-05, "loss": 0.6027, "step": 15753 }, { "epoch": 1.1708658491267188, "grad_norm": 2.484277579575559, "learning_rate": 1.3930187076455593e-05, "loss": 0.4842, "step": 15754 }, { "epoch": 1.170940170940171, "grad_norm": 2.079696128139912, "learning_rate": 1.3929449261744964e-05, "loss": 0.7138, "step": 15755 }, { "epoch": 1.1710144927536232, "grad_norm": 2.3492344491928905, "learning_rate": 1.3928711421736747e-05, "loss": 0.6564, "step": 15756 }, { "epoch": 1.1710888145670755, "grad_norm": 2.0047008958822627, "learning_rate": 1.392797355643569e-05, "loss": 0.7195, "step": 15757 }, { "epoch": 1.1711631363805277, "grad_norm": 2.2463963993341802, "learning_rate": 1.3927235665846542e-05, "loss": 0.6181, "step": 15758 }, { "epoch": 1.17123745819398, "grad_norm": 1.9890092302508213, "learning_rate": 1.3926497749974055e-05, "loss": 0.657, "step": 15759 }, { "epoch": 1.1713117800074322, "grad_norm": 1.6234551678458489, "learning_rate": 1.3925759808822979e-05, "loss": 0.5448, "step": 15760 }, { "epoch": 1.1713861018208844, "grad_norm": 2.2931630636325098, "learning_rate": 1.3925021842398063e-05, "loss": 0.6834, "step": 15761 }, { "epoch": 1.1714604236343367, "grad_norm": 1.9708227118086394, "learning_rate": 1.3924283850704063e-05, "loss": 0.7366, "step": 15762 }, { "epoch": 1.171534745447789, "grad_norm": 1.7700234385378633, "learning_rate": 1.3923545833745726e-05, "loss": 0.5302, "step": 15763 }, { "epoch": 1.1716090672612411, "grad_norm": 2.2031648295122075, "learning_rate": 1.3922807791527806e-05, "loss": 0.6446, "step": 15764 }, { "epoch": 1.1716833890746934, "grad_norm": 1.8526001066106021, "learning_rate": 1.392206972405505e-05, "loss": 0.6225, "step": 15765 }, { "epoch": 1.1717577108881456, "grad_norm": 1.561576658069726, "learning_rate": 1.3921331631332214e-05, "loss": 0.4913, "step": 15766 }, { "epoch": 1.1718320327015979, "grad_norm": 1.7362502319461623, "learning_rate": 1.3920593513364046e-05, "loss": 0.5008, "step": 15767 }, { "epoch": 1.17190635451505, "grad_norm": 2.184416988629258, "learning_rate": 1.3919855370155306e-05, "loss": 0.616, "step": 15768 }, { "epoch": 1.1719806763285023, "grad_norm": 1.7264516782482355, "learning_rate": 1.3919117201710737e-05, "loss": 0.5508, "step": 15769 }, { "epoch": 1.1720549981419546, "grad_norm": 1.7087068503803655, "learning_rate": 1.3918379008035096e-05, "loss": 0.6646, "step": 15770 }, { "epoch": 1.172129319955407, "grad_norm": 3.092676644474917, "learning_rate": 1.3917640789133134e-05, "loss": 0.7115, "step": 15771 }, { "epoch": 1.172203641768859, "grad_norm": 2.063509194944454, "learning_rate": 1.39169025450096e-05, "loss": 0.5786, "step": 15772 }, { "epoch": 1.1722779635823115, "grad_norm": 1.7877694053693818, "learning_rate": 1.3916164275669257e-05, "loss": 0.518, "step": 15773 }, { "epoch": 1.1723522853957637, "grad_norm": 1.6992882111870107, "learning_rate": 1.3915425981116847e-05, "loss": 0.4891, "step": 15774 }, { "epoch": 1.172426607209216, "grad_norm": 1.9913624900932252, "learning_rate": 1.3914687661357129e-05, "loss": 0.5755, "step": 15775 }, { "epoch": 1.1725009290226682, "grad_norm": 6.942929363119825, "learning_rate": 1.3913949316394858e-05, "loss": 0.6366, "step": 15776 }, { "epoch": 1.1725752508361205, "grad_norm": 1.8393784735812309, "learning_rate": 1.3913210946234781e-05, "loss": 0.6525, "step": 15777 }, { "epoch": 1.1726495726495727, "grad_norm": 2.10208573630576, "learning_rate": 1.3912472550881653e-05, "loss": 0.6699, "step": 15778 }, { "epoch": 1.172723894463025, "grad_norm": 1.810907263820856, "learning_rate": 1.3911734130340235e-05, "loss": 0.5336, "step": 15779 }, { "epoch": 1.1727982162764772, "grad_norm": 1.7320612822244028, "learning_rate": 1.3910995684615271e-05, "loss": 0.5728, "step": 15780 }, { "epoch": 1.1728725380899294, "grad_norm": 1.712482620645013, "learning_rate": 1.3910257213711524e-05, "loss": 0.5828, "step": 15781 }, { "epoch": 1.1729468599033817, "grad_norm": 2.0033641828115156, "learning_rate": 1.390951871763374e-05, "loss": 0.5471, "step": 15782 }, { "epoch": 1.173021181716834, "grad_norm": 1.9731715082607626, "learning_rate": 1.390878019638668e-05, "loss": 0.6145, "step": 15783 }, { "epoch": 1.1730955035302861, "grad_norm": 2.1813002709438134, "learning_rate": 1.3908041649975094e-05, "loss": 0.6273, "step": 15784 }, { "epoch": 1.1731698253437384, "grad_norm": 3.647806562649504, "learning_rate": 1.390730307840374e-05, "loss": 0.7224, "step": 15785 }, { "epoch": 1.1732441471571906, "grad_norm": 1.7970912601506004, "learning_rate": 1.390656448167737e-05, "loss": 0.6966, "step": 15786 }, { "epoch": 1.1733184689706428, "grad_norm": 2.0722117497028485, "learning_rate": 1.3905825859800744e-05, "loss": 0.6951, "step": 15787 }, { "epoch": 1.173392790784095, "grad_norm": 1.8550939737693302, "learning_rate": 1.390508721277861e-05, "loss": 0.6822, "step": 15788 }, { "epoch": 1.1734671125975473, "grad_norm": 1.439020743374864, "learning_rate": 1.3904348540615729e-05, "loss": 0.4338, "step": 15789 }, { "epoch": 1.1735414344109996, "grad_norm": 1.8707283319262475, "learning_rate": 1.3903609843316854e-05, "loss": 0.7135, "step": 15790 }, { "epoch": 1.1736157562244518, "grad_norm": 1.9326254638081126, "learning_rate": 1.390287112088674e-05, "loss": 0.7352, "step": 15791 }, { "epoch": 1.173690078037904, "grad_norm": 2.198282268834417, "learning_rate": 1.3902132373330145e-05, "loss": 0.5818, "step": 15792 }, { "epoch": 1.1737643998513563, "grad_norm": 1.9598256657223836, "learning_rate": 1.3901393600651827e-05, "loss": 0.5207, "step": 15793 }, { "epoch": 1.1738387216648087, "grad_norm": 1.7676367075470543, "learning_rate": 1.3900654802856537e-05, "loss": 0.5222, "step": 15794 }, { "epoch": 1.1739130434782608, "grad_norm": 1.9815048181050614, "learning_rate": 1.3899915979949037e-05, "loss": 0.675, "step": 15795 }, { "epoch": 1.1739873652917132, "grad_norm": 3.1200833810310735, "learning_rate": 1.3899177131934077e-05, "loss": 0.7313, "step": 15796 }, { "epoch": 1.1740616871051655, "grad_norm": 1.9236823704210941, "learning_rate": 1.3898438258816416e-05, "loss": 0.5819, "step": 15797 }, { "epoch": 1.1741360089186177, "grad_norm": 2.0304290259847053, "learning_rate": 1.3897699360600814e-05, "loss": 0.6692, "step": 15798 }, { "epoch": 1.17421033073207, "grad_norm": 1.5869914732060557, "learning_rate": 1.3896960437292024e-05, "loss": 0.4925, "step": 15799 }, { "epoch": 1.1742846525455222, "grad_norm": 2.0817816708917216, "learning_rate": 1.3896221488894808e-05, "loss": 0.6331, "step": 15800 }, { "epoch": 1.1743589743589744, "grad_norm": 2.017441809422411, "learning_rate": 1.3895482515413919e-05, "loss": 0.7197, "step": 15801 }, { "epoch": 1.1744332961724266, "grad_norm": 1.6389206883935978, "learning_rate": 1.3894743516854112e-05, "loss": 0.5545, "step": 15802 }, { "epoch": 1.1745076179858789, "grad_norm": 1.8040179907968283, "learning_rate": 1.389400449322015e-05, "loss": 0.562, "step": 15803 }, { "epoch": 1.1745819397993311, "grad_norm": 2.2126401366753172, "learning_rate": 1.3893265444516793e-05, "loss": 0.694, "step": 15804 }, { "epoch": 1.1746562616127834, "grad_norm": 2.1221338780269905, "learning_rate": 1.3892526370748792e-05, "loss": 0.6774, "step": 15805 }, { "epoch": 1.1747305834262356, "grad_norm": 2.2479486862674807, "learning_rate": 1.389178727192091e-05, "loss": 0.6993, "step": 15806 }, { "epoch": 1.1748049052396878, "grad_norm": 1.9582475958335908, "learning_rate": 1.38910481480379e-05, "loss": 0.7266, "step": 15807 }, { "epoch": 1.17487922705314, "grad_norm": 1.7510200685161394, "learning_rate": 1.3890308999104525e-05, "loss": 0.4968, "step": 15808 }, { "epoch": 1.1749535488665923, "grad_norm": 2.36329629881622, "learning_rate": 1.3889569825125545e-05, "loss": 0.708, "step": 15809 }, { "epoch": 1.1750278706800446, "grad_norm": 2.0351523429201595, "learning_rate": 1.3888830626105713e-05, "loss": 0.7012, "step": 15810 }, { "epoch": 1.1751021924934968, "grad_norm": 1.8278888490341338, "learning_rate": 1.3888091402049795e-05, "loss": 0.5752, "step": 15811 }, { "epoch": 1.175176514306949, "grad_norm": 1.7790141509008657, "learning_rate": 1.3887352152962543e-05, "loss": 0.6517, "step": 15812 }, { "epoch": 1.1752508361204013, "grad_norm": 2.1257049775730623, "learning_rate": 1.3886612878848719e-05, "loss": 0.5299, "step": 15813 }, { "epoch": 1.1753251579338535, "grad_norm": 1.9604653327609172, "learning_rate": 1.3885873579713084e-05, "loss": 0.6602, "step": 15814 }, { "epoch": 1.1753994797473057, "grad_norm": 2.361444985478235, "learning_rate": 1.38851342555604e-05, "loss": 0.4634, "step": 15815 }, { "epoch": 1.175473801560758, "grad_norm": 1.7034510058905004, "learning_rate": 1.3884394906395419e-05, "loss": 0.593, "step": 15816 }, { "epoch": 1.1755481233742104, "grad_norm": 1.5864337659091314, "learning_rate": 1.3883655532222908e-05, "loss": 0.4441, "step": 15817 }, { "epoch": 1.1756224451876625, "grad_norm": 2.507201980249589, "learning_rate": 1.3882916133047623e-05, "loss": 0.6295, "step": 15818 }, { "epoch": 1.175696767001115, "grad_norm": 2.040914204576183, "learning_rate": 1.3882176708874325e-05, "loss": 0.5484, "step": 15819 }, { "epoch": 1.1757710888145672, "grad_norm": 1.5637243381279955, "learning_rate": 1.3881437259707773e-05, "loss": 0.478, "step": 15820 }, { "epoch": 1.1758454106280194, "grad_norm": 1.7292921061084514, "learning_rate": 1.3880697785552732e-05, "loss": 0.5834, "step": 15821 }, { "epoch": 1.1759197324414716, "grad_norm": 2.1624841202117113, "learning_rate": 1.3879958286413962e-05, "loss": 0.6688, "step": 15822 }, { "epoch": 1.1759940542549239, "grad_norm": 2.0521650039959134, "learning_rate": 1.3879218762296219e-05, "loss": 0.6345, "step": 15823 }, { "epoch": 1.176068376068376, "grad_norm": 1.842524144997368, "learning_rate": 1.3878479213204266e-05, "loss": 0.5694, "step": 15824 }, { "epoch": 1.1761426978818283, "grad_norm": 1.9714948165810953, "learning_rate": 1.3877739639142864e-05, "loss": 0.7991, "step": 15825 }, { "epoch": 1.1762170196952806, "grad_norm": 2.5264146384620716, "learning_rate": 1.387700004011678e-05, "loss": 0.7279, "step": 15826 }, { "epoch": 1.1762913415087328, "grad_norm": 1.550403702115198, "learning_rate": 1.3876260416130767e-05, "loss": 0.5594, "step": 15827 }, { "epoch": 1.176365663322185, "grad_norm": 1.6485589571383046, "learning_rate": 1.3875520767189595e-05, "loss": 0.4984, "step": 15828 }, { "epoch": 1.1764399851356373, "grad_norm": 1.8023847729641191, "learning_rate": 1.3874781093298015e-05, "loss": 0.647, "step": 15829 }, { "epoch": 1.1765143069490895, "grad_norm": 2.1049969511079665, "learning_rate": 1.3874041394460799e-05, "loss": 0.5963, "step": 15830 }, { "epoch": 1.1765886287625418, "grad_norm": 1.9815752180181736, "learning_rate": 1.3873301670682704e-05, "loss": 0.5712, "step": 15831 }, { "epoch": 1.176662950575994, "grad_norm": 2.3109170615731154, "learning_rate": 1.3872561921968497e-05, "loss": 0.5184, "step": 15832 }, { "epoch": 1.1767372723894463, "grad_norm": 2.159599386796174, "learning_rate": 1.3871822148322938e-05, "loss": 0.7025, "step": 15833 }, { "epoch": 1.1768115942028985, "grad_norm": 1.8661527046212476, "learning_rate": 1.3871082349750784e-05, "loss": 0.4677, "step": 15834 }, { "epoch": 1.1768859160163507, "grad_norm": 1.801155467000061, "learning_rate": 1.3870342526256806e-05, "loss": 0.4848, "step": 15835 }, { "epoch": 1.176960237829803, "grad_norm": 2.054953320894298, "learning_rate": 1.3869602677845762e-05, "loss": 0.5953, "step": 15836 }, { "epoch": 1.1770345596432552, "grad_norm": 2.0813230733458816, "learning_rate": 1.3868862804522417e-05, "loss": 0.5845, "step": 15837 }, { "epoch": 1.1771088814567077, "grad_norm": 1.8490059954242348, "learning_rate": 1.3868122906291535e-05, "loss": 0.6318, "step": 15838 }, { "epoch": 1.1771832032701597, "grad_norm": 2.0424581815394314, "learning_rate": 1.3867382983157877e-05, "loss": 0.6142, "step": 15839 }, { "epoch": 1.1772575250836121, "grad_norm": 2.098413758791549, "learning_rate": 1.386664303512621e-05, "loss": 0.6899, "step": 15840 }, { "epoch": 1.1773318468970644, "grad_norm": 1.9631035607350693, "learning_rate": 1.3865903062201294e-05, "loss": 0.6727, "step": 15841 }, { "epoch": 1.1774061687105166, "grad_norm": 2.2205369356567446, "learning_rate": 1.3865163064387895e-05, "loss": 0.7846, "step": 15842 }, { "epoch": 1.1774804905239689, "grad_norm": 1.933229195863097, "learning_rate": 1.3864423041690777e-05, "loss": 0.7026, "step": 15843 }, { "epoch": 1.177554812337421, "grad_norm": 2.1776253262193412, "learning_rate": 1.3863682994114704e-05, "loss": 0.7155, "step": 15844 }, { "epoch": 1.1776291341508733, "grad_norm": 3.8743836028575167, "learning_rate": 1.3862942921664441e-05, "loss": 0.6096, "step": 15845 }, { "epoch": 1.1777034559643256, "grad_norm": 1.7978294474078622, "learning_rate": 1.3862202824344751e-05, "loss": 0.5489, "step": 15846 }, { "epoch": 1.1777777777777778, "grad_norm": 1.698219252572613, "learning_rate": 1.38614627021604e-05, "loss": 0.6166, "step": 15847 }, { "epoch": 1.17785209959123, "grad_norm": 1.927985370671468, "learning_rate": 1.386072255511615e-05, "loss": 0.6064, "step": 15848 }, { "epoch": 1.1779264214046823, "grad_norm": 2.4354326183691315, "learning_rate": 1.3859982383216772e-05, "loss": 0.7001, "step": 15849 }, { "epoch": 1.1780007432181345, "grad_norm": 1.8996661050267796, "learning_rate": 1.3859242186467028e-05, "loss": 0.5716, "step": 15850 }, { "epoch": 1.1780750650315868, "grad_norm": 1.8408707357127403, "learning_rate": 1.385850196487168e-05, "loss": 0.5853, "step": 15851 }, { "epoch": 1.178149386845039, "grad_norm": 2.2193631704745744, "learning_rate": 1.3857761718435499e-05, "loss": 0.7631, "step": 15852 }, { "epoch": 1.1782237086584912, "grad_norm": 1.964286961545533, "learning_rate": 1.3857021447163245e-05, "loss": 0.548, "step": 15853 }, { "epoch": 1.1782980304719435, "grad_norm": 1.6878721450684313, "learning_rate": 1.3856281151059691e-05, "loss": 0.514, "step": 15854 }, { "epoch": 1.1783723522853957, "grad_norm": 2.085386980176165, "learning_rate": 1.38555408301296e-05, "loss": 0.6857, "step": 15855 }, { "epoch": 1.178446674098848, "grad_norm": 1.6603605512107193, "learning_rate": 1.3854800484377732e-05, "loss": 0.5278, "step": 15856 }, { "epoch": 1.1785209959123002, "grad_norm": 2.247063531183789, "learning_rate": 1.3854060113808862e-05, "loss": 0.7102, "step": 15857 }, { "epoch": 1.1785953177257524, "grad_norm": 1.9626465819800691, "learning_rate": 1.385331971842775e-05, "loss": 0.6485, "step": 15858 }, { "epoch": 1.1786696395392047, "grad_norm": 1.8158138388194265, "learning_rate": 1.3852579298239168e-05, "loss": 0.614, "step": 15859 }, { "epoch": 1.178743961352657, "grad_norm": 1.7005906075780912, "learning_rate": 1.3851838853247881e-05, "loss": 0.5346, "step": 15860 }, { "epoch": 1.1788182831661094, "grad_norm": 1.5350113457359442, "learning_rate": 1.3851098383458657e-05, "loss": 0.5034, "step": 15861 }, { "epoch": 1.1788926049795614, "grad_norm": 1.482968022001691, "learning_rate": 1.3850357888876257e-05, "loss": 0.5611, "step": 15862 }, { "epoch": 1.1789669267930138, "grad_norm": 2.101982909929623, "learning_rate": 1.3849617369505452e-05, "loss": 0.5109, "step": 15863 }, { "epoch": 1.179041248606466, "grad_norm": 2.0013460877105818, "learning_rate": 1.3848876825351014e-05, "loss": 0.6567, "step": 15864 }, { "epoch": 1.1791155704199183, "grad_norm": 2.316044516189307, "learning_rate": 1.3848136256417706e-05, "loss": 0.6287, "step": 15865 }, { "epoch": 1.1791898922333706, "grad_norm": 1.854650058166479, "learning_rate": 1.3847395662710298e-05, "loss": 0.547, "step": 15866 }, { "epoch": 1.1792642140468228, "grad_norm": 1.936350263366846, "learning_rate": 1.3846655044233552e-05, "loss": 0.6514, "step": 15867 }, { "epoch": 1.179338535860275, "grad_norm": 2.5402398125323473, "learning_rate": 1.3845914400992242e-05, "loss": 0.7426, "step": 15868 }, { "epoch": 1.1794128576737273, "grad_norm": 2.1296470082320904, "learning_rate": 1.3845173732991135e-05, "loss": 0.7383, "step": 15869 }, { "epoch": 1.1794871794871795, "grad_norm": 1.7546352367236608, "learning_rate": 1.3844433040235e-05, "loss": 0.6369, "step": 15870 }, { "epoch": 1.1795615013006318, "grad_norm": 1.9258885874701612, "learning_rate": 1.3843692322728603e-05, "loss": 0.6829, "step": 15871 }, { "epoch": 1.179635823114084, "grad_norm": 2.2010336629218794, "learning_rate": 1.3842951580476717e-05, "loss": 0.7533, "step": 15872 }, { "epoch": 1.1797101449275362, "grad_norm": 2.1105289353292047, "learning_rate": 1.3842210813484105e-05, "loss": 0.7584, "step": 15873 }, { "epoch": 1.1797844667409885, "grad_norm": 2.219558760066328, "learning_rate": 1.384147002175554e-05, "loss": 0.7287, "step": 15874 }, { "epoch": 1.1798587885544407, "grad_norm": 1.958992429449193, "learning_rate": 1.384072920529579e-05, "loss": 0.5973, "step": 15875 }, { "epoch": 1.179933110367893, "grad_norm": 1.840655323505752, "learning_rate": 1.3839988364109627e-05, "loss": 0.5843, "step": 15876 }, { "epoch": 1.1800074321813452, "grad_norm": 1.8634177930653273, "learning_rate": 1.3839247498201815e-05, "loss": 0.4438, "step": 15877 }, { "epoch": 1.1800817539947974, "grad_norm": 2.345310165108178, "learning_rate": 1.3838506607577129e-05, "loss": 0.739, "step": 15878 }, { "epoch": 1.1801560758082497, "grad_norm": 1.9397422816561487, "learning_rate": 1.3837765692240335e-05, "loss": 0.7394, "step": 15879 }, { "epoch": 1.180230397621702, "grad_norm": 1.7122867611416426, "learning_rate": 1.3837024752196205e-05, "loss": 0.4994, "step": 15880 }, { "epoch": 1.1803047194351541, "grad_norm": 2.2334537019530645, "learning_rate": 1.3836283787449509e-05, "loss": 0.7525, "step": 15881 }, { "epoch": 1.1803790412486064, "grad_norm": 1.769442126734572, "learning_rate": 1.3835542798005017e-05, "loss": 0.5025, "step": 15882 }, { "epoch": 1.1804533630620586, "grad_norm": 2.052580996601009, "learning_rate": 1.3834801783867503e-05, "loss": 0.6314, "step": 15883 }, { "epoch": 1.180527684875511, "grad_norm": 3.224999736132681, "learning_rate": 1.3834060745041728e-05, "loss": 0.5726, "step": 15884 }, { "epoch": 1.180602006688963, "grad_norm": 1.6419147226822235, "learning_rate": 1.3833319681532471e-05, "loss": 0.6592, "step": 15885 }, { "epoch": 1.1806763285024156, "grad_norm": 1.7780352576926186, "learning_rate": 1.3832578593344501e-05, "loss": 0.5317, "step": 15886 }, { "epoch": 1.1807506503158678, "grad_norm": 1.8686391285646722, "learning_rate": 1.383183748048259e-05, "loss": 0.5042, "step": 15887 }, { "epoch": 1.18082497212932, "grad_norm": 1.9626811229514256, "learning_rate": 1.3831096342951509e-05, "loss": 0.5576, "step": 15888 }, { "epoch": 1.1808992939427723, "grad_norm": 2.0798779984307765, "learning_rate": 1.3830355180756027e-05, "loss": 0.6014, "step": 15889 }, { "epoch": 1.1809736157562245, "grad_norm": 1.8317831381271852, "learning_rate": 1.3829613993900916e-05, "loss": 0.6036, "step": 15890 }, { "epoch": 1.1810479375696767, "grad_norm": 1.9715311323238889, "learning_rate": 1.382887278239095e-05, "loss": 0.5461, "step": 15891 }, { "epoch": 1.181122259383129, "grad_norm": 2.0332678582471826, "learning_rate": 1.3828131546230899e-05, "loss": 0.6376, "step": 15892 }, { "epoch": 1.1811965811965812, "grad_norm": 1.9497559432610891, "learning_rate": 1.3827390285425538e-05, "loss": 0.5761, "step": 15893 }, { "epoch": 1.1812709030100335, "grad_norm": 1.9394944971390513, "learning_rate": 1.3826648999979635e-05, "loss": 0.5168, "step": 15894 }, { "epoch": 1.1813452248234857, "grad_norm": 1.6564059708182042, "learning_rate": 1.3825907689897964e-05, "loss": 0.4761, "step": 15895 }, { "epoch": 1.181419546636938, "grad_norm": 1.7096005663049896, "learning_rate": 1.3825166355185297e-05, "loss": 0.5908, "step": 15896 }, { "epoch": 1.1814938684503902, "grad_norm": 2.050403044382607, "learning_rate": 1.3824424995846409e-05, "loss": 0.5048, "step": 15897 }, { "epoch": 1.1815681902638424, "grad_norm": 2.140298033162636, "learning_rate": 1.3823683611886072e-05, "loss": 0.689, "step": 15898 }, { "epoch": 1.1816425120772946, "grad_norm": 1.8382162800859023, "learning_rate": 1.3822942203309055e-05, "loss": 0.5448, "step": 15899 }, { "epoch": 1.1817168338907469, "grad_norm": 1.8510784671466425, "learning_rate": 1.382220077012014e-05, "loss": 0.4836, "step": 15900 }, { "epoch": 1.1817911557041991, "grad_norm": 1.8509758576268023, "learning_rate": 1.382145931232409e-05, "loss": 0.4797, "step": 15901 }, { "epoch": 1.1818654775176514, "grad_norm": 2.056280113496333, "learning_rate": 1.3820717829925685e-05, "loss": 0.5659, "step": 15902 }, { "epoch": 1.1819397993311036, "grad_norm": 2.740718907969204, "learning_rate": 1.3819976322929696e-05, "loss": 0.717, "step": 15903 }, { "epoch": 1.1820141211445558, "grad_norm": 1.9454781599680808, "learning_rate": 1.3819234791340898e-05, "loss": 0.6397, "step": 15904 }, { "epoch": 1.182088442958008, "grad_norm": 1.6755345596494347, "learning_rate": 1.3818493235164065e-05, "loss": 0.555, "step": 15905 }, { "epoch": 1.1821627647714603, "grad_norm": 1.4940319656721104, "learning_rate": 1.3817751654403973e-05, "loss": 0.3717, "step": 15906 }, { "epoch": 1.1822370865849128, "grad_norm": 2.0379688173175308, "learning_rate": 1.3817010049065389e-05, "loss": 0.76, "step": 15907 }, { "epoch": 1.182311408398365, "grad_norm": 2.1904202543622335, "learning_rate": 1.3816268419153096e-05, "loss": 0.6672, "step": 15908 }, { "epoch": 1.1823857302118173, "grad_norm": 2.3131257181022384, "learning_rate": 1.3815526764671864e-05, "loss": 0.794, "step": 15909 }, { "epoch": 1.1824600520252695, "grad_norm": 2.242568292506421, "learning_rate": 1.3814785085626468e-05, "loss": 0.865, "step": 15910 }, { "epoch": 1.1825343738387217, "grad_norm": 1.6360527127276374, "learning_rate": 1.3814043382021684e-05, "loss": 0.5217, "step": 15911 }, { "epoch": 1.182608695652174, "grad_norm": 1.9551738920368424, "learning_rate": 1.3813301653862287e-05, "loss": 0.6957, "step": 15912 }, { "epoch": 1.1826830174656262, "grad_norm": 1.7845300125188372, "learning_rate": 1.381255990115305e-05, "loss": 0.6586, "step": 15913 }, { "epoch": 1.1827573392790784, "grad_norm": 2.1974611313379793, "learning_rate": 1.3811818123898753e-05, "loss": 0.675, "step": 15914 }, { "epoch": 1.1828316610925307, "grad_norm": 1.8972880207498843, "learning_rate": 1.3811076322104166e-05, "loss": 0.6359, "step": 15915 }, { "epoch": 1.182905982905983, "grad_norm": 19.19714786392391, "learning_rate": 1.381033449577407e-05, "loss": 0.7767, "step": 15916 }, { "epoch": 1.1829803047194352, "grad_norm": 1.8623699885501412, "learning_rate": 1.380959264491324e-05, "loss": 0.4604, "step": 15917 }, { "epoch": 1.1830546265328874, "grad_norm": 1.8711760027159166, "learning_rate": 1.3808850769526445e-05, "loss": 0.7005, "step": 15918 }, { "epoch": 1.1831289483463396, "grad_norm": 2.8512468031007163, "learning_rate": 1.3808108869618467e-05, "loss": 0.5969, "step": 15919 }, { "epoch": 1.1832032701597919, "grad_norm": 3.0403696354993923, "learning_rate": 1.3807366945194086e-05, "loss": 0.6658, "step": 15920 }, { "epoch": 1.1832775919732441, "grad_norm": 1.991917616859489, "learning_rate": 1.3806624996258069e-05, "loss": 0.6515, "step": 15921 }, { "epoch": 1.1833519137866964, "grad_norm": 2.366098484116792, "learning_rate": 1.3805883022815203e-05, "loss": 0.7017, "step": 15922 }, { "epoch": 1.1834262356001486, "grad_norm": 1.8791599139700583, "learning_rate": 1.3805141024870254e-05, "loss": 0.6801, "step": 15923 }, { "epoch": 1.1835005574136008, "grad_norm": 2.3468290626378994, "learning_rate": 1.3804399002428008e-05, "loss": 0.7487, "step": 15924 }, { "epoch": 1.183574879227053, "grad_norm": 2.002755237498487, "learning_rate": 1.3803656955493237e-05, "loss": 0.6502, "step": 15925 }, { "epoch": 1.1836492010405053, "grad_norm": 2.0307191749053954, "learning_rate": 1.3802914884070721e-05, "loss": 0.7395, "step": 15926 }, { "epoch": 1.1837235228539575, "grad_norm": 1.9942601899331744, "learning_rate": 1.3802172788165236e-05, "loss": 0.6745, "step": 15927 }, { "epoch": 1.18379784466741, "grad_norm": 1.9431953855845618, "learning_rate": 1.3801430667781562e-05, "loss": 0.5799, "step": 15928 }, { "epoch": 1.183872166480862, "grad_norm": 2.145873693313115, "learning_rate": 1.380068852292447e-05, "loss": 0.5631, "step": 15929 }, { "epoch": 1.1839464882943145, "grad_norm": 1.9533156618356633, "learning_rate": 1.3799946353598746e-05, "loss": 0.7166, "step": 15930 }, { "epoch": 1.1840208101077667, "grad_norm": 2.4905642536813546, "learning_rate": 1.3799204159809163e-05, "loss": 0.6409, "step": 15931 }, { "epoch": 1.184095131921219, "grad_norm": 2.2620456579901025, "learning_rate": 1.3798461941560501e-05, "loss": 0.7938, "step": 15932 }, { "epoch": 1.1841694537346712, "grad_norm": 9.005861668663428, "learning_rate": 1.3797719698857539e-05, "loss": 0.7279, "step": 15933 }, { "epoch": 1.1842437755481234, "grad_norm": 1.7839880641946197, "learning_rate": 1.3796977431705053e-05, "loss": 0.5792, "step": 15934 }, { "epoch": 1.1843180973615757, "grad_norm": 1.9362218436357375, "learning_rate": 1.3796235140107826e-05, "loss": 0.6479, "step": 15935 }, { "epoch": 1.184392419175028, "grad_norm": 2.076776599403578, "learning_rate": 1.3795492824070633e-05, "loss": 0.6368, "step": 15936 }, { "epoch": 1.1844667409884801, "grad_norm": 1.8965993240421275, "learning_rate": 1.3794750483598254e-05, "loss": 0.5738, "step": 15937 }, { "epoch": 1.1845410628019324, "grad_norm": 1.9083635182820486, "learning_rate": 1.3794008118695466e-05, "loss": 0.5608, "step": 15938 }, { "epoch": 1.1846153846153846, "grad_norm": 2.194993053192872, "learning_rate": 1.3793265729367053e-05, "loss": 0.5566, "step": 15939 }, { "epoch": 1.1846897064288369, "grad_norm": 1.8289887948856995, "learning_rate": 1.3792523315617792e-05, "loss": 0.6458, "step": 15940 }, { "epoch": 1.184764028242289, "grad_norm": 1.890359395407262, "learning_rate": 1.3791780877452461e-05, "loss": 0.4998, "step": 15941 }, { "epoch": 1.1848383500557413, "grad_norm": 1.6718156869912082, "learning_rate": 1.3791038414875844e-05, "loss": 0.4467, "step": 15942 }, { "epoch": 1.1849126718691936, "grad_norm": 2.0531713455032183, "learning_rate": 1.3790295927892717e-05, "loss": 0.6457, "step": 15943 }, { "epoch": 1.1849869936826458, "grad_norm": 1.9078734271630364, "learning_rate": 1.378955341650786e-05, "loss": 0.491, "step": 15944 }, { "epoch": 1.185061315496098, "grad_norm": 1.6783669061606994, "learning_rate": 1.3788810880726058e-05, "loss": 0.5768, "step": 15945 }, { "epoch": 1.1851356373095503, "grad_norm": 1.879672436271403, "learning_rate": 1.3788068320552084e-05, "loss": 0.6515, "step": 15946 }, { "epoch": 1.1852099591230025, "grad_norm": 2.3140401079012323, "learning_rate": 1.3787325735990727e-05, "loss": 0.4673, "step": 15947 }, { "epoch": 1.1852842809364548, "grad_norm": 2.219985699637321, "learning_rate": 1.3786583127046763e-05, "loss": 0.7123, "step": 15948 }, { "epoch": 1.185358602749907, "grad_norm": 2.3751527285788523, "learning_rate": 1.3785840493724968e-05, "loss": 0.775, "step": 15949 }, { "epoch": 1.1854329245633592, "grad_norm": 1.8836528619370287, "learning_rate": 1.3785097836030133e-05, "loss": 0.6541, "step": 15950 }, { "epoch": 1.1855072463768117, "grad_norm": 2.410901855349292, "learning_rate": 1.3784355153967035e-05, "loss": 0.7702, "step": 15951 }, { "epoch": 1.1855815681902637, "grad_norm": 1.7169752818803856, "learning_rate": 1.3783612447540453e-05, "loss": 0.5492, "step": 15952 }, { "epoch": 1.1856558900037162, "grad_norm": 1.6567219044825876, "learning_rate": 1.3782869716755172e-05, "loss": 0.6563, "step": 15953 }, { "epoch": 1.1857302118171684, "grad_norm": 2.04824071895179, "learning_rate": 1.3782126961615969e-05, "loss": 0.6172, "step": 15954 }, { "epoch": 1.1858045336306207, "grad_norm": 2.034500882084915, "learning_rate": 1.3781384182127631e-05, "loss": 0.7025, "step": 15955 }, { "epoch": 1.185878855444073, "grad_norm": 2.3607977060708483, "learning_rate": 1.3780641378294938e-05, "loss": 0.5249, "step": 15956 }, { "epoch": 1.1859531772575251, "grad_norm": 2.109078137178249, "learning_rate": 1.3779898550122672e-05, "loss": 0.7475, "step": 15957 }, { "epoch": 1.1860274990709774, "grad_norm": 2.3998561921161654, "learning_rate": 1.3779155697615616e-05, "loss": 0.5747, "step": 15958 }, { "epoch": 1.1861018208844296, "grad_norm": 1.9158310428432794, "learning_rate": 1.377841282077855e-05, "loss": 0.513, "step": 15959 }, { "epoch": 1.1861761426978819, "grad_norm": 1.6920001275601426, "learning_rate": 1.3777669919616258e-05, "loss": 0.5941, "step": 15960 }, { "epoch": 1.186250464511334, "grad_norm": 1.5097074688420729, "learning_rate": 1.3776926994133523e-05, "loss": 0.4729, "step": 15961 }, { "epoch": 1.1863247863247863, "grad_norm": 1.6745607881560645, "learning_rate": 1.377618404433513e-05, "loss": 0.5857, "step": 15962 }, { "epoch": 1.1863991081382386, "grad_norm": 1.798385924712119, "learning_rate": 1.377544107022586e-05, "loss": 0.5962, "step": 15963 }, { "epoch": 1.1864734299516908, "grad_norm": 2.5560004626425488, "learning_rate": 1.3774698071810498e-05, "loss": 0.6877, "step": 15964 }, { "epoch": 1.186547751765143, "grad_norm": 2.2012744737694665, "learning_rate": 1.3773955049093819e-05, "loss": 0.688, "step": 15965 }, { "epoch": 1.1866220735785953, "grad_norm": 2.04784196816015, "learning_rate": 1.3773212002080618e-05, "loss": 0.6486, "step": 15966 }, { "epoch": 1.1866963953920475, "grad_norm": 1.4146139051062616, "learning_rate": 1.3772468930775671e-05, "loss": 0.5133, "step": 15967 }, { "epoch": 1.1867707172054998, "grad_norm": 1.9146457565132347, "learning_rate": 1.3771725835183768e-05, "loss": 0.6575, "step": 15968 }, { "epoch": 1.186845039018952, "grad_norm": 2.018517757710862, "learning_rate": 1.3770982715309691e-05, "loss": 0.5467, "step": 15969 }, { "epoch": 1.1869193608324042, "grad_norm": 1.9655882766670147, "learning_rate": 1.3770239571158219e-05, "loss": 0.5988, "step": 15970 }, { "epoch": 1.1869936826458565, "grad_norm": 1.6480100449082538, "learning_rate": 1.376949640273414e-05, "loss": 0.5003, "step": 15971 }, { "epoch": 1.1870680044593087, "grad_norm": 2.67985387916518, "learning_rate": 1.3768753210042242e-05, "loss": 0.8349, "step": 15972 }, { "epoch": 1.187142326272761, "grad_norm": 1.6978688193883116, "learning_rate": 1.3768009993087307e-05, "loss": 0.5365, "step": 15973 }, { "epoch": 1.1872166480862134, "grad_norm": 2.149226580999515, "learning_rate": 1.3767266751874115e-05, "loss": 0.6334, "step": 15974 }, { "epoch": 1.1872909698996654, "grad_norm": 2.1720725206887415, "learning_rate": 1.376652348640746e-05, "loss": 0.6404, "step": 15975 }, { "epoch": 1.1873652917131179, "grad_norm": 1.7645611429337567, "learning_rate": 1.3765780196692118e-05, "loss": 0.5155, "step": 15976 }, { "epoch": 1.1874396135265701, "grad_norm": 2.2044096712674253, "learning_rate": 1.3765036882732878e-05, "loss": 0.6019, "step": 15977 }, { "epoch": 1.1875139353400224, "grad_norm": 1.6871693949190665, "learning_rate": 1.3764293544534528e-05, "loss": 0.502, "step": 15978 }, { "epoch": 1.1875882571534746, "grad_norm": 1.9793923277690357, "learning_rate": 1.3763550182101853e-05, "loss": 0.6636, "step": 15979 }, { "epoch": 1.1876625789669268, "grad_norm": 2.0242153740253923, "learning_rate": 1.3762806795439638e-05, "loss": 0.6969, "step": 15980 }, { "epoch": 1.187736900780379, "grad_norm": 2.611792275876741, "learning_rate": 1.3762063384552664e-05, "loss": 0.5806, "step": 15981 }, { "epoch": 1.1878112225938313, "grad_norm": 1.9111362008577926, "learning_rate": 1.3761319949445722e-05, "loss": 0.6179, "step": 15982 }, { "epoch": 1.1878855444072836, "grad_norm": 2.0473919319051443, "learning_rate": 1.3760576490123598e-05, "loss": 0.62, "step": 15983 }, { "epoch": 1.1879598662207358, "grad_norm": 1.8436901164982231, "learning_rate": 1.3759833006591077e-05, "loss": 0.6177, "step": 15984 }, { "epoch": 1.188034188034188, "grad_norm": 2.1648223576525, "learning_rate": 1.375908949885295e-05, "loss": 0.7308, "step": 15985 }, { "epoch": 1.1881085098476403, "grad_norm": 3.1129276083859714, "learning_rate": 1.3758345966913994e-05, "loss": 0.6236, "step": 15986 }, { "epoch": 1.1881828316610925, "grad_norm": 2.052235074989731, "learning_rate": 1.3757602410779005e-05, "loss": 0.6379, "step": 15987 }, { "epoch": 1.1882571534745447, "grad_norm": 2.3075566554465667, "learning_rate": 1.3756858830452766e-05, "loss": 0.7172, "step": 15988 }, { "epoch": 1.188331475287997, "grad_norm": 1.660163016610869, "learning_rate": 1.3756115225940065e-05, "loss": 0.378, "step": 15989 }, { "epoch": 1.1884057971014492, "grad_norm": 1.8005074552466354, "learning_rate": 1.375537159724569e-05, "loss": 0.6222, "step": 15990 }, { "epoch": 1.1884801189149015, "grad_norm": 2.12637530933552, "learning_rate": 1.3754627944374429e-05, "loss": 0.5795, "step": 15991 }, { "epoch": 1.1885544407283537, "grad_norm": 1.6796895959951563, "learning_rate": 1.3753884267331064e-05, "loss": 0.5269, "step": 15992 }, { "epoch": 1.188628762541806, "grad_norm": 2.111023949645459, "learning_rate": 1.3753140566120386e-05, "loss": 0.7157, "step": 15993 }, { "epoch": 1.1887030843552582, "grad_norm": 2.0391932431939344, "learning_rate": 1.3752396840747187e-05, "loss": 0.5927, "step": 15994 }, { "epoch": 1.1887774061687106, "grad_norm": 1.7810931886129016, "learning_rate": 1.375165309121625e-05, "loss": 0.6028, "step": 15995 }, { "epoch": 1.1888517279821627, "grad_norm": 1.7870673923392983, "learning_rate": 1.3750909317532368e-05, "loss": 0.6202, "step": 15996 }, { "epoch": 1.1889260497956151, "grad_norm": 1.8502447093288181, "learning_rate": 1.3750165519700324e-05, "loss": 0.7189, "step": 15997 }, { "epoch": 1.1890003716090674, "grad_norm": 2.025976951781114, "learning_rate": 1.374942169772491e-05, "loss": 0.6173, "step": 15998 }, { "epoch": 1.1890746934225196, "grad_norm": 1.8185644573974389, "learning_rate": 1.3748677851610913e-05, "loss": 0.6155, "step": 15999 }, { "epoch": 1.1891490152359718, "grad_norm": 1.8368464973631566, "learning_rate": 1.3747933981363123e-05, "loss": 0.4173, "step": 16000 }, { "epoch": 1.189223337049424, "grad_norm": 2.281408652380787, "learning_rate": 1.3747190086986328e-05, "loss": 0.7678, "step": 16001 }, { "epoch": 1.1892976588628763, "grad_norm": 2.6528461654439948, "learning_rate": 1.374644616848532e-05, "loss": 0.7234, "step": 16002 }, { "epoch": 1.1893719806763285, "grad_norm": 1.9502631309489016, "learning_rate": 1.3745702225864883e-05, "loss": 0.7326, "step": 16003 }, { "epoch": 1.1894463024897808, "grad_norm": 2.0909599548269893, "learning_rate": 1.3744958259129808e-05, "loss": 0.5979, "step": 16004 }, { "epoch": 1.189520624303233, "grad_norm": 1.6218566309647733, "learning_rate": 1.3744214268284887e-05, "loss": 0.5296, "step": 16005 }, { "epoch": 1.1895949461166853, "grad_norm": 1.8897074946194798, "learning_rate": 1.3743470253334911e-05, "loss": 0.3899, "step": 16006 }, { "epoch": 1.1896692679301375, "grad_norm": 2.49476646958452, "learning_rate": 1.3742726214284668e-05, "loss": 0.5804, "step": 16007 }, { "epoch": 1.1897435897435897, "grad_norm": 2.394566148566236, "learning_rate": 1.3741982151138949e-05, "loss": 0.6581, "step": 16008 }, { "epoch": 1.189817911557042, "grad_norm": 1.7758396529819482, "learning_rate": 1.3741238063902538e-05, "loss": 0.6017, "step": 16009 }, { "epoch": 1.1898922333704942, "grad_norm": 2.358263155434812, "learning_rate": 1.3740493952580235e-05, "loss": 0.7444, "step": 16010 }, { "epoch": 1.1899665551839465, "grad_norm": 2.00419849320522, "learning_rate": 1.3739749817176822e-05, "loss": 0.6632, "step": 16011 }, { "epoch": 1.1900408769973987, "grad_norm": 1.795276714874885, "learning_rate": 1.3739005657697098e-05, "loss": 0.4977, "step": 16012 }, { "epoch": 1.190115198810851, "grad_norm": 2.1654268574379145, "learning_rate": 1.3738261474145849e-05, "loss": 0.6628, "step": 16013 }, { "epoch": 1.1901895206243032, "grad_norm": 2.3927760289728557, "learning_rate": 1.3737517266527865e-05, "loss": 0.6676, "step": 16014 }, { "epoch": 1.1902638424377554, "grad_norm": 2.0101908754176736, "learning_rate": 1.3736773034847936e-05, "loss": 0.6949, "step": 16015 }, { "epoch": 1.1903381642512076, "grad_norm": 1.8297864225277949, "learning_rate": 1.373602877911086e-05, "loss": 0.5511, "step": 16016 }, { "epoch": 1.1904124860646599, "grad_norm": 1.734381357910463, "learning_rate": 1.3735284499321421e-05, "loss": 0.6149, "step": 16017 }, { "epoch": 1.1904868078781123, "grad_norm": 2.184760153470083, "learning_rate": 1.3734540195484418e-05, "loss": 0.5927, "step": 16018 }, { "epoch": 1.1905611296915644, "grad_norm": 2.184441284315347, "learning_rate": 1.373379586760464e-05, "loss": 0.7208, "step": 16019 }, { "epoch": 1.1906354515050168, "grad_norm": 2.183212441149736, "learning_rate": 1.3733051515686875e-05, "loss": 0.6653, "step": 16020 }, { "epoch": 1.190709773318469, "grad_norm": 2.118333794174131, "learning_rate": 1.3732307139735918e-05, "loss": 0.6305, "step": 16021 }, { "epoch": 1.1907840951319213, "grad_norm": 2.0513841101561683, "learning_rate": 1.3731562739756562e-05, "loss": 0.6018, "step": 16022 }, { "epoch": 1.1908584169453735, "grad_norm": 2.284283216958018, "learning_rate": 1.3730818315753598e-05, "loss": 0.6056, "step": 16023 }, { "epoch": 1.1909327387588258, "grad_norm": 1.9098076937735926, "learning_rate": 1.373007386773182e-05, "loss": 0.6456, "step": 16024 }, { "epoch": 1.191007060572278, "grad_norm": 2.099292000087574, "learning_rate": 1.372932939569602e-05, "loss": 0.5758, "step": 16025 }, { "epoch": 1.1910813823857302, "grad_norm": 1.8068297537726654, "learning_rate": 1.3728584899650992e-05, "loss": 0.5458, "step": 16026 }, { "epoch": 1.1911557041991825, "grad_norm": 1.7341666706550705, "learning_rate": 1.3727840379601524e-05, "loss": 0.6399, "step": 16027 }, { "epoch": 1.1912300260126347, "grad_norm": 1.9459742099905135, "learning_rate": 1.3727095835552416e-05, "loss": 0.5109, "step": 16028 }, { "epoch": 1.191304347826087, "grad_norm": 2.203913981861159, "learning_rate": 1.372635126750846e-05, "loss": 0.6065, "step": 16029 }, { "epoch": 1.1913786696395392, "grad_norm": 1.7564818064683472, "learning_rate": 1.3725606675474448e-05, "loss": 0.5629, "step": 16030 }, { "epoch": 1.1914529914529914, "grad_norm": 1.8882149922892615, "learning_rate": 1.3724862059455171e-05, "loss": 0.5694, "step": 16031 }, { "epoch": 1.1915273132664437, "grad_norm": 1.853552668669026, "learning_rate": 1.3724117419455424e-05, "loss": 0.5902, "step": 16032 }, { "epoch": 1.191601635079896, "grad_norm": 2.1546301568604576, "learning_rate": 1.3723372755480005e-05, "loss": 0.8228, "step": 16033 }, { "epoch": 1.1916759568933482, "grad_norm": 2.717412121890572, "learning_rate": 1.3722628067533706e-05, "loss": 0.6839, "step": 16034 }, { "epoch": 1.1917502787068004, "grad_norm": 1.9422948273523561, "learning_rate": 1.372188335562132e-05, "loss": 0.657, "step": 16035 }, { "epoch": 1.1918246005202526, "grad_norm": 1.7780440018321788, "learning_rate": 1.3721138619747638e-05, "loss": 0.3932, "step": 16036 }, { "epoch": 1.1918989223337049, "grad_norm": 2.468234350140484, "learning_rate": 1.3720393859917463e-05, "loss": 0.692, "step": 16037 }, { "epoch": 1.191973244147157, "grad_norm": 1.934568602284155, "learning_rate": 1.3719649076135584e-05, "loss": 0.7094, "step": 16038 }, { "epoch": 1.1920475659606093, "grad_norm": 1.763324648542244, "learning_rate": 1.3718904268406797e-05, "loss": 0.502, "step": 16039 }, { "epoch": 1.1921218877740616, "grad_norm": 1.920035345915876, "learning_rate": 1.3718159436735898e-05, "loss": 0.6393, "step": 16040 }, { "epoch": 1.192196209587514, "grad_norm": 1.9191430863399443, "learning_rate": 1.3717414581127684e-05, "loss": 0.6423, "step": 16041 }, { "epoch": 1.192270531400966, "grad_norm": 2.7378697130211025, "learning_rate": 1.3716669701586942e-05, "loss": 0.6174, "step": 16042 }, { "epoch": 1.1923448532144185, "grad_norm": 1.841509567300689, "learning_rate": 1.3715924798118476e-05, "loss": 0.5455, "step": 16043 }, { "epoch": 1.1924191750278708, "grad_norm": 1.9000681796877106, "learning_rate": 1.371517987072708e-05, "loss": 0.5209, "step": 16044 }, { "epoch": 1.192493496841323, "grad_norm": 1.942320410945409, "learning_rate": 1.3714434919417547e-05, "loss": 0.4925, "step": 16045 }, { "epoch": 1.1925678186547752, "grad_norm": 1.775201254860941, "learning_rate": 1.3713689944194677e-05, "loss": 0.5778, "step": 16046 }, { "epoch": 1.1926421404682275, "grad_norm": 1.6314944965379357, "learning_rate": 1.3712944945063264e-05, "loss": 0.5012, "step": 16047 }, { "epoch": 1.1927164622816797, "grad_norm": 1.7832597113137123, "learning_rate": 1.3712199922028102e-05, "loss": 0.5477, "step": 16048 }, { "epoch": 1.192790784095132, "grad_norm": 2.7260765564549456, "learning_rate": 1.371145487509399e-05, "loss": 0.5497, "step": 16049 }, { "epoch": 1.1928651059085842, "grad_norm": 2.0879830060802256, "learning_rate": 1.3710709804265722e-05, "loss": 0.7058, "step": 16050 }, { "epoch": 1.1929394277220364, "grad_norm": 1.7054464165109076, "learning_rate": 1.3709964709548103e-05, "loss": 0.5938, "step": 16051 }, { "epoch": 1.1930137495354887, "grad_norm": 1.8734895760034265, "learning_rate": 1.370921959094592e-05, "loss": 0.6777, "step": 16052 }, { "epoch": 1.193088071348941, "grad_norm": 1.6277716257822148, "learning_rate": 1.3708474448463974e-05, "loss": 0.5083, "step": 16053 }, { "epoch": 1.1931623931623931, "grad_norm": 1.6927799334677776, "learning_rate": 1.370772928210706e-05, "loss": 0.614, "step": 16054 }, { "epoch": 1.1932367149758454, "grad_norm": 2.230179991003319, "learning_rate": 1.3706984091879979e-05, "loss": 0.7278, "step": 16055 }, { "epoch": 1.1933110367892976, "grad_norm": 1.5993603581079119, "learning_rate": 1.3706238877787526e-05, "loss": 0.4631, "step": 16056 }, { "epoch": 1.1933853586027499, "grad_norm": 1.8445309759606427, "learning_rate": 1.37054936398345e-05, "loss": 0.5282, "step": 16057 }, { "epoch": 1.193459680416202, "grad_norm": 2.098613867521622, "learning_rate": 1.37047483780257e-05, "loss": 0.6544, "step": 16058 }, { "epoch": 1.1935340022296543, "grad_norm": 1.6847219935731224, "learning_rate": 1.3704003092365918e-05, "loss": 0.5484, "step": 16059 }, { "epoch": 1.1936083240431066, "grad_norm": 2.268006473878668, "learning_rate": 1.3703257782859958e-05, "loss": 0.4884, "step": 16060 }, { "epoch": 1.1936826458565588, "grad_norm": 2.107008882069903, "learning_rate": 1.3702512449512619e-05, "loss": 0.6021, "step": 16061 }, { "epoch": 1.1937569676700113, "grad_norm": 1.785845219759955, "learning_rate": 1.3701767092328693e-05, "loss": 0.6003, "step": 16062 }, { "epoch": 1.1938312894834633, "grad_norm": 1.463190987614875, "learning_rate": 1.3701021711312986e-05, "loss": 0.4799, "step": 16063 }, { "epoch": 1.1939056112969157, "grad_norm": 1.9827198111200874, "learning_rate": 1.3700276306470292e-05, "loss": 0.6126, "step": 16064 }, { "epoch": 1.193979933110368, "grad_norm": 2.1039003912909964, "learning_rate": 1.3699530877805409e-05, "loss": 0.6028, "step": 16065 }, { "epoch": 1.1940542549238202, "grad_norm": 2.4776924657299704, "learning_rate": 1.3698785425323143e-05, "loss": 0.6674, "step": 16066 }, { "epoch": 1.1941285767372725, "grad_norm": 1.773231481506384, "learning_rate": 1.3698039949028286e-05, "loss": 0.6017, "step": 16067 }, { "epoch": 1.1942028985507247, "grad_norm": 1.7465919095933398, "learning_rate": 1.3697294448925638e-05, "loss": 0.5248, "step": 16068 }, { "epoch": 1.194277220364177, "grad_norm": 1.9610360491650842, "learning_rate": 1.369654892502e-05, "loss": 0.6165, "step": 16069 }, { "epoch": 1.1943515421776292, "grad_norm": 1.5655420011819292, "learning_rate": 1.3695803377316175e-05, "loss": 0.551, "step": 16070 }, { "epoch": 1.1944258639910814, "grad_norm": 2.074012715139003, "learning_rate": 1.3695057805818956e-05, "loss": 0.5931, "step": 16071 }, { "epoch": 1.1945001858045337, "grad_norm": 1.6597361483062687, "learning_rate": 1.369431221053315e-05, "loss": 0.5433, "step": 16072 }, { "epoch": 1.194574507617986, "grad_norm": 1.9062246417077024, "learning_rate": 1.3693566591463552e-05, "loss": 0.7399, "step": 16073 }, { "epoch": 1.1946488294314381, "grad_norm": 1.7270517518143214, "learning_rate": 1.3692820948614963e-05, "loss": 0.6259, "step": 16074 }, { "epoch": 1.1947231512448904, "grad_norm": 1.9504853010441559, "learning_rate": 1.3692075281992188e-05, "loss": 0.6744, "step": 16075 }, { "epoch": 1.1947974730583426, "grad_norm": 1.8407609185979563, "learning_rate": 1.369132959160002e-05, "loss": 0.5837, "step": 16076 }, { "epoch": 1.1948717948717948, "grad_norm": 2.2951804545142145, "learning_rate": 1.3690583877443264e-05, "loss": 0.5564, "step": 16077 }, { "epoch": 1.194946116685247, "grad_norm": 2.1960444375208548, "learning_rate": 1.368983813952672e-05, "loss": 0.6059, "step": 16078 }, { "epoch": 1.1950204384986993, "grad_norm": 1.8230267808717413, "learning_rate": 1.3689092377855193e-05, "loss": 0.7201, "step": 16079 }, { "epoch": 1.1950947603121516, "grad_norm": 1.698069664320691, "learning_rate": 1.3688346592433476e-05, "loss": 0.505, "step": 16080 }, { "epoch": 1.1951690821256038, "grad_norm": 1.9057652129312088, "learning_rate": 1.368760078326638e-05, "loss": 0.6542, "step": 16081 }, { "epoch": 1.195243403939056, "grad_norm": 2.0872445104889965, "learning_rate": 1.3686854950358699e-05, "loss": 0.6782, "step": 16082 }, { "epoch": 1.1953177257525083, "grad_norm": 2.1681344005220122, "learning_rate": 1.3686109093715238e-05, "loss": 0.75, "step": 16083 }, { "epoch": 1.1953920475659605, "grad_norm": 1.4480447683204407, "learning_rate": 1.3685363213340796e-05, "loss": 0.4375, "step": 16084 }, { "epoch": 1.195466369379413, "grad_norm": 1.621827123462259, "learning_rate": 1.3684617309240178e-05, "loss": 0.5412, "step": 16085 }, { "epoch": 1.195540691192865, "grad_norm": 2.331375783755228, "learning_rate": 1.3683871381418186e-05, "loss": 0.8624, "step": 16086 }, { "epoch": 1.1956150130063175, "grad_norm": 2.0310392032502262, "learning_rate": 1.3683125429879618e-05, "loss": 0.546, "step": 16087 }, { "epoch": 1.1956893348197697, "grad_norm": 1.8253754042288197, "learning_rate": 1.3682379454629282e-05, "loss": 0.4669, "step": 16088 }, { "epoch": 1.195763656633222, "grad_norm": 2.197500837592675, "learning_rate": 1.3681633455671979e-05, "loss": 0.701, "step": 16089 }, { "epoch": 1.1958379784466742, "grad_norm": 1.7422005819058612, "learning_rate": 1.3680887433012508e-05, "loss": 0.5179, "step": 16090 }, { "epoch": 1.1959123002601264, "grad_norm": 1.7923625298941972, "learning_rate": 1.3680141386655677e-05, "loss": 0.5407, "step": 16091 }, { "epoch": 1.1959866220735786, "grad_norm": 2.453957489847236, "learning_rate": 1.3679395316606287e-05, "loss": 0.6412, "step": 16092 }, { "epoch": 1.1960609438870309, "grad_norm": 1.4161604291636771, "learning_rate": 1.367864922286914e-05, "loss": 0.5494, "step": 16093 }, { "epoch": 1.1961352657004831, "grad_norm": 2.032314490275248, "learning_rate": 1.3677903105449039e-05, "loss": 0.661, "step": 16094 }, { "epoch": 1.1962095875139354, "grad_norm": 2.1243662639690113, "learning_rate": 1.3677156964350791e-05, "loss": 0.8092, "step": 16095 }, { "epoch": 1.1962839093273876, "grad_norm": 1.9439093969829693, "learning_rate": 1.3676410799579194e-05, "loss": 0.6259, "step": 16096 }, { "epoch": 1.1963582311408398, "grad_norm": 1.9337607410744835, "learning_rate": 1.3675664611139057e-05, "loss": 0.5583, "step": 16097 }, { "epoch": 1.196432552954292, "grad_norm": 2.099733241047461, "learning_rate": 1.3674918399035185e-05, "loss": 0.6566, "step": 16098 }, { "epoch": 1.1965068747677443, "grad_norm": 2.2601778998621005, "learning_rate": 1.3674172163272374e-05, "loss": 0.6297, "step": 16099 }, { "epoch": 1.1965811965811965, "grad_norm": 2.1663123415169103, "learning_rate": 1.3673425903855438e-05, "loss": 0.5884, "step": 16100 }, { "epoch": 1.1966555183946488, "grad_norm": 1.8946475256196627, "learning_rate": 1.3672679620789171e-05, "loss": 0.6692, "step": 16101 }, { "epoch": 1.196729840208101, "grad_norm": 1.6310172587186604, "learning_rate": 1.3671933314078384e-05, "loss": 0.5477, "step": 16102 }, { "epoch": 1.1968041620215533, "grad_norm": 1.7665465374855518, "learning_rate": 1.3671186983727887e-05, "loss": 0.5609, "step": 16103 }, { "epoch": 1.1968784838350055, "grad_norm": 2.156599581344196, "learning_rate": 1.3670440629742472e-05, "loss": 0.63, "step": 16104 }, { "epoch": 1.1969528056484577, "grad_norm": 2.1041455301362335, "learning_rate": 1.3669694252126955e-05, "loss": 0.668, "step": 16105 }, { "epoch": 1.19702712746191, "grad_norm": 2.605016730870069, "learning_rate": 1.3668947850886136e-05, "loss": 0.7026, "step": 16106 }, { "epoch": 1.1971014492753622, "grad_norm": 1.9796966304156687, "learning_rate": 1.3668201426024818e-05, "loss": 0.5532, "step": 16107 }, { "epoch": 1.1971757710888147, "grad_norm": 4.902814834372402, "learning_rate": 1.3667454977547812e-05, "loss": 0.6041, "step": 16108 }, { "epoch": 1.1972500929022667, "grad_norm": 1.8301353118953319, "learning_rate": 1.3666708505459923e-05, "loss": 0.6905, "step": 16109 }, { "epoch": 1.1973244147157192, "grad_norm": 2.0482781773892094, "learning_rate": 1.366596200976595e-05, "loss": 0.6445, "step": 16110 }, { "epoch": 1.1973987365291714, "grad_norm": 3.316758351096095, "learning_rate": 1.366521549047071e-05, "loss": 0.6365, "step": 16111 }, { "epoch": 1.1974730583426236, "grad_norm": 1.7429220138741819, "learning_rate": 1.3664468947578997e-05, "loss": 0.6, "step": 16112 }, { "epoch": 1.1975473801560759, "grad_norm": 2.895641370164901, "learning_rate": 1.3663722381095625e-05, "loss": 0.688, "step": 16113 }, { "epoch": 1.197621701969528, "grad_norm": 1.8193274290502695, "learning_rate": 1.3662975791025397e-05, "loss": 0.5179, "step": 16114 }, { "epoch": 1.1976960237829803, "grad_norm": 2.0208542309954534, "learning_rate": 1.3662229177373124e-05, "loss": 0.5581, "step": 16115 }, { "epoch": 1.1977703455964326, "grad_norm": 1.9201181665881983, "learning_rate": 1.3661482540143612e-05, "loss": 0.7197, "step": 16116 }, { "epoch": 1.1978446674098848, "grad_norm": 2.111222185573303, "learning_rate": 1.366073587934166e-05, "loss": 0.6, "step": 16117 }, { "epoch": 1.197918989223337, "grad_norm": 1.848240583589286, "learning_rate": 1.3659989194972081e-05, "loss": 0.7332, "step": 16118 }, { "epoch": 1.1979933110367893, "grad_norm": 1.8523350912980907, "learning_rate": 1.3659242487039684e-05, "loss": 0.5656, "step": 16119 }, { "epoch": 1.1980676328502415, "grad_norm": 2.2683045554809653, "learning_rate": 1.3658495755549272e-05, "loss": 0.792, "step": 16120 }, { "epoch": 1.1981419546636938, "grad_norm": 5.4923324910908935, "learning_rate": 1.3657749000505654e-05, "loss": 0.5758, "step": 16121 }, { "epoch": 1.198216276477146, "grad_norm": 2.6864156331309754, "learning_rate": 1.365700222191364e-05, "loss": 0.4756, "step": 16122 }, { "epoch": 1.1982905982905983, "grad_norm": 2.177368350820902, "learning_rate": 1.3656255419778033e-05, "loss": 0.5628, "step": 16123 }, { "epoch": 1.1983649201040505, "grad_norm": 2.3116616845312223, "learning_rate": 1.365550859410364e-05, "loss": 0.7214, "step": 16124 }, { "epoch": 1.1984392419175027, "grad_norm": 1.8725283664430137, "learning_rate": 1.3654761744895277e-05, "loss": 0.5956, "step": 16125 }, { "epoch": 1.198513563730955, "grad_norm": 1.89890707434271, "learning_rate": 1.3654014872157749e-05, "loss": 0.6124, "step": 16126 }, { "epoch": 1.1985878855444072, "grad_norm": 1.7982052768325203, "learning_rate": 1.365326797589586e-05, "loss": 0.6611, "step": 16127 }, { "epoch": 1.1986622073578594, "grad_norm": 2.4509672207533826, "learning_rate": 1.3652521056114423e-05, "loss": 0.7347, "step": 16128 }, { "epoch": 1.198736529171312, "grad_norm": 2.1223849913289117, "learning_rate": 1.3651774112818244e-05, "loss": 0.6444, "step": 16129 }, { "epoch": 1.198810850984764, "grad_norm": 1.704685827707165, "learning_rate": 1.3651027146012133e-05, "loss": 0.6837, "step": 16130 }, { "epoch": 1.1988851727982164, "grad_norm": 2.073706295990363, "learning_rate": 1.3650280155700899e-05, "loss": 0.6801, "step": 16131 }, { "epoch": 1.1989594946116686, "grad_norm": 2.094644796451872, "learning_rate": 1.364953314188935e-05, "loss": 0.4793, "step": 16132 }, { "epoch": 1.1990338164251209, "grad_norm": 1.755318247605326, "learning_rate": 1.3648786104582295e-05, "loss": 0.6019, "step": 16133 }, { "epoch": 1.199108138238573, "grad_norm": 1.8980645458610588, "learning_rate": 1.3648039043784546e-05, "loss": 0.5971, "step": 16134 }, { "epoch": 1.1991824600520253, "grad_norm": 1.985682002776293, "learning_rate": 1.364729195950091e-05, "loss": 0.7597, "step": 16135 }, { "epoch": 1.1992567818654776, "grad_norm": 1.6994794630542402, "learning_rate": 1.3646544851736197e-05, "loss": 0.5271, "step": 16136 }, { "epoch": 1.1993311036789298, "grad_norm": 3.5363768409277854, "learning_rate": 1.364579772049522e-05, "loss": 0.6806, "step": 16137 }, { "epoch": 1.199405425492382, "grad_norm": 2.0596125061859385, "learning_rate": 1.3645050565782785e-05, "loss": 0.6062, "step": 16138 }, { "epoch": 1.1994797473058343, "grad_norm": 2.123431079839473, "learning_rate": 1.3644303387603704e-05, "loss": 0.6877, "step": 16139 }, { "epoch": 1.1995540691192865, "grad_norm": 2.0488467037004208, "learning_rate": 1.3643556185962786e-05, "loss": 0.6263, "step": 16140 }, { "epoch": 1.1996283909327388, "grad_norm": 2.201977195312835, "learning_rate": 1.3642808960864842e-05, "loss": 0.5467, "step": 16141 }, { "epoch": 1.199702712746191, "grad_norm": 2.0260420151659977, "learning_rate": 1.364206171231468e-05, "loss": 0.6293, "step": 16142 }, { "epoch": 1.1997770345596432, "grad_norm": 2.2367759984993163, "learning_rate": 1.3641314440317119e-05, "loss": 0.5982, "step": 16143 }, { "epoch": 1.1998513563730955, "grad_norm": 1.8808106531224504, "learning_rate": 1.3640567144876967e-05, "loss": 0.518, "step": 16144 }, { "epoch": 1.1999256781865477, "grad_norm": 2.09481059315018, "learning_rate": 1.3639819825999025e-05, "loss": 0.635, "step": 16145 }, { "epoch": 1.2, "grad_norm": 2.174106912861329, "learning_rate": 1.3639072483688117e-05, "loss": 0.5954, "step": 16146 }, { "epoch": 1.2000743218134522, "grad_norm": 1.6636526097378257, "learning_rate": 1.3638325117949044e-05, "loss": 0.5966, "step": 16147 }, { "epoch": 1.2001486436269044, "grad_norm": 2.7511839270150906, "learning_rate": 1.3637577728786628e-05, "loss": 0.7685, "step": 16148 }, { "epoch": 1.2002229654403567, "grad_norm": 3.057162013410161, "learning_rate": 1.3636830316205676e-05, "loss": 0.8386, "step": 16149 }, { "epoch": 1.200297287253809, "grad_norm": 2.0563380486750957, "learning_rate": 1.3636082880210995e-05, "loss": 0.567, "step": 16150 }, { "epoch": 1.2003716090672611, "grad_norm": 2.0758861144094207, "learning_rate": 1.3635335420807404e-05, "loss": 0.5642, "step": 16151 }, { "epoch": 1.2004459308807136, "grad_norm": 1.9580228120236218, "learning_rate": 1.3634587937999708e-05, "loss": 0.7283, "step": 16152 }, { "epoch": 1.2005202526941656, "grad_norm": 1.8530854557987266, "learning_rate": 1.3633840431792727e-05, "loss": 0.6006, "step": 16153 }, { "epoch": 1.200594574507618, "grad_norm": 1.8893352358823852, "learning_rate": 1.363309290219127e-05, "loss": 0.603, "step": 16154 }, { "epoch": 1.2006688963210703, "grad_norm": 2.373170484627989, "learning_rate": 1.3632345349200149e-05, "loss": 0.6815, "step": 16155 }, { "epoch": 1.2007432181345226, "grad_norm": 2.037564164631167, "learning_rate": 1.3631597772824176e-05, "loss": 0.5854, "step": 16156 }, { "epoch": 1.2008175399479748, "grad_norm": 1.7210968150879966, "learning_rate": 1.3630850173068164e-05, "loss": 0.6314, "step": 16157 }, { "epoch": 1.200891861761427, "grad_norm": 1.809142829642874, "learning_rate": 1.363010254993693e-05, "loss": 0.6498, "step": 16158 }, { "epoch": 1.2009661835748793, "grad_norm": 1.615238841086307, "learning_rate": 1.3629354903435283e-05, "loss": 0.4762, "step": 16159 }, { "epoch": 1.2010405053883315, "grad_norm": 1.5012707989570868, "learning_rate": 1.3628607233568039e-05, "loss": 0.4967, "step": 16160 }, { "epoch": 1.2011148272017838, "grad_norm": 1.729371136919457, "learning_rate": 1.3627859540340006e-05, "loss": 0.6396, "step": 16161 }, { "epoch": 1.201189149015236, "grad_norm": 2.0506464070816808, "learning_rate": 1.3627111823756003e-05, "loss": 0.6911, "step": 16162 }, { "epoch": 1.2012634708286882, "grad_norm": 1.74098485664214, "learning_rate": 1.3626364083820843e-05, "loss": 0.4956, "step": 16163 }, { "epoch": 1.2013377926421405, "grad_norm": 2.433385857165797, "learning_rate": 1.3625616320539338e-05, "loss": 0.6489, "step": 16164 }, { "epoch": 1.2014121144555927, "grad_norm": 1.8862321093384926, "learning_rate": 1.3624868533916305e-05, "loss": 0.621, "step": 16165 }, { "epoch": 1.201486436269045, "grad_norm": 1.9624479813842899, "learning_rate": 1.3624120723956558e-05, "loss": 0.6261, "step": 16166 }, { "epoch": 1.2015607580824972, "grad_norm": 1.96622785848256, "learning_rate": 1.3623372890664906e-05, "loss": 0.5827, "step": 16167 }, { "epoch": 1.2016350798959494, "grad_norm": 1.8224644780863932, "learning_rate": 1.3622625034046168e-05, "loss": 0.599, "step": 16168 }, { "epoch": 1.2017094017094017, "grad_norm": 2.2203447393983264, "learning_rate": 1.3621877154105159e-05, "loss": 0.6983, "step": 16169 }, { "epoch": 1.201783723522854, "grad_norm": 1.707359681596448, "learning_rate": 1.3621129250846695e-05, "loss": 0.6032, "step": 16170 }, { "epoch": 1.2018580453363061, "grad_norm": 1.9859203735911664, "learning_rate": 1.3620381324275587e-05, "loss": 0.626, "step": 16171 }, { "epoch": 1.2019323671497584, "grad_norm": 1.9705823343065274, "learning_rate": 1.361963337439665e-05, "loss": 0.6708, "step": 16172 }, { "epoch": 1.2020066889632106, "grad_norm": 1.8937317868325, "learning_rate": 1.3618885401214703e-05, "loss": 0.4473, "step": 16173 }, { "epoch": 1.2020810107766629, "grad_norm": 1.9915441122044768, "learning_rate": 1.3618137404734557e-05, "loss": 0.5391, "step": 16174 }, { "epoch": 1.2021553325901153, "grad_norm": 4.497524697262092, "learning_rate": 1.3617389384961032e-05, "loss": 0.4409, "step": 16175 }, { "epoch": 1.2022296544035673, "grad_norm": 2.0614249624617544, "learning_rate": 1.3616641341898944e-05, "loss": 0.5192, "step": 16176 }, { "epoch": 1.2023039762170198, "grad_norm": 2.105943174902924, "learning_rate": 1.3615893275553105e-05, "loss": 0.6851, "step": 16177 }, { "epoch": 1.202378298030472, "grad_norm": 1.5335788184195824, "learning_rate": 1.3615145185928334e-05, "loss": 0.467, "step": 16178 }, { "epoch": 1.2024526198439243, "grad_norm": 5.384207860271216, "learning_rate": 1.3614397073029442e-05, "loss": 0.5526, "step": 16179 }, { "epoch": 1.2025269416573765, "grad_norm": 2.049421186858725, "learning_rate": 1.361364893686125e-05, "loss": 0.7405, "step": 16180 }, { "epoch": 1.2026012634708287, "grad_norm": 1.666051514639792, "learning_rate": 1.3612900777428577e-05, "loss": 0.5439, "step": 16181 }, { "epoch": 1.202675585284281, "grad_norm": 1.7767162774938638, "learning_rate": 1.3612152594736237e-05, "loss": 0.6473, "step": 16182 }, { "epoch": 1.2027499070977332, "grad_norm": 2.26386445932865, "learning_rate": 1.3611404388789042e-05, "loss": 0.7213, "step": 16183 }, { "epoch": 1.2028242289111855, "grad_norm": 1.7056654814578909, "learning_rate": 1.3610656159591813e-05, "loss": 0.6268, "step": 16184 }, { "epoch": 1.2028985507246377, "grad_norm": 1.9428401244480809, "learning_rate": 1.3609907907149368e-05, "loss": 0.7295, "step": 16185 }, { "epoch": 1.20297287253809, "grad_norm": 1.7428717561334983, "learning_rate": 1.3609159631466523e-05, "loss": 0.4502, "step": 16186 }, { "epoch": 1.2030471943515422, "grad_norm": 1.959764140510402, "learning_rate": 1.3608411332548096e-05, "loss": 0.5994, "step": 16187 }, { "epoch": 1.2031215161649944, "grad_norm": 1.58193214475795, "learning_rate": 1.3607663010398906e-05, "loss": 0.5222, "step": 16188 }, { "epoch": 1.2031958379784466, "grad_norm": 2.1388634657436905, "learning_rate": 1.3606914665023766e-05, "loss": 0.6956, "step": 16189 }, { "epoch": 1.2032701597918989, "grad_norm": 1.9468074068252466, "learning_rate": 1.3606166296427498e-05, "loss": 0.4137, "step": 16190 }, { "epoch": 1.2033444816053511, "grad_norm": 2.307716081902119, "learning_rate": 1.3605417904614918e-05, "loss": 0.7789, "step": 16191 }, { "epoch": 1.2034188034188034, "grad_norm": 2.3707911395604206, "learning_rate": 1.3604669489590845e-05, "loss": 0.5708, "step": 16192 }, { "epoch": 1.2034931252322556, "grad_norm": 1.8116642986434557, "learning_rate": 1.3603921051360095e-05, "loss": 0.5022, "step": 16193 }, { "epoch": 1.2035674470457078, "grad_norm": 2.5163712447058715, "learning_rate": 1.3603172589927491e-05, "loss": 0.6236, "step": 16194 }, { "epoch": 1.20364176885916, "grad_norm": 2.0743660234754984, "learning_rate": 1.3602424105297847e-05, "loss": 0.4789, "step": 16195 }, { "epoch": 1.2037160906726123, "grad_norm": 1.8704376811393937, "learning_rate": 1.3601675597475983e-05, "loss": 0.6242, "step": 16196 }, { "epoch": 1.2037904124860646, "grad_norm": 1.890642188355705, "learning_rate": 1.360092706646672e-05, "loss": 0.6108, "step": 16197 }, { "epoch": 1.203864734299517, "grad_norm": 8.379763909919502, "learning_rate": 1.3600178512274875e-05, "loss": 0.7153, "step": 16198 }, { "epoch": 1.2039390561129693, "grad_norm": 1.5584657530585841, "learning_rate": 1.3599429934905268e-05, "loss": 0.4359, "step": 16199 }, { "epoch": 1.2040133779264215, "grad_norm": 1.8783472957247855, "learning_rate": 1.3598681334362716e-05, "loss": 0.6084, "step": 16200 }, { "epoch": 1.2040876997398737, "grad_norm": 1.7968063404151515, "learning_rate": 1.3597932710652042e-05, "loss": 0.6322, "step": 16201 }, { "epoch": 1.204162021553326, "grad_norm": 1.7187599382438374, "learning_rate": 1.3597184063778061e-05, "loss": 0.5707, "step": 16202 }, { "epoch": 1.2042363433667782, "grad_norm": 1.89203168008121, "learning_rate": 1.35964353937456e-05, "loss": 0.5905, "step": 16203 }, { "epoch": 1.2043106651802304, "grad_norm": 1.7185727192001359, "learning_rate": 1.3595686700559471e-05, "loss": 0.6368, "step": 16204 }, { "epoch": 1.2043849869936827, "grad_norm": 1.920455897549596, "learning_rate": 1.35949379842245e-05, "loss": 0.6013, "step": 16205 }, { "epoch": 1.204459308807135, "grad_norm": 1.857197884236035, "learning_rate": 1.3594189244745506e-05, "loss": 0.5847, "step": 16206 }, { "epoch": 1.2045336306205872, "grad_norm": 2.105887669885162, "learning_rate": 1.3593440482127305e-05, "loss": 0.744, "step": 16207 }, { "epoch": 1.2046079524340394, "grad_norm": 2.371894112030934, "learning_rate": 1.3592691696374722e-05, "loss": 0.5504, "step": 16208 }, { "epoch": 1.2046822742474916, "grad_norm": 1.9709871130023549, "learning_rate": 1.3591942887492575e-05, "loss": 0.5975, "step": 16209 }, { "epoch": 1.2047565960609439, "grad_norm": 1.7661203824789904, "learning_rate": 1.3591194055485686e-05, "loss": 0.6162, "step": 16210 }, { "epoch": 1.2048309178743961, "grad_norm": 6.704603979781124, "learning_rate": 1.3590445200358879e-05, "loss": 0.7527, "step": 16211 }, { "epoch": 1.2049052396878484, "grad_norm": 1.714497721424189, "learning_rate": 1.3589696322116969e-05, "loss": 0.4923, "step": 16212 }, { "epoch": 1.2049795615013006, "grad_norm": 1.995765456861354, "learning_rate": 1.358894742076478e-05, "loss": 0.6553, "step": 16213 }, { "epoch": 1.2050538833147528, "grad_norm": 1.7527934154116727, "learning_rate": 1.3588198496307137e-05, "loss": 0.6298, "step": 16214 }, { "epoch": 1.205128205128205, "grad_norm": 2.113551579024315, "learning_rate": 1.3587449548748857e-05, "loss": 0.6173, "step": 16215 }, { "epoch": 1.2052025269416573, "grad_norm": 1.780945921104105, "learning_rate": 1.3586700578094762e-05, "loss": 0.562, "step": 16216 }, { "epoch": 1.2052768487551095, "grad_norm": 2.0087322137466326, "learning_rate": 1.3585951584349674e-05, "loss": 0.4752, "step": 16217 }, { "epoch": 1.2053511705685618, "grad_norm": 2.069572235488792, "learning_rate": 1.3585202567518416e-05, "loss": 0.5404, "step": 16218 }, { "epoch": 1.2054254923820142, "grad_norm": 2.092120858995239, "learning_rate": 1.3584453527605815e-05, "loss": 0.6928, "step": 16219 }, { "epoch": 1.2054998141954663, "grad_norm": 1.6915086726933983, "learning_rate": 1.358370446461668e-05, "loss": 0.6082, "step": 16220 }, { "epoch": 1.2055741360089187, "grad_norm": 2.1661703292061567, "learning_rate": 1.3582955378555847e-05, "loss": 0.593, "step": 16221 }, { "epoch": 1.205648457822371, "grad_norm": 1.8175715987023613, "learning_rate": 1.3582206269428133e-05, "loss": 0.6409, "step": 16222 }, { "epoch": 1.2057227796358232, "grad_norm": 1.9750737478616285, "learning_rate": 1.3581457137238358e-05, "loss": 0.5892, "step": 16223 }, { "epoch": 1.2057971014492754, "grad_norm": 2.067495709023934, "learning_rate": 1.358070798199135e-05, "loss": 0.6221, "step": 16224 }, { "epoch": 1.2058714232627277, "grad_norm": 1.742133745954074, "learning_rate": 1.357995880369193e-05, "loss": 0.6052, "step": 16225 }, { "epoch": 1.20594574507618, "grad_norm": 1.7006533737677278, "learning_rate": 1.3579209602344919e-05, "loss": 0.556, "step": 16226 }, { "epoch": 1.2060200668896321, "grad_norm": 2.22664920276751, "learning_rate": 1.3578460377955145e-05, "loss": 0.7874, "step": 16227 }, { "epoch": 1.2060943887030844, "grad_norm": 2.1898814817653993, "learning_rate": 1.3577711130527427e-05, "loss": 0.5882, "step": 16228 }, { "epoch": 1.2061687105165366, "grad_norm": 2.235918339369814, "learning_rate": 1.357696186006659e-05, "loss": 0.684, "step": 16229 }, { "epoch": 1.2062430323299889, "grad_norm": 2.0854139640021736, "learning_rate": 1.3576212566577459e-05, "loss": 0.6274, "step": 16230 }, { "epoch": 1.206317354143441, "grad_norm": 1.991358787957704, "learning_rate": 1.3575463250064856e-05, "loss": 0.676, "step": 16231 }, { "epoch": 1.2063916759568933, "grad_norm": 2.0483292961775343, "learning_rate": 1.3574713910533608e-05, "loss": 0.5896, "step": 16232 }, { "epoch": 1.2064659977703456, "grad_norm": 1.564277826961953, "learning_rate": 1.3573964547988537e-05, "loss": 0.4512, "step": 16233 }, { "epoch": 1.2065403195837978, "grad_norm": 1.4280653644933923, "learning_rate": 1.3573215162434465e-05, "loss": 0.4032, "step": 16234 }, { "epoch": 1.20661464139725, "grad_norm": 2.1512496925214726, "learning_rate": 1.357246575387622e-05, "loss": 0.6929, "step": 16235 }, { "epoch": 1.2066889632107023, "grad_norm": 2.3274002884503098, "learning_rate": 1.3571716322318628e-05, "loss": 0.7471, "step": 16236 }, { "epoch": 1.2067632850241545, "grad_norm": 2.0451032411137033, "learning_rate": 1.3570966867766511e-05, "loss": 0.7452, "step": 16237 }, { "epoch": 1.2068376068376068, "grad_norm": 2.0714660254712003, "learning_rate": 1.3570217390224693e-05, "loss": 0.6391, "step": 16238 }, { "epoch": 1.206911928651059, "grad_norm": 1.8141902107332026, "learning_rate": 1.3569467889698004e-05, "loss": 0.6763, "step": 16239 }, { "epoch": 1.2069862504645112, "grad_norm": 2.013877581776403, "learning_rate": 1.3568718366191263e-05, "loss": 0.6503, "step": 16240 }, { "epoch": 1.2070605722779635, "grad_norm": 2.027158124961333, "learning_rate": 1.3567968819709301e-05, "loss": 0.6509, "step": 16241 }, { "epoch": 1.207134894091416, "grad_norm": 2.3735388299591333, "learning_rate": 1.3567219250256938e-05, "loss": 0.7614, "step": 16242 }, { "epoch": 1.207209215904868, "grad_norm": 1.6862705155083537, "learning_rate": 1.3566469657839e-05, "loss": 0.5259, "step": 16243 }, { "epoch": 1.2072835377183204, "grad_norm": 1.9149198134974312, "learning_rate": 1.3565720042460323e-05, "loss": 0.6705, "step": 16244 }, { "epoch": 1.2073578595317727, "grad_norm": 1.8727833677649093, "learning_rate": 1.3564970404125722e-05, "loss": 0.6639, "step": 16245 }, { "epoch": 1.207432181345225, "grad_norm": 1.5251430051360204, "learning_rate": 1.3564220742840023e-05, "loss": 0.557, "step": 16246 }, { "epoch": 1.2075065031586771, "grad_norm": 2.1430108368204634, "learning_rate": 1.356347105860806e-05, "loss": 0.6095, "step": 16247 }, { "epoch": 1.2075808249721294, "grad_norm": 1.7472189779193987, "learning_rate": 1.3562721351434653e-05, "loss": 0.6127, "step": 16248 }, { "epoch": 1.2076551467855816, "grad_norm": 1.8300880099108607, "learning_rate": 1.356197162132463e-05, "loss": 0.7034, "step": 16249 }, { "epoch": 1.2077294685990339, "grad_norm": 2.1916413258525624, "learning_rate": 1.3561221868282823e-05, "loss": 0.5151, "step": 16250 }, { "epoch": 1.207803790412486, "grad_norm": 1.6553659385343884, "learning_rate": 1.356047209231405e-05, "loss": 0.6263, "step": 16251 }, { "epoch": 1.2078781122259383, "grad_norm": 2.736438105547955, "learning_rate": 1.3559722293423145e-05, "loss": 0.5564, "step": 16252 }, { "epoch": 1.2079524340393906, "grad_norm": 2.0005477952300232, "learning_rate": 1.3558972471614933e-05, "loss": 0.792, "step": 16253 }, { "epoch": 1.2080267558528428, "grad_norm": 1.7533975149484493, "learning_rate": 1.3558222626894238e-05, "loss": 0.5251, "step": 16254 }, { "epoch": 1.208101077666295, "grad_norm": 3.267003227037269, "learning_rate": 1.355747275926589e-05, "loss": 0.6599, "step": 16255 }, { "epoch": 1.2081753994797473, "grad_norm": 1.8265848059479783, "learning_rate": 1.355672286873472e-05, "loss": 0.6492, "step": 16256 }, { "epoch": 1.2082497212931995, "grad_norm": 2.6813529218601757, "learning_rate": 1.3555972955305552e-05, "loss": 0.6799, "step": 16257 }, { "epoch": 1.2083240431066518, "grad_norm": 2.0326807801408777, "learning_rate": 1.3555223018983214e-05, "loss": 0.6599, "step": 16258 }, { "epoch": 1.208398364920104, "grad_norm": 1.4528220302810635, "learning_rate": 1.3554473059772535e-05, "loss": 0.408, "step": 16259 }, { "epoch": 1.2084726867335562, "grad_norm": 1.7808347563976732, "learning_rate": 1.3553723077678342e-05, "loss": 0.5928, "step": 16260 }, { "epoch": 1.2085470085470085, "grad_norm": 2.0997733130010134, "learning_rate": 1.3552973072705466e-05, "loss": 0.6634, "step": 16261 }, { "epoch": 1.2086213303604607, "grad_norm": 1.9378298913249399, "learning_rate": 1.3552223044858733e-05, "loss": 0.609, "step": 16262 }, { "epoch": 1.208695652173913, "grad_norm": 2.407439227711591, "learning_rate": 1.355147299414297e-05, "loss": 0.7314, "step": 16263 }, { "epoch": 1.2087699739873652, "grad_norm": 2.1508127063413895, "learning_rate": 1.355072292056301e-05, "loss": 0.6995, "step": 16264 }, { "epoch": 1.2088442958008176, "grad_norm": 1.6308285635816844, "learning_rate": 1.354997282412368e-05, "loss": 0.5515, "step": 16265 }, { "epoch": 1.2089186176142697, "grad_norm": 2.352901997058375, "learning_rate": 1.354922270482981e-05, "loss": 0.7854, "step": 16266 }, { "epoch": 1.2089929394277221, "grad_norm": 2.098070878099297, "learning_rate": 1.3548472562686227e-05, "loss": 0.7338, "step": 16267 }, { "epoch": 1.2090672612411744, "grad_norm": 1.5998925478714547, "learning_rate": 1.3547722397697762e-05, "loss": 0.6081, "step": 16268 }, { "epoch": 1.2091415830546266, "grad_norm": 1.8868115172401894, "learning_rate": 1.3546972209869248e-05, "loss": 0.6135, "step": 16269 }, { "epoch": 1.2092159048680788, "grad_norm": 2.3762143849379003, "learning_rate": 1.3546221999205506e-05, "loss": 0.6386, "step": 16270 }, { "epoch": 1.209290226681531, "grad_norm": 1.990003653438403, "learning_rate": 1.354547176571137e-05, "loss": 0.5529, "step": 16271 }, { "epoch": 1.2093645484949833, "grad_norm": 2.674386374901282, "learning_rate": 1.3544721509391674e-05, "loss": 0.7399, "step": 16272 }, { "epoch": 1.2094388703084356, "grad_norm": 1.9427406496425732, "learning_rate": 1.3543971230251245e-05, "loss": 0.578, "step": 16273 }, { "epoch": 1.2095131921218878, "grad_norm": 1.9326558660266306, "learning_rate": 1.3543220928294914e-05, "loss": 0.6086, "step": 16274 }, { "epoch": 1.20958751393534, "grad_norm": 3.15132935895685, "learning_rate": 1.3542470603527508e-05, "loss": 0.5728, "step": 16275 }, { "epoch": 1.2096618357487923, "grad_norm": 2.4685909947499924, "learning_rate": 1.354172025595386e-05, "loss": 0.609, "step": 16276 }, { "epoch": 1.2097361575622445, "grad_norm": 2.086290413920846, "learning_rate": 1.3540969885578801e-05, "loss": 0.5755, "step": 16277 }, { "epoch": 1.2098104793756967, "grad_norm": 2.2865317713277973, "learning_rate": 1.3540219492407162e-05, "loss": 0.4835, "step": 16278 }, { "epoch": 1.209884801189149, "grad_norm": 1.7793772136697756, "learning_rate": 1.3539469076443775e-05, "loss": 0.589, "step": 16279 }, { "epoch": 1.2099591230026012, "grad_norm": 2.06166896102247, "learning_rate": 1.353871863769347e-05, "loss": 0.712, "step": 16280 }, { "epoch": 1.2100334448160535, "grad_norm": 1.9181311590727794, "learning_rate": 1.3537968176161073e-05, "loss": 0.5186, "step": 16281 }, { "epoch": 1.2101077666295057, "grad_norm": 2.3967340029679645, "learning_rate": 1.3537217691851425e-05, "loss": 0.6086, "step": 16282 }, { "epoch": 1.210182088442958, "grad_norm": 1.7462371953698705, "learning_rate": 1.3536467184769351e-05, "loss": 0.5201, "step": 16283 }, { "epoch": 1.2102564102564102, "grad_norm": 2.1546960235142456, "learning_rate": 1.3535716654919687e-05, "loss": 0.7134, "step": 16284 }, { "epoch": 1.2103307320698624, "grad_norm": 2.282633434717149, "learning_rate": 1.3534966102307264e-05, "loss": 0.6052, "step": 16285 }, { "epoch": 1.2104050538833149, "grad_norm": 2.492366454521277, "learning_rate": 1.3534215526936907e-05, "loss": 0.6668, "step": 16286 }, { "epoch": 1.210479375696767, "grad_norm": 2.351106637973618, "learning_rate": 1.3533464928813455e-05, "loss": 0.5958, "step": 16287 }, { "epoch": 1.2105536975102194, "grad_norm": 1.7309982002264872, "learning_rate": 1.3532714307941742e-05, "loss": 0.56, "step": 16288 }, { "epoch": 1.2106280193236716, "grad_norm": 1.6793147468189231, "learning_rate": 1.3531963664326594e-05, "loss": 0.5026, "step": 16289 }, { "epoch": 1.2107023411371238, "grad_norm": 2.151774398503549, "learning_rate": 1.3531212997972851e-05, "loss": 0.5568, "step": 16290 }, { "epoch": 1.210776662950576, "grad_norm": 2.0914518517773595, "learning_rate": 1.3530462308885341e-05, "loss": 0.708, "step": 16291 }, { "epoch": 1.2108509847640283, "grad_norm": 2.3658998096250223, "learning_rate": 1.3529711597068897e-05, "loss": 0.6214, "step": 16292 }, { "epoch": 1.2109253065774805, "grad_norm": 1.925679883172258, "learning_rate": 1.3528960862528356e-05, "loss": 0.6063, "step": 16293 }, { "epoch": 1.2109996283909328, "grad_norm": 2.104569099041414, "learning_rate": 1.3528210105268543e-05, "loss": 0.6254, "step": 16294 }, { "epoch": 1.211073950204385, "grad_norm": 1.9225052711744997, "learning_rate": 1.35274593252943e-05, "loss": 0.6853, "step": 16295 }, { "epoch": 1.2111482720178373, "grad_norm": 2.0848640159929954, "learning_rate": 1.3526708522610457e-05, "loss": 0.6224, "step": 16296 }, { "epoch": 1.2112225938312895, "grad_norm": 2.0378456906319196, "learning_rate": 1.3525957697221845e-05, "loss": 0.6505, "step": 16297 }, { "epoch": 1.2112969156447417, "grad_norm": 2.2459539718871007, "learning_rate": 1.3525206849133301e-05, "loss": 0.6966, "step": 16298 }, { "epoch": 1.211371237458194, "grad_norm": 1.7094636146212847, "learning_rate": 1.3524455978349657e-05, "loss": 0.4264, "step": 16299 }, { "epoch": 1.2114455592716462, "grad_norm": 1.9183527262519016, "learning_rate": 1.3523705084875752e-05, "loss": 0.588, "step": 16300 }, { "epoch": 1.2115198810850984, "grad_norm": 2.111251683983857, "learning_rate": 1.3522954168716413e-05, "loss": 0.641, "step": 16301 }, { "epoch": 1.2115942028985507, "grad_norm": 2.0862011358291097, "learning_rate": 1.3522203229876482e-05, "loss": 0.5787, "step": 16302 }, { "epoch": 1.211668524712003, "grad_norm": 2.118129847321013, "learning_rate": 1.3521452268360786e-05, "loss": 0.5892, "step": 16303 }, { "epoch": 1.2117428465254552, "grad_norm": 1.7548609249294373, "learning_rate": 1.3520701284174162e-05, "loss": 0.7472, "step": 16304 }, { "epoch": 1.2118171683389074, "grad_norm": 2.073904609481077, "learning_rate": 1.3519950277321447e-05, "loss": 0.7565, "step": 16305 }, { "epoch": 1.2118914901523596, "grad_norm": 1.7998758963642976, "learning_rate": 1.3519199247807478e-05, "loss": 0.5627, "step": 16306 }, { "epoch": 1.2119658119658119, "grad_norm": 1.9230623305375125, "learning_rate": 1.3518448195637084e-05, "loss": 0.6059, "step": 16307 }, { "epoch": 1.2120401337792641, "grad_norm": 2.7002171175716536, "learning_rate": 1.3517697120815104e-05, "loss": 0.8661, "step": 16308 }, { "epoch": 1.2121144555927166, "grad_norm": 1.908526819259929, "learning_rate": 1.3516946023346371e-05, "loss": 0.6903, "step": 16309 }, { "epoch": 1.2121887774061686, "grad_norm": 2.294876707887437, "learning_rate": 1.351619490323572e-05, "loss": 0.6082, "step": 16310 }, { "epoch": 1.212263099219621, "grad_norm": 3.3928003741497275, "learning_rate": 1.3515443760487991e-05, "loss": 0.5749, "step": 16311 }, { "epoch": 1.2123374210330733, "grad_norm": 1.940967055453208, "learning_rate": 1.3514692595108021e-05, "loss": 0.4113, "step": 16312 }, { "epoch": 1.2124117428465255, "grad_norm": 1.722432796917477, "learning_rate": 1.351394140710064e-05, "loss": 0.5358, "step": 16313 }, { "epoch": 1.2124860646599778, "grad_norm": 2.4043023492638804, "learning_rate": 1.3513190196470685e-05, "loss": 0.8219, "step": 16314 }, { "epoch": 1.21256038647343, "grad_norm": 2.3464478640185353, "learning_rate": 1.3512438963222993e-05, "loss": 0.6685, "step": 16315 }, { "epoch": 1.2126347082868822, "grad_norm": 1.688911377563339, "learning_rate": 1.3511687707362402e-05, "loss": 0.4746, "step": 16316 }, { "epoch": 1.2127090301003345, "grad_norm": 1.8038710890751626, "learning_rate": 1.351093642889375e-05, "loss": 0.5858, "step": 16317 }, { "epoch": 1.2127833519137867, "grad_norm": 2.092217037721667, "learning_rate": 1.3510185127821872e-05, "loss": 0.5674, "step": 16318 }, { "epoch": 1.212857673727239, "grad_norm": 1.7521613652058665, "learning_rate": 1.35094338041516e-05, "loss": 0.5996, "step": 16319 }, { "epoch": 1.2129319955406912, "grad_norm": 2.238010963167303, "learning_rate": 1.3508682457887775e-05, "loss": 0.7356, "step": 16320 }, { "epoch": 1.2130063173541434, "grad_norm": 1.9139130974638585, "learning_rate": 1.3507931089035238e-05, "loss": 0.6657, "step": 16321 }, { "epoch": 1.2130806391675957, "grad_norm": 2.0194604251021473, "learning_rate": 1.3507179697598823e-05, "loss": 0.5233, "step": 16322 }, { "epoch": 1.213154960981048, "grad_norm": 1.8507051322795427, "learning_rate": 1.3506428283583366e-05, "loss": 0.6533, "step": 16323 }, { "epoch": 1.2132292827945002, "grad_norm": 2.484079470220246, "learning_rate": 1.3505676846993705e-05, "loss": 0.7391, "step": 16324 }, { "epoch": 1.2133036046079524, "grad_norm": 1.8959866572905855, "learning_rate": 1.3504925387834677e-05, "loss": 0.6641, "step": 16325 }, { "epoch": 1.2133779264214046, "grad_norm": 2.13833925703214, "learning_rate": 1.3504173906111125e-05, "loss": 0.6751, "step": 16326 }, { "epoch": 1.2134522482348569, "grad_norm": 1.6941626233065266, "learning_rate": 1.3503422401827879e-05, "loss": 0.525, "step": 16327 }, { "epoch": 1.213526570048309, "grad_norm": 1.7852427265378528, "learning_rate": 1.3502670874989785e-05, "loss": 0.5331, "step": 16328 }, { "epoch": 1.2136008918617613, "grad_norm": 2.085374429167327, "learning_rate": 1.3501919325601678e-05, "loss": 0.6348, "step": 16329 }, { "epoch": 1.2136752136752136, "grad_norm": 2.129273950920242, "learning_rate": 1.3501167753668392e-05, "loss": 0.7823, "step": 16330 }, { "epoch": 1.2137495354886658, "grad_norm": 2.0958200225567456, "learning_rate": 1.3500416159194771e-05, "loss": 0.6914, "step": 16331 }, { "epoch": 1.2138238573021183, "grad_norm": 2.1631887161834484, "learning_rate": 1.3499664542185653e-05, "loss": 0.6792, "step": 16332 }, { "epoch": 1.2138981791155703, "grad_norm": 2.871497762373687, "learning_rate": 1.3498912902645878e-05, "loss": 0.6725, "step": 16333 }, { "epoch": 1.2139725009290228, "grad_norm": 1.9259139409280737, "learning_rate": 1.349816124058028e-05, "loss": 0.5422, "step": 16334 }, { "epoch": 1.214046822742475, "grad_norm": 2.3256389441284493, "learning_rate": 1.3497409555993706e-05, "loss": 0.5011, "step": 16335 }, { "epoch": 1.2141211445559272, "grad_norm": 2.701199809951276, "learning_rate": 1.3496657848890987e-05, "loss": 0.6734, "step": 16336 }, { "epoch": 1.2141954663693795, "grad_norm": 1.846035547250492, "learning_rate": 1.3495906119276968e-05, "loss": 0.6735, "step": 16337 }, { "epoch": 1.2142697881828317, "grad_norm": 1.8515076804194253, "learning_rate": 1.3495154367156485e-05, "loss": 0.5785, "step": 16338 }, { "epoch": 1.214344109996284, "grad_norm": 1.5847628451313012, "learning_rate": 1.349440259253438e-05, "loss": 0.4849, "step": 16339 }, { "epoch": 1.2144184318097362, "grad_norm": 2.2577560977451276, "learning_rate": 1.3493650795415494e-05, "loss": 0.6997, "step": 16340 }, { "epoch": 1.2144927536231884, "grad_norm": 2.12595721161123, "learning_rate": 1.3492898975804665e-05, "loss": 0.5752, "step": 16341 }, { "epoch": 1.2145670754366407, "grad_norm": 2.470206657524142, "learning_rate": 1.3492147133706732e-05, "loss": 0.6581, "step": 16342 }, { "epoch": 1.214641397250093, "grad_norm": 1.9394362853469973, "learning_rate": 1.3491395269126537e-05, "loss": 0.6692, "step": 16343 }, { "epoch": 1.2147157190635451, "grad_norm": 2.1392152929370423, "learning_rate": 1.3490643382068922e-05, "loss": 0.6682, "step": 16344 }, { "epoch": 1.2147900408769974, "grad_norm": 1.9028957484962148, "learning_rate": 1.3489891472538725e-05, "loss": 0.5854, "step": 16345 }, { "epoch": 1.2148643626904496, "grad_norm": 1.734674620157882, "learning_rate": 1.348913954054079e-05, "loss": 0.5401, "step": 16346 }, { "epoch": 1.2149386845039019, "grad_norm": 2.0211954885282832, "learning_rate": 1.3488387586079952e-05, "loss": 0.619, "step": 16347 }, { "epoch": 1.215013006317354, "grad_norm": 2.2191295352697322, "learning_rate": 1.3487635609161056e-05, "loss": 0.821, "step": 16348 }, { "epoch": 1.2150873281308063, "grad_norm": 1.918550620177681, "learning_rate": 1.3486883609788946e-05, "loss": 0.6921, "step": 16349 }, { "epoch": 1.2151616499442586, "grad_norm": 1.925519712616744, "learning_rate": 1.3486131587968458e-05, "loss": 0.6191, "step": 16350 }, { "epoch": 1.2152359717577108, "grad_norm": 2.2208780379708797, "learning_rate": 1.3485379543704435e-05, "loss": 0.5347, "step": 16351 }, { "epoch": 1.215310293571163, "grad_norm": 4.353067604961424, "learning_rate": 1.3484627477001724e-05, "loss": 0.5163, "step": 16352 }, { "epoch": 1.2153846153846155, "grad_norm": 1.9022036377229485, "learning_rate": 1.3483875387865158e-05, "loss": 0.6353, "step": 16353 }, { "epoch": 1.2154589371980675, "grad_norm": 2.2802444278783827, "learning_rate": 1.3483123276299584e-05, "loss": 0.728, "step": 16354 }, { "epoch": 1.21553325901152, "grad_norm": 2.2509078999059495, "learning_rate": 1.3482371142309842e-05, "loss": 0.6281, "step": 16355 }, { "epoch": 1.2156075808249722, "grad_norm": 1.9553556589584737, "learning_rate": 1.3481618985900778e-05, "loss": 0.5835, "step": 16356 }, { "epoch": 1.2156819026384245, "grad_norm": 1.5730227463519997, "learning_rate": 1.348086680707723e-05, "loss": 0.4811, "step": 16357 }, { "epoch": 1.2157562244518767, "grad_norm": 2.220216473880776, "learning_rate": 1.348011460584404e-05, "loss": 0.5741, "step": 16358 }, { "epoch": 1.215830546265329, "grad_norm": 2.8501615981133646, "learning_rate": 1.3479362382206055e-05, "loss": 0.5335, "step": 16359 }, { "epoch": 1.2159048680787812, "grad_norm": 8.98848656907897, "learning_rate": 1.3478610136168113e-05, "loss": 0.4371, "step": 16360 }, { "epoch": 1.2159791898922334, "grad_norm": 2.3957182989893404, "learning_rate": 1.3477857867735065e-05, "loss": 0.6912, "step": 16361 }, { "epoch": 1.2160535117056857, "grad_norm": 1.9542485284670483, "learning_rate": 1.3477105576911744e-05, "loss": 0.6255, "step": 16362 }, { "epoch": 1.216127833519138, "grad_norm": 2.346340695867661, "learning_rate": 1.3476353263703e-05, "loss": 0.6396, "step": 16363 }, { "epoch": 1.2162021553325901, "grad_norm": 1.777071676326189, "learning_rate": 1.3475600928113672e-05, "loss": 0.5406, "step": 16364 }, { "epoch": 1.2162764771460424, "grad_norm": 1.630723190695026, "learning_rate": 1.3474848570148607e-05, "loss": 0.4566, "step": 16365 }, { "epoch": 1.2163507989594946, "grad_norm": 2.0099780263294527, "learning_rate": 1.3474096189812646e-05, "loss": 0.5373, "step": 16366 }, { "epoch": 1.2164251207729468, "grad_norm": 2.2036693186872456, "learning_rate": 1.3473343787110636e-05, "loss": 0.6538, "step": 16367 }, { "epoch": 1.216499442586399, "grad_norm": 1.9322246198419197, "learning_rate": 1.3472591362047415e-05, "loss": 0.6217, "step": 16368 }, { "epoch": 1.2165737643998513, "grad_norm": 1.9598131770824458, "learning_rate": 1.3471838914627834e-05, "loss": 0.6571, "step": 16369 }, { "epoch": 1.2166480862133036, "grad_norm": 2.024522947320129, "learning_rate": 1.3471086444856734e-05, "loss": 0.6206, "step": 16370 }, { "epoch": 1.2167224080267558, "grad_norm": 2.118662346325073, "learning_rate": 1.3470333952738958e-05, "loss": 0.6338, "step": 16371 }, { "epoch": 1.216796729840208, "grad_norm": 2.097992034844474, "learning_rate": 1.3469581438279355e-05, "loss": 0.6859, "step": 16372 }, { "epoch": 1.2168710516536603, "grad_norm": 2.070125929693627, "learning_rate": 1.3468828901482765e-05, "loss": 0.5773, "step": 16373 }, { "epoch": 1.2169453734671125, "grad_norm": 2.0931902017834996, "learning_rate": 1.3468076342354036e-05, "loss": 0.635, "step": 16374 }, { "epoch": 1.2170196952805648, "grad_norm": 2.439765145477044, "learning_rate": 1.346732376089801e-05, "loss": 0.7564, "step": 16375 }, { "epoch": 1.2170940170940172, "grad_norm": 2.0480486724047817, "learning_rate": 1.3466571157119533e-05, "loss": 0.7511, "step": 16376 }, { "epoch": 1.2171683389074692, "grad_norm": 1.5375980508482177, "learning_rate": 1.3465818531023452e-05, "loss": 0.4808, "step": 16377 }, { "epoch": 1.2172426607209217, "grad_norm": 2.6985337457454492, "learning_rate": 1.346506588261461e-05, "loss": 0.8242, "step": 16378 }, { "epoch": 1.217316982534374, "grad_norm": 1.8135687412442485, "learning_rate": 1.3464313211897853e-05, "loss": 0.624, "step": 16379 }, { "epoch": 1.2173913043478262, "grad_norm": 1.8115771438109023, "learning_rate": 1.346356051887803e-05, "loss": 0.6212, "step": 16380 }, { "epoch": 1.2174656261612784, "grad_norm": 2.2153398963735884, "learning_rate": 1.346280780355998e-05, "loss": 0.7234, "step": 16381 }, { "epoch": 1.2175399479747306, "grad_norm": 1.7589029751553935, "learning_rate": 1.3462055065948557e-05, "loss": 0.4404, "step": 16382 }, { "epoch": 1.2176142697881829, "grad_norm": 1.6042315771201974, "learning_rate": 1.3461302306048603e-05, "loss": 0.5312, "step": 16383 }, { "epoch": 1.2176885916016351, "grad_norm": 1.8588383789908933, "learning_rate": 1.3460549523864962e-05, "loss": 0.6541, "step": 16384 }, { "epoch": 1.2177629134150874, "grad_norm": 1.5085622776087562, "learning_rate": 1.3459796719402482e-05, "loss": 0.5002, "step": 16385 }, { "epoch": 1.2178372352285396, "grad_norm": 1.8653225432301217, "learning_rate": 1.3459043892666012e-05, "loss": 0.6514, "step": 16386 }, { "epoch": 1.2179115570419918, "grad_norm": 2.1286328546179996, "learning_rate": 1.3458291043660398e-05, "loss": 0.6523, "step": 16387 }, { "epoch": 1.217985878855444, "grad_norm": 2.270360076487752, "learning_rate": 1.3457538172390487e-05, "loss": 0.706, "step": 16388 }, { "epoch": 1.2180602006688963, "grad_norm": 12.95775096322029, "learning_rate": 1.3456785278861122e-05, "loss": 0.7689, "step": 16389 }, { "epoch": 1.2181345224823485, "grad_norm": 2.202045168339395, "learning_rate": 1.3456032363077151e-05, "loss": 0.8101, "step": 16390 }, { "epoch": 1.2182088442958008, "grad_norm": 1.80733324062745, "learning_rate": 1.3455279425043426e-05, "loss": 0.4847, "step": 16391 }, { "epoch": 1.218283166109253, "grad_norm": 1.8064656293835293, "learning_rate": 1.345452646476479e-05, "loss": 0.627, "step": 16392 }, { "epoch": 1.2183574879227053, "grad_norm": 1.380339224389882, "learning_rate": 1.345377348224609e-05, "loss": 0.4546, "step": 16393 }, { "epoch": 1.2184318097361575, "grad_norm": 2.0094380232761786, "learning_rate": 1.345302047749218e-05, "loss": 0.8202, "step": 16394 }, { "epoch": 1.2185061315496097, "grad_norm": 2.0223517697129747, "learning_rate": 1.3452267450507902e-05, "loss": 0.6815, "step": 16395 }, { "epoch": 1.218580453363062, "grad_norm": 1.9035322624549031, "learning_rate": 1.3451514401298103e-05, "loss": 0.6024, "step": 16396 }, { "epoch": 1.2186547751765142, "grad_norm": 2.041101514369606, "learning_rate": 1.3450761329867633e-05, "loss": 0.6512, "step": 16397 }, { "epoch": 1.2187290969899665, "grad_norm": 1.8469642413479521, "learning_rate": 1.3450008236221343e-05, "loss": 0.6319, "step": 16398 }, { "epoch": 1.218803418803419, "grad_norm": 2.052459108051449, "learning_rate": 1.344925512036408e-05, "loss": 0.5648, "step": 16399 }, { "epoch": 1.218877740616871, "grad_norm": 1.715590804313043, "learning_rate": 1.344850198230069e-05, "loss": 0.4483, "step": 16400 }, { "epoch": 1.2189520624303234, "grad_norm": 2.7283596218795454, "learning_rate": 1.3447748822036022e-05, "loss": 0.6385, "step": 16401 }, { "epoch": 1.2190263842437756, "grad_norm": 1.4826451327512888, "learning_rate": 1.3446995639574927e-05, "loss": 0.3871, "step": 16402 }, { "epoch": 1.2191007060572279, "grad_norm": 2.0845130100845046, "learning_rate": 1.3446242434922252e-05, "loss": 0.6724, "step": 16403 }, { "epoch": 1.21917502787068, "grad_norm": 1.7585244049174238, "learning_rate": 1.3445489208082848e-05, "loss": 0.4832, "step": 16404 }, { "epoch": 1.2192493496841323, "grad_norm": 2.311310978151267, "learning_rate": 1.3444735959061563e-05, "loss": 0.701, "step": 16405 }, { "epoch": 1.2193236714975846, "grad_norm": 1.9021284819508015, "learning_rate": 1.3443982687863248e-05, "loss": 0.5254, "step": 16406 }, { "epoch": 1.2193979933110368, "grad_norm": 2.721913515866532, "learning_rate": 1.3443229394492749e-05, "loss": 0.682, "step": 16407 }, { "epoch": 1.219472315124489, "grad_norm": 2.2675588593244456, "learning_rate": 1.344247607895492e-05, "loss": 0.6993, "step": 16408 }, { "epoch": 1.2195466369379413, "grad_norm": 2.4873697079227237, "learning_rate": 1.3441722741254606e-05, "loss": 0.5314, "step": 16409 }, { "epoch": 1.2196209587513935, "grad_norm": 2.5400231012856342, "learning_rate": 1.3440969381396662e-05, "loss": 0.7617, "step": 16410 }, { "epoch": 1.2196952805648458, "grad_norm": 2.1516587434292784, "learning_rate": 1.3440215999385933e-05, "loss": 0.6314, "step": 16411 }, { "epoch": 1.219769602378298, "grad_norm": 2.1754548156518156, "learning_rate": 1.3439462595227272e-05, "loss": 0.6445, "step": 16412 }, { "epoch": 1.2198439241917503, "grad_norm": 2.3503270727917003, "learning_rate": 1.3438709168925533e-05, "loss": 0.636, "step": 16413 }, { "epoch": 1.2199182460052025, "grad_norm": 1.946168660516785, "learning_rate": 1.343795572048556e-05, "loss": 0.6292, "step": 16414 }, { "epoch": 1.2199925678186547, "grad_norm": 2.212686246881409, "learning_rate": 1.3437202249912207e-05, "loss": 0.8089, "step": 16415 }, { "epoch": 1.220066889632107, "grad_norm": 1.8077574331689357, "learning_rate": 1.3436448757210325e-05, "loss": 0.4577, "step": 16416 }, { "epoch": 1.2201412114455592, "grad_norm": 2.1910000239386958, "learning_rate": 1.3435695242384763e-05, "loss": 0.6996, "step": 16417 }, { "epoch": 1.2202155332590114, "grad_norm": 2.3683854659374033, "learning_rate": 1.3434941705440375e-05, "loss": 0.6173, "step": 16418 }, { "epoch": 1.2202898550724637, "grad_norm": 1.7720808552914908, "learning_rate": 1.3434188146382008e-05, "loss": 0.5242, "step": 16419 }, { "epoch": 1.2203641768859161, "grad_norm": 1.376515455844212, "learning_rate": 1.3433434565214519e-05, "loss": 0.4498, "step": 16420 }, { "epoch": 1.2204384986993682, "grad_norm": 1.9565644284729902, "learning_rate": 1.3432680961942754e-05, "loss": 0.539, "step": 16421 }, { "epoch": 1.2205128205128206, "grad_norm": 2.102650224938577, "learning_rate": 1.3431927336571567e-05, "loss": 0.573, "step": 16422 }, { "epoch": 1.2205871423262729, "grad_norm": 1.8923550495518549, "learning_rate": 1.343117368910581e-05, "loss": 0.6051, "step": 16423 }, { "epoch": 1.220661464139725, "grad_norm": 1.993063057933318, "learning_rate": 1.3430420019550334e-05, "loss": 0.624, "step": 16424 }, { "epoch": 1.2207357859531773, "grad_norm": 2.818267380187656, "learning_rate": 1.3429666327909994e-05, "loss": 0.7125, "step": 16425 }, { "epoch": 1.2208101077666296, "grad_norm": 1.9061951994756792, "learning_rate": 1.342891261418964e-05, "loss": 0.4977, "step": 16426 }, { "epoch": 1.2208844295800818, "grad_norm": 1.6283297073319163, "learning_rate": 1.3428158878394123e-05, "loss": 0.5486, "step": 16427 }, { "epoch": 1.220958751393534, "grad_norm": 1.8525515153533558, "learning_rate": 1.3427405120528299e-05, "loss": 0.6127, "step": 16428 }, { "epoch": 1.2210330732069863, "grad_norm": 2.2491565687030475, "learning_rate": 1.3426651340597017e-05, "loss": 0.7056, "step": 16429 }, { "epoch": 1.2211073950204385, "grad_norm": 2.0346429347267114, "learning_rate": 1.342589753860513e-05, "loss": 0.7132, "step": 16430 }, { "epoch": 1.2211817168338908, "grad_norm": 3.1267675113917974, "learning_rate": 1.3425143714557498e-05, "loss": 0.5945, "step": 16431 }, { "epoch": 1.221256038647343, "grad_norm": 1.877710373549978, "learning_rate": 1.3424389868458964e-05, "loss": 0.6292, "step": 16432 }, { "epoch": 1.2213303604607952, "grad_norm": 1.478775077404943, "learning_rate": 1.3423636000314386e-05, "loss": 0.5008, "step": 16433 }, { "epoch": 1.2214046822742475, "grad_norm": 1.5945346665101354, "learning_rate": 1.3422882110128619e-05, "loss": 0.545, "step": 16434 }, { "epoch": 1.2214790040876997, "grad_norm": 1.6884858223365897, "learning_rate": 1.3422128197906512e-05, "loss": 0.3799, "step": 16435 }, { "epoch": 1.221553325901152, "grad_norm": 2.2540537187623317, "learning_rate": 1.3421374263652924e-05, "loss": 0.7012, "step": 16436 }, { "epoch": 1.2216276477146042, "grad_norm": 2.078181261807576, "learning_rate": 1.3420620307372703e-05, "loss": 0.5578, "step": 16437 }, { "epoch": 1.2217019695280564, "grad_norm": 2.5230207021169964, "learning_rate": 1.341986632907071e-05, "loss": 0.8301, "step": 16438 }, { "epoch": 1.2217762913415087, "grad_norm": 1.8909624334943063, "learning_rate": 1.341911232875179e-05, "loss": 0.6748, "step": 16439 }, { "epoch": 1.221850613154961, "grad_norm": 2.212325165718112, "learning_rate": 1.3418358306420805e-05, "loss": 0.6575, "step": 16440 }, { "epoch": 1.2219249349684131, "grad_norm": 2.375655505979069, "learning_rate": 1.3417604262082607e-05, "loss": 0.6922, "step": 16441 }, { "epoch": 1.2219992567818654, "grad_norm": 1.9156396201827413, "learning_rate": 1.341685019574205e-05, "loss": 0.6172, "step": 16442 }, { "epoch": 1.2220735785953178, "grad_norm": 1.9758662815109052, "learning_rate": 1.3416096107403989e-05, "loss": 0.6812, "step": 16443 }, { "epoch": 1.2221479004087699, "grad_norm": 1.6700512068626725, "learning_rate": 1.3415341997073278e-05, "loss": 0.5464, "step": 16444 }, { "epoch": 1.2222222222222223, "grad_norm": 2.558194198056532, "learning_rate": 1.3414587864754772e-05, "loss": 0.6722, "step": 16445 }, { "epoch": 1.2222965440356746, "grad_norm": 2.0905252307016498, "learning_rate": 1.3413833710453323e-05, "loss": 0.6507, "step": 16446 }, { "epoch": 1.2223708658491268, "grad_norm": 1.6181814949560123, "learning_rate": 1.3413079534173794e-05, "loss": 0.5375, "step": 16447 }, { "epoch": 1.222445187662579, "grad_norm": 2.4244057419535747, "learning_rate": 1.3412325335921038e-05, "loss": 0.6912, "step": 16448 }, { "epoch": 1.2225195094760313, "grad_norm": 2.4197607288144307, "learning_rate": 1.3411571115699906e-05, "loss": 0.5929, "step": 16449 }, { "epoch": 1.2225938312894835, "grad_norm": 2.1540099371721615, "learning_rate": 1.3410816873515255e-05, "loss": 0.6222, "step": 16450 }, { "epoch": 1.2226681531029358, "grad_norm": 1.7900260899911071, "learning_rate": 1.3410062609371941e-05, "loss": 0.6441, "step": 16451 }, { "epoch": 1.222742474916388, "grad_norm": 2.151881916437099, "learning_rate": 1.3409308323274823e-05, "loss": 0.6006, "step": 16452 }, { "epoch": 1.2228167967298402, "grad_norm": 1.7230664151358532, "learning_rate": 1.3408554015228756e-05, "loss": 0.6053, "step": 16453 }, { "epoch": 1.2228911185432925, "grad_norm": 1.8541551417038908, "learning_rate": 1.3407799685238593e-05, "loss": 0.5021, "step": 16454 }, { "epoch": 1.2229654403567447, "grad_norm": 2.1807902879540118, "learning_rate": 1.3407045333309195e-05, "loss": 0.5857, "step": 16455 }, { "epoch": 1.223039762170197, "grad_norm": 2.583055474990076, "learning_rate": 1.3406290959445414e-05, "loss": 0.6113, "step": 16456 }, { "epoch": 1.2231140839836492, "grad_norm": 2.0570596813757507, "learning_rate": 1.3405536563652106e-05, "loss": 0.6187, "step": 16457 }, { "epoch": 1.2231884057971014, "grad_norm": 3.348573897601332, "learning_rate": 1.3404782145934134e-05, "loss": 0.4694, "step": 16458 }, { "epoch": 1.2232627276105537, "grad_norm": 2.093569880010947, "learning_rate": 1.3404027706296354e-05, "loss": 0.6134, "step": 16459 }, { "epoch": 1.223337049424006, "grad_norm": 1.844433428851614, "learning_rate": 1.3403273244743616e-05, "loss": 0.5493, "step": 16460 }, { "epoch": 1.2234113712374581, "grad_norm": 1.8688183458375716, "learning_rate": 1.340251876128078e-05, "loss": 0.4863, "step": 16461 }, { "epoch": 1.2234856930509104, "grad_norm": 1.812745099049205, "learning_rate": 1.3401764255912707e-05, "loss": 0.5253, "step": 16462 }, { "epoch": 1.2235600148643626, "grad_norm": 2.289843498850814, "learning_rate": 1.340100972864425e-05, "loss": 0.6934, "step": 16463 }, { "epoch": 1.2236343366778148, "grad_norm": 2.2406123275012026, "learning_rate": 1.3400255179480272e-05, "loss": 0.4463, "step": 16464 }, { "epoch": 1.223708658491267, "grad_norm": 2.12347059767062, "learning_rate": 1.3399500608425629e-05, "loss": 0.6325, "step": 16465 }, { "epoch": 1.2237829803047195, "grad_norm": 2.736690831334296, "learning_rate": 1.3398746015485174e-05, "loss": 0.602, "step": 16466 }, { "epoch": 1.2238573021181716, "grad_norm": 1.9909307914392038, "learning_rate": 1.3397991400663768e-05, "loss": 0.4904, "step": 16467 }, { "epoch": 1.223931623931624, "grad_norm": 1.9865656945606187, "learning_rate": 1.339723676396627e-05, "loss": 0.664, "step": 16468 }, { "epoch": 1.2240059457450763, "grad_norm": 2.110974473048803, "learning_rate": 1.3396482105397542e-05, "loss": 0.6863, "step": 16469 }, { "epoch": 1.2240802675585285, "grad_norm": 2.119588455829299, "learning_rate": 1.3395727424962435e-05, "loss": 0.6568, "step": 16470 }, { "epoch": 1.2241545893719807, "grad_norm": 1.968274024386975, "learning_rate": 1.3394972722665812e-05, "loss": 0.7459, "step": 16471 }, { "epoch": 1.224228911185433, "grad_norm": 1.989910901062942, "learning_rate": 1.339421799851253e-05, "loss": 0.7444, "step": 16472 }, { "epoch": 1.2243032329988852, "grad_norm": 1.8917497194255875, "learning_rate": 1.3393463252507445e-05, "loss": 0.6373, "step": 16473 }, { "epoch": 1.2243775548123375, "grad_norm": 2.017453718914244, "learning_rate": 1.3392708484655423e-05, "loss": 0.6477, "step": 16474 }, { "epoch": 1.2244518766257897, "grad_norm": 2.1059338581149736, "learning_rate": 1.3391953694961324e-05, "loss": 0.5702, "step": 16475 }, { "epoch": 1.224526198439242, "grad_norm": 1.9341199870699415, "learning_rate": 1.3391198883429998e-05, "loss": 0.6914, "step": 16476 }, { "epoch": 1.2246005202526942, "grad_norm": 4.623699018340769, "learning_rate": 1.3390444050066312e-05, "loss": 0.5652, "step": 16477 }, { "epoch": 1.2246748420661464, "grad_norm": 2.3349679041150093, "learning_rate": 1.3389689194875122e-05, "loss": 0.7579, "step": 16478 }, { "epoch": 1.2247491638795986, "grad_norm": 1.8929075716349784, "learning_rate": 1.3388934317861288e-05, "loss": 0.5075, "step": 16479 }, { "epoch": 1.2248234856930509, "grad_norm": 2.042280513217223, "learning_rate": 1.3388179419029672e-05, "loss": 0.7972, "step": 16480 }, { "epoch": 1.2248978075065031, "grad_norm": 2.177673938285509, "learning_rate": 1.3387424498385134e-05, "loss": 0.8483, "step": 16481 }, { "epoch": 1.2249721293199554, "grad_norm": 1.919405243431105, "learning_rate": 1.338666955593253e-05, "loss": 0.703, "step": 16482 }, { "epoch": 1.2250464511334076, "grad_norm": 2.2748364678363022, "learning_rate": 1.3385914591676725e-05, "loss": 0.4963, "step": 16483 }, { "epoch": 1.2251207729468598, "grad_norm": 1.5814776755489068, "learning_rate": 1.3385159605622575e-05, "loss": 0.5477, "step": 16484 }, { "epoch": 1.225195094760312, "grad_norm": 1.6749894968150718, "learning_rate": 1.3384404597774945e-05, "loss": 0.4933, "step": 16485 }, { "epoch": 1.2252694165737643, "grad_norm": 1.9834726755596612, "learning_rate": 1.3383649568138695e-05, "loss": 0.727, "step": 16486 }, { "epoch": 1.2253437383872166, "grad_norm": 1.842745540563828, "learning_rate": 1.3382894516718686e-05, "loss": 0.495, "step": 16487 }, { "epoch": 1.2254180602006688, "grad_norm": 1.7543304480025692, "learning_rate": 1.3382139443519774e-05, "loss": 0.5648, "step": 16488 }, { "epoch": 1.2254923820141213, "grad_norm": 2.2094468462768733, "learning_rate": 1.3381384348546824e-05, "loss": 0.5391, "step": 16489 }, { "epoch": 1.2255667038275735, "grad_norm": 1.6809952382586377, "learning_rate": 1.3380629231804696e-05, "loss": 0.5458, "step": 16490 }, { "epoch": 1.2256410256410257, "grad_norm": 1.9814530438321556, "learning_rate": 1.3379874093298256e-05, "loss": 0.6091, "step": 16491 }, { "epoch": 1.225715347454478, "grad_norm": 2.3338590349210415, "learning_rate": 1.337911893303236e-05, "loss": 0.6438, "step": 16492 }, { "epoch": 1.2257896692679302, "grad_norm": 2.230431101164327, "learning_rate": 1.3378363751011874e-05, "loss": 0.6606, "step": 16493 }, { "epoch": 1.2258639910813824, "grad_norm": 4.433961579635494, "learning_rate": 1.3377608547241652e-05, "loss": 0.5244, "step": 16494 }, { "epoch": 1.2259383128948347, "grad_norm": 2.1714473465452304, "learning_rate": 1.3376853321726565e-05, "loss": 0.6081, "step": 16495 }, { "epoch": 1.226012634708287, "grad_norm": 2.3237654233705545, "learning_rate": 1.337609807447147e-05, "loss": 0.5417, "step": 16496 }, { "epoch": 1.2260869565217392, "grad_norm": 1.808005324696986, "learning_rate": 1.3375342805481233e-05, "loss": 0.6001, "step": 16497 }, { "epoch": 1.2261612783351914, "grad_norm": 1.8623130090888667, "learning_rate": 1.3374587514760713e-05, "loss": 0.5683, "step": 16498 }, { "epoch": 1.2262356001486436, "grad_norm": 2.124151909895598, "learning_rate": 1.3373832202314776e-05, "loss": 0.6796, "step": 16499 }, { "epoch": 1.2263099219620959, "grad_norm": 2.004777761731558, "learning_rate": 1.3373076868148278e-05, "loss": 0.6157, "step": 16500 }, { "epoch": 1.226384243775548, "grad_norm": 1.8602406105690823, "learning_rate": 1.3372321512266089e-05, "loss": 0.602, "step": 16501 }, { "epoch": 1.2264585655890003, "grad_norm": 2.051721712254488, "learning_rate": 1.3371566134673068e-05, "loss": 0.5209, "step": 16502 }, { "epoch": 1.2265328874024526, "grad_norm": 1.5682390643465605, "learning_rate": 1.337081073537408e-05, "loss": 0.4091, "step": 16503 }, { "epoch": 1.2266072092159048, "grad_norm": 1.8058263572435806, "learning_rate": 1.337005531437399e-05, "loss": 0.4994, "step": 16504 }, { "epoch": 1.226681531029357, "grad_norm": 1.6130794548706824, "learning_rate": 1.3369299871677652e-05, "loss": 0.5891, "step": 16505 }, { "epoch": 1.2267558528428093, "grad_norm": 2.2185541042957766, "learning_rate": 1.3368544407289941e-05, "loss": 0.7038, "step": 16506 }, { "epoch": 1.2268301746562615, "grad_norm": 1.8703889363389288, "learning_rate": 1.3367788921215711e-05, "loss": 0.6494, "step": 16507 }, { "epoch": 1.2269044964697138, "grad_norm": 1.6801679462618957, "learning_rate": 1.3367033413459837e-05, "loss": 0.4319, "step": 16508 }, { "epoch": 1.226978818283166, "grad_norm": 2.0204158584298852, "learning_rate": 1.3366277884027174e-05, "loss": 0.6446, "step": 16509 }, { "epoch": 1.2270531400966185, "grad_norm": 3.401709693883547, "learning_rate": 1.3365522332922591e-05, "loss": 0.5398, "step": 16510 }, { "epoch": 1.2271274619100705, "grad_norm": 1.9723442099270199, "learning_rate": 1.3364766760150946e-05, "loss": 0.645, "step": 16511 }, { "epoch": 1.227201783723523, "grad_norm": 1.7635961598868377, "learning_rate": 1.3364011165717108e-05, "loss": 0.6608, "step": 16512 }, { "epoch": 1.2272761055369752, "grad_norm": 1.2927279764938298, "learning_rate": 1.3363255549625942e-05, "loss": 0.4164, "step": 16513 }, { "epoch": 1.2273504273504274, "grad_norm": 2.111607453519243, "learning_rate": 1.3362499911882312e-05, "loss": 0.6036, "step": 16514 }, { "epoch": 1.2274247491638797, "grad_norm": 2.1666853076952846, "learning_rate": 1.3361744252491079e-05, "loss": 0.5893, "step": 16515 }, { "epoch": 1.227499070977332, "grad_norm": 1.9795184483039476, "learning_rate": 1.3360988571457114e-05, "loss": 0.8177, "step": 16516 }, { "epoch": 1.2275733927907841, "grad_norm": 1.8233668224855806, "learning_rate": 1.3360232868785277e-05, "loss": 0.4988, "step": 16517 }, { "epoch": 1.2276477146042364, "grad_norm": 2.817713716116998, "learning_rate": 1.3359477144480434e-05, "loss": 0.619, "step": 16518 }, { "epoch": 1.2277220364176886, "grad_norm": 1.9932752641508609, "learning_rate": 1.3358721398547456e-05, "loss": 0.7215, "step": 16519 }, { "epoch": 1.2277963582311409, "grad_norm": 1.7692012241178907, "learning_rate": 1.3357965630991201e-05, "loss": 0.6126, "step": 16520 }, { "epoch": 1.227870680044593, "grad_norm": 2.1609975205884817, "learning_rate": 1.3357209841816538e-05, "loss": 0.7146, "step": 16521 }, { "epoch": 1.2279450018580453, "grad_norm": 2.152669049027302, "learning_rate": 1.335645403102833e-05, "loss": 0.8065, "step": 16522 }, { "epoch": 1.2280193236714976, "grad_norm": 1.4615017207048588, "learning_rate": 1.3355698198631446e-05, "loss": 0.3882, "step": 16523 }, { "epoch": 1.2280936454849498, "grad_norm": 2.030025152319578, "learning_rate": 1.3354942344630755e-05, "loss": 0.5559, "step": 16524 }, { "epoch": 1.228167967298402, "grad_norm": 2.11128071772695, "learning_rate": 1.3354186469031115e-05, "loss": 0.6338, "step": 16525 }, { "epoch": 1.2282422891118543, "grad_norm": 1.8215632916540767, "learning_rate": 1.3353430571837398e-05, "loss": 0.6591, "step": 16526 }, { "epoch": 1.2283166109253065, "grad_norm": 2.356925065980093, "learning_rate": 1.335267465305447e-05, "loss": 0.7746, "step": 16527 }, { "epoch": 1.2283909327387588, "grad_norm": 2.0030525067403406, "learning_rate": 1.3351918712687195e-05, "loss": 0.5663, "step": 16528 }, { "epoch": 1.228465254552211, "grad_norm": 1.6899156156598059, "learning_rate": 1.335116275074044e-05, "loss": 0.5377, "step": 16529 }, { "epoch": 1.2285395763656632, "grad_norm": 1.8031924421017786, "learning_rate": 1.3350406767219077e-05, "loss": 0.5248, "step": 16530 }, { "epoch": 1.2286138981791155, "grad_norm": 2.3732758954474016, "learning_rate": 1.3349650762127966e-05, "loss": 0.7269, "step": 16531 }, { "epoch": 1.2286882199925677, "grad_norm": 1.8120317347952861, "learning_rate": 1.3348894735471976e-05, "loss": 0.6111, "step": 16532 }, { "epoch": 1.2287625418060202, "grad_norm": 2.3289222878211473, "learning_rate": 1.3348138687255978e-05, "loss": 0.6878, "step": 16533 }, { "epoch": 1.2288368636194722, "grad_norm": 2.290663705568959, "learning_rate": 1.3347382617484836e-05, "loss": 0.666, "step": 16534 }, { "epoch": 1.2289111854329247, "grad_norm": 2.9180154435386916, "learning_rate": 1.3346626526163422e-05, "loss": 0.5517, "step": 16535 }, { "epoch": 1.228985507246377, "grad_norm": 2.212750927039968, "learning_rate": 1.3345870413296596e-05, "loss": 0.6556, "step": 16536 }, { "epoch": 1.2290598290598291, "grad_norm": 2.249431480280653, "learning_rate": 1.3345114278889229e-05, "loss": 0.6208, "step": 16537 }, { "epoch": 1.2291341508732814, "grad_norm": 1.9713564289628398, "learning_rate": 1.3344358122946193e-05, "loss": 0.5802, "step": 16538 }, { "epoch": 1.2292084726867336, "grad_norm": 2.0211556781801576, "learning_rate": 1.3343601945472352e-05, "loss": 0.725, "step": 16539 }, { "epoch": 1.2292827945001858, "grad_norm": 2.182615623159169, "learning_rate": 1.3342845746472572e-05, "loss": 0.6103, "step": 16540 }, { "epoch": 1.229357116313638, "grad_norm": 2.507230670646863, "learning_rate": 1.334208952595173e-05, "loss": 0.7854, "step": 16541 }, { "epoch": 1.2294314381270903, "grad_norm": 2.08957129566826, "learning_rate": 1.3341333283914684e-05, "loss": 0.6964, "step": 16542 }, { "epoch": 1.2295057599405426, "grad_norm": 2.203285746705422, "learning_rate": 1.334057702036631e-05, "loss": 0.6789, "step": 16543 }, { "epoch": 1.2295800817539948, "grad_norm": 1.9328270459250612, "learning_rate": 1.3339820735311475e-05, "loss": 0.745, "step": 16544 }, { "epoch": 1.229654403567447, "grad_norm": 2.214941060982986, "learning_rate": 1.3339064428755046e-05, "loss": 0.5204, "step": 16545 }, { "epoch": 1.2297287253808993, "grad_norm": 1.763905943209667, "learning_rate": 1.3338308100701895e-05, "loss": 0.6215, "step": 16546 }, { "epoch": 1.2298030471943515, "grad_norm": 1.8314570217488413, "learning_rate": 1.3337551751156888e-05, "loss": 0.5522, "step": 16547 }, { "epoch": 1.2298773690078038, "grad_norm": 2.134629731970139, "learning_rate": 1.3336795380124896e-05, "loss": 0.6583, "step": 16548 }, { "epoch": 1.229951690821256, "grad_norm": 1.7267828992890633, "learning_rate": 1.333603898761079e-05, "loss": 0.5786, "step": 16549 }, { "epoch": 1.2300260126347082, "grad_norm": 2.3141600412840138, "learning_rate": 1.3335282573619436e-05, "loss": 0.7383, "step": 16550 }, { "epoch": 1.2301003344481605, "grad_norm": 1.754393287596297, "learning_rate": 1.3334526138155707e-05, "loss": 0.5347, "step": 16551 }, { "epoch": 1.2301746562616127, "grad_norm": 2.1094405789891475, "learning_rate": 1.3333769681224475e-05, "loss": 0.6784, "step": 16552 }, { "epoch": 1.230248978075065, "grad_norm": 1.9370455727790636, "learning_rate": 1.3333013202830603e-05, "loss": 0.5726, "step": 16553 }, { "epoch": 1.2303232998885172, "grad_norm": 1.9345943543199027, "learning_rate": 1.3332256702978964e-05, "loss": 0.702, "step": 16554 }, { "epoch": 1.2303976217019694, "grad_norm": 2.070313182516159, "learning_rate": 1.3331500181674433e-05, "loss": 0.3947, "step": 16555 }, { "epoch": 1.2304719435154219, "grad_norm": 1.625270136578325, "learning_rate": 1.3330743638921875e-05, "loss": 0.5973, "step": 16556 }, { "epoch": 1.230546265328874, "grad_norm": 2.2226302492608454, "learning_rate": 1.3329987074726164e-05, "loss": 0.661, "step": 16557 }, { "epoch": 1.2306205871423264, "grad_norm": 3.2867533326605414, "learning_rate": 1.3329230489092165e-05, "loss": 0.6903, "step": 16558 }, { "epoch": 1.2306949089557786, "grad_norm": 1.628115026469743, "learning_rate": 1.3328473882024753e-05, "loss": 0.4796, "step": 16559 }, { "epoch": 1.2307692307692308, "grad_norm": 1.8352093570742278, "learning_rate": 1.33277172535288e-05, "loss": 0.4894, "step": 16560 }, { "epoch": 1.230843552582683, "grad_norm": 1.7582520170619633, "learning_rate": 1.3326960603609178e-05, "loss": 0.5332, "step": 16561 }, { "epoch": 1.2309178743961353, "grad_norm": 2.053207854229028, "learning_rate": 1.3326203932270753e-05, "loss": 0.6603, "step": 16562 }, { "epoch": 1.2309921962095876, "grad_norm": 2.173845735314949, "learning_rate": 1.3325447239518405e-05, "loss": 0.673, "step": 16563 }, { "epoch": 1.2310665180230398, "grad_norm": 1.77619914320722, "learning_rate": 1.3324690525356996e-05, "loss": 0.5165, "step": 16564 }, { "epoch": 1.231140839836492, "grad_norm": 2.018302450964003, "learning_rate": 1.33239337897914e-05, "loss": 0.4652, "step": 16565 }, { "epoch": 1.2312151616499443, "grad_norm": 2.074651729339688, "learning_rate": 1.3323177032826498e-05, "loss": 0.603, "step": 16566 }, { "epoch": 1.2312894834633965, "grad_norm": 1.9156907606019842, "learning_rate": 1.3322420254467149e-05, "loss": 0.5325, "step": 16567 }, { "epoch": 1.2313638052768487, "grad_norm": 1.909453058604608, "learning_rate": 1.3321663454718232e-05, "loss": 0.5645, "step": 16568 }, { "epoch": 1.231438127090301, "grad_norm": 2.5248008500797536, "learning_rate": 1.3320906633584617e-05, "loss": 0.7666, "step": 16569 }, { "epoch": 1.2315124489037532, "grad_norm": 1.8058969785421701, "learning_rate": 1.3320149791071177e-05, "loss": 0.5316, "step": 16570 }, { "epoch": 1.2315867707172055, "grad_norm": 1.8428598988535168, "learning_rate": 1.3319392927182786e-05, "loss": 0.6723, "step": 16571 }, { "epoch": 1.2316610925306577, "grad_norm": 2.090964778040858, "learning_rate": 1.3318636041924318e-05, "loss": 0.626, "step": 16572 }, { "epoch": 1.23173541434411, "grad_norm": 1.9126032360912884, "learning_rate": 1.3317879135300643e-05, "loss": 0.5895, "step": 16573 }, { "epoch": 1.2318097361575622, "grad_norm": 2.0372988349522143, "learning_rate": 1.331712220731663e-05, "loss": 0.7824, "step": 16574 }, { "epoch": 1.2318840579710144, "grad_norm": 1.957444825785782, "learning_rate": 1.3316365257977158e-05, "loss": 0.6206, "step": 16575 }, { "epoch": 1.2319583797844667, "grad_norm": 1.9105242089137018, "learning_rate": 1.3315608287287098e-05, "loss": 0.6102, "step": 16576 }, { "epoch": 1.232032701597919, "grad_norm": 4.7343670293219, "learning_rate": 1.3314851295251326e-05, "loss": 0.6187, "step": 16577 }, { "epoch": 1.2321070234113711, "grad_norm": 1.9092004451188573, "learning_rate": 1.3314094281874711e-05, "loss": 0.5951, "step": 16578 }, { "epoch": 1.2321813452248236, "grad_norm": 2.3587483776919305, "learning_rate": 1.3313337247162135e-05, "loss": 0.6883, "step": 16579 }, { "epoch": 1.2322556670382758, "grad_norm": 3.636662462904055, "learning_rate": 1.331258019111846e-05, "loss": 0.5929, "step": 16580 }, { "epoch": 1.232329988851728, "grad_norm": 1.8492158309155666, "learning_rate": 1.3311823113748566e-05, "loss": 0.6665, "step": 16581 }, { "epoch": 1.2324043106651803, "grad_norm": 2.3343179914880747, "learning_rate": 1.3311066015057326e-05, "loss": 0.6267, "step": 16582 }, { "epoch": 1.2324786324786325, "grad_norm": 1.8649301671002954, "learning_rate": 1.331030889504962e-05, "loss": 0.7313, "step": 16583 }, { "epoch": 1.2325529542920848, "grad_norm": 1.9168962393383258, "learning_rate": 1.3309551753730313e-05, "loss": 0.6607, "step": 16584 }, { "epoch": 1.232627276105537, "grad_norm": 2.252209609710416, "learning_rate": 1.3308794591104284e-05, "loss": 0.5801, "step": 16585 }, { "epoch": 1.2327015979189893, "grad_norm": 1.9266434243422377, "learning_rate": 1.3308037407176407e-05, "loss": 0.6914, "step": 16586 }, { "epoch": 1.2327759197324415, "grad_norm": 1.938138764589418, "learning_rate": 1.3307280201951556e-05, "loss": 0.6582, "step": 16587 }, { "epoch": 1.2328502415458937, "grad_norm": 2.0303790033187052, "learning_rate": 1.3306522975434606e-05, "loss": 0.5606, "step": 16588 }, { "epoch": 1.232924563359346, "grad_norm": 2.1675639562136593, "learning_rate": 1.3305765727630436e-05, "loss": 0.6598, "step": 16589 }, { "epoch": 1.2329988851727982, "grad_norm": 2.4946301017100274, "learning_rate": 1.3305008458543916e-05, "loss": 0.671, "step": 16590 }, { "epoch": 1.2330732069862504, "grad_norm": 2.264431227778841, "learning_rate": 1.3304251168179923e-05, "loss": 0.7506, "step": 16591 }, { "epoch": 1.2331475287997027, "grad_norm": 1.9378514927221213, "learning_rate": 1.330349385654333e-05, "loss": 0.5121, "step": 16592 }, { "epoch": 1.233221850613155, "grad_norm": 2.04066392418637, "learning_rate": 1.3302736523639018e-05, "loss": 0.7407, "step": 16593 }, { "epoch": 1.2332961724266072, "grad_norm": 3.698292085348183, "learning_rate": 1.3301979169471858e-05, "loss": 0.6392, "step": 16594 }, { "epoch": 1.2333704942400594, "grad_norm": 1.8780586035575635, "learning_rate": 1.3301221794046733e-05, "loss": 0.5392, "step": 16595 }, { "epoch": 1.2334448160535116, "grad_norm": 2.2955277389233686, "learning_rate": 1.3300464397368506e-05, "loss": 0.6491, "step": 16596 }, { "epoch": 1.2335191378669639, "grad_norm": 1.9045257553080344, "learning_rate": 1.3299706979442061e-05, "loss": 0.6118, "step": 16597 }, { "epoch": 1.2335934596804161, "grad_norm": 2.1849106013289648, "learning_rate": 1.3298949540272277e-05, "loss": 0.6911, "step": 16598 }, { "epoch": 1.2336677814938684, "grad_norm": 2.2012081412463975, "learning_rate": 1.3298192079864025e-05, "loss": 0.8708, "step": 16599 }, { "epoch": 1.2337421033073208, "grad_norm": 3.4894503156760384, "learning_rate": 1.3297434598222185e-05, "loss": 0.5811, "step": 16600 }, { "epoch": 1.2338164251207728, "grad_norm": 1.7362281197461693, "learning_rate": 1.3296677095351634e-05, "loss": 0.5535, "step": 16601 }, { "epoch": 1.2338907469342253, "grad_norm": 1.565520715328005, "learning_rate": 1.3295919571257244e-05, "loss": 0.5201, "step": 16602 }, { "epoch": 1.2339650687476775, "grad_norm": 1.9115369494972283, "learning_rate": 1.3295162025943893e-05, "loss": 0.6963, "step": 16603 }, { "epoch": 1.2340393905611298, "grad_norm": 2.830106036951887, "learning_rate": 1.3294404459416463e-05, "loss": 0.4618, "step": 16604 }, { "epoch": 1.234113712374582, "grad_norm": 1.8983327362840883, "learning_rate": 1.3293646871679829e-05, "loss": 0.545, "step": 16605 }, { "epoch": 1.2341880341880342, "grad_norm": 5.76582265274454, "learning_rate": 1.3292889262738866e-05, "loss": 0.6362, "step": 16606 }, { "epoch": 1.2342623560014865, "grad_norm": 2.0486390906113456, "learning_rate": 1.3292131632598451e-05, "loss": 0.561, "step": 16607 }, { "epoch": 1.2343366778149387, "grad_norm": 2.2658858449619177, "learning_rate": 1.3291373981263465e-05, "loss": 0.7185, "step": 16608 }, { "epoch": 1.234410999628391, "grad_norm": 1.7968110288760648, "learning_rate": 1.3290616308738781e-05, "loss": 0.4815, "step": 16609 }, { "epoch": 1.2344853214418432, "grad_norm": 2.2904800054242793, "learning_rate": 1.3289858615029283e-05, "loss": 0.672, "step": 16610 }, { "epoch": 1.2345596432552954, "grad_norm": 1.9412034797043196, "learning_rate": 1.328910090013985e-05, "loss": 0.6714, "step": 16611 }, { "epoch": 1.2346339650687477, "grad_norm": 2.0944993500583085, "learning_rate": 1.3288343164075351e-05, "loss": 0.5886, "step": 16612 }, { "epoch": 1.2347082868822, "grad_norm": 2.2537780871620074, "learning_rate": 1.3287585406840669e-05, "loss": 0.7047, "step": 16613 }, { "epoch": 1.2347826086956522, "grad_norm": 2.487551014053071, "learning_rate": 1.3286827628440681e-05, "loss": 0.8104, "step": 16614 }, { "epoch": 1.2348569305091044, "grad_norm": 1.6681371203019055, "learning_rate": 1.328606982888027e-05, "loss": 0.5084, "step": 16615 }, { "epoch": 1.2349312523225566, "grad_norm": 2.409004882954634, "learning_rate": 1.3285312008164314e-05, "loss": 0.688, "step": 16616 }, { "epoch": 1.2350055741360089, "grad_norm": 6.191252235031512, "learning_rate": 1.328455416629769e-05, "loss": 0.5989, "step": 16617 }, { "epoch": 1.235079895949461, "grad_norm": 1.836427161108209, "learning_rate": 1.3283796303285272e-05, "loss": 0.5788, "step": 16618 }, { "epoch": 1.2351542177629133, "grad_norm": 1.9864173225877542, "learning_rate": 1.3283038419131947e-05, "loss": 0.592, "step": 16619 }, { "epoch": 1.2352285395763656, "grad_norm": 1.7294771057861595, "learning_rate": 1.3282280513842588e-05, "loss": 0.4573, "step": 16620 }, { "epoch": 1.2353028613898178, "grad_norm": 1.735286201368624, "learning_rate": 1.328152258742208e-05, "loss": 0.5979, "step": 16621 }, { "epoch": 1.23537718320327, "grad_norm": 2.143488834725349, "learning_rate": 1.32807646398753e-05, "loss": 0.4862, "step": 16622 }, { "epoch": 1.2354515050167225, "grad_norm": 1.9602667448393356, "learning_rate": 1.3280006671207128e-05, "loss": 0.6682, "step": 16623 }, { "epoch": 1.2355258268301745, "grad_norm": 2.1514292364689034, "learning_rate": 1.3279248681422444e-05, "loss": 0.653, "step": 16624 }, { "epoch": 1.235600148643627, "grad_norm": 1.654872894645038, "learning_rate": 1.3278490670526124e-05, "loss": 0.5757, "step": 16625 }, { "epoch": 1.2356744704570792, "grad_norm": 2.2323720316281404, "learning_rate": 1.327773263852305e-05, "loss": 0.6897, "step": 16626 }, { "epoch": 1.2357487922705315, "grad_norm": 1.774443548432857, "learning_rate": 1.3276974585418108e-05, "loss": 0.6221, "step": 16627 }, { "epoch": 1.2358231140839837, "grad_norm": 2.4219864457385407, "learning_rate": 1.3276216511216172e-05, "loss": 0.6372, "step": 16628 }, { "epoch": 1.235897435897436, "grad_norm": 3.3079812925859673, "learning_rate": 1.3275458415922122e-05, "loss": 0.7556, "step": 16629 }, { "epoch": 1.2359717577108882, "grad_norm": 2.075869619439885, "learning_rate": 1.3274700299540842e-05, "loss": 0.6283, "step": 16630 }, { "epoch": 1.2360460795243404, "grad_norm": 2.0271214198594394, "learning_rate": 1.3273942162077212e-05, "loss": 0.6606, "step": 16631 }, { "epoch": 1.2361204013377927, "grad_norm": 1.686207162928969, "learning_rate": 1.327318400353611e-05, "loss": 0.5633, "step": 16632 }, { "epoch": 1.236194723151245, "grad_norm": 2.666254602484141, "learning_rate": 1.3272425823922424e-05, "loss": 0.5958, "step": 16633 }, { "epoch": 1.2362690449646971, "grad_norm": 3.980678318405717, "learning_rate": 1.3271667623241027e-05, "loss": 0.5478, "step": 16634 }, { "epoch": 1.2363433667781494, "grad_norm": 2.0256963315217207, "learning_rate": 1.3270909401496805e-05, "loss": 0.5631, "step": 16635 }, { "epoch": 1.2364176885916016, "grad_norm": 1.780709086683201, "learning_rate": 1.3270151158694634e-05, "loss": 0.6309, "step": 16636 }, { "epoch": 1.2364920104050539, "grad_norm": 2.405439260074424, "learning_rate": 1.3269392894839404e-05, "loss": 0.6891, "step": 16637 }, { "epoch": 1.236566332218506, "grad_norm": 2.029048966011305, "learning_rate": 1.3268634609935991e-05, "loss": 0.5889, "step": 16638 }, { "epoch": 1.2366406540319583, "grad_norm": 2.2366408288456774, "learning_rate": 1.3267876303989277e-05, "loss": 0.6248, "step": 16639 }, { "epoch": 1.2367149758454106, "grad_norm": 1.837744423674971, "learning_rate": 1.3267117977004146e-05, "loss": 0.6061, "step": 16640 }, { "epoch": 1.2367892976588628, "grad_norm": 1.921553318637216, "learning_rate": 1.3266359628985479e-05, "loss": 0.5636, "step": 16641 }, { "epoch": 1.236863619472315, "grad_norm": 1.849225324787873, "learning_rate": 1.3265601259938157e-05, "loss": 0.6301, "step": 16642 }, { "epoch": 1.2369379412857673, "grad_norm": 1.8535135078047045, "learning_rate": 1.3264842869867064e-05, "loss": 0.5256, "step": 16643 }, { "epoch": 1.2370122630992197, "grad_norm": 2.6843213499413534, "learning_rate": 1.3264084458777082e-05, "loss": 0.709, "step": 16644 }, { "epoch": 1.2370865849126718, "grad_norm": 2.423518522983661, "learning_rate": 1.3263326026673096e-05, "loss": 0.5827, "step": 16645 }, { "epoch": 1.2371609067261242, "grad_norm": 1.5957646601891706, "learning_rate": 1.3262567573559984e-05, "loss": 0.5356, "step": 16646 }, { "epoch": 1.2372352285395765, "grad_norm": 2.2802003343811577, "learning_rate": 1.326180909944263e-05, "loss": 0.7575, "step": 16647 }, { "epoch": 1.2373095503530287, "grad_norm": 2.0692575283122516, "learning_rate": 1.326105060432592e-05, "loss": 0.7473, "step": 16648 }, { "epoch": 1.237383872166481, "grad_norm": 2.428885349441806, "learning_rate": 1.3260292088214736e-05, "loss": 0.5206, "step": 16649 }, { "epoch": 1.2374581939799332, "grad_norm": 1.779648729819101, "learning_rate": 1.3259533551113958e-05, "loss": 0.6114, "step": 16650 }, { "epoch": 1.2375325157933854, "grad_norm": 1.4688995452846672, "learning_rate": 1.3258774993028476e-05, "loss": 0.3772, "step": 16651 }, { "epoch": 1.2376068376068377, "grad_norm": 2.1089796221778987, "learning_rate": 1.3258016413963168e-05, "loss": 0.5258, "step": 16652 }, { "epoch": 1.23768115942029, "grad_norm": 2.2846622886370294, "learning_rate": 1.3257257813922917e-05, "loss": 0.6638, "step": 16653 }, { "epoch": 1.2377554812337421, "grad_norm": 2.521323771434171, "learning_rate": 1.325649919291261e-05, "loss": 0.7218, "step": 16654 }, { "epoch": 1.2378298030471944, "grad_norm": 1.8850838624185386, "learning_rate": 1.3255740550937133e-05, "loss": 0.6384, "step": 16655 }, { "epoch": 1.2379041248606466, "grad_norm": 1.691225754775137, "learning_rate": 1.3254981888001365e-05, "loss": 0.5486, "step": 16656 }, { "epoch": 1.2379784466740988, "grad_norm": 1.6007741779191549, "learning_rate": 1.3254223204110196e-05, "loss": 0.5825, "step": 16657 }, { "epoch": 1.238052768487551, "grad_norm": 2.5188705756973486, "learning_rate": 1.3253464499268503e-05, "loss": 0.7459, "step": 16658 }, { "epoch": 1.2381270903010033, "grad_norm": 1.684388652354647, "learning_rate": 1.3252705773481173e-05, "loss": 0.6242, "step": 16659 }, { "epoch": 1.2382014121144556, "grad_norm": 2.253491308868083, "learning_rate": 1.3251947026753098e-05, "loss": 0.69, "step": 16660 }, { "epoch": 1.2382757339279078, "grad_norm": 1.8663668317348114, "learning_rate": 1.3251188259089152e-05, "loss": 0.5739, "step": 16661 }, { "epoch": 1.23835005574136, "grad_norm": 1.8861639960040597, "learning_rate": 1.3250429470494226e-05, "loss": 0.6355, "step": 16662 }, { "epoch": 1.2384243775548123, "grad_norm": 2.233889406571236, "learning_rate": 1.3249670660973205e-05, "loss": 0.7337, "step": 16663 }, { "epoch": 1.2384986993682645, "grad_norm": 1.8449448096923158, "learning_rate": 1.3248911830530968e-05, "loss": 0.6601, "step": 16664 }, { "epoch": 1.2385730211817167, "grad_norm": 2.0881314609242683, "learning_rate": 1.3248152979172408e-05, "loss": 0.6241, "step": 16665 }, { "epoch": 1.238647342995169, "grad_norm": 2.4142683691958844, "learning_rate": 1.3247394106902412e-05, "loss": 0.7267, "step": 16666 }, { "epoch": 1.2387216648086214, "grad_norm": 2.007375459104999, "learning_rate": 1.3246635213725856e-05, "loss": 0.6892, "step": 16667 }, { "epoch": 1.2387959866220735, "grad_norm": 2.4136561657219198, "learning_rate": 1.3245876299647633e-05, "loss": 0.7202, "step": 16668 }, { "epoch": 1.238870308435526, "grad_norm": 2.353039674806357, "learning_rate": 1.3245117364672627e-05, "loss": 0.5241, "step": 16669 }, { "epoch": 1.2389446302489782, "grad_norm": 2.411309318833419, "learning_rate": 1.3244358408805722e-05, "loss": 0.6744, "step": 16670 }, { "epoch": 1.2390189520624304, "grad_norm": 2.0930226242862635, "learning_rate": 1.3243599432051808e-05, "loss": 0.7229, "step": 16671 }, { "epoch": 1.2390932738758826, "grad_norm": 1.8327986651590704, "learning_rate": 1.3242840434415767e-05, "loss": 0.5773, "step": 16672 }, { "epoch": 1.2391675956893349, "grad_norm": 1.6398676162507762, "learning_rate": 1.3242081415902487e-05, "loss": 0.5067, "step": 16673 }, { "epoch": 1.2392419175027871, "grad_norm": 2.1280805303248393, "learning_rate": 1.3241322376516858e-05, "loss": 0.7548, "step": 16674 }, { "epoch": 1.2393162393162394, "grad_norm": 1.9927851006665624, "learning_rate": 1.3240563316263763e-05, "loss": 0.5779, "step": 16675 }, { "epoch": 1.2393905611296916, "grad_norm": 1.92058123072793, "learning_rate": 1.3239804235148089e-05, "loss": 0.5864, "step": 16676 }, { "epoch": 1.2394648829431438, "grad_norm": 2.4345417095446233, "learning_rate": 1.3239045133174724e-05, "loss": 0.6697, "step": 16677 }, { "epoch": 1.239539204756596, "grad_norm": 2.533646223582151, "learning_rate": 1.3238286010348554e-05, "loss": 0.5446, "step": 16678 }, { "epoch": 1.2396135265700483, "grad_norm": 1.6560024955676749, "learning_rate": 1.3237526866674468e-05, "loss": 0.5039, "step": 16679 }, { "epoch": 1.2396878483835005, "grad_norm": 2.3386670490779946, "learning_rate": 1.323676770215735e-05, "loss": 0.6236, "step": 16680 }, { "epoch": 1.2397621701969528, "grad_norm": 2.5960430258401446, "learning_rate": 1.3236008516802088e-05, "loss": 0.7568, "step": 16681 }, { "epoch": 1.239836492010405, "grad_norm": 2.0355521614874856, "learning_rate": 1.3235249310613574e-05, "loss": 0.6898, "step": 16682 }, { "epoch": 1.2399108138238573, "grad_norm": 1.6875061877680497, "learning_rate": 1.3234490083596691e-05, "loss": 0.6761, "step": 16683 }, { "epoch": 1.2399851356373095, "grad_norm": 1.5665668835624498, "learning_rate": 1.323373083575633e-05, "loss": 0.5059, "step": 16684 }, { "epoch": 1.2400594574507617, "grad_norm": 2.4984239409640194, "learning_rate": 1.3232971567097378e-05, "loss": 0.6419, "step": 16685 }, { "epoch": 1.240133779264214, "grad_norm": 2.1699558887534094, "learning_rate": 1.3232212277624723e-05, "loss": 0.6406, "step": 16686 }, { "epoch": 1.2402081010776662, "grad_norm": 2.1096187970895297, "learning_rate": 1.323145296734325e-05, "loss": 0.6656, "step": 16687 }, { "epoch": 1.2402824228911185, "grad_norm": 2.1617053451169257, "learning_rate": 1.3230693636257854e-05, "loss": 0.8224, "step": 16688 }, { "epoch": 1.2403567447045707, "grad_norm": 1.560700267920239, "learning_rate": 1.3229934284373417e-05, "loss": 0.5702, "step": 16689 }, { "epoch": 1.2404310665180232, "grad_norm": 1.9039464628306066, "learning_rate": 1.3229174911694833e-05, "loss": 0.5415, "step": 16690 }, { "epoch": 1.2405053883314752, "grad_norm": 2.2407744309410336, "learning_rate": 1.322841551822699e-05, "loss": 0.7303, "step": 16691 }, { "epoch": 1.2405797101449276, "grad_norm": 2.0918075351995804, "learning_rate": 1.3227656103974772e-05, "loss": 0.6645, "step": 16692 }, { "epoch": 1.2406540319583799, "grad_norm": 1.585793835641208, "learning_rate": 1.3226896668943075e-05, "loss": 0.5069, "step": 16693 }, { "epoch": 1.240728353771832, "grad_norm": 2.3271378943459387, "learning_rate": 1.3226137213136781e-05, "loss": 0.5379, "step": 16694 }, { "epoch": 1.2408026755852843, "grad_norm": 1.967798928092467, "learning_rate": 1.3225377736560783e-05, "loss": 0.7865, "step": 16695 }, { "epoch": 1.2408769973987366, "grad_norm": 2.95026790904174, "learning_rate": 1.3224618239219973e-05, "loss": 0.6096, "step": 16696 }, { "epoch": 1.2409513192121888, "grad_norm": 1.746972599147445, "learning_rate": 1.3223858721119235e-05, "loss": 0.5347, "step": 16697 }, { "epoch": 1.241025641025641, "grad_norm": 1.821693459903783, "learning_rate": 1.3223099182263463e-05, "loss": 0.6743, "step": 16698 }, { "epoch": 1.2410999628390933, "grad_norm": 2.171712439267039, "learning_rate": 1.322233962265755e-05, "loss": 0.5185, "step": 16699 }, { "epoch": 1.2411742846525455, "grad_norm": 2.527230346061272, "learning_rate": 1.3221580042306376e-05, "loss": 0.5673, "step": 16700 }, { "epoch": 1.2412486064659978, "grad_norm": 1.7527705352540723, "learning_rate": 1.322082044121484e-05, "loss": 0.524, "step": 16701 }, { "epoch": 1.24132292827945, "grad_norm": 1.7474329277671825, "learning_rate": 1.322006081938783e-05, "loss": 0.4921, "step": 16702 }, { "epoch": 1.2413972500929022, "grad_norm": 2.2191253102546793, "learning_rate": 1.321930117683023e-05, "loss": 0.6853, "step": 16703 }, { "epoch": 1.2414715719063545, "grad_norm": 2.2950177975520947, "learning_rate": 1.3218541513546943e-05, "loss": 0.6933, "step": 16704 }, { "epoch": 1.2415458937198067, "grad_norm": 2.168314905324473, "learning_rate": 1.3217781829542849e-05, "loss": 0.5032, "step": 16705 }, { "epoch": 1.241620215533259, "grad_norm": 2.1829022185799367, "learning_rate": 1.321702212482284e-05, "loss": 0.7242, "step": 16706 }, { "epoch": 1.2416945373467112, "grad_norm": 1.6227885769199315, "learning_rate": 1.3216262399391813e-05, "loss": 0.5048, "step": 16707 }, { "epoch": 1.2417688591601634, "grad_norm": 2.55798901646505, "learning_rate": 1.3215502653254656e-05, "loss": 0.5986, "step": 16708 }, { "epoch": 1.2418431809736157, "grad_norm": 1.5944165316982377, "learning_rate": 1.3214742886416261e-05, "loss": 0.5365, "step": 16709 }, { "epoch": 1.241917502787068, "grad_norm": 1.9374035730255896, "learning_rate": 1.3213983098881516e-05, "loss": 0.633, "step": 16710 }, { "epoch": 1.2419918246005202, "grad_norm": 1.8318919514490266, "learning_rate": 1.3213223290655313e-05, "loss": 0.5417, "step": 16711 }, { "epoch": 1.2420661464139724, "grad_norm": 2.0398105651450753, "learning_rate": 1.3212463461742547e-05, "loss": 0.6503, "step": 16712 }, { "epoch": 1.2421404682274249, "grad_norm": 1.920197579367573, "learning_rate": 1.3211703612148108e-05, "loss": 0.6386, "step": 16713 }, { "epoch": 1.242214790040877, "grad_norm": 1.8698621678909497, "learning_rate": 1.3210943741876885e-05, "loss": 0.6702, "step": 16714 }, { "epoch": 1.2422891118543293, "grad_norm": 2.639307510509795, "learning_rate": 1.3210183850933777e-05, "loss": 0.5699, "step": 16715 }, { "epoch": 1.2423634336677816, "grad_norm": 2.660256085858365, "learning_rate": 1.3209423939323669e-05, "loss": 0.7258, "step": 16716 }, { "epoch": 1.2424377554812338, "grad_norm": 2.292734708542535, "learning_rate": 1.3208664007051457e-05, "loss": 0.719, "step": 16717 }, { "epoch": 1.242512077294686, "grad_norm": 2.504357357784601, "learning_rate": 1.3207904054122031e-05, "loss": 0.5645, "step": 16718 }, { "epoch": 1.2425863991081383, "grad_norm": 1.8429119711268462, "learning_rate": 1.320714408054029e-05, "loss": 0.3773, "step": 16719 }, { "epoch": 1.2426607209215905, "grad_norm": 2.268053980439754, "learning_rate": 1.320638408631112e-05, "loss": 0.5958, "step": 16720 }, { "epoch": 1.2427350427350428, "grad_norm": 1.7735733471743051, "learning_rate": 1.3205624071439413e-05, "loss": 0.6076, "step": 16721 }, { "epoch": 1.242809364548495, "grad_norm": 1.9332266306855677, "learning_rate": 1.3204864035930065e-05, "loss": 0.5752, "step": 16722 }, { "epoch": 1.2428836863619472, "grad_norm": 2.282103641239492, "learning_rate": 1.320410397978797e-05, "loss": 0.7019, "step": 16723 }, { "epoch": 1.2429580081753995, "grad_norm": 3.5419328019514325, "learning_rate": 1.320334390301802e-05, "loss": 0.509, "step": 16724 }, { "epoch": 1.2430323299888517, "grad_norm": 2.215452565781276, "learning_rate": 1.320258380562511e-05, "loss": 0.672, "step": 16725 }, { "epoch": 1.243106651802304, "grad_norm": 1.9238482715601526, "learning_rate": 1.3201823687614129e-05, "loss": 0.6013, "step": 16726 }, { "epoch": 1.2431809736157562, "grad_norm": 2.374501781786186, "learning_rate": 1.3201063548989971e-05, "loss": 0.6201, "step": 16727 }, { "epoch": 1.2432552954292084, "grad_norm": 1.762610892028537, "learning_rate": 1.3200303389757535e-05, "loss": 0.5985, "step": 16728 }, { "epoch": 1.2433296172426607, "grad_norm": 1.6394915230550446, "learning_rate": 1.319954320992171e-05, "loss": 0.5599, "step": 16729 }, { "epoch": 1.243403939056113, "grad_norm": 1.7746753896739937, "learning_rate": 1.3198783009487394e-05, "loss": 0.5996, "step": 16730 }, { "epoch": 1.2434782608695651, "grad_norm": 2.363091142640402, "learning_rate": 1.3198022788459479e-05, "loss": 0.6913, "step": 16731 }, { "epoch": 1.2435525826830174, "grad_norm": 1.8015761043631535, "learning_rate": 1.3197262546842859e-05, "loss": 0.4895, "step": 16732 }, { "epoch": 1.2436269044964696, "grad_norm": 1.5761723826507277, "learning_rate": 1.3196502284642427e-05, "loss": 0.5662, "step": 16733 }, { "epoch": 1.243701226309922, "grad_norm": 1.8411440688379863, "learning_rate": 1.3195742001863079e-05, "loss": 0.5373, "step": 16734 }, { "epoch": 1.243775548123374, "grad_norm": 2.6555377200706047, "learning_rate": 1.319498169850971e-05, "loss": 0.6051, "step": 16735 }, { "epoch": 1.2438498699368266, "grad_norm": 1.939612701066268, "learning_rate": 1.3194221374587218e-05, "loss": 0.541, "step": 16736 }, { "epoch": 1.2439241917502788, "grad_norm": 1.928737407899018, "learning_rate": 1.3193461030100491e-05, "loss": 0.6991, "step": 16737 }, { "epoch": 1.243998513563731, "grad_norm": 2.0415891187353457, "learning_rate": 1.319270066505443e-05, "loss": 0.6934, "step": 16738 }, { "epoch": 1.2440728353771833, "grad_norm": 1.633832594229096, "learning_rate": 1.3191940279453926e-05, "loss": 0.6057, "step": 16739 }, { "epoch": 1.2441471571906355, "grad_norm": 2.0816229933117327, "learning_rate": 1.3191179873303875e-05, "loss": 0.6929, "step": 16740 }, { "epoch": 1.2442214790040877, "grad_norm": 2.7517696402760334, "learning_rate": 1.3190419446609175e-05, "loss": 0.6114, "step": 16741 }, { "epoch": 1.24429580081754, "grad_norm": 2.19525941907762, "learning_rate": 1.3189658999374722e-05, "loss": 0.5816, "step": 16742 }, { "epoch": 1.2443701226309922, "grad_norm": 1.9080008603614043, "learning_rate": 1.3188898531605407e-05, "loss": 0.5562, "step": 16743 }, { "epoch": 1.2444444444444445, "grad_norm": 2.4771443986317996, "learning_rate": 1.3188138043306129e-05, "loss": 0.6503, "step": 16744 }, { "epoch": 1.2445187662578967, "grad_norm": 1.9137491197613925, "learning_rate": 1.3187377534481785e-05, "loss": 0.6145, "step": 16745 }, { "epoch": 1.244593088071349, "grad_norm": 2.1451685872164297, "learning_rate": 1.3186617005137268e-05, "loss": 0.6, "step": 16746 }, { "epoch": 1.2446674098848012, "grad_norm": 2.7536604629929395, "learning_rate": 1.318585645527748e-05, "loss": 0.818, "step": 16747 }, { "epoch": 1.2447417316982534, "grad_norm": 2.1246939474843747, "learning_rate": 1.318509588490731e-05, "loss": 0.7109, "step": 16748 }, { "epoch": 1.2448160535117057, "grad_norm": 2.1501459729046184, "learning_rate": 1.3184335294031657e-05, "loss": 0.5999, "step": 16749 }, { "epoch": 1.244890375325158, "grad_norm": 1.8159106069723983, "learning_rate": 1.3183574682655422e-05, "loss": 0.616, "step": 16750 }, { "epoch": 1.2449646971386101, "grad_norm": 2.2343463007038635, "learning_rate": 1.3182814050783495e-05, "loss": 0.6175, "step": 16751 }, { "epoch": 1.2450390189520624, "grad_norm": 1.9207637884399726, "learning_rate": 1.318205339842078e-05, "loss": 0.5141, "step": 16752 }, { "epoch": 1.2451133407655146, "grad_norm": 2.230334808280952, "learning_rate": 1.318129272557217e-05, "loss": 0.8557, "step": 16753 }, { "epoch": 1.2451876625789668, "grad_norm": 2.0899213633863742, "learning_rate": 1.318053203224256e-05, "loss": 0.6147, "step": 16754 }, { "epoch": 1.245261984392419, "grad_norm": 1.8186089944342803, "learning_rate": 1.3179771318436849e-05, "loss": 0.5685, "step": 16755 }, { "epoch": 1.2453363062058713, "grad_norm": 1.9540381819373653, "learning_rate": 1.3179010584159937e-05, "loss": 0.3825, "step": 16756 }, { "epoch": 1.2454106280193238, "grad_norm": 2.096079885813771, "learning_rate": 1.3178249829416718e-05, "loss": 0.6645, "step": 16757 }, { "epoch": 1.2454849498327758, "grad_norm": 1.8364051261539098, "learning_rate": 1.3177489054212096e-05, "loss": 0.6399, "step": 16758 }, { "epoch": 1.2455592716462283, "grad_norm": 1.5571983715113187, "learning_rate": 1.3176728258550961e-05, "loss": 0.4832, "step": 16759 }, { "epoch": 1.2456335934596805, "grad_norm": 2.022457561254112, "learning_rate": 1.3175967442438214e-05, "loss": 0.5889, "step": 16760 }, { "epoch": 1.2457079152731327, "grad_norm": 2.36149042186808, "learning_rate": 1.3175206605878754e-05, "loss": 0.7, "step": 16761 }, { "epoch": 1.245782237086585, "grad_norm": 1.6497597608239702, "learning_rate": 1.3174445748877479e-05, "loss": 0.6381, "step": 16762 }, { "epoch": 1.2458565589000372, "grad_norm": 2.361588318352745, "learning_rate": 1.3173684871439287e-05, "loss": 0.6418, "step": 16763 }, { "epoch": 1.2459308807134895, "grad_norm": 1.6351866546306502, "learning_rate": 1.3172923973569078e-05, "loss": 0.4763, "step": 16764 }, { "epoch": 1.2460052025269417, "grad_norm": 1.7644676954184864, "learning_rate": 1.3172163055271747e-05, "loss": 0.49, "step": 16765 }, { "epoch": 1.246079524340394, "grad_norm": 2.261986963829822, "learning_rate": 1.3171402116552194e-05, "loss": 0.6076, "step": 16766 }, { "epoch": 1.2461538461538462, "grad_norm": 1.7499184887259558, "learning_rate": 1.3170641157415318e-05, "loss": 0.6441, "step": 16767 }, { "epoch": 1.2462281679672984, "grad_norm": 1.786396279181817, "learning_rate": 1.3169880177866022e-05, "loss": 0.5454, "step": 16768 }, { "epoch": 1.2463024897807506, "grad_norm": 1.5090710665712856, "learning_rate": 1.31691191779092e-05, "loss": 0.4325, "step": 16769 }, { "epoch": 1.2463768115942029, "grad_norm": 1.9281214862992202, "learning_rate": 1.3168358157549757e-05, "loss": 0.5297, "step": 16770 }, { "epoch": 1.2464511334076551, "grad_norm": 1.9203353048585412, "learning_rate": 1.3167597116792583e-05, "loss": 0.5882, "step": 16771 }, { "epoch": 1.2465254552211074, "grad_norm": 2.1292993755333556, "learning_rate": 1.3166836055642585e-05, "loss": 0.8011, "step": 16772 }, { "epoch": 1.2465997770345596, "grad_norm": 4.54988055264722, "learning_rate": 1.316607497410466e-05, "loss": 0.5172, "step": 16773 }, { "epoch": 1.2466740988480118, "grad_norm": 1.616949443544572, "learning_rate": 1.3165313872183712e-05, "loss": 0.514, "step": 16774 }, { "epoch": 1.246748420661464, "grad_norm": 1.5740339351002464, "learning_rate": 1.3164552749884636e-05, "loss": 0.5023, "step": 16775 }, { "epoch": 1.2468227424749163, "grad_norm": 1.9431880436213924, "learning_rate": 1.3163791607212332e-05, "loss": 0.4623, "step": 16776 }, { "epoch": 1.2468970642883686, "grad_norm": 1.727889220856998, "learning_rate": 1.3163030444171702e-05, "loss": 0.6038, "step": 16777 }, { "epoch": 1.2469713861018208, "grad_norm": 2.239511028882071, "learning_rate": 1.3162269260767646e-05, "loss": 0.592, "step": 16778 }, { "epoch": 1.247045707915273, "grad_norm": 2.251251561488965, "learning_rate": 1.3161508057005066e-05, "loss": 0.6902, "step": 16779 }, { "epoch": 1.2471200297287255, "grad_norm": 2.06118914028043, "learning_rate": 1.3160746832888863e-05, "loss": 0.6432, "step": 16780 }, { "epoch": 1.2471943515421777, "grad_norm": 2.164667158865027, "learning_rate": 1.3159985588423934e-05, "loss": 0.6589, "step": 16781 }, { "epoch": 1.24726867335563, "grad_norm": 2.137928860500757, "learning_rate": 1.3159224323615178e-05, "loss": 0.6703, "step": 16782 }, { "epoch": 1.2473429951690822, "grad_norm": 1.751564695324383, "learning_rate": 1.3158463038467503e-05, "loss": 0.5675, "step": 16783 }, { "epoch": 1.2474173169825344, "grad_norm": 1.8291314071860607, "learning_rate": 1.3157701732985806e-05, "loss": 0.6865, "step": 16784 }, { "epoch": 1.2474916387959867, "grad_norm": 3.398144264339757, "learning_rate": 1.3156940407174991e-05, "loss": 0.7169, "step": 16785 }, { "epoch": 1.247565960609439, "grad_norm": 2.3083147518715106, "learning_rate": 1.3156179061039956e-05, "loss": 0.6288, "step": 16786 }, { "epoch": 1.2476402824228912, "grad_norm": 2.1611157343985385, "learning_rate": 1.3155417694585607e-05, "loss": 0.6564, "step": 16787 }, { "epoch": 1.2477146042363434, "grad_norm": 1.9631547592590577, "learning_rate": 1.3154656307816838e-05, "loss": 0.5907, "step": 16788 }, { "epoch": 1.2477889260497956, "grad_norm": 2.3739451099461615, "learning_rate": 1.3153894900738558e-05, "loss": 0.7039, "step": 16789 }, { "epoch": 1.2478632478632479, "grad_norm": 2.410550110680182, "learning_rate": 1.3153133473355663e-05, "loss": 0.7576, "step": 16790 }, { "epoch": 1.2479375696767, "grad_norm": 1.7044669767299905, "learning_rate": 1.3152372025673064e-05, "loss": 0.5325, "step": 16791 }, { "epoch": 1.2480118914901523, "grad_norm": 1.8493706563493224, "learning_rate": 1.3151610557695653e-05, "loss": 0.5215, "step": 16792 }, { "epoch": 1.2480862133036046, "grad_norm": 1.8809605373615321, "learning_rate": 1.315084906942834e-05, "loss": 0.6437, "step": 16793 }, { "epoch": 1.2481605351170568, "grad_norm": 2.1440004918484985, "learning_rate": 1.3150087560876021e-05, "loss": 0.5224, "step": 16794 }, { "epoch": 1.248234856930509, "grad_norm": 1.555821422396815, "learning_rate": 1.3149326032043601e-05, "loss": 0.4844, "step": 16795 }, { "epoch": 1.2483091787439613, "grad_norm": 1.7519723566609244, "learning_rate": 1.3148564482935985e-05, "loss": 0.655, "step": 16796 }, { "epoch": 1.2483835005574135, "grad_norm": 2.0821651724488537, "learning_rate": 1.3147802913558077e-05, "loss": 0.7097, "step": 16797 }, { "epoch": 1.2484578223708658, "grad_norm": 1.9304916847522284, "learning_rate": 1.3147041323914776e-05, "loss": 0.6757, "step": 16798 }, { "epoch": 1.248532144184318, "grad_norm": 2.457310677971586, "learning_rate": 1.3146279714010984e-05, "loss": 0.8236, "step": 16799 }, { "epoch": 1.2486064659977703, "grad_norm": 2.000523283248764, "learning_rate": 1.3145518083851605e-05, "loss": 0.6752, "step": 16800 }, { "epoch": 1.2486807878112227, "grad_norm": 1.6210766567010342, "learning_rate": 1.3144756433441548e-05, "loss": 0.4414, "step": 16801 }, { "epoch": 1.2487551096246747, "grad_norm": 1.9798253887546509, "learning_rate": 1.314399476278571e-05, "loss": 0.6041, "step": 16802 }, { "epoch": 1.2488294314381272, "grad_norm": 3.3217878243639793, "learning_rate": 1.3143233071888998e-05, "loss": 0.3735, "step": 16803 }, { "epoch": 1.2489037532515794, "grad_norm": 1.8052777983558574, "learning_rate": 1.3142471360756315e-05, "loss": 0.477, "step": 16804 }, { "epoch": 1.2489780750650317, "grad_norm": 1.9902716163616778, "learning_rate": 1.3141709629392562e-05, "loss": 0.6854, "step": 16805 }, { "epoch": 1.249052396878484, "grad_norm": 1.5360291565767616, "learning_rate": 1.3140947877802646e-05, "loss": 0.458, "step": 16806 }, { "epoch": 1.2491267186919361, "grad_norm": 7.810310227013351, "learning_rate": 1.3140186105991472e-05, "loss": 0.7857, "step": 16807 }, { "epoch": 1.2492010405053884, "grad_norm": 2.0082267821196536, "learning_rate": 1.3139424313963942e-05, "loss": 0.5569, "step": 16808 }, { "epoch": 1.2492753623188406, "grad_norm": 1.712409471034576, "learning_rate": 1.3138662501724963e-05, "loss": 0.5014, "step": 16809 }, { "epoch": 1.2493496841322929, "grad_norm": 2.322447953225948, "learning_rate": 1.3137900669279436e-05, "loss": 0.5687, "step": 16810 }, { "epoch": 1.249424005945745, "grad_norm": 1.984552370656784, "learning_rate": 1.3137138816632265e-05, "loss": 0.5381, "step": 16811 }, { "epoch": 1.2494983277591973, "grad_norm": 2.6184427789099574, "learning_rate": 1.3136376943788358e-05, "loss": 0.8122, "step": 16812 }, { "epoch": 1.2495726495726496, "grad_norm": 1.9440780206940376, "learning_rate": 1.3135615050752621e-05, "loss": 0.6334, "step": 16813 }, { "epoch": 1.2496469713861018, "grad_norm": 1.804655153219792, "learning_rate": 1.3134853137529956e-05, "loss": 0.5396, "step": 16814 }, { "epoch": 1.249721293199554, "grad_norm": 1.8479846739276429, "learning_rate": 1.3134091204125273e-05, "loss": 0.7457, "step": 16815 }, { "epoch": 1.2497956150130063, "grad_norm": 2.4271207306924194, "learning_rate": 1.313332925054347e-05, "loss": 0.7132, "step": 16816 }, { "epoch": 1.2498699368264585, "grad_norm": 1.8931228916159848, "learning_rate": 1.3132567276789452e-05, "loss": 0.577, "step": 16817 }, { "epoch": 1.2499442586399108, "grad_norm": 1.8597846870472141, "learning_rate": 1.3131805282868136e-05, "loss": 0.6237, "step": 16818 }, { "epoch": 1.250018580453363, "grad_norm": 1.747367998824769, "learning_rate": 1.3131043268784414e-05, "loss": 0.5693, "step": 16819 }, { "epoch": 1.2500929022668152, "grad_norm": 1.9353330882037691, "learning_rate": 1.3130281234543198e-05, "loss": 0.6158, "step": 16820 }, { "epoch": 1.2501672240802675, "grad_norm": 2.1831479528645907, "learning_rate": 1.3129519180149399e-05, "loss": 0.6488, "step": 16821 }, { "epoch": 1.25024154589372, "grad_norm": 2.0825473686433735, "learning_rate": 1.3128757105607915e-05, "loss": 0.5987, "step": 16822 }, { "epoch": 1.250315867707172, "grad_norm": 1.9610468436750985, "learning_rate": 1.3127995010923654e-05, "loss": 0.4889, "step": 16823 }, { "epoch": 1.2503901895206244, "grad_norm": 2.039844811302692, "learning_rate": 1.3127232896101525e-05, "loss": 0.8012, "step": 16824 }, { "epoch": 1.2504645113340764, "grad_norm": 1.7568339574308673, "learning_rate": 1.3126470761146432e-05, "loss": 0.4489, "step": 16825 }, { "epoch": 1.250538833147529, "grad_norm": 1.8020414580095363, "learning_rate": 1.3125708606063284e-05, "loss": 0.5277, "step": 16826 }, { "epoch": 1.250613154960981, "grad_norm": 1.7592829810065467, "learning_rate": 1.3124946430856984e-05, "loss": 0.6218, "step": 16827 }, { "epoch": 1.2506874767744334, "grad_norm": 2.4672394257706984, "learning_rate": 1.3124184235532443e-05, "loss": 0.6577, "step": 16828 }, { "epoch": 1.2507617985878856, "grad_norm": 1.8198827968248958, "learning_rate": 1.3123422020094565e-05, "loss": 0.6088, "step": 16829 }, { "epoch": 1.2508361204013378, "grad_norm": 1.8038065581598095, "learning_rate": 1.3122659784548257e-05, "loss": 0.6037, "step": 16830 }, { "epoch": 1.25091044221479, "grad_norm": 1.711531583477963, "learning_rate": 1.3121897528898427e-05, "loss": 0.5888, "step": 16831 }, { "epoch": 1.2509847640282423, "grad_norm": 1.9910280591665563, "learning_rate": 1.3121135253149985e-05, "loss": 0.4322, "step": 16832 }, { "epoch": 1.2510590858416946, "grad_norm": 1.9087906404048398, "learning_rate": 1.3120372957307834e-05, "loss": 0.6214, "step": 16833 }, { "epoch": 1.2511334076551468, "grad_norm": 2.1309091812587537, "learning_rate": 1.3119610641376888e-05, "loss": 0.5571, "step": 16834 }, { "epoch": 1.251207729468599, "grad_norm": 2.0097887945782875, "learning_rate": 1.3118848305362047e-05, "loss": 0.6736, "step": 16835 }, { "epoch": 1.2512820512820513, "grad_norm": 1.7603145440854995, "learning_rate": 1.3118085949268223e-05, "loss": 0.6005, "step": 16836 }, { "epoch": 1.2513563730955035, "grad_norm": 1.9362236430133524, "learning_rate": 1.311732357310032e-05, "loss": 0.6685, "step": 16837 }, { "epoch": 1.2514306949089558, "grad_norm": 2.1121556370481103, "learning_rate": 1.3116561176863257e-05, "loss": 0.6134, "step": 16838 }, { "epoch": 1.251505016722408, "grad_norm": 2.192821809635243, "learning_rate": 1.3115798760561929e-05, "loss": 0.7193, "step": 16839 }, { "epoch": 1.2515793385358602, "grad_norm": 2.2364461021408384, "learning_rate": 1.3115036324201253e-05, "loss": 0.6975, "step": 16840 }, { "epoch": 1.2516536603493125, "grad_norm": 1.9839058258357427, "learning_rate": 1.3114273867786134e-05, "loss": 0.6243, "step": 16841 }, { "epoch": 1.2517279821627647, "grad_norm": 1.7793888018826656, "learning_rate": 1.311351139132148e-05, "loss": 0.6157, "step": 16842 }, { "epoch": 1.251802303976217, "grad_norm": 1.9615307191662534, "learning_rate": 1.3112748894812204e-05, "loss": 0.6807, "step": 16843 }, { "epoch": 1.2518766257896692, "grad_norm": 1.9128679714524495, "learning_rate": 1.3111986378263209e-05, "loss": 0.6794, "step": 16844 }, { "epoch": 1.2519509476031216, "grad_norm": 2.2137639267761844, "learning_rate": 1.311122384167941e-05, "loss": 0.4062, "step": 16845 }, { "epoch": 1.2520252694165737, "grad_norm": 1.5488911101233631, "learning_rate": 1.3110461285065712e-05, "loss": 0.4536, "step": 16846 }, { "epoch": 1.2520995912300261, "grad_norm": 2.022351608660354, "learning_rate": 1.3109698708427026e-05, "loss": 0.5493, "step": 16847 }, { "epoch": 1.2521739130434781, "grad_norm": 1.9136892418373972, "learning_rate": 1.310893611176826e-05, "loss": 0.445, "step": 16848 }, { "epoch": 1.2522482348569306, "grad_norm": 1.8139429771954583, "learning_rate": 1.3108173495094328e-05, "loss": 0.5905, "step": 16849 }, { "epoch": 1.2523225566703828, "grad_norm": 1.9287361801076914, "learning_rate": 1.3107410858410133e-05, "loss": 0.5885, "step": 16850 }, { "epoch": 1.252396878483835, "grad_norm": 2.909665341846888, "learning_rate": 1.3106648201720589e-05, "loss": 0.7619, "step": 16851 }, { "epoch": 1.2524712002972873, "grad_norm": 2.1659005367196418, "learning_rate": 1.3105885525030604e-05, "loss": 0.7142, "step": 16852 }, { "epoch": 1.2525455221107396, "grad_norm": 2.0659749462215253, "learning_rate": 1.310512282834509e-05, "loss": 0.5987, "step": 16853 }, { "epoch": 1.2526198439241918, "grad_norm": 2.4818369655412935, "learning_rate": 1.3104360111668956e-05, "loss": 0.6626, "step": 16854 }, { "epoch": 1.252694165737644, "grad_norm": 2.4208784516015642, "learning_rate": 1.3103597375007113e-05, "loss": 0.6778, "step": 16855 }, { "epoch": 1.2527684875510963, "grad_norm": 1.7384324187393976, "learning_rate": 1.3102834618364472e-05, "loss": 0.5592, "step": 16856 }, { "epoch": 1.2528428093645485, "grad_norm": 1.772022864197915, "learning_rate": 1.3102071841745942e-05, "loss": 0.71, "step": 16857 }, { "epoch": 1.2529171311780007, "grad_norm": 2.0936111760025677, "learning_rate": 1.3101309045156433e-05, "loss": 0.7348, "step": 16858 }, { "epoch": 1.252991452991453, "grad_norm": 1.7085671646060976, "learning_rate": 1.3100546228600859e-05, "loss": 0.6064, "step": 16859 }, { "epoch": 1.2530657748049052, "grad_norm": 1.8857983770633842, "learning_rate": 1.3099783392084131e-05, "loss": 0.6315, "step": 16860 }, { "epoch": 1.2531400966183575, "grad_norm": 5.885347389637792, "learning_rate": 1.3099020535611156e-05, "loss": 0.635, "step": 16861 }, { "epoch": 1.2532144184318097, "grad_norm": 2.073240388617156, "learning_rate": 1.3098257659186849e-05, "loss": 0.7476, "step": 16862 }, { "epoch": 1.253288740245262, "grad_norm": 2.223326277126089, "learning_rate": 1.3097494762816119e-05, "loss": 0.7349, "step": 16863 }, { "epoch": 1.2533630620587142, "grad_norm": 2.8342283067623213, "learning_rate": 1.3096731846503876e-05, "loss": 0.5927, "step": 16864 }, { "epoch": 1.2534373838721664, "grad_norm": 2.1341775741997564, "learning_rate": 1.3095968910255036e-05, "loss": 0.5745, "step": 16865 }, { "epoch": 1.2535117056856186, "grad_norm": 1.4597598653934936, "learning_rate": 1.309520595407451e-05, "loss": 0.3953, "step": 16866 }, { "epoch": 1.2535860274990709, "grad_norm": 1.9834494496965975, "learning_rate": 1.309444297796721e-05, "loss": 0.7447, "step": 16867 }, { "epoch": 1.2536603493125233, "grad_norm": 1.9653406668194608, "learning_rate": 1.3093679981938044e-05, "loss": 0.5763, "step": 16868 }, { "epoch": 1.2537346711259754, "grad_norm": 2.2337858105087376, "learning_rate": 1.3092916965991926e-05, "loss": 0.695, "step": 16869 }, { "epoch": 1.2538089929394278, "grad_norm": 1.9215186896543428, "learning_rate": 1.309215393013377e-05, "loss": 0.5136, "step": 16870 }, { "epoch": 1.2538833147528798, "grad_norm": 2.5752174891281814, "learning_rate": 1.309139087436849e-05, "loss": 0.8207, "step": 16871 }, { "epoch": 1.2539576365663323, "grad_norm": 2.2278410363890053, "learning_rate": 1.3090627798700992e-05, "loss": 0.6225, "step": 16872 }, { "epoch": 1.2540319583797845, "grad_norm": 2.7982316705223447, "learning_rate": 1.3089864703136196e-05, "loss": 0.7017, "step": 16873 }, { "epoch": 1.2541062801932368, "grad_norm": 2.2998486967101504, "learning_rate": 1.308910158767901e-05, "loss": 0.6584, "step": 16874 }, { "epoch": 1.254180602006689, "grad_norm": 1.962840394410214, "learning_rate": 1.3088338452334346e-05, "loss": 0.7156, "step": 16875 }, { "epoch": 1.2542549238201413, "grad_norm": 1.6186340713373855, "learning_rate": 1.308757529710712e-05, "loss": 0.6674, "step": 16876 }, { "epoch": 1.2543292456335935, "grad_norm": 2.0877116306779984, "learning_rate": 1.3086812122002248e-05, "loss": 0.5089, "step": 16877 }, { "epoch": 1.2544035674470457, "grad_norm": 1.9257889178567218, "learning_rate": 1.308604892702464e-05, "loss": 0.7551, "step": 16878 }, { "epoch": 1.254477889260498, "grad_norm": 1.7369590690695103, "learning_rate": 1.3085285712179206e-05, "loss": 0.4968, "step": 16879 }, { "epoch": 1.2545522110739502, "grad_norm": 1.7877717290155257, "learning_rate": 1.308452247747086e-05, "loss": 0.5472, "step": 16880 }, { "epoch": 1.2546265328874024, "grad_norm": 2.08442979706865, "learning_rate": 1.3083759222904522e-05, "loss": 0.6589, "step": 16881 }, { "epoch": 1.2547008547008547, "grad_norm": 2.2225641179437594, "learning_rate": 1.30829959484851e-05, "loss": 0.6504, "step": 16882 }, { "epoch": 1.254775176514307, "grad_norm": 1.9560951258738948, "learning_rate": 1.3082232654217512e-05, "loss": 0.5359, "step": 16883 }, { "epoch": 1.2548494983277592, "grad_norm": 1.7600158259164265, "learning_rate": 1.3081469340106675e-05, "loss": 0.5023, "step": 16884 }, { "epoch": 1.2549238201412114, "grad_norm": 2.1051480788693255, "learning_rate": 1.308070600615749e-05, "loss": 0.6004, "step": 16885 }, { "epoch": 1.2549981419546636, "grad_norm": 2.1160437742138605, "learning_rate": 1.3079942652374882e-05, "loss": 0.5801, "step": 16886 }, { "epoch": 1.2550724637681159, "grad_norm": 1.8466997245635488, "learning_rate": 1.3079179278763763e-05, "loss": 0.5396, "step": 16887 }, { "epoch": 1.2551467855815681, "grad_norm": 1.57242427263358, "learning_rate": 1.307841588532905e-05, "loss": 0.4351, "step": 16888 }, { "epoch": 1.2552211073950206, "grad_norm": 2.2217546971587936, "learning_rate": 1.3077652472075655e-05, "loss": 0.6664, "step": 16889 }, { "epoch": 1.2552954292084726, "grad_norm": 2.41932418729834, "learning_rate": 1.3076889039008491e-05, "loss": 0.5765, "step": 16890 }, { "epoch": 1.255369751021925, "grad_norm": 2.3221356343648645, "learning_rate": 1.3076125586132475e-05, "loss": 0.4252, "step": 16891 }, { "epoch": 1.255444072835377, "grad_norm": 1.991902395254698, "learning_rate": 1.3075362113452521e-05, "loss": 0.8032, "step": 16892 }, { "epoch": 1.2555183946488295, "grad_norm": 2.0955522047504975, "learning_rate": 1.3074598620973546e-05, "loss": 0.7104, "step": 16893 }, { "epoch": 1.2555927164622815, "grad_norm": 2.478249623573442, "learning_rate": 1.3073835108700467e-05, "loss": 0.5974, "step": 16894 }, { "epoch": 1.255667038275734, "grad_norm": 1.9335713574127231, "learning_rate": 1.3073071576638197e-05, "loss": 0.6013, "step": 16895 }, { "epoch": 1.2557413600891862, "grad_norm": 1.8809468535057117, "learning_rate": 1.3072308024791652e-05, "loss": 0.6331, "step": 16896 }, { "epoch": 1.2558156819026385, "grad_norm": 1.985110581182371, "learning_rate": 1.3071544453165743e-05, "loss": 0.4281, "step": 16897 }, { "epoch": 1.2558900037160907, "grad_norm": 1.7016765207893754, "learning_rate": 1.3070780861765394e-05, "loss": 0.4803, "step": 16898 }, { "epoch": 1.255964325529543, "grad_norm": 2.124345376379202, "learning_rate": 1.3070017250595517e-05, "loss": 0.7663, "step": 16899 }, { "epoch": 1.2560386473429952, "grad_norm": 1.8070618042038311, "learning_rate": 1.3069253619661027e-05, "loss": 0.666, "step": 16900 }, { "epoch": 1.2561129691564474, "grad_norm": 1.9448741349867067, "learning_rate": 1.306848996896684e-05, "loss": 0.5326, "step": 16901 }, { "epoch": 1.2561872909698997, "grad_norm": 2.0762775528212156, "learning_rate": 1.3067726298517875e-05, "loss": 0.6375, "step": 16902 }, { "epoch": 1.256261612783352, "grad_norm": 2.055939905717602, "learning_rate": 1.3066962608319047e-05, "loss": 0.6045, "step": 16903 }, { "epoch": 1.2563359345968041, "grad_norm": 1.7090974493531135, "learning_rate": 1.3066198898375273e-05, "loss": 0.6539, "step": 16904 }, { "epoch": 1.2564102564102564, "grad_norm": 2.297021805396568, "learning_rate": 1.3065435168691471e-05, "loss": 0.7269, "step": 16905 }, { "epoch": 1.2564845782237086, "grad_norm": 2.5472120789612567, "learning_rate": 1.3064671419272558e-05, "loss": 0.6239, "step": 16906 }, { "epoch": 1.2565589000371609, "grad_norm": 1.8433741416180802, "learning_rate": 1.3063907650123447e-05, "loss": 0.6339, "step": 16907 }, { "epoch": 1.256633221850613, "grad_norm": 1.7627275194014649, "learning_rate": 1.3063143861249057e-05, "loss": 0.4858, "step": 16908 }, { "epoch": 1.2567075436640653, "grad_norm": 1.9577111503063038, "learning_rate": 1.3062380052654305e-05, "loss": 0.696, "step": 16909 }, { "epoch": 1.2567818654775176, "grad_norm": 2.178246970346133, "learning_rate": 1.306161622434411e-05, "loss": 0.6855, "step": 16910 }, { "epoch": 1.2568561872909698, "grad_norm": 2.2673450367924635, "learning_rate": 1.3060852376323392e-05, "loss": 0.5933, "step": 16911 }, { "epoch": 1.2569305091044223, "grad_norm": 2.034749055798891, "learning_rate": 1.3060088508597062e-05, "loss": 0.6552, "step": 16912 }, { "epoch": 1.2570048309178743, "grad_norm": 1.762902215332162, "learning_rate": 1.305932462117004e-05, "loss": 0.5916, "step": 16913 }, { "epoch": 1.2570791527313268, "grad_norm": 2.2068552484391475, "learning_rate": 1.3058560714047246e-05, "loss": 0.6155, "step": 16914 }, { "epoch": 1.2571534745447788, "grad_norm": 1.8782659571247744, "learning_rate": 1.3057796787233596e-05, "loss": 0.4737, "step": 16915 }, { "epoch": 1.2572277963582312, "grad_norm": 2.2999076896310853, "learning_rate": 1.3057032840734012e-05, "loss": 0.6185, "step": 16916 }, { "epoch": 1.2573021181716835, "grad_norm": 2.3364938219644755, "learning_rate": 1.3056268874553407e-05, "loss": 0.7308, "step": 16917 }, { "epoch": 1.2573764399851357, "grad_norm": 1.9493689913175312, "learning_rate": 1.3055504888696699e-05, "loss": 0.6227, "step": 16918 }, { "epoch": 1.257450761798588, "grad_norm": 1.8001097753758923, "learning_rate": 1.3054740883168813e-05, "loss": 0.515, "step": 16919 }, { "epoch": 1.2575250836120402, "grad_norm": 1.8151316421498023, "learning_rate": 1.3053976857974661e-05, "loss": 0.6142, "step": 16920 }, { "epoch": 1.2575994054254924, "grad_norm": 2.1193873428327996, "learning_rate": 1.3053212813119168e-05, "loss": 0.6847, "step": 16921 }, { "epoch": 1.2576737272389447, "grad_norm": 1.6434928140129617, "learning_rate": 1.3052448748607246e-05, "loss": 0.6364, "step": 16922 }, { "epoch": 1.257748049052397, "grad_norm": 2.1789717732594087, "learning_rate": 1.305168466444382e-05, "loss": 0.5404, "step": 16923 }, { "epoch": 1.2578223708658491, "grad_norm": 1.6581530586578623, "learning_rate": 1.3050920560633803e-05, "loss": 0.4794, "step": 16924 }, { "epoch": 1.2578966926793014, "grad_norm": 2.108686901078224, "learning_rate": 1.3050156437182119e-05, "loss": 0.6193, "step": 16925 }, { "epoch": 1.2579710144927536, "grad_norm": 1.8327551155943262, "learning_rate": 1.3049392294093685e-05, "loss": 0.4338, "step": 16926 }, { "epoch": 1.2580453363062059, "grad_norm": 2.371006043424223, "learning_rate": 1.3048628131373427e-05, "loss": 0.6304, "step": 16927 }, { "epoch": 1.258119658119658, "grad_norm": 3.1617698088785273, "learning_rate": 1.3047863949026256e-05, "loss": 0.5178, "step": 16928 }, { "epoch": 1.2581939799331103, "grad_norm": 2.373829787942045, "learning_rate": 1.3047099747057094e-05, "loss": 0.5784, "step": 16929 }, { "epoch": 1.2582683017465626, "grad_norm": 2.6861497270103456, "learning_rate": 1.3046335525470861e-05, "loss": 0.7394, "step": 16930 }, { "epoch": 1.2583426235600148, "grad_norm": 1.8789691108071773, "learning_rate": 1.304557128427248e-05, "loss": 0.5662, "step": 16931 }, { "epoch": 1.258416945373467, "grad_norm": 1.9326606525394772, "learning_rate": 1.304480702346687e-05, "loss": 0.6062, "step": 16932 }, { "epoch": 1.2584912671869193, "grad_norm": 2.1146758589764962, "learning_rate": 1.3044042743058949e-05, "loss": 0.6581, "step": 16933 }, { "epoch": 1.2585655890003715, "grad_norm": 1.789827041087229, "learning_rate": 1.304327844305364e-05, "loss": 0.6315, "step": 16934 }, { "epoch": 1.258639910813824, "grad_norm": 2.2999152469722097, "learning_rate": 1.3042514123455861e-05, "loss": 0.5969, "step": 16935 }, { "epoch": 1.258714232627276, "grad_norm": 1.4443707654486921, "learning_rate": 1.3041749784270531e-05, "loss": 0.483, "step": 16936 }, { "epoch": 1.2587885544407285, "grad_norm": 1.9743308755597226, "learning_rate": 1.3040985425502576e-05, "loss": 0.6173, "step": 16937 }, { "epoch": 1.2588628762541805, "grad_norm": 1.8207999070888308, "learning_rate": 1.3040221047156918e-05, "loss": 0.5492, "step": 16938 }, { "epoch": 1.258937198067633, "grad_norm": 2.3788598736780595, "learning_rate": 1.3039456649238473e-05, "loss": 0.7361, "step": 16939 }, { "epoch": 1.2590115198810852, "grad_norm": 1.793640521933273, "learning_rate": 1.303869223175216e-05, "loss": 0.5897, "step": 16940 }, { "epoch": 1.2590858416945374, "grad_norm": 1.7730739407363403, "learning_rate": 1.3037927794702908e-05, "loss": 0.5101, "step": 16941 }, { "epoch": 1.2591601635079896, "grad_norm": 1.7629760190871846, "learning_rate": 1.3037163338095633e-05, "loss": 0.517, "step": 16942 }, { "epoch": 1.2592344853214419, "grad_norm": 1.944351372094194, "learning_rate": 1.303639886193526e-05, "loss": 0.6749, "step": 16943 }, { "epoch": 1.2593088071348941, "grad_norm": 1.8200657404903997, "learning_rate": 1.3035634366226706e-05, "loss": 0.5647, "step": 16944 }, { "epoch": 1.2593831289483464, "grad_norm": 1.9224473396804374, "learning_rate": 1.3034869850974898e-05, "loss": 0.6096, "step": 16945 }, { "epoch": 1.2594574507617986, "grad_norm": 1.8851603046888166, "learning_rate": 1.3034105316184753e-05, "loss": 0.6627, "step": 16946 }, { "epoch": 1.2595317725752508, "grad_norm": 1.9738184080060337, "learning_rate": 1.3033340761861195e-05, "loss": 0.6092, "step": 16947 }, { "epoch": 1.259606094388703, "grad_norm": 1.823617037190063, "learning_rate": 1.3032576188009148e-05, "loss": 0.616, "step": 16948 }, { "epoch": 1.2596804162021553, "grad_norm": 1.8326685659439552, "learning_rate": 1.3031811594633534e-05, "loss": 0.5315, "step": 16949 }, { "epoch": 1.2597547380156076, "grad_norm": 1.6556695580867273, "learning_rate": 1.3031046981739275e-05, "loss": 0.5186, "step": 16950 }, { "epoch": 1.2598290598290598, "grad_norm": 2.0279930229950285, "learning_rate": 1.3030282349331289e-05, "loss": 0.7206, "step": 16951 }, { "epoch": 1.259903381642512, "grad_norm": 1.8816249395359215, "learning_rate": 1.3029517697414504e-05, "loss": 0.5369, "step": 16952 }, { "epoch": 1.2599777034559643, "grad_norm": 1.7940687877520536, "learning_rate": 1.3028753025993841e-05, "loss": 0.5421, "step": 16953 }, { "epoch": 1.2600520252694165, "grad_norm": 2.393406072548251, "learning_rate": 1.3027988335074225e-05, "loss": 0.7585, "step": 16954 }, { "epoch": 1.2601263470828687, "grad_norm": 2.310992861202494, "learning_rate": 1.3027223624660574e-05, "loss": 0.5758, "step": 16955 }, { "epoch": 1.2602006688963212, "grad_norm": 1.787929599398846, "learning_rate": 1.3026458894757819e-05, "loss": 0.5691, "step": 16956 }, { "epoch": 1.2602749907097732, "grad_norm": 2.118318540268618, "learning_rate": 1.3025694145370876e-05, "loss": 0.5978, "step": 16957 }, { "epoch": 1.2603493125232257, "grad_norm": 1.8116871070227563, "learning_rate": 1.3024929376504668e-05, "loss": 0.6076, "step": 16958 }, { "epoch": 1.2604236343366777, "grad_norm": 2.524505580878734, "learning_rate": 1.3024164588164125e-05, "loss": 0.7297, "step": 16959 }, { "epoch": 1.2604979561501302, "grad_norm": 1.8232490045089793, "learning_rate": 1.3023399780354167e-05, "loss": 0.6148, "step": 16960 }, { "epoch": 1.2605722779635822, "grad_norm": 2.3036482435198677, "learning_rate": 1.3022634953079718e-05, "loss": 0.5394, "step": 16961 }, { "epoch": 1.2606465997770346, "grad_norm": 2.5033448959740694, "learning_rate": 1.3021870106345702e-05, "loss": 0.7545, "step": 16962 }, { "epoch": 1.2607209215904869, "grad_norm": 2.0412832512796437, "learning_rate": 1.3021105240157044e-05, "loss": 0.5643, "step": 16963 }, { "epoch": 1.2607952434039391, "grad_norm": 1.8728576267191746, "learning_rate": 1.3020340354518666e-05, "loss": 0.672, "step": 16964 }, { "epoch": 1.2608695652173914, "grad_norm": 1.7693701743008228, "learning_rate": 1.3019575449435494e-05, "loss": 0.4485, "step": 16965 }, { "epoch": 1.2609438870308436, "grad_norm": 2.0259550644053665, "learning_rate": 1.301881052491245e-05, "loss": 0.6655, "step": 16966 }, { "epoch": 1.2610182088442958, "grad_norm": 1.7637807268790542, "learning_rate": 1.3018045580954467e-05, "loss": 0.5574, "step": 16967 }, { "epoch": 1.261092530657748, "grad_norm": 2.179212614708092, "learning_rate": 1.3017280617566455e-05, "loss": 0.5913, "step": 16968 }, { "epoch": 1.2611668524712003, "grad_norm": 2.069458955659331, "learning_rate": 1.301651563475335e-05, "loss": 0.6408, "step": 16969 }, { "epoch": 1.2612411742846525, "grad_norm": 2.516939558138658, "learning_rate": 1.3015750632520074e-05, "loss": 0.5396, "step": 16970 }, { "epoch": 1.2613154960981048, "grad_norm": 2.3208925143055303, "learning_rate": 1.3014985610871553e-05, "loss": 0.7126, "step": 16971 }, { "epoch": 1.261389817911557, "grad_norm": 2.2707986594359415, "learning_rate": 1.301422056981271e-05, "loss": 0.6856, "step": 16972 }, { "epoch": 1.2614641397250093, "grad_norm": 2.104851446481307, "learning_rate": 1.3013455509348472e-05, "loss": 0.6605, "step": 16973 }, { "epoch": 1.2615384615384615, "grad_norm": 2.182795230885632, "learning_rate": 1.3012690429483762e-05, "loss": 0.7758, "step": 16974 }, { "epoch": 1.2616127833519137, "grad_norm": 1.875380775471637, "learning_rate": 1.3011925330223508e-05, "loss": 0.592, "step": 16975 }, { "epoch": 1.261687105165366, "grad_norm": 2.112396941355466, "learning_rate": 1.3011160211572637e-05, "loss": 0.5969, "step": 16976 }, { "epoch": 1.2617614269788182, "grad_norm": 1.6683937903583164, "learning_rate": 1.3010395073536071e-05, "loss": 0.4434, "step": 16977 }, { "epoch": 1.2618357487922705, "grad_norm": 1.928229744202799, "learning_rate": 1.3009629916118738e-05, "loss": 0.5396, "step": 16978 }, { "epoch": 1.261910070605723, "grad_norm": 2.3638790702719104, "learning_rate": 1.3008864739325564e-05, "loss": 0.6471, "step": 16979 }, { "epoch": 1.261984392419175, "grad_norm": 1.9353510258284758, "learning_rate": 1.3008099543161474e-05, "loss": 0.5538, "step": 16980 }, { "epoch": 1.2620587142326274, "grad_norm": 1.6816031335119719, "learning_rate": 1.3007334327631398e-05, "loss": 0.6218, "step": 16981 }, { "epoch": 1.2621330360460794, "grad_norm": 1.7649474283598647, "learning_rate": 1.3006569092740257e-05, "loss": 0.6257, "step": 16982 }, { "epoch": 1.2622073578595319, "grad_norm": 1.6805276131510076, "learning_rate": 1.3005803838492981e-05, "loss": 0.6131, "step": 16983 }, { "epoch": 1.262281679672984, "grad_norm": 2.211018721359527, "learning_rate": 1.3005038564894496e-05, "loss": 0.6712, "step": 16984 }, { "epoch": 1.2623560014864363, "grad_norm": 1.7557392768908826, "learning_rate": 1.3004273271949728e-05, "loss": 0.5658, "step": 16985 }, { "epoch": 1.2624303232998886, "grad_norm": 1.650023022193967, "learning_rate": 1.3003507959663604e-05, "loss": 0.4946, "step": 16986 }, { "epoch": 1.2625046451133408, "grad_norm": 1.7388199854985293, "learning_rate": 1.3002742628041054e-05, "loss": 0.5731, "step": 16987 }, { "epoch": 1.262578966926793, "grad_norm": 2.3291167758945113, "learning_rate": 1.3001977277087002e-05, "loss": 0.6299, "step": 16988 }, { "epoch": 1.2626532887402453, "grad_norm": 1.8370158926533995, "learning_rate": 1.3001211906806372e-05, "loss": 0.6794, "step": 16989 }, { "epoch": 1.2627276105536975, "grad_norm": 1.502028534416054, "learning_rate": 1.3000446517204102e-05, "loss": 0.4614, "step": 16990 }, { "epoch": 1.2628019323671498, "grad_norm": 1.907903171168254, "learning_rate": 1.2999681108285107e-05, "loss": 0.5935, "step": 16991 }, { "epoch": 1.262876254180602, "grad_norm": 1.83077206831585, "learning_rate": 1.2998915680054326e-05, "loss": 0.5154, "step": 16992 }, { "epoch": 1.2629505759940542, "grad_norm": 2.0414905367032095, "learning_rate": 1.2998150232516677e-05, "loss": 0.5985, "step": 16993 }, { "epoch": 1.2630248978075065, "grad_norm": 2.3383508138973035, "learning_rate": 1.2997384765677094e-05, "loss": 0.6183, "step": 16994 }, { "epoch": 1.2630992196209587, "grad_norm": 2.80685199192177, "learning_rate": 1.2996619279540504e-05, "loss": 0.7864, "step": 16995 }, { "epoch": 1.263173541434411, "grad_norm": 2.4384076646879387, "learning_rate": 1.2995853774111835e-05, "loss": 0.7841, "step": 16996 }, { "epoch": 1.2632478632478632, "grad_norm": 1.7176356552057743, "learning_rate": 1.2995088249396013e-05, "loss": 0.5524, "step": 16997 }, { "epoch": 1.2633221850613154, "grad_norm": 1.6078286989571142, "learning_rate": 1.2994322705397973e-05, "loss": 0.5659, "step": 16998 }, { "epoch": 1.2633965068747677, "grad_norm": 1.8836399999959956, "learning_rate": 1.2993557142122633e-05, "loss": 0.6143, "step": 16999 }, { "epoch": 1.26347082868822, "grad_norm": 1.9849175900853318, "learning_rate": 1.299279155957493e-05, "loss": 0.6518, "step": 17000 }, { "epoch": 1.2635451505016722, "grad_norm": 1.8337713976545411, "learning_rate": 1.2992025957759791e-05, "loss": 0.5345, "step": 17001 }, { "epoch": 1.2636194723151246, "grad_norm": 2.229381555218958, "learning_rate": 1.2991260336682142e-05, "loss": 0.7309, "step": 17002 }, { "epoch": 1.2636937941285766, "grad_norm": 2.16346607101071, "learning_rate": 1.2990494696346917e-05, "loss": 0.7122, "step": 17003 }, { "epoch": 1.263768115942029, "grad_norm": 2.170910882654876, "learning_rate": 1.2989729036759037e-05, "loss": 0.8214, "step": 17004 }, { "epoch": 1.263842437755481, "grad_norm": 2.1158455910240908, "learning_rate": 1.2988963357923441e-05, "loss": 0.7586, "step": 17005 }, { "epoch": 1.2639167595689336, "grad_norm": 1.9404214339096553, "learning_rate": 1.2988197659845051e-05, "loss": 0.6018, "step": 17006 }, { "epoch": 1.2639910813823858, "grad_norm": 2.136463195955195, "learning_rate": 1.2987431942528804e-05, "loss": 0.58, "step": 17007 }, { "epoch": 1.264065403195838, "grad_norm": 1.7792189708318804, "learning_rate": 1.2986666205979621e-05, "loss": 0.5176, "step": 17008 }, { "epoch": 1.2641397250092903, "grad_norm": 2.300151401822266, "learning_rate": 1.298590045020244e-05, "loss": 0.5728, "step": 17009 }, { "epoch": 1.2642140468227425, "grad_norm": 1.4519430868329042, "learning_rate": 1.2985134675202182e-05, "loss": 0.4507, "step": 17010 }, { "epoch": 1.2642883686361948, "grad_norm": 1.9889682200447034, "learning_rate": 1.2984368880983784e-05, "loss": 0.5502, "step": 17011 }, { "epoch": 1.264362690449647, "grad_norm": 2.1790909837449064, "learning_rate": 1.2983603067552172e-05, "loss": 0.7657, "step": 17012 }, { "epoch": 1.2644370122630992, "grad_norm": 1.6714010974200915, "learning_rate": 1.2982837234912284e-05, "loss": 0.5853, "step": 17013 }, { "epoch": 1.2645113340765515, "grad_norm": 1.8319516673639797, "learning_rate": 1.298207138306904e-05, "loss": 0.4441, "step": 17014 }, { "epoch": 1.2645856558900037, "grad_norm": 1.7440522752087773, "learning_rate": 1.2981305512027377e-05, "loss": 0.5321, "step": 17015 }, { "epoch": 1.264659977703456, "grad_norm": 1.991337335225885, "learning_rate": 1.2980539621792223e-05, "loss": 0.7175, "step": 17016 }, { "epoch": 1.2647342995169082, "grad_norm": 1.7069149866591398, "learning_rate": 1.2979773712368509e-05, "loss": 0.5295, "step": 17017 }, { "epoch": 1.2648086213303604, "grad_norm": 2.372997149702921, "learning_rate": 1.2979007783761168e-05, "loss": 0.637, "step": 17018 }, { "epoch": 1.2648829431438127, "grad_norm": 1.8018949947476268, "learning_rate": 1.2978241835975128e-05, "loss": 0.5488, "step": 17019 }, { "epoch": 1.264957264957265, "grad_norm": 1.8027514407782288, "learning_rate": 1.2977475869015323e-05, "loss": 0.529, "step": 17020 }, { "epoch": 1.2650315867707171, "grad_norm": 2.0029030926749987, "learning_rate": 1.2976709882886682e-05, "loss": 0.6142, "step": 17021 }, { "epoch": 1.2651059085841694, "grad_norm": 1.8148307907578505, "learning_rate": 1.2975943877594136e-05, "loss": 0.5702, "step": 17022 }, { "epoch": 1.2651802303976216, "grad_norm": 2.057652924080185, "learning_rate": 1.2975177853142619e-05, "loss": 0.6501, "step": 17023 }, { "epoch": 1.2652545522110739, "grad_norm": 3.87885017821228, "learning_rate": 1.2974411809537064e-05, "loss": 0.6642, "step": 17024 }, { "epoch": 1.2653288740245263, "grad_norm": 2.1893346912400715, "learning_rate": 1.2973645746782397e-05, "loss": 0.5757, "step": 17025 }, { "epoch": 1.2654031958379783, "grad_norm": 1.9858877458349198, "learning_rate": 1.2972879664883553e-05, "loss": 0.7534, "step": 17026 }, { "epoch": 1.2654775176514308, "grad_norm": 2.42954987827796, "learning_rate": 1.2972113563845463e-05, "loss": 0.5683, "step": 17027 }, { "epoch": 1.2655518394648828, "grad_norm": 1.9596891630183693, "learning_rate": 1.2971347443673062e-05, "loss": 0.5137, "step": 17028 }, { "epoch": 1.2656261612783353, "grad_norm": 2.0673321607240505, "learning_rate": 1.297058130437128e-05, "loss": 0.6879, "step": 17029 }, { "epoch": 1.2657004830917875, "grad_norm": 1.5713172024242499, "learning_rate": 1.2969815145945051e-05, "loss": 0.4038, "step": 17030 }, { "epoch": 1.2657748049052397, "grad_norm": 1.9710794183051537, "learning_rate": 1.2969048968399304e-05, "loss": 0.5314, "step": 17031 }, { "epoch": 1.265849126718692, "grad_norm": 2.441590740470327, "learning_rate": 1.2968282771738974e-05, "loss": 0.5323, "step": 17032 }, { "epoch": 1.2659234485321442, "grad_norm": 2.637116663725807, "learning_rate": 1.2967516555968994e-05, "loss": 0.628, "step": 17033 }, { "epoch": 1.2659977703455965, "grad_norm": 1.7920566361408785, "learning_rate": 1.2966750321094294e-05, "loss": 0.6279, "step": 17034 }, { "epoch": 1.2660720921590487, "grad_norm": 2.025621784945116, "learning_rate": 1.2965984067119814e-05, "loss": 0.6707, "step": 17035 }, { "epoch": 1.266146413972501, "grad_norm": 1.3808856791396302, "learning_rate": 1.2965217794050481e-05, "loss": 0.3294, "step": 17036 }, { "epoch": 1.2662207357859532, "grad_norm": 1.657912936347115, "learning_rate": 1.2964451501891228e-05, "loss": 0.4851, "step": 17037 }, { "epoch": 1.2662950575994054, "grad_norm": 1.997915108615198, "learning_rate": 1.296368519064699e-05, "loss": 0.5683, "step": 17038 }, { "epoch": 1.2663693794128577, "grad_norm": 1.9783807053154792, "learning_rate": 1.2962918860322701e-05, "loss": 0.5486, "step": 17039 }, { "epoch": 1.26644370122631, "grad_norm": 1.8676133277984317, "learning_rate": 1.2962152510923294e-05, "loss": 0.5646, "step": 17040 }, { "epoch": 1.2665180230397621, "grad_norm": 2.088805211110191, "learning_rate": 1.2961386142453707e-05, "loss": 0.557, "step": 17041 }, { "epoch": 1.2665923448532144, "grad_norm": 1.7119076549993937, "learning_rate": 1.2960619754918866e-05, "loss": 0.615, "step": 17042 }, { "epoch": 1.2666666666666666, "grad_norm": 2.006412527582443, "learning_rate": 1.2959853348323708e-05, "loss": 0.5518, "step": 17043 }, { "epoch": 1.2667409884801188, "grad_norm": 1.9038555514083073, "learning_rate": 1.2959086922673167e-05, "loss": 0.6251, "step": 17044 }, { "epoch": 1.266815310293571, "grad_norm": 1.892625540938382, "learning_rate": 1.295832047797218e-05, "loss": 0.6257, "step": 17045 }, { "epoch": 1.2668896321070235, "grad_norm": 1.651069110509382, "learning_rate": 1.295755401422568e-05, "loss": 0.486, "step": 17046 }, { "epoch": 1.2669639539204756, "grad_norm": 1.905758010148214, "learning_rate": 1.2956787531438599e-05, "loss": 0.6932, "step": 17047 }, { "epoch": 1.267038275733928, "grad_norm": 1.9392548772878797, "learning_rate": 1.2956021029615875e-05, "loss": 0.5957, "step": 17048 }, { "epoch": 1.26711259754738, "grad_norm": 2.0766154265193224, "learning_rate": 1.2955254508762439e-05, "loss": 0.6172, "step": 17049 }, { "epoch": 1.2671869193608325, "grad_norm": 1.9616413025375659, "learning_rate": 1.2954487968883226e-05, "loss": 0.6733, "step": 17050 }, { "epoch": 1.2672612411742845, "grad_norm": 2.1031508574700517, "learning_rate": 1.2953721409983176e-05, "loss": 0.723, "step": 17051 }, { "epoch": 1.267335562987737, "grad_norm": 1.7114647443061897, "learning_rate": 1.2952954832067222e-05, "loss": 0.4755, "step": 17052 }, { "epoch": 1.2674098848011892, "grad_norm": 1.6173408630531934, "learning_rate": 1.2952188235140297e-05, "loss": 0.5949, "step": 17053 }, { "epoch": 1.2674842066146415, "grad_norm": 1.9931961140176504, "learning_rate": 1.2951421619207336e-05, "loss": 0.7493, "step": 17054 }, { "epoch": 1.2675585284280937, "grad_norm": 1.92095678035638, "learning_rate": 1.2950654984273275e-05, "loss": 0.6748, "step": 17055 }, { "epoch": 1.267632850241546, "grad_norm": 2.098987607466418, "learning_rate": 1.294988833034305e-05, "loss": 0.6421, "step": 17056 }, { "epoch": 1.2677071720549982, "grad_norm": 1.9187947832603427, "learning_rate": 1.29491216574216e-05, "loss": 0.5014, "step": 17057 }, { "epoch": 1.2677814938684504, "grad_norm": 2.1461132911699328, "learning_rate": 1.2948354965513858e-05, "loss": 0.604, "step": 17058 }, { "epoch": 1.2678558156819026, "grad_norm": 2.349013351086558, "learning_rate": 1.2947588254624757e-05, "loss": 0.7228, "step": 17059 }, { "epoch": 1.2679301374953549, "grad_norm": 2.391970540116957, "learning_rate": 1.2946821524759234e-05, "loss": 0.6272, "step": 17060 }, { "epoch": 1.2680044593088071, "grad_norm": 1.8473117478154566, "learning_rate": 1.294605477592223e-05, "loss": 0.5296, "step": 17061 }, { "epoch": 1.2680787811222594, "grad_norm": 2.168194779839909, "learning_rate": 1.2945288008118677e-05, "loss": 0.6086, "step": 17062 }, { "epoch": 1.2681531029357116, "grad_norm": 1.8777228694000105, "learning_rate": 1.2944521221353514e-05, "loss": 0.6918, "step": 17063 }, { "epoch": 1.2682274247491638, "grad_norm": 1.9101478862455783, "learning_rate": 1.2943754415631678e-05, "loss": 0.5845, "step": 17064 }, { "epoch": 1.268301746562616, "grad_norm": 2.2487900176609066, "learning_rate": 1.29429875909581e-05, "loss": 0.7624, "step": 17065 }, { "epoch": 1.2683760683760683, "grad_norm": 1.9304150415182881, "learning_rate": 1.2942220747337721e-05, "loss": 0.5012, "step": 17066 }, { "epoch": 1.2684503901895205, "grad_norm": 2.0719131451558837, "learning_rate": 1.294145388477548e-05, "loss": 0.6328, "step": 17067 }, { "epoch": 1.2685247120029728, "grad_norm": 2.2401506083510476, "learning_rate": 1.2940687003276309e-05, "loss": 0.6775, "step": 17068 }, { "epoch": 1.2685990338164252, "grad_norm": 2.6585848236401, "learning_rate": 1.2939920102845152e-05, "loss": 0.6828, "step": 17069 }, { "epoch": 1.2686733556298773, "grad_norm": 1.8322930334798861, "learning_rate": 1.2939153183486936e-05, "loss": 0.5354, "step": 17070 }, { "epoch": 1.2687476774433297, "grad_norm": 1.8215444263311473, "learning_rate": 1.2938386245206607e-05, "loss": 0.553, "step": 17071 }, { "epoch": 1.2688219992567817, "grad_norm": 2.078228386291301, "learning_rate": 1.29376192880091e-05, "loss": 0.8269, "step": 17072 }, { "epoch": 1.2688963210702342, "grad_norm": 2.1149016773825293, "learning_rate": 1.2936852311899351e-05, "loss": 0.655, "step": 17073 }, { "epoch": 1.2689706428836864, "grad_norm": 1.9992371085989424, "learning_rate": 1.29360853168823e-05, "loss": 0.6759, "step": 17074 }, { "epoch": 1.2690449646971387, "grad_norm": 1.9153999995913857, "learning_rate": 1.2935318302962888e-05, "loss": 0.5601, "step": 17075 }, { "epoch": 1.269119286510591, "grad_norm": 1.9489211948800889, "learning_rate": 1.2934551270146043e-05, "loss": 0.6979, "step": 17076 }, { "epoch": 1.2691936083240432, "grad_norm": 1.803481545100553, "learning_rate": 1.2933784218436711e-05, "loss": 0.588, "step": 17077 }, { "epoch": 1.2692679301374954, "grad_norm": 1.678824061025296, "learning_rate": 1.2933017147839827e-05, "loss": 0.5411, "step": 17078 }, { "epoch": 1.2693422519509476, "grad_norm": 2.411167605280933, "learning_rate": 1.2932250058360336e-05, "loss": 0.6876, "step": 17079 }, { "epoch": 1.2694165737643999, "grad_norm": 2.0827283670723014, "learning_rate": 1.2931482950003169e-05, "loss": 0.6213, "step": 17080 }, { "epoch": 1.269490895577852, "grad_norm": 1.6270546799719805, "learning_rate": 1.2930715822773265e-05, "loss": 0.5982, "step": 17081 }, { "epoch": 1.2695652173913043, "grad_norm": 2.038582566387698, "learning_rate": 1.2929948676675566e-05, "loss": 0.6681, "step": 17082 }, { "epoch": 1.2696395392047566, "grad_norm": 1.9581640725791094, "learning_rate": 1.2929181511715007e-05, "loss": 0.7134, "step": 17083 }, { "epoch": 1.2697138610182088, "grad_norm": 1.6911159901955808, "learning_rate": 1.2928414327896532e-05, "loss": 0.5463, "step": 17084 }, { "epoch": 1.269788182831661, "grad_norm": 1.6630718902458768, "learning_rate": 1.2927647125225078e-05, "loss": 0.4976, "step": 17085 }, { "epoch": 1.2698625046451133, "grad_norm": 2.0256195474670116, "learning_rate": 1.2926879903705585e-05, "loss": 0.59, "step": 17086 }, { "epoch": 1.2699368264585655, "grad_norm": 2.309125989805286, "learning_rate": 1.2926112663342986e-05, "loss": 0.5592, "step": 17087 }, { "epoch": 1.2700111482720178, "grad_norm": 1.882945532353322, "learning_rate": 1.292534540414223e-05, "loss": 0.6581, "step": 17088 }, { "epoch": 1.27008547008547, "grad_norm": 2.105184662595043, "learning_rate": 1.2924578126108247e-05, "loss": 0.6367, "step": 17089 }, { "epoch": 1.2701597918989223, "grad_norm": 2.0091808885536, "learning_rate": 1.2923810829245986e-05, "loss": 0.7024, "step": 17090 }, { "epoch": 1.2702341137123745, "grad_norm": 1.9399229174049737, "learning_rate": 1.2923043513560383e-05, "loss": 0.422, "step": 17091 }, { "epoch": 1.270308435525827, "grad_norm": 1.9259983706043873, "learning_rate": 1.2922276179056376e-05, "loss": 0.5237, "step": 17092 }, { "epoch": 1.270382757339279, "grad_norm": 1.7285307147117626, "learning_rate": 1.2921508825738907e-05, "loss": 0.4806, "step": 17093 }, { "epoch": 1.2704570791527314, "grad_norm": 2.3584969223078884, "learning_rate": 1.2920741453612915e-05, "loss": 0.7534, "step": 17094 }, { "epoch": 1.2705314009661834, "grad_norm": 3.591749574024225, "learning_rate": 1.2919974062683342e-05, "loss": 0.6204, "step": 17095 }, { "epoch": 1.270605722779636, "grad_norm": 2.2578453526300986, "learning_rate": 1.2919206652955127e-05, "loss": 0.7031, "step": 17096 }, { "epoch": 1.2706800445930881, "grad_norm": 1.569889979233597, "learning_rate": 1.2918439224433212e-05, "loss": 0.4714, "step": 17097 }, { "epoch": 1.2707543664065404, "grad_norm": 1.9327422857196277, "learning_rate": 1.2917671777122536e-05, "loss": 0.5932, "step": 17098 }, { "epoch": 1.2708286882199926, "grad_norm": 1.6543519751767568, "learning_rate": 1.291690431102804e-05, "loss": 0.5356, "step": 17099 }, { "epoch": 1.2709030100334449, "grad_norm": 1.7232068237280231, "learning_rate": 1.2916136826154664e-05, "loss": 0.5069, "step": 17100 }, { "epoch": 1.270977331846897, "grad_norm": 2.104263744959198, "learning_rate": 1.2915369322507354e-05, "loss": 0.7445, "step": 17101 }, { "epoch": 1.2710516536603493, "grad_norm": 2.282658395353141, "learning_rate": 1.2914601800091045e-05, "loss": 0.6866, "step": 17102 }, { "epoch": 1.2711259754738016, "grad_norm": 1.9967245148388744, "learning_rate": 1.2913834258910682e-05, "loss": 0.671, "step": 17103 }, { "epoch": 1.2712002972872538, "grad_norm": 5.813539974365423, "learning_rate": 1.2913066698971206e-05, "loss": 0.6557, "step": 17104 }, { "epoch": 1.271274619100706, "grad_norm": 2.134138149223682, "learning_rate": 1.2912299120277554e-05, "loss": 0.7555, "step": 17105 }, { "epoch": 1.2713489409141583, "grad_norm": 2.406098234166103, "learning_rate": 1.2911531522834678e-05, "loss": 0.5506, "step": 17106 }, { "epoch": 1.2714232627276105, "grad_norm": 1.5678310474124697, "learning_rate": 1.2910763906647506e-05, "loss": 0.4852, "step": 17107 }, { "epoch": 1.2714975845410628, "grad_norm": 2.299026589325486, "learning_rate": 1.290999627172099e-05, "loss": 0.7266, "step": 17108 }, { "epoch": 1.271571906354515, "grad_norm": 1.6061994144572427, "learning_rate": 1.290922861806007e-05, "loss": 0.4298, "step": 17109 }, { "epoch": 1.2716462281679672, "grad_norm": 1.7699894526834123, "learning_rate": 1.2908460945669686e-05, "loss": 0.5746, "step": 17110 }, { "epoch": 1.2717205499814195, "grad_norm": 2.0062644439849957, "learning_rate": 1.2907693254554779e-05, "loss": 0.5435, "step": 17111 }, { "epoch": 1.2717948717948717, "grad_norm": 2.19885570188375, "learning_rate": 1.2906925544720299e-05, "loss": 0.6255, "step": 17112 }, { "epoch": 1.2718691936083242, "grad_norm": 1.6075196753355614, "learning_rate": 1.2906157816171179e-05, "loss": 0.4906, "step": 17113 }, { "epoch": 1.2719435154217762, "grad_norm": 2.260673197481365, "learning_rate": 1.2905390068912369e-05, "loss": 0.5983, "step": 17114 }, { "epoch": 1.2720178372352287, "grad_norm": 1.7932978103744848, "learning_rate": 1.2904622302948803e-05, "loss": 0.5651, "step": 17115 }, { "epoch": 1.2720921590486807, "grad_norm": 1.8190725992505672, "learning_rate": 1.2903854518285435e-05, "loss": 0.4979, "step": 17116 }, { "epoch": 1.2721664808621331, "grad_norm": 1.8740411751738142, "learning_rate": 1.2903086714927202e-05, "loss": 0.5498, "step": 17117 }, { "epoch": 1.2722408026755851, "grad_norm": 2.390460297292138, "learning_rate": 1.2902318892879044e-05, "loss": 0.5674, "step": 17118 }, { "epoch": 1.2723151244890376, "grad_norm": 2.0621967322135437, "learning_rate": 1.290155105214591e-05, "loss": 0.4555, "step": 17119 }, { "epoch": 1.2723894463024898, "grad_norm": 1.7129449106495367, "learning_rate": 1.290078319273274e-05, "loss": 0.581, "step": 17120 }, { "epoch": 1.272463768115942, "grad_norm": 1.8019611772167332, "learning_rate": 1.2900015314644476e-05, "loss": 0.4393, "step": 17121 }, { "epoch": 1.2725380899293943, "grad_norm": 2.1583925900456364, "learning_rate": 1.2899247417886066e-05, "loss": 0.7786, "step": 17122 }, { "epoch": 1.2726124117428466, "grad_norm": 1.5817666480118957, "learning_rate": 1.2898479502462453e-05, "loss": 0.562, "step": 17123 }, { "epoch": 1.2726867335562988, "grad_norm": 2.5036226437494573, "learning_rate": 1.289771156837858e-05, "loss": 0.5385, "step": 17124 }, { "epoch": 1.272761055369751, "grad_norm": 1.523934487781984, "learning_rate": 1.2896943615639385e-05, "loss": 0.4461, "step": 17125 }, { "epoch": 1.2728353771832033, "grad_norm": 2.014957587979407, "learning_rate": 1.2896175644249823e-05, "loss": 0.6749, "step": 17126 }, { "epoch": 1.2729096989966555, "grad_norm": 1.8134282208544683, "learning_rate": 1.2895407654214829e-05, "loss": 0.5814, "step": 17127 }, { "epoch": 1.2729840208101078, "grad_norm": 4.048787834996781, "learning_rate": 1.2894639645539352e-05, "loss": 0.4602, "step": 17128 }, { "epoch": 1.27305834262356, "grad_norm": 1.73040286656364, "learning_rate": 1.2893871618228336e-05, "loss": 0.6581, "step": 17129 }, { "epoch": 1.2731326644370122, "grad_norm": 1.819214333072639, "learning_rate": 1.2893103572286723e-05, "loss": 0.599, "step": 17130 }, { "epoch": 1.2732069862504645, "grad_norm": 1.9151655070876972, "learning_rate": 1.289233550771946e-05, "loss": 0.5711, "step": 17131 }, { "epoch": 1.2732813080639167, "grad_norm": 1.7375621933152838, "learning_rate": 1.289156742453149e-05, "loss": 0.6431, "step": 17132 }, { "epoch": 1.273355629877369, "grad_norm": 1.97036500312309, "learning_rate": 1.289079932272776e-05, "loss": 0.5965, "step": 17133 }, { "epoch": 1.2734299516908212, "grad_norm": 2.0516748533843203, "learning_rate": 1.2890031202313216e-05, "loss": 0.5147, "step": 17134 }, { "epoch": 1.2735042735042734, "grad_norm": 2.54110329698396, "learning_rate": 1.2889263063292798e-05, "loss": 0.6956, "step": 17135 }, { "epoch": 1.2735785953177259, "grad_norm": 2.4483434888203264, "learning_rate": 1.2888494905671457e-05, "loss": 0.6966, "step": 17136 }, { "epoch": 1.273652917131178, "grad_norm": 1.9483070614480735, "learning_rate": 1.2887726729454133e-05, "loss": 0.459, "step": 17137 }, { "epoch": 1.2737272389446304, "grad_norm": 2.271749235887857, "learning_rate": 1.2886958534645776e-05, "loss": 0.6887, "step": 17138 }, { "epoch": 1.2738015607580824, "grad_norm": 1.8707108152737149, "learning_rate": 1.2886190321251331e-05, "loss": 0.6086, "step": 17139 }, { "epoch": 1.2738758825715348, "grad_norm": 2.443537822414778, "learning_rate": 1.2885422089275738e-05, "loss": 0.6641, "step": 17140 }, { "epoch": 1.273950204384987, "grad_norm": 2.0925293920818877, "learning_rate": 1.2884653838723949e-05, "loss": 0.7127, "step": 17141 }, { "epoch": 1.2740245261984393, "grad_norm": 2.000241622242915, "learning_rate": 1.288388556960091e-05, "loss": 0.5633, "step": 17142 }, { "epoch": 1.2740988480118915, "grad_norm": 2.080826179097594, "learning_rate": 1.2883117281911564e-05, "loss": 0.7375, "step": 17143 }, { "epoch": 1.2741731698253438, "grad_norm": 1.8863821082230685, "learning_rate": 1.2882348975660858e-05, "loss": 0.5802, "step": 17144 }, { "epoch": 1.274247491638796, "grad_norm": 1.9825079615284789, "learning_rate": 1.2881580650853743e-05, "loss": 0.5942, "step": 17145 }, { "epoch": 1.2743218134522483, "grad_norm": 3.1977666872969963, "learning_rate": 1.2880812307495157e-05, "loss": 0.5707, "step": 17146 }, { "epoch": 1.2743961352657005, "grad_norm": 1.8581579676548974, "learning_rate": 1.288004394559005e-05, "loss": 0.7277, "step": 17147 }, { "epoch": 1.2744704570791527, "grad_norm": 2.085470190413634, "learning_rate": 1.2879275565143373e-05, "loss": 0.8175, "step": 17148 }, { "epoch": 1.274544778892605, "grad_norm": 1.7151377374100498, "learning_rate": 1.2878507166160066e-05, "loss": 0.6514, "step": 17149 }, { "epoch": 1.2746191007060572, "grad_norm": 1.862441378326192, "learning_rate": 1.2877738748645084e-05, "loss": 0.6272, "step": 17150 }, { "epoch": 1.2746934225195095, "grad_norm": 2.0483405951121094, "learning_rate": 1.2876970312603362e-05, "loss": 0.6453, "step": 17151 }, { "epoch": 1.2747677443329617, "grad_norm": 2.0606390698304438, "learning_rate": 1.287620185803986e-05, "loss": 0.6994, "step": 17152 }, { "epoch": 1.274842066146414, "grad_norm": 1.8611962678482754, "learning_rate": 1.2875433384959518e-05, "loss": 0.6847, "step": 17153 }, { "epoch": 1.2749163879598662, "grad_norm": 1.6287735288817438, "learning_rate": 1.2874664893367286e-05, "loss": 0.6265, "step": 17154 }, { "epoch": 1.2749907097733184, "grad_norm": 2.419961985812623, "learning_rate": 1.287389638326811e-05, "loss": 0.6432, "step": 17155 }, { "epoch": 1.2750650315867706, "grad_norm": 1.9317014182023526, "learning_rate": 1.2873127854666938e-05, "loss": 0.594, "step": 17156 }, { "epoch": 1.2751393534002229, "grad_norm": 1.7285605827973074, "learning_rate": 1.2872359307568719e-05, "loss": 0.6176, "step": 17157 }, { "epoch": 1.2752136752136751, "grad_norm": 1.8684578989613831, "learning_rate": 1.2871590741978397e-05, "loss": 0.708, "step": 17158 }, { "epoch": 1.2752879970271276, "grad_norm": 2.19062826438456, "learning_rate": 1.2870822157900927e-05, "loss": 0.7743, "step": 17159 }, { "epoch": 1.2753623188405796, "grad_norm": 2.221855130354088, "learning_rate": 1.2870053555341248e-05, "loss": 0.8014, "step": 17160 }, { "epoch": 1.275436640654032, "grad_norm": 2.953412438155578, "learning_rate": 1.2869284934304317e-05, "loss": 0.8311, "step": 17161 }, { "epoch": 1.275510962467484, "grad_norm": 2.0542253851829506, "learning_rate": 1.2868516294795077e-05, "loss": 0.7277, "step": 17162 }, { "epoch": 1.2755852842809365, "grad_norm": 1.7244511030367136, "learning_rate": 1.2867747636818478e-05, "loss": 0.5779, "step": 17163 }, { "epoch": 1.2756596060943888, "grad_norm": 1.8155464635189904, "learning_rate": 1.2866978960379468e-05, "loss": 0.6162, "step": 17164 }, { "epoch": 1.275733927907841, "grad_norm": 1.8676878399059933, "learning_rate": 1.2866210265482997e-05, "loss": 0.5978, "step": 17165 }, { "epoch": 1.2758082497212933, "grad_norm": 2.0064688681522385, "learning_rate": 1.2865441552134013e-05, "loss": 0.6688, "step": 17166 }, { "epoch": 1.2758825715347455, "grad_norm": 2.1366087405322554, "learning_rate": 1.2864672820337465e-05, "loss": 0.6446, "step": 17167 }, { "epoch": 1.2759568933481977, "grad_norm": 1.6208555640292117, "learning_rate": 1.28639040700983e-05, "loss": 0.5422, "step": 17168 }, { "epoch": 1.27603121516165, "grad_norm": 1.94662983597026, "learning_rate": 1.2863135301421471e-05, "loss": 0.6471, "step": 17169 }, { "epoch": 1.2761055369751022, "grad_norm": 2.101000785679075, "learning_rate": 1.2862366514311925e-05, "loss": 0.4592, "step": 17170 }, { "epoch": 1.2761798587885544, "grad_norm": 1.85021023392946, "learning_rate": 1.2861597708774616e-05, "loss": 0.5739, "step": 17171 }, { "epoch": 1.2762541806020067, "grad_norm": 2.0255378913047988, "learning_rate": 1.2860828884814485e-05, "loss": 0.7218, "step": 17172 }, { "epoch": 1.276328502415459, "grad_norm": 2.6009512824342513, "learning_rate": 1.2860060042436486e-05, "loss": 0.6903, "step": 17173 }, { "epoch": 1.2764028242289112, "grad_norm": 2.512530852831985, "learning_rate": 1.285929118164557e-05, "loss": 0.5706, "step": 17174 }, { "epoch": 1.2764771460423634, "grad_norm": 1.715916037773148, "learning_rate": 1.2858522302446686e-05, "loss": 0.5292, "step": 17175 }, { "epoch": 1.2765514678558156, "grad_norm": 1.731630376758079, "learning_rate": 1.2857753404844783e-05, "loss": 0.5145, "step": 17176 }, { "epoch": 1.2766257896692679, "grad_norm": 2.0189691309055258, "learning_rate": 1.2856984488844813e-05, "loss": 0.5826, "step": 17177 }, { "epoch": 1.2767001114827201, "grad_norm": 1.8533074777134666, "learning_rate": 1.2856215554451723e-05, "loss": 0.6203, "step": 17178 }, { "epoch": 1.2767744332961724, "grad_norm": 1.7746858119396667, "learning_rate": 1.2855446601670467e-05, "loss": 0.573, "step": 17179 }, { "epoch": 1.2768487551096248, "grad_norm": 1.9149285239845322, "learning_rate": 1.2854677630505994e-05, "loss": 0.6451, "step": 17180 }, { "epoch": 1.2769230769230768, "grad_norm": 2.105146833861668, "learning_rate": 1.2853908640963253e-05, "loss": 0.5898, "step": 17181 }, { "epoch": 1.2769973987365293, "grad_norm": 1.969810178616149, "learning_rate": 1.2853139633047197e-05, "loss": 0.7074, "step": 17182 }, { "epoch": 1.2770717205499813, "grad_norm": 2.3941710024143057, "learning_rate": 1.2852370606762779e-05, "loss": 0.5031, "step": 17183 }, { "epoch": 1.2771460423634338, "grad_norm": 2.7302753406623963, "learning_rate": 1.2851601562114943e-05, "loss": 0.5861, "step": 17184 }, { "epoch": 1.2772203641768858, "grad_norm": 2.2305071081431658, "learning_rate": 1.2850832499108645e-05, "loss": 0.824, "step": 17185 }, { "epoch": 1.2772946859903382, "grad_norm": 2.0743473610196745, "learning_rate": 1.2850063417748834e-05, "loss": 0.6873, "step": 17186 }, { "epoch": 1.2773690078037905, "grad_norm": 2.436428025814957, "learning_rate": 1.2849294318040464e-05, "loss": 0.5606, "step": 17187 }, { "epoch": 1.2774433296172427, "grad_norm": 1.9073522183571066, "learning_rate": 1.2848525199988488e-05, "loss": 0.687, "step": 17188 }, { "epoch": 1.277517651430695, "grad_norm": 1.8761627582120872, "learning_rate": 1.2847756063597853e-05, "loss": 0.4911, "step": 17189 }, { "epoch": 1.2775919732441472, "grad_norm": 1.9193934171518756, "learning_rate": 1.284698690887351e-05, "loss": 0.5166, "step": 17190 }, { "epoch": 1.2776662950575994, "grad_norm": 1.839754737569044, "learning_rate": 1.2846217735820414e-05, "loss": 0.6646, "step": 17191 }, { "epoch": 1.2777406168710517, "grad_norm": 1.7187746319979942, "learning_rate": 1.2845448544443516e-05, "loss": 0.5239, "step": 17192 }, { "epoch": 1.277814938684504, "grad_norm": 1.8143016981517255, "learning_rate": 1.284467933474777e-05, "loss": 0.4263, "step": 17193 }, { "epoch": 1.2778892604979561, "grad_norm": 1.9449183746984287, "learning_rate": 1.2843910106738125e-05, "loss": 0.5694, "step": 17194 }, { "epoch": 1.2779635823114084, "grad_norm": 1.710039911908036, "learning_rate": 1.2843140860419532e-05, "loss": 0.5364, "step": 17195 }, { "epoch": 1.2780379041248606, "grad_norm": 1.585812651824814, "learning_rate": 1.2842371595796946e-05, "loss": 0.532, "step": 17196 }, { "epoch": 1.2781122259383129, "grad_norm": 4.847441430230268, "learning_rate": 1.2841602312875319e-05, "loss": 0.7023, "step": 17197 }, { "epoch": 1.278186547751765, "grad_norm": 2.110900483775865, "learning_rate": 1.2840833011659606e-05, "loss": 0.7549, "step": 17198 }, { "epoch": 1.2782608695652173, "grad_norm": 2.133825060461099, "learning_rate": 1.2840063692154756e-05, "loss": 0.6051, "step": 17199 }, { "epoch": 1.2783351913786696, "grad_norm": 2.0495221725127344, "learning_rate": 1.2839294354365725e-05, "loss": 0.6377, "step": 17200 }, { "epoch": 1.2784095131921218, "grad_norm": 1.6315595261781184, "learning_rate": 1.2838524998297463e-05, "loss": 0.4414, "step": 17201 }, { "epoch": 1.278483835005574, "grad_norm": 1.9635602099472518, "learning_rate": 1.2837755623954924e-05, "loss": 0.6092, "step": 17202 }, { "epoch": 1.2785581568190265, "grad_norm": 2.3695167190492747, "learning_rate": 1.283698623134306e-05, "loss": 0.7277, "step": 17203 }, { "epoch": 1.2786324786324785, "grad_norm": 1.9104626627816412, "learning_rate": 1.2836216820466829e-05, "loss": 0.466, "step": 17204 }, { "epoch": 1.278706800445931, "grad_norm": 2.9222289035186337, "learning_rate": 1.2835447391331181e-05, "loss": 0.6082, "step": 17205 }, { "epoch": 1.278781122259383, "grad_norm": 3.3535785495627013, "learning_rate": 1.283467794394107e-05, "loss": 0.6207, "step": 17206 }, { "epoch": 1.2788554440728355, "grad_norm": 2.2327174804639367, "learning_rate": 1.2833908478301446e-05, "loss": 0.7594, "step": 17207 }, { "epoch": 1.2789297658862877, "grad_norm": 2.1021685627670883, "learning_rate": 1.283313899441727e-05, "loss": 0.6946, "step": 17208 }, { "epoch": 1.27900408769974, "grad_norm": 4.650027210038424, "learning_rate": 1.2832369492293488e-05, "loss": 0.6726, "step": 17209 }, { "epoch": 1.2790784095131922, "grad_norm": 2.094589603309314, "learning_rate": 1.2831599971935062e-05, "loss": 0.7328, "step": 17210 }, { "epoch": 1.2791527313266444, "grad_norm": 1.8515068029700636, "learning_rate": 1.2830830433346943e-05, "loss": 0.407, "step": 17211 }, { "epoch": 1.2792270531400967, "grad_norm": 1.964876884509382, "learning_rate": 1.2830060876534083e-05, "loss": 0.5913, "step": 17212 }, { "epoch": 1.279301374953549, "grad_norm": 5.448323936325669, "learning_rate": 1.2829291301501437e-05, "loss": 0.6478, "step": 17213 }, { "epoch": 1.2793756967670011, "grad_norm": 1.8286239076010553, "learning_rate": 1.2828521708253958e-05, "loss": 0.5526, "step": 17214 }, { "epoch": 1.2794500185804534, "grad_norm": 4.567130283612456, "learning_rate": 1.282775209679661e-05, "loss": 0.6916, "step": 17215 }, { "epoch": 1.2795243403939056, "grad_norm": 2.303646210087747, "learning_rate": 1.2826982467134337e-05, "loss": 0.6973, "step": 17216 }, { "epoch": 1.2795986622073579, "grad_norm": 2.02858987370753, "learning_rate": 1.2826212819272097e-05, "loss": 0.6627, "step": 17217 }, { "epoch": 1.27967298402081, "grad_norm": 2.518788985932869, "learning_rate": 1.2825443153214845e-05, "loss": 0.5398, "step": 17218 }, { "epoch": 1.2797473058342623, "grad_norm": 1.6869254418092912, "learning_rate": 1.2824673468967537e-05, "loss": 0.5514, "step": 17219 }, { "epoch": 1.2798216276477146, "grad_norm": 2.390199663423852, "learning_rate": 1.2823903766535126e-05, "loss": 0.7841, "step": 17220 }, { "epoch": 1.2798959494611668, "grad_norm": 1.717536257612305, "learning_rate": 1.2823134045922575e-05, "loss": 0.5691, "step": 17221 }, { "epoch": 1.279970271274619, "grad_norm": 1.5702640021656071, "learning_rate": 1.2822364307134829e-05, "loss": 0.3758, "step": 17222 }, { "epoch": 1.2800445930880713, "grad_norm": 3.055132985155794, "learning_rate": 1.2821594550176849e-05, "loss": 0.49, "step": 17223 }, { "epoch": 1.2801189149015235, "grad_norm": 1.9830022864610843, "learning_rate": 1.2820824775053588e-05, "loss": 0.4815, "step": 17224 }, { "epoch": 1.2801932367149758, "grad_norm": 1.8252677126235992, "learning_rate": 1.2820054981770004e-05, "loss": 0.7082, "step": 17225 }, { "epoch": 1.2802675585284282, "grad_norm": 2.4882214777473743, "learning_rate": 1.2819285170331055e-05, "loss": 0.7653, "step": 17226 }, { "epoch": 1.2803418803418802, "grad_norm": 2.030657076805471, "learning_rate": 1.2818515340741695e-05, "loss": 0.7874, "step": 17227 }, { "epoch": 1.2804162021553327, "grad_norm": 2.0886966981962614, "learning_rate": 1.2817745493006876e-05, "loss": 0.6982, "step": 17228 }, { "epoch": 1.2804905239687847, "grad_norm": 2.215137394490169, "learning_rate": 1.2816975627131559e-05, "loss": 0.7323, "step": 17229 }, { "epoch": 1.2805648457822372, "grad_norm": 2.189043574089472, "learning_rate": 1.2816205743120698e-05, "loss": 0.578, "step": 17230 }, { "epoch": 1.2806391675956894, "grad_norm": 1.6877786370160783, "learning_rate": 1.2815435840979252e-05, "loss": 0.5994, "step": 17231 }, { "epoch": 1.2807134894091416, "grad_norm": 2.300795742870568, "learning_rate": 1.2814665920712175e-05, "loss": 0.7439, "step": 17232 }, { "epoch": 1.2807878112225939, "grad_norm": 1.7859900004106903, "learning_rate": 1.2813895982324427e-05, "loss": 0.7714, "step": 17233 }, { "epoch": 1.2808621330360461, "grad_norm": 2.708662772724336, "learning_rate": 1.2813126025820961e-05, "loss": 0.7124, "step": 17234 }, { "epoch": 1.2809364548494984, "grad_norm": 1.978957856470598, "learning_rate": 1.2812356051206734e-05, "loss": 0.599, "step": 17235 }, { "epoch": 1.2810107766629506, "grad_norm": 3.235258307723656, "learning_rate": 1.2811586058486705e-05, "loss": 0.5822, "step": 17236 }, { "epoch": 1.2810850984764028, "grad_norm": 2.143745836120355, "learning_rate": 1.2810816047665835e-05, "loss": 0.7315, "step": 17237 }, { "epoch": 1.281159420289855, "grad_norm": 1.84719440818288, "learning_rate": 1.2810046018749071e-05, "loss": 0.6671, "step": 17238 }, { "epoch": 1.2812337421033073, "grad_norm": 2.0520395108619924, "learning_rate": 1.2809275971741384e-05, "loss": 0.509, "step": 17239 }, { "epoch": 1.2813080639167596, "grad_norm": 1.8717734595047695, "learning_rate": 1.2808505906647719e-05, "loss": 0.6942, "step": 17240 }, { "epoch": 1.2813823857302118, "grad_norm": 2.151296339607342, "learning_rate": 1.2807735823473038e-05, "loss": 0.7455, "step": 17241 }, { "epoch": 1.281456707543664, "grad_norm": 1.8858001874824961, "learning_rate": 1.2806965722222302e-05, "loss": 0.6853, "step": 17242 }, { "epoch": 1.2815310293571163, "grad_norm": 2.4104226830957964, "learning_rate": 1.2806195602900467e-05, "loss": 0.7018, "step": 17243 }, { "epoch": 1.2816053511705685, "grad_norm": 2.160824943568155, "learning_rate": 1.2805425465512489e-05, "loss": 0.7303, "step": 17244 }, { "epoch": 1.2816796729840207, "grad_norm": 2.6732019231533077, "learning_rate": 1.2804655310063325e-05, "loss": 0.7504, "step": 17245 }, { "epoch": 1.281753994797473, "grad_norm": 1.9453434017028077, "learning_rate": 1.2803885136557939e-05, "loss": 0.6264, "step": 17246 }, { "epoch": 1.2818283166109254, "grad_norm": 2.254035325218137, "learning_rate": 1.2803114945001282e-05, "loss": 0.654, "step": 17247 }, { "epoch": 1.2819026384243775, "grad_norm": 1.9355296436801885, "learning_rate": 1.2802344735398323e-05, "loss": 0.6842, "step": 17248 }, { "epoch": 1.28197696023783, "grad_norm": 1.8526296041167885, "learning_rate": 1.2801574507754009e-05, "loss": 0.4534, "step": 17249 }, { "epoch": 1.282051282051282, "grad_norm": 2.112203801186295, "learning_rate": 1.2800804262073306e-05, "loss": 0.6, "step": 17250 }, { "epoch": 1.2821256038647344, "grad_norm": 4.212402939444101, "learning_rate": 1.2800033998361168e-05, "loss": 0.5798, "step": 17251 }, { "epoch": 1.2821999256781864, "grad_norm": 2.0933219136283387, "learning_rate": 1.2799263716622557e-05, "loss": 0.7104, "step": 17252 }, { "epoch": 1.2822742474916389, "grad_norm": 1.8740378857434847, "learning_rate": 1.2798493416862434e-05, "loss": 0.5549, "step": 17253 }, { "epoch": 1.2823485693050911, "grad_norm": 1.9980241074597112, "learning_rate": 1.2797723099085752e-05, "loss": 0.5338, "step": 17254 }, { "epoch": 1.2824228911185434, "grad_norm": 1.8872312310515174, "learning_rate": 1.2796952763297479e-05, "loss": 0.6346, "step": 17255 }, { "epoch": 1.2824972129319956, "grad_norm": 1.9590705194139577, "learning_rate": 1.2796182409502565e-05, "loss": 0.6181, "step": 17256 }, { "epoch": 1.2825715347454478, "grad_norm": 2.3260513857547167, "learning_rate": 1.2795412037705972e-05, "loss": 0.7477, "step": 17257 }, { "epoch": 1.2826458565589, "grad_norm": 1.6102707331573247, "learning_rate": 1.2794641647912664e-05, "loss": 0.4607, "step": 17258 }, { "epoch": 1.2827201783723523, "grad_norm": 3.8153864079106774, "learning_rate": 1.27938712401276e-05, "loss": 0.546, "step": 17259 }, { "epoch": 1.2827945001858045, "grad_norm": 2.7282011634086754, "learning_rate": 1.2793100814355734e-05, "loss": 0.7456, "step": 17260 }, { "epoch": 1.2828688219992568, "grad_norm": 2.3404583845951934, "learning_rate": 1.2792330370602032e-05, "loss": 0.6433, "step": 17261 }, { "epoch": 1.282943143812709, "grad_norm": 1.5203439856991159, "learning_rate": 1.279155990887145e-05, "loss": 0.3775, "step": 17262 }, { "epoch": 1.2830174656261613, "grad_norm": 1.7026294121631738, "learning_rate": 1.2790789429168954e-05, "loss": 0.5796, "step": 17263 }, { "epoch": 1.2830917874396135, "grad_norm": 2.002763001557838, "learning_rate": 1.27900189314995e-05, "loss": 0.7359, "step": 17264 }, { "epoch": 1.2831661092530657, "grad_norm": 1.8161301632637128, "learning_rate": 1.2789248415868045e-05, "loss": 0.6062, "step": 17265 }, { "epoch": 1.283240431066518, "grad_norm": 1.8352035239008688, "learning_rate": 1.2788477882279555e-05, "loss": 0.5227, "step": 17266 }, { "epoch": 1.2833147528799702, "grad_norm": 2.239219212022413, "learning_rate": 1.2787707330738992e-05, "loss": 0.6986, "step": 17267 }, { "epoch": 1.2833890746934224, "grad_norm": 1.9448824221881278, "learning_rate": 1.2786936761251311e-05, "loss": 0.7202, "step": 17268 }, { "epoch": 1.2834633965068747, "grad_norm": 1.8257459276827626, "learning_rate": 1.2786166173821474e-05, "loss": 0.519, "step": 17269 }, { "epoch": 1.2835377183203271, "grad_norm": 1.8690925509130576, "learning_rate": 1.2785395568454449e-05, "loss": 0.5493, "step": 17270 }, { "epoch": 1.2836120401337792, "grad_norm": 1.8042723133932805, "learning_rate": 1.2784624945155189e-05, "loss": 0.6246, "step": 17271 }, { "epoch": 1.2836863619472316, "grad_norm": 2.0137013304606763, "learning_rate": 1.278385430392866e-05, "loss": 0.6714, "step": 17272 }, { "epoch": 1.2837606837606836, "grad_norm": 1.5222765431406855, "learning_rate": 1.2783083644779818e-05, "loss": 0.5448, "step": 17273 }, { "epoch": 1.283835005574136, "grad_norm": 2.1523463356102326, "learning_rate": 1.2782312967713628e-05, "loss": 0.5236, "step": 17274 }, { "epoch": 1.2839093273875883, "grad_norm": 2.02153287239101, "learning_rate": 1.2781542272735054e-05, "loss": 0.7238, "step": 17275 }, { "epoch": 1.2839836492010406, "grad_norm": 2.393118027012902, "learning_rate": 1.2780771559849053e-05, "loss": 0.5416, "step": 17276 }, { "epoch": 1.2840579710144928, "grad_norm": 1.8623205404917544, "learning_rate": 1.278000082906059e-05, "loss": 0.5618, "step": 17277 }, { "epoch": 1.284132292827945, "grad_norm": 1.934362635105061, "learning_rate": 1.2779230080374628e-05, "loss": 0.7075, "step": 17278 }, { "epoch": 1.2842066146413973, "grad_norm": 2.1178954833316714, "learning_rate": 1.2778459313796123e-05, "loss": 0.6426, "step": 17279 }, { "epoch": 1.2842809364548495, "grad_norm": 1.824216555441818, "learning_rate": 1.2777688529330044e-05, "loss": 0.5109, "step": 17280 }, { "epoch": 1.2843552582683018, "grad_norm": 1.6415400851674407, "learning_rate": 1.2776917726981348e-05, "loss": 0.5015, "step": 17281 }, { "epoch": 1.284429580081754, "grad_norm": 1.9966034599229647, "learning_rate": 1.2776146906755003e-05, "loss": 0.6265, "step": 17282 }, { "epoch": 1.2845039018952062, "grad_norm": 2.6405717055024285, "learning_rate": 1.2775376068655965e-05, "loss": 0.6711, "step": 17283 }, { "epoch": 1.2845782237086585, "grad_norm": 2.5755768907714063, "learning_rate": 1.2774605212689202e-05, "loss": 0.6597, "step": 17284 }, { "epoch": 1.2846525455221107, "grad_norm": 2.155830463889748, "learning_rate": 1.2773834338859672e-05, "loss": 0.6683, "step": 17285 }, { "epoch": 1.284726867335563, "grad_norm": 2.0139403887157874, "learning_rate": 1.2773063447172344e-05, "loss": 0.6897, "step": 17286 }, { "epoch": 1.2848011891490152, "grad_norm": 2.1291508724848622, "learning_rate": 1.2772292537632175e-05, "loss": 0.6979, "step": 17287 }, { "epoch": 1.2848755109624674, "grad_norm": 2.0661175431706846, "learning_rate": 1.277152161024413e-05, "loss": 0.633, "step": 17288 }, { "epoch": 1.2849498327759197, "grad_norm": 7.425942166692407, "learning_rate": 1.2770750665013176e-05, "loss": 0.607, "step": 17289 }, { "epoch": 1.285024154589372, "grad_norm": 3.6563617493008214, "learning_rate": 1.2769979701944271e-05, "loss": 0.5013, "step": 17290 }, { "epoch": 1.2850984764028242, "grad_norm": 1.8870679859674857, "learning_rate": 1.2769208721042378e-05, "loss": 0.517, "step": 17291 }, { "epoch": 1.2851727982162764, "grad_norm": 2.0516484079621633, "learning_rate": 1.2768437722312468e-05, "loss": 0.6036, "step": 17292 }, { "epoch": 1.2852471200297289, "grad_norm": 1.704670846054794, "learning_rate": 1.2767666705759497e-05, "loss": 0.5989, "step": 17293 }, { "epoch": 1.2853214418431809, "grad_norm": 2.0402942014798957, "learning_rate": 1.2766895671388429e-05, "loss": 0.6401, "step": 17294 }, { "epoch": 1.2853957636566333, "grad_norm": 2.550944687886588, "learning_rate": 1.2766124619204234e-05, "loss": 0.8049, "step": 17295 }, { "epoch": 1.2854700854700853, "grad_norm": 1.6393249268525352, "learning_rate": 1.276535354921187e-05, "loss": 0.5581, "step": 17296 }, { "epoch": 1.2855444072835378, "grad_norm": 1.8682385091565683, "learning_rate": 1.2764582461416306e-05, "loss": 0.518, "step": 17297 }, { "epoch": 1.28561872909699, "grad_norm": 1.7052164956972986, "learning_rate": 1.27638113558225e-05, "loss": 0.4984, "step": 17298 }, { "epoch": 1.2856930509104423, "grad_norm": 2.1528774332764695, "learning_rate": 1.276304023243542e-05, "loss": 0.6509, "step": 17299 }, { "epoch": 1.2857673727238945, "grad_norm": 1.358111361741693, "learning_rate": 1.2762269091260031e-05, "loss": 0.4523, "step": 17300 }, { "epoch": 1.2858416945373468, "grad_norm": 2.1725644433287448, "learning_rate": 1.27614979323013e-05, "loss": 0.7365, "step": 17301 }, { "epoch": 1.285916016350799, "grad_norm": 2.017983194315488, "learning_rate": 1.2760726755564186e-05, "loss": 0.5365, "step": 17302 }, { "epoch": 1.2859903381642512, "grad_norm": 3.23383079455568, "learning_rate": 1.2759955561053657e-05, "loss": 0.6951, "step": 17303 }, { "epoch": 1.2860646599777035, "grad_norm": 1.9927917312678696, "learning_rate": 1.2759184348774675e-05, "loss": 0.6201, "step": 17304 }, { "epoch": 1.2861389817911557, "grad_norm": 2.744521821551544, "learning_rate": 1.2758413118732208e-05, "loss": 0.6605, "step": 17305 }, { "epoch": 1.286213303604608, "grad_norm": 2.0480959889299477, "learning_rate": 1.2757641870931225e-05, "loss": 0.5328, "step": 17306 }, { "epoch": 1.2862876254180602, "grad_norm": 2.2853463341223295, "learning_rate": 1.2756870605376683e-05, "loss": 0.6986, "step": 17307 }, { "epoch": 1.2863619472315124, "grad_norm": 1.8427514606140938, "learning_rate": 1.2756099322073552e-05, "loss": 0.5841, "step": 17308 }, { "epoch": 1.2864362690449647, "grad_norm": 2.5141399243553706, "learning_rate": 1.2755328021026796e-05, "loss": 0.6014, "step": 17309 }, { "epoch": 1.286510590858417, "grad_norm": 2.268522918562054, "learning_rate": 1.275455670224138e-05, "loss": 0.6115, "step": 17310 }, { "epoch": 1.2865849126718691, "grad_norm": 2.4534185898987344, "learning_rate": 1.2753785365722271e-05, "loss": 0.6831, "step": 17311 }, { "epoch": 1.2866592344853214, "grad_norm": 1.9171819690985359, "learning_rate": 1.2753014011474438e-05, "loss": 0.6789, "step": 17312 }, { "epoch": 1.2867335562987736, "grad_norm": 1.9569338438607766, "learning_rate": 1.2752242639502841e-05, "loss": 0.6184, "step": 17313 }, { "epoch": 1.2868078781122259, "grad_norm": 1.6711426605268414, "learning_rate": 1.2751471249812451e-05, "loss": 0.4419, "step": 17314 }, { "epoch": 1.286882199925678, "grad_norm": 1.8460122374224888, "learning_rate": 1.2750699842408227e-05, "loss": 0.6623, "step": 17315 }, { "epoch": 1.2869565217391306, "grad_norm": 2.262947449132167, "learning_rate": 1.2749928417295143e-05, "loss": 0.4826, "step": 17316 }, { "epoch": 1.2870308435525826, "grad_norm": 2.0595449789062106, "learning_rate": 1.2749156974478163e-05, "loss": 0.6094, "step": 17317 }, { "epoch": 1.287105165366035, "grad_norm": 1.771342040178509, "learning_rate": 1.2748385513962254e-05, "loss": 0.4863, "step": 17318 }, { "epoch": 1.287179487179487, "grad_norm": 1.6953871089251105, "learning_rate": 1.2747614035752381e-05, "loss": 0.3761, "step": 17319 }, { "epoch": 1.2872538089929395, "grad_norm": 1.884075071398918, "learning_rate": 1.274684253985351e-05, "loss": 0.6366, "step": 17320 }, { "epoch": 1.2873281308063917, "grad_norm": 2.113524005669132, "learning_rate": 1.274607102627061e-05, "loss": 0.6577, "step": 17321 }, { "epoch": 1.287402452619844, "grad_norm": 2.068812573825664, "learning_rate": 1.2745299495008645e-05, "loss": 0.6167, "step": 17322 }, { "epoch": 1.2874767744332962, "grad_norm": 1.7060237327185515, "learning_rate": 1.2744527946072588e-05, "loss": 0.4574, "step": 17323 }, { "epoch": 1.2875510962467485, "grad_norm": 2.234266878978787, "learning_rate": 1.27437563794674e-05, "loss": 0.6757, "step": 17324 }, { "epoch": 1.2876254180602007, "grad_norm": 1.6685938572367827, "learning_rate": 1.2742984795198054e-05, "loss": 0.4698, "step": 17325 }, { "epoch": 1.287699739873653, "grad_norm": 2.4538762182539045, "learning_rate": 1.2742213193269512e-05, "loss": 0.7228, "step": 17326 }, { "epoch": 1.2877740616871052, "grad_norm": 2.0019069417373383, "learning_rate": 1.2741441573686743e-05, "loss": 0.6934, "step": 17327 }, { "epoch": 1.2878483835005574, "grad_norm": 2.0335017372799697, "learning_rate": 1.2740669936454715e-05, "loss": 0.5833, "step": 17328 }, { "epoch": 1.2879227053140097, "grad_norm": 2.4529884740074728, "learning_rate": 1.27398982815784e-05, "loss": 0.5882, "step": 17329 }, { "epoch": 1.287997027127462, "grad_norm": 1.8735013972232661, "learning_rate": 1.273912660906276e-05, "loss": 0.7607, "step": 17330 }, { "epoch": 1.2880713489409141, "grad_norm": 2.0509991288374425, "learning_rate": 1.2738354918912764e-05, "loss": 0.6081, "step": 17331 }, { "epoch": 1.2881456707543664, "grad_norm": 2.045511423519154, "learning_rate": 1.2737583211133379e-05, "loss": 0.6198, "step": 17332 }, { "epoch": 1.2882199925678186, "grad_norm": 2.161744461105068, "learning_rate": 1.2736811485729578e-05, "loss": 0.6192, "step": 17333 }, { "epoch": 1.2882943143812708, "grad_norm": 1.9712304533960003, "learning_rate": 1.2736039742706328e-05, "loss": 0.6393, "step": 17334 }, { "epoch": 1.288368636194723, "grad_norm": 2.5669587023927685, "learning_rate": 1.2735267982068593e-05, "loss": 0.5933, "step": 17335 }, { "epoch": 1.2884429580081753, "grad_norm": 1.6786793752147973, "learning_rate": 1.2734496203821349e-05, "loss": 0.5586, "step": 17336 }, { "epoch": 1.2885172798216278, "grad_norm": 2.12124488754468, "learning_rate": 1.2733724407969557e-05, "loss": 0.5569, "step": 17337 }, { "epoch": 1.2885916016350798, "grad_norm": 1.9005097103639135, "learning_rate": 1.2732952594518189e-05, "loss": 0.7123, "step": 17338 }, { "epoch": 1.2886659234485323, "grad_norm": 2.039211859451047, "learning_rate": 1.2732180763472212e-05, "loss": 0.5887, "step": 17339 }, { "epoch": 1.2887402452619843, "grad_norm": 2.116091553509938, "learning_rate": 1.2731408914836603e-05, "loss": 0.6339, "step": 17340 }, { "epoch": 1.2888145670754367, "grad_norm": 1.9558158856063925, "learning_rate": 1.2730637048616323e-05, "loss": 0.6227, "step": 17341 }, { "epoch": 1.2888888888888888, "grad_norm": 2.1128969203968464, "learning_rate": 1.2729865164816343e-05, "loss": 0.6526, "step": 17342 }, { "epoch": 1.2889632107023412, "grad_norm": 2.215894804130231, "learning_rate": 1.272909326344163e-05, "loss": 0.7259, "step": 17343 }, { "epoch": 1.2890375325157934, "grad_norm": 1.8318230126451238, "learning_rate": 1.2728321344497157e-05, "loss": 0.6345, "step": 17344 }, { "epoch": 1.2891118543292457, "grad_norm": 1.9429333566449163, "learning_rate": 1.2727549407987894e-05, "loss": 0.5514, "step": 17345 }, { "epoch": 1.289186176142698, "grad_norm": 1.7597524115398022, "learning_rate": 1.272677745391881e-05, "loss": 0.6037, "step": 17346 }, { "epoch": 1.2892604979561502, "grad_norm": 1.830267402220519, "learning_rate": 1.2726005482294875e-05, "loss": 0.6652, "step": 17347 }, { "epoch": 1.2893348197696024, "grad_norm": 2.1219723419128615, "learning_rate": 1.2725233493121057e-05, "loss": 0.7376, "step": 17348 }, { "epoch": 1.2894091415830546, "grad_norm": 2.199661520774674, "learning_rate": 1.2724461486402324e-05, "loss": 0.6874, "step": 17349 }, { "epoch": 1.2894834633965069, "grad_norm": 1.8431150311862974, "learning_rate": 1.272368946214365e-05, "loss": 0.6793, "step": 17350 }, { "epoch": 1.2895577852099591, "grad_norm": 2.242997475499767, "learning_rate": 1.272291742035001e-05, "loss": 0.4795, "step": 17351 }, { "epoch": 1.2896321070234114, "grad_norm": 1.505680174895119, "learning_rate": 1.2722145361026365e-05, "loss": 0.5252, "step": 17352 }, { "epoch": 1.2897064288368636, "grad_norm": 2.543074086943388, "learning_rate": 1.2721373284177691e-05, "loss": 0.6365, "step": 17353 }, { "epoch": 1.2897807506503158, "grad_norm": 2.2221882278900877, "learning_rate": 1.2720601189808956e-05, "loss": 0.6912, "step": 17354 }, { "epoch": 1.289855072463768, "grad_norm": 3.816615484673508, "learning_rate": 1.271982907792513e-05, "loss": 0.6437, "step": 17355 }, { "epoch": 1.2899293942772203, "grad_norm": 2.177113795843478, "learning_rate": 1.271905694853119e-05, "loss": 0.6207, "step": 17356 }, { "epoch": 1.2900037160906725, "grad_norm": 2.0095205940640346, "learning_rate": 1.27182848016321e-05, "loss": 0.6851, "step": 17357 }, { "epoch": 1.2900780379041248, "grad_norm": 2.1001233358170457, "learning_rate": 1.2717512637232831e-05, "loss": 0.7402, "step": 17358 }, { "epoch": 1.290152359717577, "grad_norm": 2.869205694518289, "learning_rate": 1.2716740455338359e-05, "loss": 0.7128, "step": 17359 }, { "epoch": 1.2902266815310295, "grad_norm": 2.250326954654428, "learning_rate": 1.2715968255953652e-05, "loss": 0.7497, "step": 17360 }, { "epoch": 1.2903010033444815, "grad_norm": 1.8760487039558353, "learning_rate": 1.2715196039083683e-05, "loss": 0.6178, "step": 17361 }, { "epoch": 1.290375325157934, "grad_norm": 1.9685676233186067, "learning_rate": 1.2714423804733424e-05, "loss": 0.5776, "step": 17362 }, { "epoch": 1.290449646971386, "grad_norm": 2.067301019681093, "learning_rate": 1.2713651552907847e-05, "loss": 0.5832, "step": 17363 }, { "epoch": 1.2905239687848384, "grad_norm": 2.5586807917717658, "learning_rate": 1.2712879283611917e-05, "loss": 0.8392, "step": 17364 }, { "epoch": 1.2905982905982907, "grad_norm": 1.473081660880893, "learning_rate": 1.2712106996850614e-05, "loss": 0.3374, "step": 17365 }, { "epoch": 1.290672612411743, "grad_norm": 1.9935328728580208, "learning_rate": 1.2711334692628903e-05, "loss": 0.7208, "step": 17366 }, { "epoch": 1.2907469342251952, "grad_norm": 2.3460186211312597, "learning_rate": 1.2710562370951765e-05, "loss": 0.6494, "step": 17367 }, { "epoch": 1.2908212560386474, "grad_norm": 1.7077668528614784, "learning_rate": 1.2709790031824167e-05, "loss": 0.5473, "step": 17368 }, { "epoch": 1.2908955778520996, "grad_norm": 2.2316981227716646, "learning_rate": 1.2709017675251079e-05, "loss": 0.6938, "step": 17369 }, { "epoch": 1.2909698996655519, "grad_norm": 1.9921431999929253, "learning_rate": 1.2708245301237474e-05, "loss": 0.6794, "step": 17370 }, { "epoch": 1.291044221479004, "grad_norm": 5.258332157547114, "learning_rate": 1.2707472909788327e-05, "loss": 0.6615, "step": 17371 }, { "epoch": 1.2911185432924563, "grad_norm": 2.0333884831686375, "learning_rate": 1.2706700500908613e-05, "loss": 0.4638, "step": 17372 }, { "epoch": 1.2911928651059086, "grad_norm": 1.7936410910407412, "learning_rate": 1.2705928074603299e-05, "loss": 0.5353, "step": 17373 }, { "epoch": 1.2912671869193608, "grad_norm": 1.8890082461705557, "learning_rate": 1.2705155630877366e-05, "loss": 0.6775, "step": 17374 }, { "epoch": 1.291341508732813, "grad_norm": 1.8123578753480227, "learning_rate": 1.2704383169735775e-05, "loss": 0.6419, "step": 17375 }, { "epoch": 1.2914158305462653, "grad_norm": 1.7768514706082466, "learning_rate": 1.2703610691183505e-05, "loss": 0.5867, "step": 17376 }, { "epoch": 1.2914901523597175, "grad_norm": 1.7967451227448574, "learning_rate": 1.2702838195225533e-05, "loss": 0.6542, "step": 17377 }, { "epoch": 1.2915644741731698, "grad_norm": 2.2752508365023383, "learning_rate": 1.270206568186683e-05, "loss": 0.6715, "step": 17378 }, { "epoch": 1.291638795986622, "grad_norm": 1.9977026018932984, "learning_rate": 1.2701293151112366e-05, "loss": 0.6177, "step": 17379 }, { "epoch": 1.2917131178000743, "grad_norm": 1.9453745559547682, "learning_rate": 1.2700520602967117e-05, "loss": 0.6607, "step": 17380 }, { "epoch": 1.2917874396135265, "grad_norm": 2.250371746373688, "learning_rate": 1.2699748037436057e-05, "loss": 0.652, "step": 17381 }, { "epoch": 1.2918617614269787, "grad_norm": 2.0343361726172695, "learning_rate": 1.2698975454524158e-05, "loss": 0.6183, "step": 17382 }, { "epoch": 1.2919360832404312, "grad_norm": 2.403108669811588, "learning_rate": 1.2698202854236395e-05, "loss": 0.6818, "step": 17383 }, { "epoch": 1.2920104050538832, "grad_norm": 1.9551982060268347, "learning_rate": 1.2697430236577744e-05, "loss": 0.5935, "step": 17384 }, { "epoch": 1.2920847268673357, "grad_norm": 2.2799898553553, "learning_rate": 1.2696657601553177e-05, "loss": 0.7002, "step": 17385 }, { "epoch": 1.2921590486807877, "grad_norm": 1.8294022430657204, "learning_rate": 1.2695884949167668e-05, "loss": 0.6679, "step": 17386 }, { "epoch": 1.2922333704942401, "grad_norm": 2.124560749567435, "learning_rate": 1.2695112279426189e-05, "loss": 0.6551, "step": 17387 }, { "epoch": 1.2923076923076924, "grad_norm": 1.9765800621888114, "learning_rate": 1.2694339592333718e-05, "loss": 0.6382, "step": 17388 }, { "epoch": 1.2923820141211446, "grad_norm": 2.3154748145366972, "learning_rate": 1.269356688789523e-05, "loss": 0.7269, "step": 17389 }, { "epoch": 1.2924563359345969, "grad_norm": 2.386342996249899, "learning_rate": 1.2692794166115699e-05, "loss": 0.7283, "step": 17390 }, { "epoch": 1.292530657748049, "grad_norm": 1.9376911460265789, "learning_rate": 1.2692021427000099e-05, "loss": 0.6378, "step": 17391 }, { "epoch": 1.2926049795615013, "grad_norm": 1.885333580130741, "learning_rate": 1.2691248670553402e-05, "loss": 0.6315, "step": 17392 }, { "epoch": 1.2926793013749536, "grad_norm": 2.7423840155467696, "learning_rate": 1.2690475896780585e-05, "loss": 0.6563, "step": 17393 }, { "epoch": 1.2927536231884058, "grad_norm": 2.8955725493720177, "learning_rate": 1.2689703105686626e-05, "loss": 0.6919, "step": 17394 }, { "epoch": 1.292827945001858, "grad_norm": 1.610163193061111, "learning_rate": 1.2688930297276498e-05, "loss": 0.5639, "step": 17395 }, { "epoch": 1.2929022668153103, "grad_norm": 1.82086607609784, "learning_rate": 1.2688157471555175e-05, "loss": 0.5435, "step": 17396 }, { "epoch": 1.2929765886287625, "grad_norm": 1.7686009846339457, "learning_rate": 1.2687384628527637e-05, "loss": 0.557, "step": 17397 }, { "epoch": 1.2930509104422148, "grad_norm": 2.1857115730061745, "learning_rate": 1.2686611768198854e-05, "loss": 0.7021, "step": 17398 }, { "epoch": 1.293125232255667, "grad_norm": 1.6442552253054086, "learning_rate": 1.26858388905738e-05, "loss": 0.5386, "step": 17399 }, { "epoch": 1.2931995540691192, "grad_norm": 1.971199392336009, "learning_rate": 1.2685065995657459e-05, "loss": 0.6081, "step": 17400 }, { "epoch": 1.2932738758825715, "grad_norm": 1.7724606142540964, "learning_rate": 1.2684293083454801e-05, "loss": 0.5148, "step": 17401 }, { "epoch": 1.2933481976960237, "grad_norm": 1.7158806963122106, "learning_rate": 1.2683520153970808e-05, "loss": 0.5762, "step": 17402 }, { "epoch": 1.293422519509476, "grad_norm": 2.6695731128774165, "learning_rate": 1.2682747207210446e-05, "loss": 0.6658, "step": 17403 }, { "epoch": 1.2934968413229284, "grad_norm": 2.0625018196050986, "learning_rate": 1.2681974243178696e-05, "loss": 0.6694, "step": 17404 }, { "epoch": 1.2935711631363804, "grad_norm": 2.676449238040676, "learning_rate": 1.2681201261880538e-05, "loss": 0.741, "step": 17405 }, { "epoch": 1.293645484949833, "grad_norm": 2.564139702476337, "learning_rate": 1.2680428263320945e-05, "loss": 0.6259, "step": 17406 }, { "epoch": 1.293719806763285, "grad_norm": 2.0447084403594924, "learning_rate": 1.2679655247504893e-05, "loss": 0.6255, "step": 17407 }, { "epoch": 1.2937941285767374, "grad_norm": 2.2051712029425374, "learning_rate": 1.267888221443736e-05, "loss": 0.5594, "step": 17408 }, { "epoch": 1.2938684503901894, "grad_norm": 2.375058115117947, "learning_rate": 1.2678109164123322e-05, "loss": 0.7037, "step": 17409 }, { "epoch": 1.2939427722036418, "grad_norm": 1.9913355508342783, "learning_rate": 1.2677336096567756e-05, "loss": 0.6791, "step": 17410 }, { "epoch": 1.294017094017094, "grad_norm": 1.839545014689123, "learning_rate": 1.2676563011775642e-05, "loss": 0.5649, "step": 17411 }, { "epoch": 1.2940914158305463, "grad_norm": 1.756953418084469, "learning_rate": 1.2675789909751952e-05, "loss": 0.5531, "step": 17412 }, { "epoch": 1.2941657376439986, "grad_norm": 5.484799458574944, "learning_rate": 1.2675016790501662e-05, "loss": 0.5869, "step": 17413 }, { "epoch": 1.2942400594574508, "grad_norm": 1.9543581665805891, "learning_rate": 1.2674243654029759e-05, "loss": 0.6463, "step": 17414 }, { "epoch": 1.294314381270903, "grad_norm": 2.0911694274593113, "learning_rate": 1.267347050034121e-05, "loss": 0.7588, "step": 17415 }, { "epoch": 1.2943887030843553, "grad_norm": 2.129148204844778, "learning_rate": 1.2672697329440996e-05, "loss": 0.634, "step": 17416 }, { "epoch": 1.2944630248978075, "grad_norm": 1.825320139350089, "learning_rate": 1.26719241413341e-05, "loss": 0.5728, "step": 17417 }, { "epoch": 1.2945373467112598, "grad_norm": 1.8307018700208593, "learning_rate": 1.267115093602549e-05, "loss": 0.4758, "step": 17418 }, { "epoch": 1.294611668524712, "grad_norm": 3.7509998883773528, "learning_rate": 1.2670377713520152e-05, "loss": 0.5431, "step": 17419 }, { "epoch": 1.2946859903381642, "grad_norm": 2.217618062635019, "learning_rate": 1.266960447382306e-05, "loss": 0.5225, "step": 17420 }, { "epoch": 1.2947603121516165, "grad_norm": 2.291504746442983, "learning_rate": 1.266883121693919e-05, "loss": 0.8402, "step": 17421 }, { "epoch": 1.2948346339650687, "grad_norm": 1.7966494369269925, "learning_rate": 1.2668057942873528e-05, "loss": 0.5207, "step": 17422 }, { "epoch": 1.294908955778521, "grad_norm": 1.801024862342769, "learning_rate": 1.2667284651631045e-05, "loss": 0.6661, "step": 17423 }, { "epoch": 1.2949832775919732, "grad_norm": 2.6464908536767555, "learning_rate": 1.266651134321672e-05, "loss": 0.7091, "step": 17424 }, { "epoch": 1.2950575994054254, "grad_norm": 1.6215707757203985, "learning_rate": 1.2665738017635536e-05, "loss": 0.5165, "step": 17425 }, { "epoch": 1.2951319212188777, "grad_norm": 1.7970167372765011, "learning_rate": 1.2664964674892469e-05, "loss": 0.6041, "step": 17426 }, { "epoch": 1.2952062430323301, "grad_norm": 1.6512188727627382, "learning_rate": 1.2664191314992495e-05, "loss": 0.4601, "step": 17427 }, { "epoch": 1.2952805648457821, "grad_norm": 2.3599387182422165, "learning_rate": 1.26634179379406e-05, "loss": 0.7241, "step": 17428 }, { "epoch": 1.2953548866592346, "grad_norm": 2.1966573157775517, "learning_rate": 1.2662644543741754e-05, "loss": 0.7085, "step": 17429 }, { "epoch": 1.2954292084726866, "grad_norm": 2.509690223217405, "learning_rate": 1.2661871132400942e-05, "loss": 0.6805, "step": 17430 }, { "epoch": 1.295503530286139, "grad_norm": 2.493108975180097, "learning_rate": 1.2661097703923144e-05, "loss": 0.6015, "step": 17431 }, { "epoch": 1.2955778520995913, "grad_norm": 2.4071538995479984, "learning_rate": 1.2660324258313335e-05, "loss": 0.67, "step": 17432 }, { "epoch": 1.2956521739130435, "grad_norm": 3.0033628706663635, "learning_rate": 1.2659550795576498e-05, "loss": 0.8014, "step": 17433 }, { "epoch": 1.2957264957264958, "grad_norm": 2.1021263379207262, "learning_rate": 1.2658777315717608e-05, "loss": 0.7129, "step": 17434 }, { "epoch": 1.295800817539948, "grad_norm": 2.728348677012704, "learning_rate": 1.2658003818741648e-05, "loss": 0.7081, "step": 17435 }, { "epoch": 1.2958751393534003, "grad_norm": 1.9979359823259508, "learning_rate": 1.26572303046536e-05, "loss": 0.6521, "step": 17436 }, { "epoch": 1.2959494611668525, "grad_norm": 2.0604879410867545, "learning_rate": 1.265645677345844e-05, "loss": 0.6702, "step": 17437 }, { "epoch": 1.2960237829803047, "grad_norm": 2.3495771648595523, "learning_rate": 1.2655683225161149e-05, "loss": 0.6205, "step": 17438 }, { "epoch": 1.296098104793757, "grad_norm": 1.8623226970532583, "learning_rate": 1.2654909659766708e-05, "loss": 0.6724, "step": 17439 }, { "epoch": 1.2961724266072092, "grad_norm": 1.8502991528383952, "learning_rate": 1.2654136077280094e-05, "loss": 0.6025, "step": 17440 }, { "epoch": 1.2962467484206615, "grad_norm": 1.5849552606424309, "learning_rate": 1.265336247770629e-05, "loss": 0.527, "step": 17441 }, { "epoch": 1.2963210702341137, "grad_norm": 1.6109788755961183, "learning_rate": 1.2652588861050279e-05, "loss": 0.514, "step": 17442 }, { "epoch": 1.296395392047566, "grad_norm": 1.8645880472485767, "learning_rate": 1.2651815227317037e-05, "loss": 0.5621, "step": 17443 }, { "epoch": 1.2964697138610182, "grad_norm": 1.7753233208292893, "learning_rate": 1.2651041576511547e-05, "loss": 0.5053, "step": 17444 }, { "epoch": 1.2965440356744704, "grad_norm": 1.9319077104512417, "learning_rate": 1.2650267908638786e-05, "loss": 0.6473, "step": 17445 }, { "epoch": 1.2966183574879226, "grad_norm": 2.2105648950736776, "learning_rate": 1.264949422370374e-05, "loss": 0.6354, "step": 17446 }, { "epoch": 1.2966926793013749, "grad_norm": 2.4377140276557685, "learning_rate": 1.2648720521711386e-05, "loss": 0.7409, "step": 17447 }, { "epoch": 1.2967670011148271, "grad_norm": 1.8613793940965553, "learning_rate": 1.264794680266671e-05, "loss": 0.4971, "step": 17448 }, { "epoch": 1.2968413229282794, "grad_norm": 1.988141202310372, "learning_rate": 1.2647173066574687e-05, "loss": 0.5739, "step": 17449 }, { "epoch": 1.2969156447417318, "grad_norm": 1.9512197495799313, "learning_rate": 1.2646399313440302e-05, "loss": 0.6045, "step": 17450 }, { "epoch": 1.2969899665551838, "grad_norm": 2.2866988558605863, "learning_rate": 1.2645625543268536e-05, "loss": 0.7731, "step": 17451 }, { "epoch": 1.2970642883686363, "grad_norm": 1.862916263919555, "learning_rate": 1.264485175606437e-05, "loss": 0.5657, "step": 17452 }, { "epoch": 1.2971386101820883, "grad_norm": 1.8353195584564788, "learning_rate": 1.2644077951832785e-05, "loss": 0.5666, "step": 17453 }, { "epoch": 1.2972129319955408, "grad_norm": 2.3776715432095625, "learning_rate": 1.2643304130578762e-05, "loss": 0.7331, "step": 17454 }, { "epoch": 1.297287253808993, "grad_norm": 2.1636310914425785, "learning_rate": 1.2642530292307287e-05, "loss": 0.6333, "step": 17455 }, { "epoch": 1.2973615756224453, "grad_norm": 2.104879126040508, "learning_rate": 1.2641756437023338e-05, "loss": 0.4477, "step": 17456 }, { "epoch": 1.2974358974358975, "grad_norm": 2.092140443081643, "learning_rate": 1.2640982564731896e-05, "loss": 0.5144, "step": 17457 }, { "epoch": 1.2975102192493497, "grad_norm": 1.929165443793558, "learning_rate": 1.2640208675437946e-05, "loss": 0.5927, "step": 17458 }, { "epoch": 1.297584541062802, "grad_norm": 2.355057600539385, "learning_rate": 1.2639434769146473e-05, "loss": 0.642, "step": 17459 }, { "epoch": 1.2976588628762542, "grad_norm": 3.827295745857643, "learning_rate": 1.2638660845862453e-05, "loss": 0.6333, "step": 17460 }, { "epoch": 1.2977331846897064, "grad_norm": 1.910378541587913, "learning_rate": 1.2637886905590872e-05, "loss": 0.4938, "step": 17461 }, { "epoch": 1.2978075065031587, "grad_norm": 5.782511539194943, "learning_rate": 1.2637112948336711e-05, "loss": 0.6432, "step": 17462 }, { "epoch": 1.297881828316611, "grad_norm": 1.4658088801294684, "learning_rate": 1.2636338974104956e-05, "loss": 0.442, "step": 17463 }, { "epoch": 1.2979561501300632, "grad_norm": 2.3165920262601607, "learning_rate": 1.2635564982900587e-05, "loss": 0.6967, "step": 17464 }, { "epoch": 1.2980304719435154, "grad_norm": 1.7498822316373779, "learning_rate": 1.2634790974728586e-05, "loss": 0.4568, "step": 17465 }, { "epoch": 1.2981047937569676, "grad_norm": 1.99991747233233, "learning_rate": 1.263401694959394e-05, "loss": 0.6862, "step": 17466 }, { "epoch": 1.2981791155704199, "grad_norm": 1.6362927347305574, "learning_rate": 1.2633242907501626e-05, "loss": 0.507, "step": 17467 }, { "epoch": 1.298253437383872, "grad_norm": 2.5605110585529984, "learning_rate": 1.2632468848456632e-05, "loss": 0.6789, "step": 17468 }, { "epoch": 1.2983277591973243, "grad_norm": 2.3792977229228764, "learning_rate": 1.2631694772463938e-05, "loss": 0.7085, "step": 17469 }, { "epoch": 1.2984020810107766, "grad_norm": 1.8201052877481558, "learning_rate": 1.2630920679528534e-05, "loss": 0.6498, "step": 17470 }, { "epoch": 1.298476402824229, "grad_norm": 2.2514384686717666, "learning_rate": 1.2630146569655397e-05, "loss": 0.6418, "step": 17471 }, { "epoch": 1.298550724637681, "grad_norm": 1.9143068026095704, "learning_rate": 1.2629372442849514e-05, "loss": 0.5858, "step": 17472 }, { "epoch": 1.2986250464511335, "grad_norm": 2.4374645395379, "learning_rate": 1.2628598299115866e-05, "loss": 0.627, "step": 17473 }, { "epoch": 1.2986993682645855, "grad_norm": 1.9880297244003666, "learning_rate": 1.262782413845944e-05, "loss": 0.6541, "step": 17474 }, { "epoch": 1.298773690078038, "grad_norm": 1.8855201988618475, "learning_rate": 1.2627049960885214e-05, "loss": 0.5279, "step": 17475 }, { "epoch": 1.29884801189149, "grad_norm": 2.5985311584840614, "learning_rate": 1.2626275766398184e-05, "loss": 0.5315, "step": 17476 }, { "epoch": 1.2989223337049425, "grad_norm": 1.7153969127145516, "learning_rate": 1.2625501555003323e-05, "loss": 0.545, "step": 17477 }, { "epoch": 1.2989966555183947, "grad_norm": 2.482541205233763, "learning_rate": 1.2624727326705619e-05, "loss": 0.6164, "step": 17478 }, { "epoch": 1.299070977331847, "grad_norm": 1.846732933391758, "learning_rate": 1.2623953081510056e-05, "loss": 0.4898, "step": 17479 }, { "epoch": 1.2991452991452992, "grad_norm": 1.623477000810114, "learning_rate": 1.262317881942162e-05, "loss": 0.4974, "step": 17480 }, { "epoch": 1.2992196209587514, "grad_norm": 1.8458149542325522, "learning_rate": 1.2622404540445297e-05, "loss": 0.6225, "step": 17481 }, { "epoch": 1.2992939427722037, "grad_norm": 2.2210732014175605, "learning_rate": 1.262163024458607e-05, "loss": 0.7273, "step": 17482 }, { "epoch": 1.299368264585656, "grad_norm": 1.7995606692064845, "learning_rate": 1.262085593184892e-05, "loss": 0.6071, "step": 17483 }, { "epoch": 1.2994425863991081, "grad_norm": 2.3894962713438304, "learning_rate": 1.2620081602238836e-05, "loss": 0.7348, "step": 17484 }, { "epoch": 1.2995169082125604, "grad_norm": 1.788406475699262, "learning_rate": 1.2619307255760803e-05, "loss": 0.6387, "step": 17485 }, { "epoch": 1.2995912300260126, "grad_norm": 2.7293528014299464, "learning_rate": 1.2618532892419805e-05, "loss": 0.6535, "step": 17486 }, { "epoch": 1.2996655518394649, "grad_norm": 1.969417525090122, "learning_rate": 1.261775851222083e-05, "loss": 0.6092, "step": 17487 }, { "epoch": 1.299739873652917, "grad_norm": 1.9002466815100503, "learning_rate": 1.2616984115168862e-05, "loss": 0.6008, "step": 17488 }, { "epoch": 1.2998141954663693, "grad_norm": 1.983302045880537, "learning_rate": 1.2616209701268885e-05, "loss": 0.6579, "step": 17489 }, { "epoch": 1.2998885172798216, "grad_norm": 1.7123425129782404, "learning_rate": 1.2615435270525884e-05, "loss": 0.5571, "step": 17490 }, { "epoch": 1.2999628390932738, "grad_norm": 1.9263128722732659, "learning_rate": 1.2614660822944848e-05, "loss": 0.4828, "step": 17491 }, { "epoch": 1.300037160906726, "grad_norm": 1.9360444139105633, "learning_rate": 1.2613886358530762e-05, "loss": 0.5745, "step": 17492 }, { "epoch": 1.3001114827201783, "grad_norm": 1.7194510986809852, "learning_rate": 1.2613111877288613e-05, "loss": 0.6144, "step": 17493 }, { "epoch": 1.3001858045336308, "grad_norm": 2.5342095747236004, "learning_rate": 1.2612337379223383e-05, "loss": 0.8078, "step": 17494 }, { "epoch": 1.3002601263470828, "grad_norm": 2.2312184728650535, "learning_rate": 1.2611562864340059e-05, "loss": 0.655, "step": 17495 }, { "epoch": 1.3003344481605352, "grad_norm": 2.094595838377213, "learning_rate": 1.2610788332643628e-05, "loss": 0.6919, "step": 17496 }, { "epoch": 1.3004087699739872, "grad_norm": 1.898391116919323, "learning_rate": 1.2610013784139079e-05, "loss": 0.4781, "step": 17497 }, { "epoch": 1.3004830917874397, "grad_norm": 1.983264604367706, "learning_rate": 1.26092392188314e-05, "loss": 0.6205, "step": 17498 }, { "epoch": 1.300557413600892, "grad_norm": 1.8700298219722327, "learning_rate": 1.2608464636725572e-05, "loss": 0.613, "step": 17499 }, { "epoch": 1.3006317354143442, "grad_norm": 2.1295494004556597, "learning_rate": 1.2607690037826581e-05, "loss": 0.6634, "step": 17500 }, { "epoch": 1.3007060572277964, "grad_norm": 2.229429157009684, "learning_rate": 1.2606915422139419e-05, "loss": 0.6201, "step": 17501 }, { "epoch": 1.3007803790412487, "grad_norm": 2.0102625606538824, "learning_rate": 1.260614078966907e-05, "loss": 0.5595, "step": 17502 }, { "epoch": 1.300854700854701, "grad_norm": 1.7683442751524925, "learning_rate": 1.2605366140420526e-05, "loss": 0.5612, "step": 17503 }, { "epoch": 1.3009290226681531, "grad_norm": 1.8911914074796559, "learning_rate": 1.260459147439877e-05, "loss": 0.5973, "step": 17504 }, { "epoch": 1.3010033444816054, "grad_norm": 2.1397349713460496, "learning_rate": 1.2603816791608783e-05, "loss": 0.6179, "step": 17505 }, { "epoch": 1.3010776662950576, "grad_norm": 2.4565669392659544, "learning_rate": 1.260304209205556e-05, "loss": 0.6323, "step": 17506 }, { "epoch": 1.3011519881085098, "grad_norm": 1.8882616473084424, "learning_rate": 1.2602267375744087e-05, "loss": 0.5938, "step": 17507 }, { "epoch": 1.301226309921962, "grad_norm": 1.8438502569742496, "learning_rate": 1.2601492642679353e-05, "loss": 0.63, "step": 17508 }, { "epoch": 1.3013006317354143, "grad_norm": 1.600785310428403, "learning_rate": 1.2600717892866348e-05, "loss": 0.5104, "step": 17509 }, { "epoch": 1.3013749535488666, "grad_norm": 1.9088994710582106, "learning_rate": 1.2599943126310053e-05, "loss": 0.5966, "step": 17510 }, { "epoch": 1.3014492753623188, "grad_norm": 1.7264945987736786, "learning_rate": 1.2599168343015457e-05, "loss": 0.4409, "step": 17511 }, { "epoch": 1.301523597175771, "grad_norm": 2.1648577002518685, "learning_rate": 1.2598393542987552e-05, "loss": 0.73, "step": 17512 }, { "epoch": 1.3015979189892233, "grad_norm": 2.0568852316867052, "learning_rate": 1.2597618726231325e-05, "loss": 0.6225, "step": 17513 }, { "epoch": 1.3016722408026755, "grad_norm": 2.046143206628586, "learning_rate": 1.2596843892751763e-05, "loss": 0.4862, "step": 17514 }, { "epoch": 1.3017465626161278, "grad_norm": 1.3878619053678674, "learning_rate": 1.2596069042553856e-05, "loss": 0.5158, "step": 17515 }, { "epoch": 1.30182088442958, "grad_norm": 2.118332049740073, "learning_rate": 1.2595294175642588e-05, "loss": 0.6755, "step": 17516 }, { "epoch": 1.3018952062430325, "grad_norm": 1.7789258164985424, "learning_rate": 1.2594519292022952e-05, "loss": 0.5883, "step": 17517 }, { "epoch": 1.3019695280564845, "grad_norm": 1.835214013850445, "learning_rate": 1.2593744391699936e-05, "loss": 0.4693, "step": 17518 }, { "epoch": 1.302043849869937, "grad_norm": 2.16375573718314, "learning_rate": 1.2592969474678527e-05, "loss": 0.6463, "step": 17519 }, { "epoch": 1.302118171683389, "grad_norm": 2.56265180875287, "learning_rate": 1.2592194540963716e-05, "loss": 0.8312, "step": 17520 }, { "epoch": 1.3021924934968414, "grad_norm": 2.642525549900171, "learning_rate": 1.2591419590560491e-05, "loss": 0.6637, "step": 17521 }, { "epoch": 1.3022668153102936, "grad_norm": 1.8904838439082459, "learning_rate": 1.2590644623473842e-05, "loss": 0.5906, "step": 17522 }, { "epoch": 1.3023411371237459, "grad_norm": 1.9469233681785056, "learning_rate": 1.2589869639708757e-05, "loss": 0.6195, "step": 17523 }, { "epoch": 1.3024154589371981, "grad_norm": 2.468698505340956, "learning_rate": 1.2589094639270222e-05, "loss": 0.7171, "step": 17524 }, { "epoch": 1.3024897807506504, "grad_norm": 2.3125728571867015, "learning_rate": 1.2588319622163235e-05, "loss": 0.6196, "step": 17525 }, { "epoch": 1.3025641025641026, "grad_norm": 1.980487235144742, "learning_rate": 1.258754458839278e-05, "loss": 0.5623, "step": 17526 }, { "epoch": 1.3026384243775548, "grad_norm": 1.9350125723550282, "learning_rate": 1.2586769537963845e-05, "loss": 0.5574, "step": 17527 }, { "epoch": 1.302712746191007, "grad_norm": 1.8793611598582767, "learning_rate": 1.2585994470881422e-05, "loss": 0.6532, "step": 17528 }, { "epoch": 1.3027870680044593, "grad_norm": 1.649208073582012, "learning_rate": 1.2585219387150503e-05, "loss": 0.6639, "step": 17529 }, { "epoch": 1.3028613898179116, "grad_norm": 1.7939170144603687, "learning_rate": 1.2584444286776072e-05, "loss": 0.5669, "step": 17530 }, { "epoch": 1.3029357116313638, "grad_norm": 1.7919426556725757, "learning_rate": 1.2583669169763126e-05, "loss": 0.5397, "step": 17531 }, { "epoch": 1.303010033444816, "grad_norm": 1.9399599996348205, "learning_rate": 1.2582894036116653e-05, "loss": 0.6907, "step": 17532 }, { "epoch": 1.3030843552582683, "grad_norm": 1.9137234293533312, "learning_rate": 1.2582118885841639e-05, "loss": 0.5805, "step": 17533 }, { "epoch": 1.3031586770717205, "grad_norm": 2.3521999765469035, "learning_rate": 1.258134371894308e-05, "loss": 0.7596, "step": 17534 }, { "epoch": 1.3032329988851727, "grad_norm": 1.9818736673589148, "learning_rate": 1.258056853542596e-05, "loss": 0.671, "step": 17535 }, { "epoch": 1.303307320698625, "grad_norm": 2.051078586312827, "learning_rate": 1.2579793335295279e-05, "loss": 0.4675, "step": 17536 }, { "epoch": 1.3033816425120772, "grad_norm": 1.750372380148293, "learning_rate": 1.2579018118556019e-05, "loss": 0.4834, "step": 17537 }, { "epoch": 1.3034559643255297, "grad_norm": 1.8439142974059652, "learning_rate": 1.2578242885213177e-05, "loss": 0.5101, "step": 17538 }, { "epoch": 1.3035302861389817, "grad_norm": 1.6566273123960187, "learning_rate": 1.2577467635271738e-05, "loss": 0.5429, "step": 17539 }, { "epoch": 1.3036046079524342, "grad_norm": 2.3179707382990045, "learning_rate": 1.2576692368736696e-05, "loss": 0.7295, "step": 17540 }, { "epoch": 1.3036789297658862, "grad_norm": 1.855632915105546, "learning_rate": 1.2575917085613043e-05, "loss": 0.6186, "step": 17541 }, { "epoch": 1.3037532515793386, "grad_norm": 1.6713027851287812, "learning_rate": 1.257514178590577e-05, "loss": 0.5425, "step": 17542 }, { "epoch": 1.3038275733927907, "grad_norm": 1.8752135346210559, "learning_rate": 1.257436646961987e-05, "loss": 0.6302, "step": 17543 }, { "epoch": 1.3039018952062431, "grad_norm": 2.1671394762843184, "learning_rate": 1.257359113676033e-05, "loss": 0.675, "step": 17544 }, { "epoch": 1.3039762170196953, "grad_norm": 2.1396747083641783, "learning_rate": 1.2572815787332143e-05, "loss": 0.7466, "step": 17545 }, { "epoch": 1.3040505388331476, "grad_norm": 1.560473593318721, "learning_rate": 1.25720404213403e-05, "loss": 0.4962, "step": 17546 }, { "epoch": 1.3041248606465998, "grad_norm": 1.9708024757472677, "learning_rate": 1.25712650387898e-05, "loss": 0.4705, "step": 17547 }, { "epoch": 1.304199182460052, "grad_norm": 2.1902443194511054, "learning_rate": 1.2570489639685625e-05, "loss": 0.6924, "step": 17548 }, { "epoch": 1.3042735042735043, "grad_norm": 2.1323728529709305, "learning_rate": 1.2569714224032772e-05, "loss": 0.6843, "step": 17549 }, { "epoch": 1.3043478260869565, "grad_norm": 1.7244004061849048, "learning_rate": 1.2568938791836231e-05, "loss": 0.6, "step": 17550 }, { "epoch": 1.3044221479004088, "grad_norm": 2.1051171802220354, "learning_rate": 1.2568163343100997e-05, "loss": 0.53, "step": 17551 }, { "epoch": 1.304496469713861, "grad_norm": 2.099381124834, "learning_rate": 1.2567387877832058e-05, "loss": 0.5418, "step": 17552 }, { "epoch": 1.3045707915273133, "grad_norm": 1.477375767365279, "learning_rate": 1.2566612396034412e-05, "loss": 0.5122, "step": 17553 }, { "epoch": 1.3046451133407655, "grad_norm": 2.1697228481623343, "learning_rate": 1.2565836897713047e-05, "loss": 0.712, "step": 17554 }, { "epoch": 1.3047194351542177, "grad_norm": 1.8686076268073983, "learning_rate": 1.2565061382872959e-05, "loss": 0.589, "step": 17555 }, { "epoch": 1.30479375696767, "grad_norm": 1.9395552415045043, "learning_rate": 1.2564285851519139e-05, "loss": 0.619, "step": 17556 }, { "epoch": 1.3048680787811222, "grad_norm": 2.2290281443742312, "learning_rate": 1.2563510303656577e-05, "loss": 0.8203, "step": 17557 }, { "epoch": 1.3049424005945744, "grad_norm": 1.7746510911768059, "learning_rate": 1.256273473929027e-05, "loss": 0.5505, "step": 17558 }, { "epoch": 1.3050167224080267, "grad_norm": 2.0356122214751533, "learning_rate": 1.256195915842521e-05, "loss": 0.6305, "step": 17559 }, { "epoch": 1.305091044221479, "grad_norm": 1.8346362611199358, "learning_rate": 1.256118356106639e-05, "loss": 0.6266, "step": 17560 }, { "epoch": 1.3051653660349314, "grad_norm": 1.6970819196848859, "learning_rate": 1.2560407947218802e-05, "loss": 0.6382, "step": 17561 }, { "epoch": 1.3052396878483834, "grad_norm": 1.714313350681054, "learning_rate": 1.255963231688744e-05, "loss": 0.654, "step": 17562 }, { "epoch": 1.3053140096618359, "grad_norm": 1.9939357356595715, "learning_rate": 1.2558856670077298e-05, "loss": 0.7193, "step": 17563 }, { "epoch": 1.3053883314752879, "grad_norm": 1.6078167442011913, "learning_rate": 1.2558081006793373e-05, "loss": 0.5625, "step": 17564 }, { "epoch": 1.3054626532887403, "grad_norm": 1.6613675182304315, "learning_rate": 1.2557305327040652e-05, "loss": 0.5531, "step": 17565 }, { "epoch": 1.3055369751021926, "grad_norm": 2.1957055274036708, "learning_rate": 1.2556529630824135e-05, "loss": 0.5424, "step": 17566 }, { "epoch": 1.3056112969156448, "grad_norm": 2.182636192318815, "learning_rate": 1.255575391814881e-05, "loss": 0.6774, "step": 17567 }, { "epoch": 1.305685618729097, "grad_norm": 2.851850439385478, "learning_rate": 1.2554978189019674e-05, "loss": 0.6715, "step": 17568 }, { "epoch": 1.3057599405425493, "grad_norm": 1.939274658423624, "learning_rate": 1.255420244344172e-05, "loss": 0.5444, "step": 17569 }, { "epoch": 1.3058342623560015, "grad_norm": 2.2711695840747845, "learning_rate": 1.2553426681419948e-05, "loss": 0.6116, "step": 17570 }, { "epoch": 1.3059085841694538, "grad_norm": 1.7813281106668732, "learning_rate": 1.255265090295934e-05, "loss": 0.5349, "step": 17571 }, { "epoch": 1.305982905982906, "grad_norm": 2.0716452851749456, "learning_rate": 1.2551875108064906e-05, "loss": 0.5567, "step": 17572 }, { "epoch": 1.3060572277963582, "grad_norm": 2.2899255612824945, "learning_rate": 1.2551099296741626e-05, "loss": 0.5902, "step": 17573 }, { "epoch": 1.3061315496098105, "grad_norm": 1.737558665491824, "learning_rate": 1.2550323468994505e-05, "loss": 0.5153, "step": 17574 }, { "epoch": 1.3062058714232627, "grad_norm": 2.3312562448771037, "learning_rate": 1.2549547624828534e-05, "loss": 0.7038, "step": 17575 }, { "epoch": 1.306280193236715, "grad_norm": 2.4222758346314635, "learning_rate": 1.2548771764248704e-05, "loss": 0.7775, "step": 17576 }, { "epoch": 1.3063545150501672, "grad_norm": 2.169564051373742, "learning_rate": 1.2547995887260018e-05, "loss": 0.6713, "step": 17577 }, { "epoch": 1.3064288368636194, "grad_norm": 1.9540253622254504, "learning_rate": 1.2547219993867463e-05, "loss": 0.7079, "step": 17578 }, { "epoch": 1.3065031586770717, "grad_norm": 1.6726462110261697, "learning_rate": 1.2546444084076039e-05, "loss": 0.5572, "step": 17579 }, { "epoch": 1.306577480490524, "grad_norm": 1.8944218537284447, "learning_rate": 1.2545668157890743e-05, "loss": 0.6855, "step": 17580 }, { "epoch": 1.3066518023039762, "grad_norm": 2.1455453581981754, "learning_rate": 1.2544892215316564e-05, "loss": 0.5911, "step": 17581 }, { "epoch": 1.3067261241174284, "grad_norm": 1.7726746240565625, "learning_rate": 1.25441162563585e-05, "loss": 0.6008, "step": 17582 }, { "epoch": 1.3068004459308806, "grad_norm": 1.8714457721564888, "learning_rate": 1.2543340281021552e-05, "loss": 0.6105, "step": 17583 }, { "epoch": 1.306874767744333, "grad_norm": 2.0550399442785077, "learning_rate": 1.2542564289310707e-05, "loss": 0.6054, "step": 17584 }, { "epoch": 1.306949089557785, "grad_norm": 3.070023839516583, "learning_rate": 1.2541788281230969e-05, "loss": 0.5608, "step": 17585 }, { "epoch": 1.3070234113712376, "grad_norm": 1.6630014685345458, "learning_rate": 1.2541012256787328e-05, "loss": 0.5075, "step": 17586 }, { "epoch": 1.3070977331846896, "grad_norm": 1.8914922141494563, "learning_rate": 1.254023621598478e-05, "loss": 0.5726, "step": 17587 }, { "epoch": 1.307172054998142, "grad_norm": 2.3499074309079244, "learning_rate": 1.2539460158828325e-05, "loss": 0.5233, "step": 17588 }, { "epoch": 1.3072463768115943, "grad_norm": 3.5380497769308894, "learning_rate": 1.2538684085322959e-05, "loss": 0.6108, "step": 17589 }, { "epoch": 1.3073206986250465, "grad_norm": 1.961152792233862, "learning_rate": 1.2537907995473672e-05, "loss": 0.6392, "step": 17590 }, { "epoch": 1.3073950204384988, "grad_norm": 1.6542180189856563, "learning_rate": 1.253713188928547e-05, "loss": 0.5503, "step": 17591 }, { "epoch": 1.307469342251951, "grad_norm": 2.171836633824723, "learning_rate": 1.2536355766763342e-05, "loss": 0.5549, "step": 17592 }, { "epoch": 1.3075436640654032, "grad_norm": 1.6622851130738496, "learning_rate": 1.2535579627912286e-05, "loss": 0.6134, "step": 17593 }, { "epoch": 1.3076179858788555, "grad_norm": 2.052566941037006, "learning_rate": 1.2534803472737303e-05, "loss": 0.6796, "step": 17594 }, { "epoch": 1.3076923076923077, "grad_norm": 1.9808546774119515, "learning_rate": 1.2534027301243383e-05, "loss": 0.7322, "step": 17595 }, { "epoch": 1.30776662950576, "grad_norm": 1.7425756667604824, "learning_rate": 1.253325111343553e-05, "loss": 0.5049, "step": 17596 }, { "epoch": 1.3078409513192122, "grad_norm": 2.1021767547482995, "learning_rate": 1.2532474909318737e-05, "loss": 0.703, "step": 17597 }, { "epoch": 1.3079152731326644, "grad_norm": 2.5399138972533524, "learning_rate": 1.2531698688898e-05, "loss": 0.8373, "step": 17598 }, { "epoch": 1.3079895949461167, "grad_norm": 1.6115612340688297, "learning_rate": 1.253092245217832e-05, "loss": 0.5707, "step": 17599 }, { "epoch": 1.308063916759569, "grad_norm": 2.010260737312175, "learning_rate": 1.2530146199164693e-05, "loss": 0.7241, "step": 17600 }, { "epoch": 1.3081382385730211, "grad_norm": 1.8373811847927102, "learning_rate": 1.2529369929862115e-05, "loss": 0.4772, "step": 17601 }, { "epoch": 1.3082125603864734, "grad_norm": 1.7351757312941185, "learning_rate": 1.2528593644275586e-05, "loss": 0.4442, "step": 17602 }, { "epoch": 1.3082868821999256, "grad_norm": 1.9246683570421954, "learning_rate": 1.25278173424101e-05, "loss": 0.5352, "step": 17603 }, { "epoch": 1.3083612040133779, "grad_norm": 1.5418849028393817, "learning_rate": 1.2527041024270658e-05, "loss": 0.3357, "step": 17604 }, { "epoch": 1.30843552582683, "grad_norm": 1.7882137388103898, "learning_rate": 1.2526264689862257e-05, "loss": 0.6194, "step": 17605 }, { "epoch": 1.3085098476402823, "grad_norm": 2.011283426387512, "learning_rate": 1.2525488339189895e-05, "loss": 0.729, "step": 17606 }, { "epoch": 1.3085841694537348, "grad_norm": 2.215487151404899, "learning_rate": 1.252471197225857e-05, "loss": 0.595, "step": 17607 }, { "epoch": 1.3086584912671868, "grad_norm": 1.9680908720120447, "learning_rate": 1.2523935589073284e-05, "loss": 0.7476, "step": 17608 }, { "epoch": 1.3087328130806393, "grad_norm": 1.9028255784586867, "learning_rate": 1.2523159189639027e-05, "loss": 0.6563, "step": 17609 }, { "epoch": 1.3088071348940913, "grad_norm": 2.276748156895078, "learning_rate": 1.2522382773960802e-05, "loss": 0.6844, "step": 17610 }, { "epoch": 1.3088814567075437, "grad_norm": 1.6718860815163465, "learning_rate": 1.252160634204361e-05, "loss": 0.5823, "step": 17611 }, { "epoch": 1.308955778520996, "grad_norm": 2.167646440428672, "learning_rate": 1.2520829893892446e-05, "loss": 0.7035, "step": 17612 }, { "epoch": 1.3090301003344482, "grad_norm": 2.266672041207962, "learning_rate": 1.252005342951231e-05, "loss": 0.6333, "step": 17613 }, { "epoch": 1.3091044221479005, "grad_norm": 2.0741607997481917, "learning_rate": 1.2519276948908204e-05, "loss": 0.6906, "step": 17614 }, { "epoch": 1.3091787439613527, "grad_norm": 1.575825956278592, "learning_rate": 1.2518500452085118e-05, "loss": 0.5634, "step": 17615 }, { "epoch": 1.309253065774805, "grad_norm": 1.7818703266843905, "learning_rate": 1.2517723939048059e-05, "loss": 0.558, "step": 17616 }, { "epoch": 1.3093273875882572, "grad_norm": 2.420614843628054, "learning_rate": 1.2516947409802025e-05, "loss": 0.5625, "step": 17617 }, { "epoch": 1.3094017094017094, "grad_norm": 1.6980016415716235, "learning_rate": 1.2516170864352015e-05, "loss": 0.6576, "step": 17618 }, { "epoch": 1.3094760312151617, "grad_norm": 1.8761130534988928, "learning_rate": 1.2515394302703025e-05, "loss": 0.5587, "step": 17619 }, { "epoch": 1.309550353028614, "grad_norm": 1.6946925872395828, "learning_rate": 1.251461772486006e-05, "loss": 0.4595, "step": 17620 }, { "epoch": 1.3096246748420661, "grad_norm": 2.159910124139017, "learning_rate": 1.2513841130828112e-05, "loss": 0.5623, "step": 17621 }, { "epoch": 1.3096989966555184, "grad_norm": 2.1909367273307763, "learning_rate": 1.2513064520612188e-05, "loss": 0.7346, "step": 17622 }, { "epoch": 1.3097733184689706, "grad_norm": 1.6161038586121106, "learning_rate": 1.2512287894217285e-05, "loss": 0.5215, "step": 17623 }, { "epoch": 1.3098476402824228, "grad_norm": 2.0021727372348113, "learning_rate": 1.2511511251648405e-05, "loss": 0.7113, "step": 17624 }, { "epoch": 1.309921962095875, "grad_norm": 1.7870639878994967, "learning_rate": 1.2510734592910542e-05, "loss": 0.5332, "step": 17625 }, { "epoch": 1.3099962839093273, "grad_norm": 2.0669031821966066, "learning_rate": 1.2509957918008702e-05, "loss": 0.6971, "step": 17626 }, { "epoch": 1.3100706057227796, "grad_norm": 2.1115947225599747, "learning_rate": 1.2509181226947882e-05, "loss": 0.6823, "step": 17627 }, { "epoch": 1.310144927536232, "grad_norm": 1.6688172237589116, "learning_rate": 1.2508404519733084e-05, "loss": 0.4926, "step": 17628 }, { "epoch": 1.310219249349684, "grad_norm": 1.4882583516878505, "learning_rate": 1.250762779636931e-05, "loss": 0.4736, "step": 17629 }, { "epoch": 1.3102935711631365, "grad_norm": 1.7827868475540154, "learning_rate": 1.2506851056861557e-05, "loss": 0.6719, "step": 17630 }, { "epoch": 1.3103678929765885, "grad_norm": 1.5471285793044622, "learning_rate": 1.2506074301214826e-05, "loss": 0.4847, "step": 17631 }, { "epoch": 1.310442214790041, "grad_norm": 2.2015437660092974, "learning_rate": 1.2505297529434117e-05, "loss": 0.6542, "step": 17632 }, { "epoch": 1.310516536603493, "grad_norm": 1.7314696338712923, "learning_rate": 1.2504520741524435e-05, "loss": 0.6165, "step": 17633 }, { "epoch": 1.3105908584169454, "grad_norm": 2.3540253429251132, "learning_rate": 1.2503743937490778e-05, "loss": 0.6357, "step": 17634 }, { "epoch": 1.3106651802303977, "grad_norm": 1.7918222038078404, "learning_rate": 1.250296711733815e-05, "loss": 0.6024, "step": 17635 }, { "epoch": 1.31073950204385, "grad_norm": 2.941655507876958, "learning_rate": 1.2502190281071544e-05, "loss": 0.6992, "step": 17636 }, { "epoch": 1.3108138238573022, "grad_norm": 2.120567591493748, "learning_rate": 1.2501413428695969e-05, "loss": 0.5991, "step": 17637 }, { "epoch": 1.3108881456707544, "grad_norm": 5.7004058262783275, "learning_rate": 1.2500636560216426e-05, "loss": 0.5361, "step": 17638 }, { "epoch": 1.3109624674842066, "grad_norm": 2.114342272645451, "learning_rate": 1.2499859675637916e-05, "loss": 0.6484, "step": 17639 }, { "epoch": 1.3110367892976589, "grad_norm": 1.7082207308219914, "learning_rate": 1.2499082774965437e-05, "loss": 0.6117, "step": 17640 }, { "epoch": 1.3111111111111111, "grad_norm": 1.6249072471105677, "learning_rate": 1.2498305858203991e-05, "loss": 0.5648, "step": 17641 }, { "epoch": 1.3111854329245634, "grad_norm": 2.2302522447142756, "learning_rate": 1.2497528925358582e-05, "loss": 0.6752, "step": 17642 }, { "epoch": 1.3112597547380156, "grad_norm": 2.025037223399432, "learning_rate": 1.2496751976434211e-05, "loss": 0.5875, "step": 17643 }, { "epoch": 1.3113340765514678, "grad_norm": 2.6676015894563694, "learning_rate": 1.2495975011435882e-05, "loss": 0.7748, "step": 17644 }, { "epoch": 1.31140839836492, "grad_norm": 2.52268035763302, "learning_rate": 1.2495198030368596e-05, "loss": 0.6287, "step": 17645 }, { "epoch": 1.3114827201783723, "grad_norm": 1.8418211925899497, "learning_rate": 1.2494421033237355e-05, "loss": 0.3897, "step": 17646 }, { "epoch": 1.3115570419918245, "grad_norm": 2.2517902972303463, "learning_rate": 1.2493644020047158e-05, "loss": 0.6455, "step": 17647 }, { "epoch": 1.3116313638052768, "grad_norm": 1.811411450784367, "learning_rate": 1.2492866990803013e-05, "loss": 0.5168, "step": 17648 }, { "epoch": 1.311705685618729, "grad_norm": 2.4585281360820597, "learning_rate": 1.2492089945509916e-05, "loss": 0.6331, "step": 17649 }, { "epoch": 1.3117800074321813, "grad_norm": 2.2290101748212674, "learning_rate": 1.2491312884172877e-05, "loss": 0.5593, "step": 17650 }, { "epoch": 1.3118543292456337, "grad_norm": 1.9775504953976608, "learning_rate": 1.2490535806796896e-05, "loss": 0.5517, "step": 17651 }, { "epoch": 1.3119286510590857, "grad_norm": 2.064636797541324, "learning_rate": 1.248975871338697e-05, "loss": 0.6, "step": 17652 }, { "epoch": 1.3120029728725382, "grad_norm": 1.5324606136706191, "learning_rate": 1.2488981603948106e-05, "loss": 0.4081, "step": 17653 }, { "epoch": 1.3120772946859902, "grad_norm": 1.9080018323221097, "learning_rate": 1.2488204478485311e-05, "loss": 0.6401, "step": 17654 }, { "epoch": 1.3121516164994427, "grad_norm": 1.788789638745955, "learning_rate": 1.2487427337003583e-05, "loss": 0.6185, "step": 17655 }, { "epoch": 1.312225938312895, "grad_norm": 1.8978805008786603, "learning_rate": 1.2486650179507928e-05, "loss": 0.6735, "step": 17656 }, { "epoch": 1.3123002601263472, "grad_norm": 2.3280909838978454, "learning_rate": 1.248587300600335e-05, "loss": 0.7225, "step": 17657 }, { "epoch": 1.3123745819397994, "grad_norm": 2.2704663330215054, "learning_rate": 1.2485095816494848e-05, "loss": 0.5921, "step": 17658 }, { "epoch": 1.3124489037532516, "grad_norm": 1.532253379690264, "learning_rate": 1.2484318610987427e-05, "loss": 0.3996, "step": 17659 }, { "epoch": 1.3125232255667039, "grad_norm": 1.554958289769786, "learning_rate": 1.2483541389486094e-05, "loss": 0.5049, "step": 17660 }, { "epoch": 1.312597547380156, "grad_norm": 2.1058638658123696, "learning_rate": 1.248276415199585e-05, "loss": 0.6272, "step": 17661 }, { "epoch": 1.3126718691936083, "grad_norm": 1.9485701353461033, "learning_rate": 1.24819868985217e-05, "loss": 0.5615, "step": 17662 }, { "epoch": 1.3127461910070606, "grad_norm": 1.988733113478386, "learning_rate": 1.2481209629068647e-05, "loss": 0.567, "step": 17663 }, { "epoch": 1.3128205128205128, "grad_norm": 1.884217463248092, "learning_rate": 1.2480432343641693e-05, "loss": 0.5627, "step": 17664 }, { "epoch": 1.312894834633965, "grad_norm": 2.1351896006982587, "learning_rate": 1.2479655042245845e-05, "loss": 0.6369, "step": 17665 }, { "epoch": 1.3129691564474173, "grad_norm": 1.587616050822647, "learning_rate": 1.2478877724886104e-05, "loss": 0.4933, "step": 17666 }, { "epoch": 1.3130434782608695, "grad_norm": 1.8294313249210679, "learning_rate": 1.2478100391567482e-05, "loss": 0.5645, "step": 17667 }, { "epoch": 1.3131178000743218, "grad_norm": 2.1038209471162883, "learning_rate": 1.2477323042294977e-05, "loss": 0.6166, "step": 17668 }, { "epoch": 1.313192121887774, "grad_norm": 1.8574478974032889, "learning_rate": 1.2476545677073592e-05, "loss": 0.433, "step": 17669 }, { "epoch": 1.3132664437012262, "grad_norm": 1.7883657486831834, "learning_rate": 1.2475768295908337e-05, "loss": 0.5454, "step": 17670 }, { "epoch": 1.3133407655146785, "grad_norm": 1.8459293159143044, "learning_rate": 1.2474990898804214e-05, "loss": 0.6184, "step": 17671 }, { "epoch": 1.3134150873281307, "grad_norm": 2.0302434695096987, "learning_rate": 1.2474213485766226e-05, "loss": 0.5453, "step": 17672 }, { "epoch": 1.313489409141583, "grad_norm": 1.960476475900854, "learning_rate": 1.2473436056799383e-05, "loss": 0.719, "step": 17673 }, { "epoch": 1.3135637309550354, "grad_norm": 1.9280366538773521, "learning_rate": 1.2472658611908685e-05, "loss": 0.6685, "step": 17674 }, { "epoch": 1.3136380527684874, "grad_norm": 2.1864725438111114, "learning_rate": 1.247188115109914e-05, "loss": 0.633, "step": 17675 }, { "epoch": 1.31371237458194, "grad_norm": 1.9017738214169058, "learning_rate": 1.247110367437575e-05, "loss": 0.6519, "step": 17676 }, { "epoch": 1.313786696395392, "grad_norm": 2.114681111011688, "learning_rate": 1.2470326181743524e-05, "loss": 0.5823, "step": 17677 }, { "epoch": 1.3138610182088444, "grad_norm": 1.6358056712561073, "learning_rate": 1.246954867320747e-05, "loss": 0.5689, "step": 17678 }, { "epoch": 1.3139353400222966, "grad_norm": 2.293660316028991, "learning_rate": 1.2468771148772586e-05, "loss": 0.8189, "step": 17679 }, { "epoch": 1.3140096618357489, "grad_norm": 2.1636894617572016, "learning_rate": 1.2467993608443882e-05, "loss": 0.7132, "step": 17680 }, { "epoch": 1.314083983649201, "grad_norm": 1.8184974669708724, "learning_rate": 1.2467216052226363e-05, "loss": 0.5902, "step": 17681 }, { "epoch": 1.3141583054626533, "grad_norm": 2.050815965419113, "learning_rate": 1.2466438480125034e-05, "loss": 0.6597, "step": 17682 }, { "epoch": 1.3142326272761056, "grad_norm": 1.9073403997102367, "learning_rate": 1.2465660892144906e-05, "loss": 0.6959, "step": 17683 }, { "epoch": 1.3143069490895578, "grad_norm": 1.859024156819681, "learning_rate": 1.2464883288290977e-05, "loss": 0.5869, "step": 17684 }, { "epoch": 1.31438127090301, "grad_norm": 1.6322264380227594, "learning_rate": 1.246410566856826e-05, "loss": 0.5104, "step": 17685 }, { "epoch": 1.3144555927164623, "grad_norm": 1.6553503647958046, "learning_rate": 1.2463328032981757e-05, "loss": 0.5149, "step": 17686 }, { "epoch": 1.3145299145299145, "grad_norm": 1.8995870530374057, "learning_rate": 1.2462550381536475e-05, "loss": 0.5693, "step": 17687 }, { "epoch": 1.3146042363433668, "grad_norm": 1.7508519078378084, "learning_rate": 1.2461772714237424e-05, "loss": 0.5453, "step": 17688 }, { "epoch": 1.314678558156819, "grad_norm": 2.382105938157514, "learning_rate": 1.2460995031089607e-05, "loss": 0.6636, "step": 17689 }, { "epoch": 1.3147528799702712, "grad_norm": 2.1305553552385614, "learning_rate": 1.2460217332098032e-05, "loss": 0.6983, "step": 17690 }, { "epoch": 1.3148272017837235, "grad_norm": 2.7663651905649593, "learning_rate": 1.2459439617267703e-05, "loss": 0.6039, "step": 17691 }, { "epoch": 1.3149015235971757, "grad_norm": 2.138628722279551, "learning_rate": 1.2458661886603631e-05, "loss": 0.6562, "step": 17692 }, { "epoch": 1.314975845410628, "grad_norm": 2.1408851493499785, "learning_rate": 1.2457884140110821e-05, "loss": 0.6761, "step": 17693 }, { "epoch": 1.3150501672240802, "grad_norm": 1.477018649824422, "learning_rate": 1.2457106377794283e-05, "loss": 0.4193, "step": 17694 }, { "epoch": 1.3151244890375327, "grad_norm": 2.124862849075026, "learning_rate": 1.245632859965902e-05, "loss": 0.5992, "step": 17695 }, { "epoch": 1.3151988108509847, "grad_norm": 2.2188105658190493, "learning_rate": 1.2455550805710041e-05, "loss": 0.5804, "step": 17696 }, { "epoch": 1.3152731326644371, "grad_norm": 1.8963252864366291, "learning_rate": 1.245477299595235e-05, "loss": 0.6409, "step": 17697 }, { "epoch": 1.3153474544778891, "grad_norm": 3.375389796820321, "learning_rate": 1.2453995170390963e-05, "loss": 0.6371, "step": 17698 }, { "epoch": 1.3154217762913416, "grad_norm": 1.865800916731262, "learning_rate": 1.2453217329030878e-05, "loss": 0.644, "step": 17699 }, { "epoch": 1.3154960981047936, "grad_norm": 2.0941790561286493, "learning_rate": 1.2452439471877111e-05, "loss": 0.7902, "step": 17700 }, { "epoch": 1.315570419918246, "grad_norm": 2.5722112348895556, "learning_rate": 1.2451661598934664e-05, "loss": 0.7883, "step": 17701 }, { "epoch": 1.3156447417316983, "grad_norm": 1.9904397605893898, "learning_rate": 1.2450883710208547e-05, "loss": 0.6237, "step": 17702 }, { "epoch": 1.3157190635451506, "grad_norm": 1.7527993670332964, "learning_rate": 1.2450105805703769e-05, "loss": 0.6584, "step": 17703 }, { "epoch": 1.3157933853586028, "grad_norm": 2.612357999738549, "learning_rate": 1.2449327885425336e-05, "loss": 0.6916, "step": 17704 }, { "epoch": 1.315867707172055, "grad_norm": 1.868090989536448, "learning_rate": 1.2448549949378256e-05, "loss": 0.7105, "step": 17705 }, { "epoch": 1.3159420289855073, "grad_norm": 1.4967238559154006, "learning_rate": 1.244777199756754e-05, "loss": 0.4862, "step": 17706 }, { "epoch": 1.3160163507989595, "grad_norm": 1.610356559827077, "learning_rate": 1.2446994029998197e-05, "loss": 0.5023, "step": 17707 }, { "epoch": 1.3160906726124117, "grad_norm": 1.89643662979529, "learning_rate": 1.2446216046675229e-05, "loss": 0.6519, "step": 17708 }, { "epoch": 1.316164994425864, "grad_norm": 2.1238360369963125, "learning_rate": 1.2445438047603648e-05, "loss": 0.6022, "step": 17709 }, { "epoch": 1.3162393162393162, "grad_norm": 1.5918774965308977, "learning_rate": 1.2444660032788468e-05, "loss": 0.4365, "step": 17710 }, { "epoch": 1.3163136380527685, "grad_norm": 1.7993601346035308, "learning_rate": 1.2443882002234693e-05, "loss": 0.4678, "step": 17711 }, { "epoch": 1.3163879598662207, "grad_norm": 2.1992122662709366, "learning_rate": 1.2443103955947331e-05, "loss": 0.5871, "step": 17712 }, { "epoch": 1.316462281679673, "grad_norm": 1.5938627461996369, "learning_rate": 1.2442325893931392e-05, "loss": 0.4046, "step": 17713 }, { "epoch": 1.3165366034931252, "grad_norm": 2.1143358643222507, "learning_rate": 1.2441547816191888e-05, "loss": 0.716, "step": 17714 }, { "epoch": 1.3166109253065774, "grad_norm": 2.0870073039262, "learning_rate": 1.2440769722733823e-05, "loss": 0.7199, "step": 17715 }, { "epoch": 1.3166852471200297, "grad_norm": 1.5179003393575632, "learning_rate": 1.2439991613562212e-05, "loss": 0.5493, "step": 17716 }, { "epoch": 1.316759568933482, "grad_norm": 2.81133093680412, "learning_rate": 1.243921348868206e-05, "loss": 0.7133, "step": 17717 }, { "epoch": 1.3168338907469344, "grad_norm": 1.6533043471186584, "learning_rate": 1.2438435348098376e-05, "loss": 0.5758, "step": 17718 }, { "epoch": 1.3169082125603864, "grad_norm": 1.7576103698088672, "learning_rate": 1.2437657191816178e-05, "loss": 0.5378, "step": 17719 }, { "epoch": 1.3169825343738388, "grad_norm": 1.9179968481799339, "learning_rate": 1.2436879019840465e-05, "loss": 0.6018, "step": 17720 }, { "epoch": 1.3170568561872908, "grad_norm": 2.1552386739787934, "learning_rate": 1.243610083217625e-05, "loss": 0.6648, "step": 17721 }, { "epoch": 1.3171311780007433, "grad_norm": 2.063309874787098, "learning_rate": 1.2435322628828547e-05, "loss": 0.4628, "step": 17722 }, { "epoch": 1.3172054998141955, "grad_norm": 1.8461178448713695, "learning_rate": 1.2434544409802363e-05, "loss": 0.5345, "step": 17723 }, { "epoch": 1.3172798216276478, "grad_norm": 2.0125096379356626, "learning_rate": 1.2433766175102708e-05, "loss": 0.6887, "step": 17724 }, { "epoch": 1.3173541434411, "grad_norm": 1.8229197329765296, "learning_rate": 1.2432987924734592e-05, "loss": 0.4844, "step": 17725 }, { "epoch": 1.3174284652545523, "grad_norm": 2.068175255596628, "learning_rate": 1.2432209658703026e-05, "loss": 0.5531, "step": 17726 }, { "epoch": 1.3175027870680045, "grad_norm": 2.0388552369045545, "learning_rate": 1.243143137701302e-05, "loss": 0.5732, "step": 17727 }, { "epoch": 1.3175771088814567, "grad_norm": 2.0129631778423103, "learning_rate": 1.2430653079669587e-05, "loss": 0.7767, "step": 17728 }, { "epoch": 1.317651430694909, "grad_norm": 1.987291505100822, "learning_rate": 1.2429874766677733e-05, "loss": 0.622, "step": 17729 }, { "epoch": 1.3177257525083612, "grad_norm": 1.8890974095507165, "learning_rate": 1.2429096438042474e-05, "loss": 0.5762, "step": 17730 }, { "epoch": 1.3178000743218135, "grad_norm": 2.270512558257695, "learning_rate": 1.2428318093768816e-05, "loss": 0.6921, "step": 17731 }, { "epoch": 1.3178743961352657, "grad_norm": 1.5600703692733031, "learning_rate": 1.2427539733861774e-05, "loss": 0.3736, "step": 17732 }, { "epoch": 1.317948717948718, "grad_norm": 1.811979851527645, "learning_rate": 1.2426761358326356e-05, "loss": 0.4096, "step": 17733 }, { "epoch": 1.3180230397621702, "grad_norm": 2.005568206971156, "learning_rate": 1.2425982967167574e-05, "loss": 0.6646, "step": 17734 }, { "epoch": 1.3180973615756224, "grad_norm": 1.9189838183079684, "learning_rate": 1.242520456039044e-05, "loss": 0.6403, "step": 17735 }, { "epoch": 1.3181716833890746, "grad_norm": 2.373585818209044, "learning_rate": 1.2424426137999965e-05, "loss": 0.7231, "step": 17736 }, { "epoch": 1.3182460052025269, "grad_norm": 2.428798422831773, "learning_rate": 1.242364770000116e-05, "loss": 0.7756, "step": 17737 }, { "epoch": 1.3183203270159791, "grad_norm": 1.756181255088982, "learning_rate": 1.2422869246399038e-05, "loss": 0.5849, "step": 17738 }, { "epoch": 1.3183946488294314, "grad_norm": 1.6796857910014074, "learning_rate": 1.2422090777198608e-05, "loss": 0.5375, "step": 17739 }, { "epoch": 1.3184689706428836, "grad_norm": 15.744581338515117, "learning_rate": 1.2421312292404883e-05, "loss": 0.6845, "step": 17740 }, { "epoch": 1.318543292456336, "grad_norm": 1.9743924441191192, "learning_rate": 1.2420533792022878e-05, "loss": 0.6043, "step": 17741 }, { "epoch": 1.318617614269788, "grad_norm": 2.534349616015023, "learning_rate": 1.2419755276057598e-05, "loss": 0.5234, "step": 17742 }, { "epoch": 1.3186919360832405, "grad_norm": 2.402531252521633, "learning_rate": 1.2418976744514065e-05, "loss": 0.5729, "step": 17743 }, { "epoch": 1.3187662578966926, "grad_norm": 2.1360077323636912, "learning_rate": 1.241819819739728e-05, "loss": 0.655, "step": 17744 }, { "epoch": 1.318840579710145, "grad_norm": 1.9808541676464795, "learning_rate": 1.241741963471226e-05, "loss": 0.5272, "step": 17745 }, { "epoch": 1.3189149015235972, "grad_norm": 1.938983752454906, "learning_rate": 1.2416641056464019e-05, "loss": 0.6189, "step": 17746 }, { "epoch": 1.3189892233370495, "grad_norm": 1.8580505861351677, "learning_rate": 1.2415862462657572e-05, "loss": 0.5163, "step": 17747 }, { "epoch": 1.3190635451505017, "grad_norm": 2.1420539148325566, "learning_rate": 1.2415083853297922e-05, "loss": 0.5657, "step": 17748 }, { "epoch": 1.319137866963954, "grad_norm": 1.4278914002809147, "learning_rate": 1.241430522839009e-05, "loss": 0.3874, "step": 17749 }, { "epoch": 1.3192121887774062, "grad_norm": 2.5841834798073924, "learning_rate": 1.2413526587939086e-05, "loss": 0.6934, "step": 17750 }, { "epoch": 1.3192865105908584, "grad_norm": 1.9956765749852228, "learning_rate": 1.2412747931949924e-05, "loss": 0.5012, "step": 17751 }, { "epoch": 1.3193608324043107, "grad_norm": 2.229626009343762, "learning_rate": 1.2411969260427614e-05, "loss": 0.5489, "step": 17752 }, { "epoch": 1.319435154217763, "grad_norm": 1.9564710192540675, "learning_rate": 1.2411190573377175e-05, "loss": 0.6397, "step": 17753 }, { "epoch": 1.3195094760312152, "grad_norm": 2.5233027033896267, "learning_rate": 1.2410411870803613e-05, "loss": 0.6981, "step": 17754 }, { "epoch": 1.3195837978446674, "grad_norm": 2.35422663839025, "learning_rate": 1.2409633152711947e-05, "loss": 0.6106, "step": 17755 }, { "epoch": 1.3196581196581196, "grad_norm": 2.284282824655367, "learning_rate": 1.2408854419107184e-05, "loss": 0.6194, "step": 17756 }, { "epoch": 1.3197324414715719, "grad_norm": 2.2587850959569313, "learning_rate": 1.2408075669994341e-05, "loss": 0.5591, "step": 17757 }, { "epoch": 1.319806763285024, "grad_norm": 1.8586774002937423, "learning_rate": 1.2407296905378436e-05, "loss": 0.5765, "step": 17758 }, { "epoch": 1.3198810850984763, "grad_norm": 2.118079302081274, "learning_rate": 1.2406518125264475e-05, "loss": 0.7064, "step": 17759 }, { "epoch": 1.3199554069119286, "grad_norm": 2.455065727600928, "learning_rate": 1.2405739329657478e-05, "loss": 0.6252, "step": 17760 }, { "epoch": 1.3200297287253808, "grad_norm": 1.936025302396046, "learning_rate": 1.2404960518562455e-05, "loss": 0.6495, "step": 17761 }, { "epoch": 1.3201040505388333, "grad_norm": 1.4962646740495056, "learning_rate": 1.2404181691984419e-05, "loss": 0.491, "step": 17762 }, { "epoch": 1.3201783723522853, "grad_norm": 1.658978290379535, "learning_rate": 1.2403402849928388e-05, "loss": 0.4454, "step": 17763 }, { "epoch": 1.3202526941657378, "grad_norm": 2.0118337641355617, "learning_rate": 1.2402623992399375e-05, "loss": 0.6824, "step": 17764 }, { "epoch": 1.3203270159791898, "grad_norm": 2.0668559396124664, "learning_rate": 1.2401845119402391e-05, "loss": 0.5736, "step": 17765 }, { "epoch": 1.3204013377926422, "grad_norm": 2.2023019371733006, "learning_rate": 1.2401066230942453e-05, "loss": 0.7288, "step": 17766 }, { "epoch": 1.3204756596060943, "grad_norm": 1.729674169505831, "learning_rate": 1.2400287327024576e-05, "loss": 0.5768, "step": 17767 }, { "epoch": 1.3205499814195467, "grad_norm": 2.0139091523738997, "learning_rate": 1.2399508407653773e-05, "loss": 0.6791, "step": 17768 }, { "epoch": 1.320624303232999, "grad_norm": 3.2331087675620447, "learning_rate": 1.2398729472835063e-05, "loss": 0.7782, "step": 17769 }, { "epoch": 1.3206986250464512, "grad_norm": 3.4130052028610067, "learning_rate": 1.2397950522573452e-05, "loss": 0.6797, "step": 17770 }, { "epoch": 1.3207729468599034, "grad_norm": 2.145414782441493, "learning_rate": 1.2397171556873964e-05, "loss": 0.5894, "step": 17771 }, { "epoch": 1.3208472686733557, "grad_norm": 3.2428582419291234, "learning_rate": 1.2396392575741606e-05, "loss": 0.4435, "step": 17772 }, { "epoch": 1.320921590486808, "grad_norm": 1.9796072124696997, "learning_rate": 1.23956135791814e-05, "loss": 0.5888, "step": 17773 }, { "epoch": 1.3209959123002601, "grad_norm": 1.6627144825404316, "learning_rate": 1.2394834567198356e-05, "loss": 0.5464, "step": 17774 }, { "epoch": 1.3210702341137124, "grad_norm": 2.194640286348812, "learning_rate": 1.2394055539797494e-05, "loss": 0.6925, "step": 17775 }, { "epoch": 1.3211445559271646, "grad_norm": 1.737286185415176, "learning_rate": 1.2393276496983825e-05, "loss": 0.61, "step": 17776 }, { "epoch": 1.3212188777406169, "grad_norm": 1.6474710451826353, "learning_rate": 1.2392497438762368e-05, "loss": 0.5248, "step": 17777 }, { "epoch": 1.321293199554069, "grad_norm": 2.0639184456788464, "learning_rate": 1.2391718365138132e-05, "loss": 0.677, "step": 17778 }, { "epoch": 1.3213675213675213, "grad_norm": 1.7018219411610351, "learning_rate": 1.239093927611614e-05, "loss": 0.6502, "step": 17779 }, { "epoch": 1.3214418431809736, "grad_norm": 2.399092191158439, "learning_rate": 1.2390160171701404e-05, "loss": 0.6125, "step": 17780 }, { "epoch": 1.3215161649944258, "grad_norm": 2.1012630808897033, "learning_rate": 1.2389381051898944e-05, "loss": 0.6312, "step": 17781 }, { "epoch": 1.321590486807878, "grad_norm": 1.830049563816863, "learning_rate": 1.2388601916713772e-05, "loss": 0.6132, "step": 17782 }, { "epoch": 1.3216648086213303, "grad_norm": 1.8641890097333904, "learning_rate": 1.2387822766150901e-05, "loss": 0.6596, "step": 17783 }, { "epoch": 1.3217391304347825, "grad_norm": 1.9594611158988875, "learning_rate": 1.2387043600215354e-05, "loss": 0.8038, "step": 17784 }, { "epoch": 1.321813452248235, "grad_norm": 1.9594136238281052, "learning_rate": 1.2386264418912142e-05, "loss": 0.6204, "step": 17785 }, { "epoch": 1.321887774061687, "grad_norm": 2.0859781583153194, "learning_rate": 1.2385485222246288e-05, "loss": 0.4283, "step": 17786 }, { "epoch": 1.3219620958751395, "grad_norm": 1.8598702589393865, "learning_rate": 1.23847060102228e-05, "loss": 0.6656, "step": 17787 }, { "epoch": 1.3220364176885915, "grad_norm": 1.8493123547856498, "learning_rate": 1.2383926782846701e-05, "loss": 0.5591, "step": 17788 }, { "epoch": 1.322110739502044, "grad_norm": 1.8317832658681825, "learning_rate": 1.2383147540123002e-05, "loss": 0.6847, "step": 17789 }, { "epoch": 1.3221850613154962, "grad_norm": 2.093122351881208, "learning_rate": 1.2382368282056723e-05, "loss": 0.5679, "step": 17790 }, { "epoch": 1.3222593831289484, "grad_norm": 2.4033615375399746, "learning_rate": 1.238158900865288e-05, "loss": 0.626, "step": 17791 }, { "epoch": 1.3223337049424007, "grad_norm": 1.698043702986231, "learning_rate": 1.2380809719916493e-05, "loss": 0.4534, "step": 17792 }, { "epoch": 1.322408026755853, "grad_norm": 1.9830797366755957, "learning_rate": 1.2380030415852578e-05, "loss": 0.5553, "step": 17793 }, { "epoch": 1.3224823485693051, "grad_norm": 1.9370552081347971, "learning_rate": 1.2379251096466147e-05, "loss": 0.7144, "step": 17794 }, { "epoch": 1.3225566703827574, "grad_norm": 1.819758472327607, "learning_rate": 1.2378471761762223e-05, "loss": 0.5626, "step": 17795 }, { "epoch": 1.3226309921962096, "grad_norm": 1.6641015411695674, "learning_rate": 1.2377692411745819e-05, "loss": 0.6101, "step": 17796 }, { "epoch": 1.3227053140096618, "grad_norm": 1.9933197866434023, "learning_rate": 1.2376913046421958e-05, "loss": 0.6836, "step": 17797 }, { "epoch": 1.322779635823114, "grad_norm": 1.9101760150395644, "learning_rate": 1.2376133665795656e-05, "loss": 0.6456, "step": 17798 }, { "epoch": 1.3228539576365663, "grad_norm": 2.063192578755513, "learning_rate": 1.2375354269871921e-05, "loss": 0.6919, "step": 17799 }, { "epoch": 1.3229282794500186, "grad_norm": 2.3982591817072354, "learning_rate": 1.2374574858655783e-05, "loss": 0.7581, "step": 17800 }, { "epoch": 1.3230026012634708, "grad_norm": 1.9051958250628445, "learning_rate": 1.2373795432152255e-05, "loss": 0.5773, "step": 17801 }, { "epoch": 1.323076923076923, "grad_norm": 2.19250959826796, "learning_rate": 1.2373015990366354e-05, "loss": 0.74, "step": 17802 }, { "epoch": 1.3231512448903753, "grad_norm": 1.816943854976443, "learning_rate": 1.23722365333031e-05, "loss": 0.5663, "step": 17803 }, { "epoch": 1.3232255667038275, "grad_norm": 1.9857875062083292, "learning_rate": 1.2371457060967513e-05, "loss": 0.6081, "step": 17804 }, { "epoch": 1.3232998885172798, "grad_norm": 1.718157330920403, "learning_rate": 1.2370677573364607e-05, "loss": 0.6077, "step": 17805 }, { "epoch": 1.323374210330732, "grad_norm": 2.4695544329037467, "learning_rate": 1.23698980704994e-05, "loss": 0.574, "step": 17806 }, { "epoch": 1.3234485321441842, "grad_norm": 2.3512463012965443, "learning_rate": 1.2369118552376911e-05, "loss": 0.7444, "step": 17807 }, { "epoch": 1.3235228539576367, "grad_norm": 3.857904841893964, "learning_rate": 1.2368339019002162e-05, "loss": 0.453, "step": 17808 }, { "epoch": 1.3235971757710887, "grad_norm": 2.270208075347377, "learning_rate": 1.236755947038017e-05, "loss": 0.6475, "step": 17809 }, { "epoch": 1.3236714975845412, "grad_norm": 1.939151574771747, "learning_rate": 1.2366779906515954e-05, "loss": 0.531, "step": 17810 }, { "epoch": 1.3237458193979932, "grad_norm": 2.3686509641740483, "learning_rate": 1.2366000327414528e-05, "loss": 0.8337, "step": 17811 }, { "epoch": 1.3238201412114456, "grad_norm": 1.9623695415091253, "learning_rate": 1.2365220733080917e-05, "loss": 0.7173, "step": 17812 }, { "epoch": 1.3238944630248979, "grad_norm": 1.5191435170028726, "learning_rate": 1.2364441123520135e-05, "loss": 0.4575, "step": 17813 }, { "epoch": 1.3239687848383501, "grad_norm": 1.7930550549344935, "learning_rate": 1.2363661498737207e-05, "loss": 0.693, "step": 17814 }, { "epoch": 1.3240431066518024, "grad_norm": 2.1108034545853904, "learning_rate": 1.2362881858737148e-05, "loss": 0.6024, "step": 17815 }, { "epoch": 1.3241174284652546, "grad_norm": 2.6357494769389223, "learning_rate": 1.2362102203524977e-05, "loss": 0.8002, "step": 17816 }, { "epoch": 1.3241917502787068, "grad_norm": 2.6352106216892275, "learning_rate": 1.2361322533105715e-05, "loss": 0.723, "step": 17817 }, { "epoch": 1.324266072092159, "grad_norm": 1.557359178975735, "learning_rate": 1.236054284748438e-05, "loss": 0.5597, "step": 17818 }, { "epoch": 1.3243403939056113, "grad_norm": 1.4351644765724119, "learning_rate": 1.2359763146665998e-05, "loss": 0.4439, "step": 17819 }, { "epoch": 1.3244147157190636, "grad_norm": 2.1388539704393414, "learning_rate": 1.235898343065558e-05, "loss": 0.6575, "step": 17820 }, { "epoch": 1.3244890375325158, "grad_norm": 1.8887436893463603, "learning_rate": 1.2358203699458147e-05, "loss": 0.5005, "step": 17821 }, { "epoch": 1.324563359345968, "grad_norm": 1.7109332861216673, "learning_rate": 1.2357423953078722e-05, "loss": 0.6087, "step": 17822 }, { "epoch": 1.3246376811594203, "grad_norm": 1.7764258858165316, "learning_rate": 1.2356644191522325e-05, "loss": 0.5747, "step": 17823 }, { "epoch": 1.3247120029728725, "grad_norm": 1.8941579255954817, "learning_rate": 1.2355864414793972e-05, "loss": 0.5479, "step": 17824 }, { "epoch": 1.3247863247863247, "grad_norm": 1.946367594550936, "learning_rate": 1.235508462289869e-05, "loss": 0.654, "step": 17825 }, { "epoch": 1.324860646599777, "grad_norm": 1.9452712720185683, "learning_rate": 1.2354304815841497e-05, "loss": 0.5082, "step": 17826 }, { "epoch": 1.3249349684132292, "grad_norm": 1.5669569650569317, "learning_rate": 1.2353524993627407e-05, "loss": 0.4568, "step": 17827 }, { "epoch": 1.3250092902266815, "grad_norm": 2.2545810330225886, "learning_rate": 1.2352745156261445e-05, "loss": 0.6488, "step": 17828 }, { "epoch": 1.325083612040134, "grad_norm": 2.571794690052946, "learning_rate": 1.2351965303748633e-05, "loss": 0.5725, "step": 17829 }, { "epoch": 1.325157933853586, "grad_norm": 2.250107667800606, "learning_rate": 1.2351185436093993e-05, "loss": 0.7325, "step": 17830 }, { "epoch": 1.3252322556670384, "grad_norm": 2.051559087940375, "learning_rate": 1.235040555330254e-05, "loss": 0.5707, "step": 17831 }, { "epoch": 1.3253065774804904, "grad_norm": 1.5986735654903823, "learning_rate": 1.23496256553793e-05, "loss": 0.4913, "step": 17832 }, { "epoch": 1.3253808992939429, "grad_norm": 1.6817392922162449, "learning_rate": 1.234884574232929e-05, "loss": 0.5554, "step": 17833 }, { "epoch": 1.3254552211073949, "grad_norm": 1.7406228798490067, "learning_rate": 1.2348065814157532e-05, "loss": 0.6842, "step": 17834 }, { "epoch": 1.3255295429208473, "grad_norm": 2.1538367253783206, "learning_rate": 1.2347285870869051e-05, "loss": 0.5555, "step": 17835 }, { "epoch": 1.3256038647342996, "grad_norm": 1.5914634409838553, "learning_rate": 1.2346505912468865e-05, "loss": 0.4438, "step": 17836 }, { "epoch": 1.3256781865477518, "grad_norm": 2.048515013814649, "learning_rate": 1.2345725938961997e-05, "loss": 0.5887, "step": 17837 }, { "epoch": 1.325752508361204, "grad_norm": 1.9637702276863864, "learning_rate": 1.2344945950353465e-05, "loss": 0.7574, "step": 17838 }, { "epoch": 1.3258268301746563, "grad_norm": 2.332392304611186, "learning_rate": 1.2344165946648291e-05, "loss": 0.5954, "step": 17839 }, { "epoch": 1.3259011519881085, "grad_norm": 1.8480166965268376, "learning_rate": 1.2343385927851502e-05, "loss": 0.7114, "step": 17840 }, { "epoch": 1.3259754738015608, "grad_norm": 1.7957246514441905, "learning_rate": 1.2342605893968113e-05, "loss": 0.5069, "step": 17841 }, { "epoch": 1.326049795615013, "grad_norm": 1.837569253025514, "learning_rate": 1.234182584500315e-05, "loss": 0.5061, "step": 17842 }, { "epoch": 1.3261241174284653, "grad_norm": 1.8540169059986906, "learning_rate": 1.2341045780961635e-05, "loss": 0.5583, "step": 17843 }, { "epoch": 1.3261984392419175, "grad_norm": 1.8072162463899712, "learning_rate": 1.2340265701848588e-05, "loss": 0.6497, "step": 17844 }, { "epoch": 1.3262727610553697, "grad_norm": 1.7775670153619818, "learning_rate": 1.233948560766903e-05, "loss": 0.4558, "step": 17845 }, { "epoch": 1.326347082868822, "grad_norm": 1.7937211109156563, "learning_rate": 1.2338705498427986e-05, "loss": 0.5521, "step": 17846 }, { "epoch": 1.3264214046822742, "grad_norm": 2.058927168892869, "learning_rate": 1.233792537413048e-05, "loss": 0.5728, "step": 17847 }, { "epoch": 1.3264957264957264, "grad_norm": 1.793932038504939, "learning_rate": 1.2337145234781526e-05, "loss": 0.575, "step": 17848 }, { "epoch": 1.3265700483091787, "grad_norm": 2.111634239305626, "learning_rate": 1.233636508038616e-05, "loss": 0.731, "step": 17849 }, { "epoch": 1.326644370122631, "grad_norm": 2.3892681166054257, "learning_rate": 1.233558491094939e-05, "loss": 0.6965, "step": 17850 }, { "epoch": 1.3267186919360832, "grad_norm": 1.7754431317760773, "learning_rate": 1.2334804726476249e-05, "loss": 0.5998, "step": 17851 }, { "epoch": 1.3267930137495356, "grad_norm": 2.2493111048511465, "learning_rate": 1.2334024526971755e-05, "loss": 0.7612, "step": 17852 }, { "epoch": 1.3268673355629876, "grad_norm": 2.0252404578091245, "learning_rate": 1.2333244312440933e-05, "loss": 0.6515, "step": 17853 }, { "epoch": 1.32694165737644, "grad_norm": 1.8311679157263383, "learning_rate": 1.2332464082888804e-05, "loss": 0.578, "step": 17854 }, { "epoch": 1.3270159791898921, "grad_norm": 1.7194762316019114, "learning_rate": 1.2331683838320395e-05, "loss": 0.5079, "step": 17855 }, { "epoch": 1.3270903010033446, "grad_norm": 2.816244822361549, "learning_rate": 1.2330903578740724e-05, "loss": 0.7059, "step": 17856 }, { "epoch": 1.3271646228167968, "grad_norm": 1.9291487818195652, "learning_rate": 1.2330123304154815e-05, "loss": 0.5983, "step": 17857 }, { "epoch": 1.327238944630249, "grad_norm": 2.12121233581988, "learning_rate": 1.2329343014567696e-05, "loss": 0.7527, "step": 17858 }, { "epoch": 1.3273132664437013, "grad_norm": 2.0782308663127487, "learning_rate": 1.2328562709984387e-05, "loss": 0.5803, "step": 17859 }, { "epoch": 1.3273875882571535, "grad_norm": 2.329970925566555, "learning_rate": 1.2327782390409913e-05, "loss": 0.7613, "step": 17860 }, { "epoch": 1.3274619100706058, "grad_norm": 1.8480392126385325, "learning_rate": 1.2327002055849295e-05, "loss": 0.5818, "step": 17861 }, { "epoch": 1.327536231884058, "grad_norm": 2.305919220430487, "learning_rate": 1.2326221706307557e-05, "loss": 0.6693, "step": 17862 }, { "epoch": 1.3276105536975102, "grad_norm": 1.8750282929643636, "learning_rate": 1.2325441341789728e-05, "loss": 0.598, "step": 17863 }, { "epoch": 1.3276848755109625, "grad_norm": 1.8139745259088493, "learning_rate": 1.2324660962300828e-05, "loss": 0.6559, "step": 17864 }, { "epoch": 1.3277591973244147, "grad_norm": 1.6855398919458497, "learning_rate": 1.2323880567845878e-05, "loss": 0.5177, "step": 17865 }, { "epoch": 1.327833519137867, "grad_norm": 1.7910459624738497, "learning_rate": 1.2323100158429909e-05, "loss": 0.5617, "step": 17866 }, { "epoch": 1.3279078409513192, "grad_norm": 1.9445248853173203, "learning_rate": 1.232231973405794e-05, "loss": 0.5852, "step": 17867 }, { "epoch": 1.3279821627647714, "grad_norm": 2.163592863072096, "learning_rate": 1.2321539294734996e-05, "loss": 0.73, "step": 17868 }, { "epoch": 1.3280564845782237, "grad_norm": 2.0853109915620482, "learning_rate": 1.2320758840466105e-05, "loss": 0.6569, "step": 17869 }, { "epoch": 1.328130806391676, "grad_norm": 1.956715344455239, "learning_rate": 1.2319978371256288e-05, "loss": 0.6817, "step": 17870 }, { "epoch": 1.3282051282051281, "grad_norm": 1.6765377450982788, "learning_rate": 1.2319197887110573e-05, "loss": 0.5408, "step": 17871 }, { "epoch": 1.3282794500185804, "grad_norm": 1.5241237773367908, "learning_rate": 1.2318417388033976e-05, "loss": 0.4231, "step": 17872 }, { "epoch": 1.3283537718320326, "grad_norm": 1.8554066426034217, "learning_rate": 1.2317636874031534e-05, "loss": 0.581, "step": 17873 }, { "epoch": 1.3284280936454849, "grad_norm": 1.9852919805499962, "learning_rate": 1.2316856345108264e-05, "loss": 0.5624, "step": 17874 }, { "epoch": 1.3285024154589373, "grad_norm": 2.27294521790135, "learning_rate": 1.2316075801269192e-05, "loss": 0.7039, "step": 17875 }, { "epoch": 1.3285767372723893, "grad_norm": 1.7496278181028841, "learning_rate": 1.2315295242519345e-05, "loss": 0.5768, "step": 17876 }, { "epoch": 1.3286510590858418, "grad_norm": 1.7525110795160779, "learning_rate": 1.2314514668863748e-05, "loss": 0.6053, "step": 17877 }, { "epoch": 1.3287253808992938, "grad_norm": 2.064469756797214, "learning_rate": 1.2313734080307424e-05, "loss": 0.6175, "step": 17878 }, { "epoch": 1.3287997027127463, "grad_norm": 2.0411813209198337, "learning_rate": 1.2312953476855402e-05, "loss": 0.5485, "step": 17879 }, { "epoch": 1.3288740245261985, "grad_norm": 2.2080555762986087, "learning_rate": 1.2312172858512706e-05, "loss": 0.7127, "step": 17880 }, { "epoch": 1.3289483463396508, "grad_norm": 1.8464936059087218, "learning_rate": 1.231139222528436e-05, "loss": 0.5171, "step": 17881 }, { "epoch": 1.329022668153103, "grad_norm": 1.5667407643555185, "learning_rate": 1.2310611577175391e-05, "loss": 0.4563, "step": 17882 }, { "epoch": 1.3290969899665552, "grad_norm": 1.7610455634357691, "learning_rate": 1.2309830914190824e-05, "loss": 0.5788, "step": 17883 }, { "epoch": 1.3291713117800075, "grad_norm": 1.6243737794286484, "learning_rate": 1.2309050236335684e-05, "loss": 0.5162, "step": 17884 }, { "epoch": 1.3292456335934597, "grad_norm": 1.647264091786942, "learning_rate": 1.2308269543615e-05, "loss": 0.532, "step": 17885 }, { "epoch": 1.329319955406912, "grad_norm": 1.9691764437240904, "learning_rate": 1.2307488836033798e-05, "loss": 0.6022, "step": 17886 }, { "epoch": 1.3293942772203642, "grad_norm": 2.502904957663287, "learning_rate": 1.2306708113597102e-05, "loss": 0.7053, "step": 17887 }, { "epoch": 1.3294685990338164, "grad_norm": 2.224532588625975, "learning_rate": 1.2305927376309939e-05, "loss": 0.6378, "step": 17888 }, { "epoch": 1.3295429208472687, "grad_norm": 2.0192937183514776, "learning_rate": 1.2305146624177331e-05, "loss": 0.6034, "step": 17889 }, { "epoch": 1.329617242660721, "grad_norm": 1.9506419225996832, "learning_rate": 1.2304365857204315e-05, "loss": 0.5627, "step": 17890 }, { "epoch": 1.3296915644741731, "grad_norm": 1.9587145206788865, "learning_rate": 1.2303585075395908e-05, "loss": 0.7578, "step": 17891 }, { "epoch": 1.3297658862876254, "grad_norm": 1.7479027972058143, "learning_rate": 1.2302804278757136e-05, "loss": 0.6482, "step": 17892 }, { "epoch": 1.3298402081010776, "grad_norm": 1.700935640801433, "learning_rate": 1.2302023467293033e-05, "loss": 0.6648, "step": 17893 }, { "epoch": 1.3299145299145299, "grad_norm": 1.7445225457878686, "learning_rate": 1.2301242641008625e-05, "loss": 0.5609, "step": 17894 }, { "epoch": 1.329988851727982, "grad_norm": 1.8023946729510383, "learning_rate": 1.2300461799908934e-05, "loss": 0.6217, "step": 17895 }, { "epoch": 1.3300631735414343, "grad_norm": 1.5264101572950628, "learning_rate": 1.229968094399899e-05, "loss": 0.4216, "step": 17896 }, { "epoch": 1.3301374953548866, "grad_norm": 1.8406128780093785, "learning_rate": 1.2298900073283817e-05, "loss": 0.6726, "step": 17897 }, { "epoch": 1.330211817168339, "grad_norm": 2.3024485104612418, "learning_rate": 1.2298119187768446e-05, "loss": 0.7811, "step": 17898 }, { "epoch": 1.330286138981791, "grad_norm": 1.9789906672011375, "learning_rate": 1.2297338287457906e-05, "loss": 0.6438, "step": 17899 }, { "epoch": 1.3303604607952435, "grad_norm": 1.8476512031494774, "learning_rate": 1.2296557372357216e-05, "loss": 0.6016, "step": 17900 }, { "epoch": 1.3304347826086955, "grad_norm": 2.15914225614718, "learning_rate": 1.2295776442471414e-05, "loss": 0.6737, "step": 17901 }, { "epoch": 1.330509104422148, "grad_norm": 1.902650063073285, "learning_rate": 1.2294995497805518e-05, "loss": 0.5468, "step": 17902 }, { "epoch": 1.3305834262356002, "grad_norm": 1.8213302321922744, "learning_rate": 1.2294214538364561e-05, "loss": 0.5925, "step": 17903 }, { "epoch": 1.3306577480490525, "grad_norm": 1.8393247762918752, "learning_rate": 1.2293433564153569e-05, "loss": 0.5674, "step": 17904 }, { "epoch": 1.3307320698625047, "grad_norm": 1.5633853068618224, "learning_rate": 1.2292652575177574e-05, "loss": 0.5148, "step": 17905 }, { "epoch": 1.330806391675957, "grad_norm": 2.080234767883349, "learning_rate": 1.2291871571441598e-05, "loss": 0.6404, "step": 17906 }, { "epoch": 1.3308807134894092, "grad_norm": 2.313098843759154, "learning_rate": 1.2291090552950675e-05, "loss": 0.7408, "step": 17907 }, { "epoch": 1.3309550353028614, "grad_norm": 2.1126623126537383, "learning_rate": 1.2290309519709827e-05, "loss": 0.6697, "step": 17908 }, { "epoch": 1.3310293571163136, "grad_norm": 1.7023973745142529, "learning_rate": 1.2289528471724086e-05, "loss": 0.4699, "step": 17909 }, { "epoch": 1.3311036789297659, "grad_norm": 2.5144391033077422, "learning_rate": 1.2288747408998478e-05, "loss": 0.8103, "step": 17910 }, { "epoch": 1.3311780007432181, "grad_norm": 1.92443790691049, "learning_rate": 1.2287966331538035e-05, "loss": 0.665, "step": 17911 }, { "epoch": 1.3312523225566704, "grad_norm": 2.3579624561205996, "learning_rate": 1.2287185239347786e-05, "loss": 0.6157, "step": 17912 }, { "epoch": 1.3313266443701226, "grad_norm": 1.856779822027922, "learning_rate": 1.2286404132432752e-05, "loss": 0.5992, "step": 17913 }, { "epoch": 1.3314009661835748, "grad_norm": 2.070518414477655, "learning_rate": 1.2285623010797967e-05, "loss": 0.7456, "step": 17914 }, { "epoch": 1.331475287997027, "grad_norm": 1.8407303642468913, "learning_rate": 1.2284841874448461e-05, "loss": 0.5814, "step": 17915 }, { "epoch": 1.3315496098104793, "grad_norm": 2.1898524564671886, "learning_rate": 1.2284060723389261e-05, "loss": 0.5923, "step": 17916 }, { "epoch": 1.3316239316239316, "grad_norm": 2.2796399376526946, "learning_rate": 1.2283279557625398e-05, "loss": 0.6814, "step": 17917 }, { "epoch": 1.3316982534373838, "grad_norm": 1.928911920667105, "learning_rate": 1.22824983771619e-05, "loss": 0.6196, "step": 17918 }, { "epoch": 1.3317725752508363, "grad_norm": 2.037010253478488, "learning_rate": 1.2281717182003793e-05, "loss": 0.6098, "step": 17919 }, { "epoch": 1.3318468970642883, "grad_norm": 1.9414399994692788, "learning_rate": 1.2280935972156112e-05, "loss": 0.6045, "step": 17920 }, { "epoch": 1.3319212188777407, "grad_norm": 2.3300174463162207, "learning_rate": 1.2280154747623882e-05, "loss": 0.7359, "step": 17921 }, { "epoch": 1.3319955406911927, "grad_norm": 1.8916616030026256, "learning_rate": 1.2279373508412135e-05, "loss": 0.5685, "step": 17922 }, { "epoch": 1.3320698625046452, "grad_norm": 2.0312355200229253, "learning_rate": 1.2278592254525902e-05, "loss": 0.7475, "step": 17923 }, { "epoch": 1.3321441843180972, "grad_norm": 2.0549956777742415, "learning_rate": 1.2277810985970207e-05, "loss": 0.6317, "step": 17924 }, { "epoch": 1.3322185061315497, "grad_norm": 2.0021783785324625, "learning_rate": 1.2277029702750082e-05, "loss": 0.6476, "step": 17925 }, { "epoch": 1.332292827945002, "grad_norm": 1.8553753978880592, "learning_rate": 1.2276248404870559e-05, "loss": 0.5526, "step": 17926 }, { "epoch": 1.3323671497584542, "grad_norm": 1.732805444800904, "learning_rate": 1.2275467092336669e-05, "loss": 0.5885, "step": 17927 }, { "epoch": 1.3324414715719064, "grad_norm": 1.6342060084906473, "learning_rate": 1.2274685765153439e-05, "loss": 0.5188, "step": 17928 }, { "epoch": 1.3325157933853586, "grad_norm": 2.5602496705182647, "learning_rate": 1.22739044233259e-05, "loss": 0.7829, "step": 17929 }, { "epoch": 1.3325901151988109, "grad_norm": 2.1570694661048266, "learning_rate": 1.2273123066859081e-05, "loss": 0.6274, "step": 17930 }, { "epoch": 1.3326644370122631, "grad_norm": 2.0627771672764292, "learning_rate": 1.2272341695758014e-05, "loss": 0.6889, "step": 17931 }, { "epoch": 1.3327387588257154, "grad_norm": 1.8444468550098565, "learning_rate": 1.2271560310027728e-05, "loss": 0.6235, "step": 17932 }, { "epoch": 1.3328130806391676, "grad_norm": 1.7745830331690065, "learning_rate": 1.2270778909673257e-05, "loss": 0.5584, "step": 17933 }, { "epoch": 1.3328874024526198, "grad_norm": 7.319639125923168, "learning_rate": 1.226999749469963e-05, "loss": 0.7258, "step": 17934 }, { "epoch": 1.332961724266072, "grad_norm": 1.6480128199209902, "learning_rate": 1.2269216065111875e-05, "loss": 0.5613, "step": 17935 }, { "epoch": 1.3330360460795243, "grad_norm": 1.525341702687197, "learning_rate": 1.2268434620915024e-05, "loss": 0.4889, "step": 17936 }, { "epoch": 1.3331103678929765, "grad_norm": 1.6574763361375786, "learning_rate": 1.2267653162114108e-05, "loss": 0.6179, "step": 17937 }, { "epoch": 1.3331846897064288, "grad_norm": 2.39296942000113, "learning_rate": 1.2266871688714161e-05, "loss": 0.725, "step": 17938 }, { "epoch": 1.333259011519881, "grad_norm": 2.339055431893704, "learning_rate": 1.226609020072021e-05, "loss": 0.5484, "step": 17939 }, { "epoch": 1.3333333333333333, "grad_norm": 1.8494276996859274, "learning_rate": 1.2265308698137292e-05, "loss": 0.4788, "step": 17940 }, { "epoch": 1.3334076551467855, "grad_norm": 1.7481399916043527, "learning_rate": 1.2264527180970429e-05, "loss": 0.5731, "step": 17941 }, { "epoch": 1.333481976960238, "grad_norm": 2.2012617258367255, "learning_rate": 1.226374564922466e-05, "loss": 0.8261, "step": 17942 }, { "epoch": 1.33355629877369, "grad_norm": 2.057819466701668, "learning_rate": 1.2262964102905012e-05, "loss": 0.6377, "step": 17943 }, { "epoch": 1.3336306205871424, "grad_norm": 2.056892970503496, "learning_rate": 1.226218254201652e-05, "loss": 0.7229, "step": 17944 }, { "epoch": 1.3337049424005945, "grad_norm": 1.556601198281004, "learning_rate": 1.2261400966564217e-05, "loss": 0.5469, "step": 17945 }, { "epoch": 1.333779264214047, "grad_norm": 2.0881459374939495, "learning_rate": 1.2260619376553126e-05, "loss": 0.7097, "step": 17946 }, { "epoch": 1.3338535860274991, "grad_norm": 1.782065813601214, "learning_rate": 1.2259837771988288e-05, "loss": 0.6096, "step": 17947 }, { "epoch": 1.3339279078409514, "grad_norm": 1.5445463473735348, "learning_rate": 1.2259056152874731e-05, "loss": 0.5297, "step": 17948 }, { "epoch": 1.3340022296544036, "grad_norm": 1.796482280615415, "learning_rate": 1.2258274519217488e-05, "loss": 0.7044, "step": 17949 }, { "epoch": 1.3340765514678559, "grad_norm": 1.9850090760405217, "learning_rate": 1.2257492871021592e-05, "loss": 0.5132, "step": 17950 }, { "epoch": 1.334150873281308, "grad_norm": 2.15740788247575, "learning_rate": 1.2256711208292074e-05, "loss": 0.7633, "step": 17951 }, { "epoch": 1.3342251950947603, "grad_norm": 2.1341339226730245, "learning_rate": 1.2255929531033966e-05, "loss": 0.6562, "step": 17952 }, { "epoch": 1.3342995169082126, "grad_norm": 2.057244816464653, "learning_rate": 1.2255147839252297e-05, "loss": 0.5638, "step": 17953 }, { "epoch": 1.3343738387216648, "grad_norm": 2.1267896338656898, "learning_rate": 1.2254366132952107e-05, "loss": 0.5992, "step": 17954 }, { "epoch": 1.334448160535117, "grad_norm": 1.9528102210432927, "learning_rate": 1.2253584412138424e-05, "loss": 0.7156, "step": 17955 }, { "epoch": 1.3345224823485693, "grad_norm": 2.268565003338866, "learning_rate": 1.2252802676816282e-05, "loss": 0.6135, "step": 17956 }, { "epoch": 1.3345968041620215, "grad_norm": 2.130502017121741, "learning_rate": 1.2252020926990713e-05, "loss": 0.6514, "step": 17957 }, { "epoch": 1.3346711259754738, "grad_norm": 2.2628476774216133, "learning_rate": 1.225123916266675e-05, "loss": 0.5247, "step": 17958 }, { "epoch": 1.334745447788926, "grad_norm": 2.0961476398079992, "learning_rate": 1.2250457383849424e-05, "loss": 0.6749, "step": 17959 }, { "epoch": 1.3348197696023782, "grad_norm": 1.6749905883074974, "learning_rate": 1.2249675590543772e-05, "loss": 0.507, "step": 17960 }, { "epoch": 1.3348940914158305, "grad_norm": 1.7561326479297155, "learning_rate": 1.2248893782754827e-05, "loss": 0.5606, "step": 17961 }, { "epoch": 1.3349684132292827, "grad_norm": 1.6316944798087833, "learning_rate": 1.224811196048762e-05, "loss": 0.5201, "step": 17962 }, { "epoch": 1.335042735042735, "grad_norm": 1.8202430327377448, "learning_rate": 1.2247330123747183e-05, "loss": 0.5706, "step": 17963 }, { "epoch": 1.3351170568561872, "grad_norm": 1.673438261369136, "learning_rate": 1.224654827253855e-05, "loss": 0.6206, "step": 17964 }, { "epoch": 1.3351913786696397, "grad_norm": 2.022045359382554, "learning_rate": 1.2245766406866758e-05, "loss": 0.5992, "step": 17965 }, { "epoch": 1.3352657004830917, "grad_norm": 2.314866382976891, "learning_rate": 1.224498452673684e-05, "loss": 0.5035, "step": 17966 }, { "epoch": 1.3353400222965441, "grad_norm": 2.295435806127948, "learning_rate": 1.2244202632153827e-05, "loss": 0.6204, "step": 17967 }, { "epoch": 1.3354143441099962, "grad_norm": 1.8859966738971354, "learning_rate": 1.2243420723122752e-05, "loss": 0.6677, "step": 17968 }, { "epoch": 1.3354886659234486, "grad_norm": 2.275407114704111, "learning_rate": 1.2242638799648651e-05, "loss": 0.7196, "step": 17969 }, { "epoch": 1.3355629877369009, "grad_norm": 1.6334745721705313, "learning_rate": 1.2241856861736558e-05, "loss": 0.6144, "step": 17970 }, { "epoch": 1.335637309550353, "grad_norm": 1.87595684718529, "learning_rate": 1.2241074909391509e-05, "loss": 0.7386, "step": 17971 }, { "epoch": 1.3357116313638053, "grad_norm": 2.3268566357109974, "learning_rate": 1.2240292942618535e-05, "loss": 0.4075, "step": 17972 }, { "epoch": 1.3357859531772576, "grad_norm": 1.6212551321496893, "learning_rate": 1.2239510961422671e-05, "loss": 0.5131, "step": 17973 }, { "epoch": 1.3358602749907098, "grad_norm": 2.0648647325994944, "learning_rate": 1.223872896580895e-05, "loss": 0.6299, "step": 17974 }, { "epoch": 1.335934596804162, "grad_norm": 2.274039612920514, "learning_rate": 1.2237946955782409e-05, "loss": 0.6516, "step": 17975 }, { "epoch": 1.3360089186176143, "grad_norm": 1.8059391715490614, "learning_rate": 1.2237164931348083e-05, "loss": 0.5707, "step": 17976 }, { "epoch": 1.3360832404310665, "grad_norm": 2.013817267825353, "learning_rate": 1.2236382892511004e-05, "loss": 0.7047, "step": 17977 }, { "epoch": 1.3361575622445188, "grad_norm": 1.6199505959267766, "learning_rate": 1.2235600839276211e-05, "loss": 0.4121, "step": 17978 }, { "epoch": 1.336231884057971, "grad_norm": 1.715882575355741, "learning_rate": 1.2234818771648734e-05, "loss": 0.6126, "step": 17979 }, { "epoch": 1.3363062058714232, "grad_norm": 2.3215089832897786, "learning_rate": 1.2234036689633606e-05, "loss": 0.5692, "step": 17980 }, { "epoch": 1.3363805276848755, "grad_norm": 1.8123669691142787, "learning_rate": 1.2233254593235868e-05, "loss": 0.5628, "step": 17981 }, { "epoch": 1.3364548494983277, "grad_norm": 1.93019519882745, "learning_rate": 1.2232472482460553e-05, "loss": 0.6141, "step": 17982 }, { "epoch": 1.33652917131178, "grad_norm": 1.9798344790946827, "learning_rate": 1.2231690357312698e-05, "loss": 0.6268, "step": 17983 }, { "epoch": 1.3366034931252322, "grad_norm": 1.6653514749499763, "learning_rate": 1.2230908217797336e-05, "loss": 0.5047, "step": 17984 }, { "epoch": 1.3366778149386844, "grad_norm": 2.174708751174067, "learning_rate": 1.2230126063919501e-05, "loss": 0.6216, "step": 17985 }, { "epoch": 1.3367521367521369, "grad_norm": 1.901505925608107, "learning_rate": 1.2229343895684229e-05, "loss": 0.6297, "step": 17986 }, { "epoch": 1.336826458565589, "grad_norm": 1.8821910876830126, "learning_rate": 1.2228561713096559e-05, "loss": 0.4561, "step": 17987 }, { "epoch": 1.3369007803790414, "grad_norm": 1.9232682052796657, "learning_rate": 1.2227779516161525e-05, "loss": 0.7257, "step": 17988 }, { "epoch": 1.3369751021924934, "grad_norm": 2.1627146146910015, "learning_rate": 1.2226997304884158e-05, "loss": 0.6591, "step": 17989 }, { "epoch": 1.3370494240059458, "grad_norm": 1.8156686789532812, "learning_rate": 1.2226215079269501e-05, "loss": 0.7191, "step": 17990 }, { "epoch": 1.3371237458193979, "grad_norm": 1.9017361921324927, "learning_rate": 1.2225432839322587e-05, "loss": 0.6362, "step": 17991 }, { "epoch": 1.3371980676328503, "grad_norm": 1.7404809620492712, "learning_rate": 1.222465058504845e-05, "loss": 0.5801, "step": 17992 }, { "epoch": 1.3372723894463026, "grad_norm": 2.3115838875502566, "learning_rate": 1.2223868316452128e-05, "loss": 0.7525, "step": 17993 }, { "epoch": 1.3373467112597548, "grad_norm": 2.154992680445741, "learning_rate": 1.222308603353866e-05, "loss": 0.6938, "step": 17994 }, { "epoch": 1.337421033073207, "grad_norm": 2.1359816931535325, "learning_rate": 1.222230373631308e-05, "loss": 0.6003, "step": 17995 }, { "epoch": 1.3374953548866593, "grad_norm": 3.1901260955732247, "learning_rate": 1.222152142478042e-05, "loss": 0.6127, "step": 17996 }, { "epoch": 1.3375696767001115, "grad_norm": 1.9442762835947187, "learning_rate": 1.222073909894572e-05, "loss": 0.7023, "step": 17997 }, { "epoch": 1.3376439985135637, "grad_norm": 1.7873322305194062, "learning_rate": 1.2219956758814018e-05, "loss": 0.5321, "step": 17998 }, { "epoch": 1.337718320327016, "grad_norm": 1.9060507768450816, "learning_rate": 1.2219174404390353e-05, "loss": 0.5534, "step": 17999 }, { "epoch": 1.3377926421404682, "grad_norm": 1.602034257761185, "learning_rate": 1.2218392035679753e-05, "loss": 0.4199, "step": 18000 }, { "epoch": 1.3378669639539205, "grad_norm": 3.1229651127646063, "learning_rate": 1.2217609652687266e-05, "loss": 0.7202, "step": 18001 }, { "epoch": 1.3379412857673727, "grad_norm": 2.1600178161941974, "learning_rate": 1.2216827255417918e-05, "loss": 0.7272, "step": 18002 }, { "epoch": 1.338015607580825, "grad_norm": 1.7302822177780672, "learning_rate": 1.2216044843876753e-05, "loss": 0.5862, "step": 18003 }, { "epoch": 1.3380899293942772, "grad_norm": 2.28593068514324, "learning_rate": 1.2215262418068807e-05, "loss": 0.4831, "step": 18004 }, { "epoch": 1.3381642512077294, "grad_norm": 2.134940514668832, "learning_rate": 1.2214479977999117e-05, "loss": 0.6064, "step": 18005 }, { "epoch": 1.3382385730211817, "grad_norm": 2.196046411558486, "learning_rate": 1.2213697523672717e-05, "loss": 0.7974, "step": 18006 }, { "epoch": 1.338312894834634, "grad_norm": 1.6769606774625818, "learning_rate": 1.2212915055094649e-05, "loss": 0.4692, "step": 18007 }, { "epoch": 1.3383872166480861, "grad_norm": 3.0251700852807373, "learning_rate": 1.221213257226995e-05, "loss": 0.7676, "step": 18008 }, { "epoch": 1.3384615384615386, "grad_norm": 1.7049496294717172, "learning_rate": 1.2211350075203654e-05, "loss": 0.5267, "step": 18009 }, { "epoch": 1.3385358602749906, "grad_norm": 2.0565848792068393, "learning_rate": 1.2210567563900802e-05, "loss": 0.744, "step": 18010 }, { "epoch": 1.338610182088443, "grad_norm": 1.7051531497587618, "learning_rate": 1.220978503836643e-05, "loss": 0.6399, "step": 18011 }, { "epoch": 1.338684503901895, "grad_norm": 1.769053743214221, "learning_rate": 1.2209002498605578e-05, "loss": 0.6066, "step": 18012 }, { "epoch": 1.3387588257153475, "grad_norm": 1.693982557868142, "learning_rate": 1.220821994462328e-05, "loss": 0.5049, "step": 18013 }, { "epoch": 1.3388331475287998, "grad_norm": 2.6309355125534233, "learning_rate": 1.220743737642458e-05, "loss": 0.7328, "step": 18014 }, { "epoch": 1.338907469342252, "grad_norm": 1.8051506173540492, "learning_rate": 1.2206654794014513e-05, "loss": 0.5596, "step": 18015 }, { "epoch": 1.3389817911557043, "grad_norm": 2.354772636378858, "learning_rate": 1.2205872197398112e-05, "loss": 0.5748, "step": 18016 }, { "epoch": 1.3390561129691565, "grad_norm": 2.116042222002164, "learning_rate": 1.2205089586580424e-05, "loss": 0.6278, "step": 18017 }, { "epoch": 1.3391304347826087, "grad_norm": 2.285113014830501, "learning_rate": 1.2204306961566483e-05, "loss": 0.7536, "step": 18018 }, { "epoch": 1.339204756596061, "grad_norm": 2.0483773678640973, "learning_rate": 1.2203524322361326e-05, "loss": 0.5134, "step": 18019 }, { "epoch": 1.3392790784095132, "grad_norm": 1.6572424430937016, "learning_rate": 1.2202741668969996e-05, "loss": 0.5394, "step": 18020 }, { "epoch": 1.3393534002229655, "grad_norm": 2.1704061710705402, "learning_rate": 1.220195900139753e-05, "loss": 0.5615, "step": 18021 }, { "epoch": 1.3394277220364177, "grad_norm": 1.8451278149035206, "learning_rate": 1.2201176319648966e-05, "loss": 0.6431, "step": 18022 }, { "epoch": 1.33950204384987, "grad_norm": 2.1832121374477054, "learning_rate": 1.2200393623729341e-05, "loss": 0.7236, "step": 18023 }, { "epoch": 1.3395763656633222, "grad_norm": 2.0583238464279923, "learning_rate": 1.2199610913643698e-05, "loss": 0.5607, "step": 18024 }, { "epoch": 1.3396506874767744, "grad_norm": 1.5689460506901651, "learning_rate": 1.2198828189397074e-05, "loss": 0.4409, "step": 18025 }, { "epoch": 1.3397250092902266, "grad_norm": 1.9598308770489725, "learning_rate": 1.219804545099451e-05, "loss": 0.6959, "step": 18026 }, { "epoch": 1.3397993311036789, "grad_norm": 2.0788671572959005, "learning_rate": 1.2197262698441039e-05, "loss": 0.8338, "step": 18027 }, { "epoch": 1.3398736529171311, "grad_norm": 1.7264204042647198, "learning_rate": 1.2196479931741705e-05, "loss": 0.5256, "step": 18028 }, { "epoch": 1.3399479747305834, "grad_norm": 2.3442984792067727, "learning_rate": 1.2195697150901551e-05, "loss": 0.6862, "step": 18029 }, { "epoch": 1.3400222965440356, "grad_norm": 1.7888006538549384, "learning_rate": 1.2194914355925608e-05, "loss": 0.5949, "step": 18030 }, { "epoch": 1.3400966183574878, "grad_norm": 2.166317229365296, "learning_rate": 1.2194131546818923e-05, "loss": 0.4848, "step": 18031 }, { "epoch": 1.3401709401709403, "grad_norm": 2.2469393320558546, "learning_rate": 1.2193348723586535e-05, "loss": 0.7056, "step": 18032 }, { "epoch": 1.3402452619843923, "grad_norm": 3.0211373135202737, "learning_rate": 1.2192565886233481e-05, "loss": 0.5389, "step": 18033 }, { "epoch": 1.3403195837978448, "grad_norm": 3.0230786630015998, "learning_rate": 1.21917830347648e-05, "loss": 0.7725, "step": 18034 }, { "epoch": 1.3403939056112968, "grad_norm": 2.0873596375398926, "learning_rate": 1.2191000169185533e-05, "loss": 0.7108, "step": 18035 }, { "epoch": 1.3404682274247492, "grad_norm": 2.018223403834628, "learning_rate": 1.219021728950072e-05, "loss": 0.5626, "step": 18036 }, { "epoch": 1.3405425492382015, "grad_norm": 1.4893127109851287, "learning_rate": 1.2189434395715404e-05, "loss": 0.5402, "step": 18037 }, { "epoch": 1.3406168710516537, "grad_norm": 1.7663316301914471, "learning_rate": 1.2188651487834621e-05, "loss": 0.518, "step": 18038 }, { "epoch": 1.340691192865106, "grad_norm": 2.137480312507982, "learning_rate": 1.2187868565863415e-05, "loss": 0.5686, "step": 18039 }, { "epoch": 1.3407655146785582, "grad_norm": 1.8144974527730302, "learning_rate": 1.2187085629806823e-05, "loss": 0.5459, "step": 18040 }, { "epoch": 1.3408398364920104, "grad_norm": 2.061401272344995, "learning_rate": 1.218630267966989e-05, "loss": 0.6794, "step": 18041 }, { "epoch": 1.3409141583054627, "grad_norm": 1.941493552610365, "learning_rate": 1.2185519715457652e-05, "loss": 0.6115, "step": 18042 }, { "epoch": 1.340988480118915, "grad_norm": 2.140301642826903, "learning_rate": 1.2184736737175151e-05, "loss": 0.6219, "step": 18043 }, { "epoch": 1.3410628019323672, "grad_norm": 1.7588493697933973, "learning_rate": 1.2183953744827428e-05, "loss": 0.5171, "step": 18044 }, { "epoch": 1.3411371237458194, "grad_norm": 1.7552364488793502, "learning_rate": 1.2183170738419525e-05, "loss": 0.5935, "step": 18045 }, { "epoch": 1.3412114455592716, "grad_norm": 1.832293744526566, "learning_rate": 1.2182387717956483e-05, "loss": 0.5784, "step": 18046 }, { "epoch": 1.3412857673727239, "grad_norm": 2.026540063643667, "learning_rate": 1.218160468344334e-05, "loss": 0.609, "step": 18047 }, { "epoch": 1.341360089186176, "grad_norm": 2.3428295087193085, "learning_rate": 1.2180821634885142e-05, "loss": 0.6603, "step": 18048 }, { "epoch": 1.3414344109996283, "grad_norm": 1.980457606462183, "learning_rate": 1.2180038572286926e-05, "loss": 0.4699, "step": 18049 }, { "epoch": 1.3415087328130806, "grad_norm": 2.1221618410762013, "learning_rate": 1.2179255495653732e-05, "loss": 0.5648, "step": 18050 }, { "epoch": 1.3415830546265328, "grad_norm": 1.6424468260013234, "learning_rate": 1.2178472404990607e-05, "loss": 0.5703, "step": 18051 }, { "epoch": 1.341657376439985, "grad_norm": 1.365702294398192, "learning_rate": 1.217768930030259e-05, "loss": 0.3758, "step": 18052 }, { "epoch": 1.3417316982534375, "grad_norm": 1.9944612783560753, "learning_rate": 1.2176906181594721e-05, "loss": 0.6596, "step": 18053 }, { "epoch": 1.3418060200668895, "grad_norm": 1.704277735354983, "learning_rate": 1.2176123048872046e-05, "loss": 0.5511, "step": 18054 }, { "epoch": 1.341880341880342, "grad_norm": 1.9156896127284093, "learning_rate": 1.21753399021396e-05, "loss": 0.6184, "step": 18055 }, { "epoch": 1.341954663693794, "grad_norm": 2.306999342594035, "learning_rate": 1.217455674140243e-05, "loss": 0.693, "step": 18056 }, { "epoch": 1.3420289855072465, "grad_norm": 2.212019440646046, "learning_rate": 1.2173773566665575e-05, "loss": 0.6683, "step": 18057 }, { "epoch": 1.3421033073206985, "grad_norm": 1.821316667525535, "learning_rate": 1.2172990377934081e-05, "loss": 0.5925, "step": 18058 }, { "epoch": 1.342177629134151, "grad_norm": 1.66360022486026, "learning_rate": 1.217220717521299e-05, "loss": 0.5013, "step": 18059 }, { "epoch": 1.3422519509476032, "grad_norm": 1.9239234266042997, "learning_rate": 1.2171423958507336e-05, "loss": 0.554, "step": 18060 }, { "epoch": 1.3423262727610554, "grad_norm": 1.785447493191116, "learning_rate": 1.217064072782217e-05, "loss": 0.5339, "step": 18061 }, { "epoch": 1.3424005945745077, "grad_norm": 1.92406038416948, "learning_rate": 1.216985748316253e-05, "loss": 0.6616, "step": 18062 }, { "epoch": 1.34247491638796, "grad_norm": 2.2741141656326977, "learning_rate": 1.2169074224533463e-05, "loss": 0.7279, "step": 18063 }, { "epoch": 1.3425492382014121, "grad_norm": 2.3766975736907314, "learning_rate": 1.2168290951940007e-05, "loss": 0.846, "step": 18064 }, { "epoch": 1.3426235600148644, "grad_norm": 2.085305119306576, "learning_rate": 1.2167507665387209e-05, "loss": 0.661, "step": 18065 }, { "epoch": 1.3426978818283166, "grad_norm": 1.8121811705703392, "learning_rate": 1.2166724364880105e-05, "loss": 0.5354, "step": 18066 }, { "epoch": 1.3427722036417689, "grad_norm": 2.5306034815075615, "learning_rate": 1.2165941050423741e-05, "loss": 0.6787, "step": 18067 }, { "epoch": 1.342846525455221, "grad_norm": 1.6284043590125887, "learning_rate": 1.2165157722023164e-05, "loss": 0.4559, "step": 18068 }, { "epoch": 1.3429208472686733, "grad_norm": 2.335491644740255, "learning_rate": 1.2164374379683417e-05, "loss": 0.8174, "step": 18069 }, { "epoch": 1.3429951690821256, "grad_norm": 1.9670873176128236, "learning_rate": 1.2163591023409538e-05, "loss": 0.7144, "step": 18070 }, { "epoch": 1.3430694908955778, "grad_norm": 3.060367113438602, "learning_rate": 1.216280765320657e-05, "loss": 0.5095, "step": 18071 }, { "epoch": 1.34314381270903, "grad_norm": 2.2051265294491844, "learning_rate": 1.2162024269079557e-05, "loss": 0.742, "step": 18072 }, { "epoch": 1.3432181345224823, "grad_norm": 1.8515813895813984, "learning_rate": 1.2161240871033545e-05, "loss": 0.53, "step": 18073 }, { "epoch": 1.3432924563359345, "grad_norm": 1.8131785148390644, "learning_rate": 1.216045745907358e-05, "loss": 0.5871, "step": 18074 }, { "epoch": 1.3433667781493868, "grad_norm": 1.8183934915326772, "learning_rate": 1.2159674033204698e-05, "loss": 0.6485, "step": 18075 }, { "epoch": 1.3434410999628392, "grad_norm": 1.7286340888951919, "learning_rate": 1.2158890593431951e-05, "loss": 0.5657, "step": 18076 }, { "epoch": 1.3435154217762912, "grad_norm": 1.8251509233776577, "learning_rate": 1.2158107139760374e-05, "loss": 0.5791, "step": 18077 }, { "epoch": 1.3435897435897437, "grad_norm": 1.7450807950606435, "learning_rate": 1.2157323672195016e-05, "loss": 0.6342, "step": 18078 }, { "epoch": 1.3436640654031957, "grad_norm": 1.7826379534136645, "learning_rate": 1.2156540190740922e-05, "loss": 0.5147, "step": 18079 }, { "epoch": 1.3437383872166482, "grad_norm": 1.9566449939463126, "learning_rate": 1.2155756695403134e-05, "loss": 0.5902, "step": 18080 }, { "epoch": 1.3438127090301004, "grad_norm": 2.581818740648478, "learning_rate": 1.2154973186186696e-05, "loss": 0.5877, "step": 18081 }, { "epoch": 1.3438870308435527, "grad_norm": 1.8351562609883858, "learning_rate": 1.2154189663096652e-05, "loss": 0.6282, "step": 18082 }, { "epoch": 1.343961352657005, "grad_norm": 3.4757061250054724, "learning_rate": 1.2153406126138045e-05, "loss": 0.6531, "step": 18083 }, { "epoch": 1.3440356744704571, "grad_norm": 2.2160026605096013, "learning_rate": 1.2152622575315923e-05, "loss": 0.6716, "step": 18084 }, { "epoch": 1.3441099962839094, "grad_norm": 2.050994147903521, "learning_rate": 1.2151839010635326e-05, "loss": 0.5225, "step": 18085 }, { "epoch": 1.3441843180973616, "grad_norm": 2.001989279251258, "learning_rate": 1.2151055432101307e-05, "loss": 0.5806, "step": 18086 }, { "epoch": 1.3442586399108138, "grad_norm": 2.070630994242044, "learning_rate": 1.2150271839718901e-05, "loss": 0.5526, "step": 18087 }, { "epoch": 1.344332961724266, "grad_norm": 1.7578430318537823, "learning_rate": 1.2149488233493156e-05, "loss": 0.5849, "step": 18088 }, { "epoch": 1.3444072835377183, "grad_norm": 1.9640347843281272, "learning_rate": 1.2148704613429116e-05, "loss": 0.6963, "step": 18089 }, { "epoch": 1.3444816053511706, "grad_norm": 1.795454740362758, "learning_rate": 1.2147920979531828e-05, "loss": 0.5766, "step": 18090 }, { "epoch": 1.3445559271646228, "grad_norm": 1.8568868907082274, "learning_rate": 1.214713733180634e-05, "loss": 0.585, "step": 18091 }, { "epoch": 1.344630248978075, "grad_norm": 1.7557428634754264, "learning_rate": 1.214635367025769e-05, "loss": 0.5865, "step": 18092 }, { "epoch": 1.3447045707915273, "grad_norm": 2.128465549225506, "learning_rate": 1.2145569994890928e-05, "loss": 0.6395, "step": 18093 }, { "epoch": 1.3447788926049795, "grad_norm": 1.9410437028582062, "learning_rate": 1.2144786305711097e-05, "loss": 0.6349, "step": 18094 }, { "epoch": 1.3448532144184318, "grad_norm": 1.7642108410914468, "learning_rate": 1.214400260272324e-05, "loss": 0.6016, "step": 18095 }, { "epoch": 1.344927536231884, "grad_norm": 2.421417949749374, "learning_rate": 1.2143218885932409e-05, "loss": 0.7082, "step": 18096 }, { "epoch": 1.3450018580453362, "grad_norm": 2.073100846245179, "learning_rate": 1.2142435155343645e-05, "loss": 0.5993, "step": 18097 }, { "epoch": 1.3450761798587885, "grad_norm": 2.1167532699649065, "learning_rate": 1.2141651410961997e-05, "loss": 0.6799, "step": 18098 }, { "epoch": 1.345150501672241, "grad_norm": 2.1826716489552385, "learning_rate": 1.2140867652792504e-05, "loss": 0.6364, "step": 18099 }, { "epoch": 1.345224823485693, "grad_norm": 2.1432507172026636, "learning_rate": 1.2140083880840219e-05, "loss": 0.6723, "step": 18100 }, { "epoch": 1.3452991452991454, "grad_norm": 1.7450496571395704, "learning_rate": 1.2139300095110183e-05, "loss": 0.6043, "step": 18101 }, { "epoch": 1.3453734671125974, "grad_norm": 2.242448257114602, "learning_rate": 1.2138516295607447e-05, "loss": 0.5077, "step": 18102 }, { "epoch": 1.3454477889260499, "grad_norm": 1.5772688878755456, "learning_rate": 1.2137732482337055e-05, "loss": 0.5258, "step": 18103 }, { "epoch": 1.3455221107395021, "grad_norm": 1.5374260018338082, "learning_rate": 1.2136948655304048e-05, "loss": 0.3757, "step": 18104 }, { "epoch": 1.3455964325529544, "grad_norm": 1.98642176097545, "learning_rate": 1.2136164814513478e-05, "loss": 0.5915, "step": 18105 }, { "epoch": 1.3456707543664066, "grad_norm": 2.176740209406285, "learning_rate": 1.2135380959970389e-05, "loss": 0.8015, "step": 18106 }, { "epoch": 1.3457450761798588, "grad_norm": 1.9467859214088068, "learning_rate": 1.2134597091679826e-05, "loss": 0.532, "step": 18107 }, { "epoch": 1.345819397993311, "grad_norm": 1.5825724731451538, "learning_rate": 1.2133813209646842e-05, "loss": 0.4124, "step": 18108 }, { "epoch": 1.3458937198067633, "grad_norm": 1.9802060167806865, "learning_rate": 1.2133029313876481e-05, "loss": 0.6922, "step": 18109 }, { "epoch": 1.3459680416202155, "grad_norm": 2.0585580714567837, "learning_rate": 1.2132245404373783e-05, "loss": 0.6226, "step": 18110 }, { "epoch": 1.3460423634336678, "grad_norm": 1.8547368994426945, "learning_rate": 1.21314614811438e-05, "loss": 0.5835, "step": 18111 }, { "epoch": 1.34611668524712, "grad_norm": 1.5413787193336892, "learning_rate": 1.213067754419158e-05, "loss": 0.5208, "step": 18112 }, { "epoch": 1.3461910070605723, "grad_norm": 2.280631714184549, "learning_rate": 1.2129893593522171e-05, "loss": 0.4539, "step": 18113 }, { "epoch": 1.3462653288740245, "grad_norm": 1.4518443584691965, "learning_rate": 1.2129109629140617e-05, "loss": 0.3499, "step": 18114 }, { "epoch": 1.3463396506874767, "grad_norm": 1.7841612243417069, "learning_rate": 1.2128325651051962e-05, "loss": 0.4647, "step": 18115 }, { "epoch": 1.346413972500929, "grad_norm": 3.792759847565334, "learning_rate": 1.2127541659261258e-05, "loss": 0.5597, "step": 18116 }, { "epoch": 1.3464882943143812, "grad_norm": 2.3458646869402107, "learning_rate": 1.212675765377355e-05, "loss": 0.781, "step": 18117 }, { "epoch": 1.3465626161278335, "grad_norm": 2.262774668924325, "learning_rate": 1.2125973634593889e-05, "loss": 0.6625, "step": 18118 }, { "epoch": 1.3466369379412857, "grad_norm": 1.836439912829473, "learning_rate": 1.212518960172732e-05, "loss": 0.6288, "step": 18119 }, { "epoch": 1.346711259754738, "grad_norm": 1.7124167254238303, "learning_rate": 1.2124405555178893e-05, "loss": 0.4857, "step": 18120 }, { "epoch": 1.3467855815681902, "grad_norm": 2.2436733505646096, "learning_rate": 1.212362149495365e-05, "loss": 0.7048, "step": 18121 }, { "epoch": 1.3468599033816426, "grad_norm": 1.9859252231353217, "learning_rate": 1.212283742105664e-05, "loss": 0.6467, "step": 18122 }, { "epoch": 1.3469342251950946, "grad_norm": 1.8251309947384167, "learning_rate": 1.2122053333492916e-05, "loss": 0.4388, "step": 18123 }, { "epoch": 1.347008547008547, "grad_norm": 2.1819225300104765, "learning_rate": 1.2121269232267522e-05, "loss": 0.8189, "step": 18124 }, { "epoch": 1.3470828688219991, "grad_norm": 2.9408741888402354, "learning_rate": 1.212048511738551e-05, "loss": 0.7199, "step": 18125 }, { "epoch": 1.3471571906354516, "grad_norm": 2.433215399815598, "learning_rate": 1.2119700988851921e-05, "loss": 0.6953, "step": 18126 }, { "epoch": 1.3472315124489038, "grad_norm": 2.0523093811011694, "learning_rate": 1.2118916846671807e-05, "loss": 0.6419, "step": 18127 }, { "epoch": 1.347305834262356, "grad_norm": 2.779195348611421, "learning_rate": 1.2118132690850215e-05, "loss": 0.647, "step": 18128 }, { "epoch": 1.3473801560758083, "grad_norm": 1.8353292367866165, "learning_rate": 1.2117348521392196e-05, "loss": 0.6941, "step": 18129 }, { "epoch": 1.3474544778892605, "grad_norm": 2.4929204297564933, "learning_rate": 1.21165643383028e-05, "loss": 0.7119, "step": 18130 }, { "epoch": 1.3475287997027128, "grad_norm": 1.6579010736878015, "learning_rate": 1.211578014158707e-05, "loss": 0.4948, "step": 18131 }, { "epoch": 1.347603121516165, "grad_norm": 2.490848868188515, "learning_rate": 1.2114995931250058e-05, "loss": 0.8343, "step": 18132 }, { "epoch": 1.3476774433296173, "grad_norm": 2.31180617327833, "learning_rate": 1.211421170729681e-05, "loss": 0.648, "step": 18133 }, { "epoch": 1.3477517651430695, "grad_norm": 1.9185138756870985, "learning_rate": 1.2113427469732377e-05, "loss": 0.7447, "step": 18134 }, { "epoch": 1.3478260869565217, "grad_norm": 1.859836222066686, "learning_rate": 1.211264321856181e-05, "loss": 0.6326, "step": 18135 }, { "epoch": 1.347900408769974, "grad_norm": 2.0758648884372204, "learning_rate": 1.2111858953790154e-05, "loss": 0.6838, "step": 18136 }, { "epoch": 1.3479747305834262, "grad_norm": 2.2092768587410996, "learning_rate": 1.211107467542246e-05, "loss": 0.665, "step": 18137 }, { "epoch": 1.3480490523968784, "grad_norm": 1.9387055134980584, "learning_rate": 1.2110290383463776e-05, "loss": 0.7214, "step": 18138 }, { "epoch": 1.3481233742103307, "grad_norm": 2.2075693113828474, "learning_rate": 1.2109506077919153e-05, "loss": 0.7137, "step": 18139 }, { "epoch": 1.348197696023783, "grad_norm": 2.046086420193489, "learning_rate": 1.210872175879364e-05, "loss": 0.6133, "step": 18140 }, { "epoch": 1.3482720178372352, "grad_norm": 2.555931039747613, "learning_rate": 1.2107937426092285e-05, "loss": 0.531, "step": 18141 }, { "epoch": 1.3483463396506874, "grad_norm": 1.9708567475591747, "learning_rate": 1.210715307982014e-05, "loss": 0.6544, "step": 18142 }, { "epoch": 1.3484206614641399, "grad_norm": 2.0388058489897323, "learning_rate": 1.210636871998225e-05, "loss": 0.7373, "step": 18143 }, { "epoch": 1.3484949832775919, "grad_norm": 2.2039080472686727, "learning_rate": 1.2105584346583667e-05, "loss": 0.6132, "step": 18144 }, { "epoch": 1.3485693050910443, "grad_norm": 2.2931085078609152, "learning_rate": 1.210479995962944e-05, "loss": 0.6545, "step": 18145 }, { "epoch": 1.3486436269044964, "grad_norm": 2.615812254906173, "learning_rate": 1.2104015559124626e-05, "loss": 0.6218, "step": 18146 }, { "epoch": 1.3487179487179488, "grad_norm": 1.6414270005957339, "learning_rate": 1.2103231145074266e-05, "loss": 0.6189, "step": 18147 }, { "epoch": 1.3487922705314008, "grad_norm": 2.5992319400555863, "learning_rate": 1.2102446717483414e-05, "loss": 0.734, "step": 18148 }, { "epoch": 1.3488665923448533, "grad_norm": 2.1728295465568492, "learning_rate": 1.2101662276357116e-05, "loss": 0.6264, "step": 18149 }, { "epoch": 1.3489409141583055, "grad_norm": 2.121055533023744, "learning_rate": 1.2100877821700425e-05, "loss": 0.697, "step": 18150 }, { "epoch": 1.3490152359717578, "grad_norm": 2.161586467790212, "learning_rate": 1.2100093353518396e-05, "loss": 0.6754, "step": 18151 }, { "epoch": 1.34908955778521, "grad_norm": 1.9877773224684876, "learning_rate": 1.209930887181607e-05, "loss": 0.6497, "step": 18152 }, { "epoch": 1.3491638795986622, "grad_norm": 2.45980856387068, "learning_rate": 1.2098524376598506e-05, "loss": 0.8833, "step": 18153 }, { "epoch": 1.3492382014121145, "grad_norm": 2.1623257545308023, "learning_rate": 1.209773986787075e-05, "loss": 0.764, "step": 18154 }, { "epoch": 1.3493125232255667, "grad_norm": 2.5079951236687665, "learning_rate": 1.2096955345637854e-05, "loss": 0.467, "step": 18155 }, { "epoch": 1.349386845039019, "grad_norm": 2.2162049185154635, "learning_rate": 1.2096170809904868e-05, "loss": 0.6167, "step": 18156 }, { "epoch": 1.3494611668524712, "grad_norm": 1.9174578221223042, "learning_rate": 1.2095386260676842e-05, "loss": 0.6159, "step": 18157 }, { "epoch": 1.3495354886659234, "grad_norm": 2.4784357268494728, "learning_rate": 1.209460169795883e-05, "loss": 0.6955, "step": 18158 }, { "epoch": 1.3496098104793757, "grad_norm": 1.7932598766847625, "learning_rate": 1.209381712175588e-05, "loss": 0.5525, "step": 18159 }, { "epoch": 1.349684132292828, "grad_norm": 1.800268789395469, "learning_rate": 1.2093032532073043e-05, "loss": 0.4646, "step": 18160 }, { "epoch": 1.3497584541062801, "grad_norm": 2.2578127759359345, "learning_rate": 1.2092247928915373e-05, "loss": 0.5663, "step": 18161 }, { "epoch": 1.3498327759197324, "grad_norm": 1.7306341092208941, "learning_rate": 1.2091463312287918e-05, "loss": 0.5315, "step": 18162 }, { "epoch": 1.3499070977331846, "grad_norm": 1.9128991998113956, "learning_rate": 1.209067868219573e-05, "loss": 0.6016, "step": 18163 }, { "epoch": 1.3499814195466369, "grad_norm": 1.368118907883754, "learning_rate": 1.2089894038643863e-05, "loss": 0.4645, "step": 18164 }, { "epoch": 1.350055741360089, "grad_norm": 2.2925550226030684, "learning_rate": 1.2089109381637368e-05, "loss": 0.7287, "step": 18165 }, { "epoch": 1.3501300631735416, "grad_norm": 1.9434519317157142, "learning_rate": 1.2088324711181292e-05, "loss": 0.6112, "step": 18166 }, { "epoch": 1.3502043849869936, "grad_norm": 1.4666428064703074, "learning_rate": 1.2087540027280688e-05, "loss": 0.6335, "step": 18167 }, { "epoch": 1.350278706800446, "grad_norm": 2.0991118415574714, "learning_rate": 1.2086755329940617e-05, "loss": 0.6187, "step": 18168 }, { "epoch": 1.350353028613898, "grad_norm": 2.044393818801157, "learning_rate": 1.2085970619166116e-05, "loss": 0.7223, "step": 18169 }, { "epoch": 1.3504273504273505, "grad_norm": 2.2439517530792243, "learning_rate": 1.2085185894962249e-05, "loss": 0.6977, "step": 18170 }, { "epoch": 1.3505016722408028, "grad_norm": 2.3083462328411555, "learning_rate": 1.2084401157334063e-05, "loss": 0.735, "step": 18171 }, { "epoch": 1.350575994054255, "grad_norm": 2.119765993267095, "learning_rate": 1.2083616406286607e-05, "loss": 0.6522, "step": 18172 }, { "epoch": 1.3506503158677072, "grad_norm": 1.7975323233634486, "learning_rate": 1.2082831641824942e-05, "loss": 0.5548, "step": 18173 }, { "epoch": 1.3507246376811595, "grad_norm": 2.044802648213556, "learning_rate": 1.2082046863954109e-05, "loss": 0.5465, "step": 18174 }, { "epoch": 1.3507989594946117, "grad_norm": 1.8481183029616093, "learning_rate": 1.2081262072679169e-05, "loss": 0.5296, "step": 18175 }, { "epoch": 1.350873281308064, "grad_norm": 2.054026863064331, "learning_rate": 1.2080477268005173e-05, "loss": 0.6692, "step": 18176 }, { "epoch": 1.3509476031215162, "grad_norm": 1.7169440661358262, "learning_rate": 1.207969244993717e-05, "loss": 0.5518, "step": 18177 }, { "epoch": 1.3510219249349684, "grad_norm": 2.6011135384472177, "learning_rate": 1.2078907618480215e-05, "loss": 0.6303, "step": 18178 }, { "epoch": 1.3510962467484207, "grad_norm": 1.7433493983814219, "learning_rate": 1.2078122773639364e-05, "loss": 0.4426, "step": 18179 }, { "epoch": 1.351170568561873, "grad_norm": 1.7841349103587625, "learning_rate": 1.2077337915419665e-05, "loss": 0.5525, "step": 18180 }, { "epoch": 1.3512448903753251, "grad_norm": 1.777198482060785, "learning_rate": 1.207655304382617e-05, "loss": 0.6272, "step": 18181 }, { "epoch": 1.3513192121887774, "grad_norm": 3.2553615677649557, "learning_rate": 1.2075768158863936e-05, "loss": 0.7051, "step": 18182 }, { "epoch": 1.3513935340022296, "grad_norm": 1.9056639181545456, "learning_rate": 1.2074983260538016e-05, "loss": 0.7117, "step": 18183 }, { "epoch": 1.3514678558156819, "grad_norm": 2.2452357771974403, "learning_rate": 1.2074198348853458e-05, "loss": 0.6609, "step": 18184 }, { "epoch": 1.351542177629134, "grad_norm": 2.247836352623611, "learning_rate": 1.2073413423815321e-05, "loss": 0.6216, "step": 18185 }, { "epoch": 1.3516164994425863, "grad_norm": 2.285017942768586, "learning_rate": 1.2072628485428655e-05, "loss": 0.7231, "step": 18186 }, { "epoch": 1.3516908212560386, "grad_norm": 2.1715787011502594, "learning_rate": 1.2071843533698515e-05, "loss": 0.69, "step": 18187 }, { "epoch": 1.3517651430694908, "grad_norm": 1.9749073473446292, "learning_rate": 1.2071058568629952e-05, "loss": 0.7144, "step": 18188 }, { "epoch": 1.3518394648829433, "grad_norm": 2.2638059639831827, "learning_rate": 1.2070273590228023e-05, "loss": 0.6806, "step": 18189 }, { "epoch": 1.3519137866963953, "grad_norm": 2.504927185026794, "learning_rate": 1.206948859849778e-05, "loss": 0.8132, "step": 18190 }, { "epoch": 1.3519881085098477, "grad_norm": 2.1485020255276965, "learning_rate": 1.2068703593444278e-05, "loss": 0.7817, "step": 18191 }, { "epoch": 1.3520624303232998, "grad_norm": 1.644682027143965, "learning_rate": 1.2067918575072566e-05, "loss": 0.5984, "step": 18192 }, { "epoch": 1.3521367521367522, "grad_norm": 1.6524116030085712, "learning_rate": 1.2067133543387706e-05, "loss": 0.5754, "step": 18193 }, { "epoch": 1.3522110739502045, "grad_norm": 2.2101520373206145, "learning_rate": 1.2066348498394745e-05, "loss": 0.4997, "step": 18194 }, { "epoch": 1.3522853957636567, "grad_norm": 1.9462480630816315, "learning_rate": 1.206556344009874e-05, "loss": 0.6108, "step": 18195 }, { "epoch": 1.352359717577109, "grad_norm": 1.6013719734920453, "learning_rate": 1.2064778368504743e-05, "loss": 0.5358, "step": 18196 }, { "epoch": 1.3524340393905612, "grad_norm": 1.9972579105613437, "learning_rate": 1.2063993283617813e-05, "loss": 0.6466, "step": 18197 }, { "epoch": 1.3525083612040134, "grad_norm": 1.8042751858786732, "learning_rate": 1.2063208185442999e-05, "loss": 0.5868, "step": 18198 }, { "epoch": 1.3525826830174656, "grad_norm": 2.154142676151263, "learning_rate": 1.206242307398536e-05, "loss": 0.668, "step": 18199 }, { "epoch": 1.3526570048309179, "grad_norm": 2.0579979623800493, "learning_rate": 1.2061637949249945e-05, "loss": 0.7223, "step": 18200 }, { "epoch": 1.3527313266443701, "grad_norm": 3.8255363387635932, "learning_rate": 1.2060852811241814e-05, "loss": 0.7086, "step": 18201 }, { "epoch": 1.3528056484578224, "grad_norm": 1.8390017663756575, "learning_rate": 1.2060067659966019e-05, "loss": 0.6071, "step": 18202 }, { "epoch": 1.3528799702712746, "grad_norm": 2.024330520292883, "learning_rate": 1.2059282495427613e-05, "loss": 0.7692, "step": 18203 }, { "epoch": 1.3529542920847268, "grad_norm": 2.5238569871744785, "learning_rate": 1.2058497317631658e-05, "loss": 0.5048, "step": 18204 }, { "epoch": 1.353028613898179, "grad_norm": 1.8300002171249832, "learning_rate": 1.2057712126583202e-05, "loss": 0.5814, "step": 18205 }, { "epoch": 1.3531029357116313, "grad_norm": 1.7379879565176082, "learning_rate": 1.2056926922287302e-05, "loss": 0.4996, "step": 18206 }, { "epoch": 1.3531772575250836, "grad_norm": 2.3070554487496886, "learning_rate": 1.2056141704749014e-05, "loss": 0.5945, "step": 18207 }, { "epoch": 1.3532515793385358, "grad_norm": 2.0087863537867023, "learning_rate": 1.2055356473973389e-05, "loss": 0.6314, "step": 18208 }, { "epoch": 1.353325901151988, "grad_norm": 1.8093204438822652, "learning_rate": 1.2054571229965488e-05, "loss": 0.6746, "step": 18209 }, { "epoch": 1.3534002229654405, "grad_norm": 2.316632583210745, "learning_rate": 1.2053785972730365e-05, "loss": 0.61, "step": 18210 }, { "epoch": 1.3534745447788925, "grad_norm": 1.9377855963707022, "learning_rate": 1.2053000702273075e-05, "loss": 0.5693, "step": 18211 }, { "epoch": 1.353548866592345, "grad_norm": 1.8295540150610943, "learning_rate": 1.2052215418598672e-05, "loss": 0.5128, "step": 18212 }, { "epoch": 1.353623188405797, "grad_norm": 2.179092395529163, "learning_rate": 1.205143012171221e-05, "loss": 0.6725, "step": 18213 }, { "epoch": 1.3536975102192494, "grad_norm": 1.8575548738634258, "learning_rate": 1.205064481161875e-05, "loss": 0.5961, "step": 18214 }, { "epoch": 1.3537718320327015, "grad_norm": 2.301988523973216, "learning_rate": 1.2049859488323343e-05, "loss": 0.5775, "step": 18215 }, { "epoch": 1.353846153846154, "grad_norm": 1.9100348471655955, "learning_rate": 1.204907415183105e-05, "loss": 0.4631, "step": 18216 }, { "epoch": 1.3539204756596062, "grad_norm": 1.9220946685415432, "learning_rate": 1.2048288802146923e-05, "loss": 0.6808, "step": 18217 }, { "epoch": 1.3539947974730584, "grad_norm": 2.0543697576740048, "learning_rate": 1.2047503439276019e-05, "loss": 0.6187, "step": 18218 }, { "epoch": 1.3540691192865106, "grad_norm": 2.0728660019148726, "learning_rate": 1.2046718063223393e-05, "loss": 0.6497, "step": 18219 }, { "epoch": 1.3541434410999629, "grad_norm": 1.701882328093592, "learning_rate": 1.2045932673994103e-05, "loss": 0.5066, "step": 18220 }, { "epoch": 1.3542177629134151, "grad_norm": 1.7185554188725871, "learning_rate": 1.2045147271593204e-05, "loss": 0.5238, "step": 18221 }, { "epoch": 1.3542920847268674, "grad_norm": 2.152885586822804, "learning_rate": 1.2044361856025755e-05, "loss": 0.7189, "step": 18222 }, { "epoch": 1.3543664065403196, "grad_norm": 2.0170250027557564, "learning_rate": 1.2043576427296807e-05, "loss": 0.758, "step": 18223 }, { "epoch": 1.3544407283537718, "grad_norm": 2.3482829963230176, "learning_rate": 1.2042790985411424e-05, "loss": 0.6649, "step": 18224 }, { "epoch": 1.354515050167224, "grad_norm": 2.123441411587582, "learning_rate": 1.2042005530374655e-05, "loss": 0.5299, "step": 18225 }, { "epoch": 1.3545893719806763, "grad_norm": 2.211755057809048, "learning_rate": 1.2041220062191562e-05, "loss": 0.5262, "step": 18226 }, { "epoch": 1.3546636937941285, "grad_norm": 1.650887759874664, "learning_rate": 1.20404345808672e-05, "loss": 0.5976, "step": 18227 }, { "epoch": 1.3547380156075808, "grad_norm": 8.671434608577355, "learning_rate": 1.2039649086406629e-05, "loss": 0.6513, "step": 18228 }, { "epoch": 1.354812337421033, "grad_norm": 1.8068227183420122, "learning_rate": 1.2038863578814899e-05, "loss": 0.634, "step": 18229 }, { "epoch": 1.3548866592344853, "grad_norm": 2.132630419098759, "learning_rate": 1.203807805809707e-05, "loss": 0.6493, "step": 18230 }, { "epoch": 1.3549609810479375, "grad_norm": 2.03665847246479, "learning_rate": 1.2037292524258202e-05, "loss": 0.6984, "step": 18231 }, { "epoch": 1.3550353028613897, "grad_norm": 1.7067968009401995, "learning_rate": 1.203650697730335e-05, "loss": 0.532, "step": 18232 }, { "epoch": 1.3551096246748422, "grad_norm": 1.9407207131740498, "learning_rate": 1.2035721417237574e-05, "loss": 0.7004, "step": 18233 }, { "epoch": 1.3551839464882942, "grad_norm": 1.577272485372809, "learning_rate": 1.203493584406593e-05, "loss": 0.4614, "step": 18234 }, { "epoch": 1.3552582683017467, "grad_norm": 1.9850577580144175, "learning_rate": 1.203415025779347e-05, "loss": 0.605, "step": 18235 }, { "epoch": 1.3553325901151987, "grad_norm": 2.0795941648022116, "learning_rate": 1.203336465842526e-05, "loss": 0.7516, "step": 18236 }, { "epoch": 1.3554069119286511, "grad_norm": 2.438384639951838, "learning_rate": 1.203257904596635e-05, "loss": 0.5421, "step": 18237 }, { "epoch": 1.3554812337421034, "grad_norm": 1.8011489777894862, "learning_rate": 1.2031793420421806e-05, "loss": 0.5959, "step": 18238 }, { "epoch": 1.3555555555555556, "grad_norm": 2.199235363898535, "learning_rate": 1.203100778179668e-05, "loss": 0.5636, "step": 18239 }, { "epoch": 1.3556298773690079, "grad_norm": 1.7628634085333168, "learning_rate": 1.203022213009603e-05, "loss": 0.6395, "step": 18240 }, { "epoch": 1.35570419918246, "grad_norm": 2.362536445131774, "learning_rate": 1.2029436465324916e-05, "loss": 0.6654, "step": 18241 }, { "epoch": 1.3557785209959123, "grad_norm": 2.402950859732324, "learning_rate": 1.2028650787488393e-05, "loss": 0.6636, "step": 18242 }, { "epoch": 1.3558528428093646, "grad_norm": 1.9073004033490637, "learning_rate": 1.2027865096591524e-05, "loss": 0.663, "step": 18243 }, { "epoch": 1.3559271646228168, "grad_norm": 2.2478531716307906, "learning_rate": 1.2027079392639364e-05, "loss": 0.7123, "step": 18244 }, { "epoch": 1.356001486436269, "grad_norm": 1.9622142551926824, "learning_rate": 1.2026293675636975e-05, "loss": 0.6283, "step": 18245 }, { "epoch": 1.3560758082497213, "grad_norm": 2.090607522862363, "learning_rate": 1.202550794558941e-05, "loss": 0.6786, "step": 18246 }, { "epoch": 1.3561501300631735, "grad_norm": 2.169448068385148, "learning_rate": 1.2024722202501729e-05, "loss": 0.8201, "step": 18247 }, { "epoch": 1.3562244518766258, "grad_norm": 1.9015611859737485, "learning_rate": 1.2023936446378993e-05, "loss": 0.5548, "step": 18248 }, { "epoch": 1.356298773690078, "grad_norm": 2.0940548472307716, "learning_rate": 1.2023150677226259e-05, "loss": 0.6585, "step": 18249 }, { "epoch": 1.3563730955035302, "grad_norm": 1.9622075278957118, "learning_rate": 1.2022364895048585e-05, "loss": 0.5705, "step": 18250 }, { "epoch": 1.3564474173169825, "grad_norm": 2.1899424719893763, "learning_rate": 1.202157909985103e-05, "loss": 0.6924, "step": 18251 }, { "epoch": 1.3565217391304347, "grad_norm": 2.1569786687352357, "learning_rate": 1.2020793291638656e-05, "loss": 0.5436, "step": 18252 }, { "epoch": 1.356596060943887, "grad_norm": 1.743701457015037, "learning_rate": 1.2020007470416518e-05, "loss": 0.5371, "step": 18253 }, { "epoch": 1.3566703827573392, "grad_norm": 2.322195882809838, "learning_rate": 1.2019221636189675e-05, "loss": 0.6039, "step": 18254 }, { "epoch": 1.3567447045707914, "grad_norm": 2.6579328410751075, "learning_rate": 1.2018435788963191e-05, "loss": 0.6168, "step": 18255 }, { "epoch": 1.356819026384244, "grad_norm": 2.22903148476514, "learning_rate": 1.2017649928742123e-05, "loss": 0.6623, "step": 18256 }, { "epoch": 1.356893348197696, "grad_norm": 1.9849565200042445, "learning_rate": 1.2016864055531527e-05, "loss": 0.5788, "step": 18257 }, { "epoch": 1.3569676700111484, "grad_norm": 1.7334988033225895, "learning_rate": 1.2016078169336465e-05, "loss": 0.4914, "step": 18258 }, { "epoch": 1.3570419918246004, "grad_norm": 2.0722561697098585, "learning_rate": 1.2015292270161995e-05, "loss": 0.7223, "step": 18259 }, { "epoch": 1.3571163136380529, "grad_norm": 2.0051942161359415, "learning_rate": 1.2014506358013183e-05, "loss": 0.7023, "step": 18260 }, { "epoch": 1.357190635451505, "grad_norm": 2.095431975076447, "learning_rate": 1.2013720432895081e-05, "loss": 0.7311, "step": 18261 }, { "epoch": 1.3572649572649573, "grad_norm": 1.7640464978226693, "learning_rate": 1.201293449481275e-05, "loss": 0.6688, "step": 18262 }, { "epoch": 1.3573392790784096, "grad_norm": 1.9377132212293116, "learning_rate": 1.2012148543771252e-05, "loss": 0.6697, "step": 18263 }, { "epoch": 1.3574136008918618, "grad_norm": 1.3580027405474073, "learning_rate": 1.2011362579775646e-05, "loss": 0.4125, "step": 18264 }, { "epoch": 1.357487922705314, "grad_norm": 2.850912433580749, "learning_rate": 1.2010576602830993e-05, "loss": 0.5728, "step": 18265 }, { "epoch": 1.3575622445187663, "grad_norm": 1.6875880752316457, "learning_rate": 1.2009790612942349e-05, "loss": 0.5705, "step": 18266 }, { "epoch": 1.3576365663322185, "grad_norm": 2.621745255276379, "learning_rate": 1.200900461011478e-05, "loss": 0.712, "step": 18267 }, { "epoch": 1.3577108881456708, "grad_norm": 2.5121111607436153, "learning_rate": 1.2008218594353345e-05, "loss": 0.6784, "step": 18268 }, { "epoch": 1.357785209959123, "grad_norm": 2.0711264130924176, "learning_rate": 1.20074325656631e-05, "loss": 0.5591, "step": 18269 }, { "epoch": 1.3578595317725752, "grad_norm": 2.419049314138467, "learning_rate": 1.2006646524049109e-05, "loss": 0.6541, "step": 18270 }, { "epoch": 1.3579338535860275, "grad_norm": 2.03491761996387, "learning_rate": 1.2005860469516433e-05, "loss": 0.6512, "step": 18271 }, { "epoch": 1.3580081753994797, "grad_norm": 2.1377412858384406, "learning_rate": 1.2005074402070132e-05, "loss": 0.5623, "step": 18272 }, { "epoch": 1.358082497212932, "grad_norm": 2.027584797258771, "learning_rate": 1.2004288321715263e-05, "loss": 0.5683, "step": 18273 }, { "epoch": 1.3581568190263842, "grad_norm": 1.7040468383150373, "learning_rate": 1.200350222845689e-05, "loss": 0.5988, "step": 18274 }, { "epoch": 1.3582311408398364, "grad_norm": 1.8379921471198375, "learning_rate": 1.2002716122300073e-05, "loss": 0.7207, "step": 18275 }, { "epoch": 1.3583054626532887, "grad_norm": 2.227621718794247, "learning_rate": 1.2001930003249877e-05, "loss": 0.4459, "step": 18276 }, { "epoch": 1.3583797844667411, "grad_norm": 1.9752101418827899, "learning_rate": 1.2001143871311358e-05, "loss": 0.5897, "step": 18277 }, { "epoch": 1.3584541062801931, "grad_norm": 2.3828797315764327, "learning_rate": 1.200035772648958e-05, "loss": 0.6171, "step": 18278 }, { "epoch": 1.3585284280936456, "grad_norm": 2.263838344087824, "learning_rate": 1.19995715687896e-05, "loss": 0.5178, "step": 18279 }, { "epoch": 1.3586027499070976, "grad_norm": 2.295969170463442, "learning_rate": 1.1998785398216484e-05, "loss": 0.7056, "step": 18280 }, { "epoch": 1.35867707172055, "grad_norm": 1.8482474691315591, "learning_rate": 1.1997999214775288e-05, "loss": 0.6216, "step": 18281 }, { "epoch": 1.358751393534002, "grad_norm": 2.793229912893043, "learning_rate": 1.1997213018471081e-05, "loss": 0.6035, "step": 18282 }, { "epoch": 1.3588257153474546, "grad_norm": 2.072513084474932, "learning_rate": 1.199642680930892e-05, "loss": 0.6283, "step": 18283 }, { "epoch": 1.3589000371609068, "grad_norm": 2.3351496399170535, "learning_rate": 1.1995640587293864e-05, "loss": 0.8236, "step": 18284 }, { "epoch": 1.358974358974359, "grad_norm": 1.494301268853125, "learning_rate": 1.1994854352430979e-05, "loss": 0.4927, "step": 18285 }, { "epoch": 1.3590486807878113, "grad_norm": 2.2747905089275533, "learning_rate": 1.1994068104725322e-05, "loss": 0.6767, "step": 18286 }, { "epoch": 1.3591230026012635, "grad_norm": 2.1922480212828472, "learning_rate": 1.1993281844181964e-05, "loss": 0.6621, "step": 18287 }, { "epoch": 1.3591973244147157, "grad_norm": 2.1859280038539763, "learning_rate": 1.1992495570805956e-05, "loss": 0.4192, "step": 18288 }, { "epoch": 1.359271646228168, "grad_norm": 2.210199011293457, "learning_rate": 1.199170928460237e-05, "loss": 0.771, "step": 18289 }, { "epoch": 1.3593459680416202, "grad_norm": 1.683474550843613, "learning_rate": 1.1990922985576257e-05, "loss": 0.4587, "step": 18290 }, { "epoch": 1.3594202898550725, "grad_norm": 2.646335900050428, "learning_rate": 1.1990136673732689e-05, "loss": 0.7468, "step": 18291 }, { "epoch": 1.3594946116685247, "grad_norm": 2.0453294698215654, "learning_rate": 1.198935034907672e-05, "loss": 0.5232, "step": 18292 }, { "epoch": 1.359568933481977, "grad_norm": 1.9226450734945497, "learning_rate": 1.1988564011613422e-05, "loss": 0.4619, "step": 18293 }, { "epoch": 1.3596432552954292, "grad_norm": 2.0535919044111925, "learning_rate": 1.1987777661347851e-05, "loss": 0.6576, "step": 18294 }, { "epoch": 1.3597175771088814, "grad_norm": 1.8693058808781253, "learning_rate": 1.1986991298285069e-05, "loss": 0.6153, "step": 18295 }, { "epoch": 1.3597918989223337, "grad_norm": 2.000864421500412, "learning_rate": 1.198620492243014e-05, "loss": 0.5186, "step": 18296 }, { "epoch": 1.359866220735786, "grad_norm": 2.1849047464675047, "learning_rate": 1.1985418533788127e-05, "loss": 0.607, "step": 18297 }, { "epoch": 1.3599405425492381, "grad_norm": 2.1319303008033472, "learning_rate": 1.1984632132364093e-05, "loss": 0.6662, "step": 18298 }, { "epoch": 1.3600148643626904, "grad_norm": 2.4793564065022395, "learning_rate": 1.19838457181631e-05, "loss": 0.6851, "step": 18299 }, { "epoch": 1.3600891861761428, "grad_norm": 2.2847980612863545, "learning_rate": 1.1983059291190213e-05, "loss": 0.482, "step": 18300 }, { "epoch": 1.3601635079895948, "grad_norm": 1.6408037949402436, "learning_rate": 1.1982272851450492e-05, "loss": 0.4985, "step": 18301 }, { "epoch": 1.3602378298030473, "grad_norm": 2.007881314110752, "learning_rate": 1.1981486398948998e-05, "loss": 0.634, "step": 18302 }, { "epoch": 1.3603121516164993, "grad_norm": 1.9666915436396224, "learning_rate": 1.19806999336908e-05, "loss": 0.7291, "step": 18303 }, { "epoch": 1.3603864734299518, "grad_norm": 2.1934855651754237, "learning_rate": 1.197991345568096e-05, "loss": 0.6637, "step": 18304 }, { "epoch": 1.360460795243404, "grad_norm": 1.943395996375521, "learning_rate": 1.1979126964924538e-05, "loss": 0.6212, "step": 18305 }, { "epoch": 1.3605351170568563, "grad_norm": 1.7284029174306137, "learning_rate": 1.19783404614266e-05, "loss": 0.4304, "step": 18306 }, { "epoch": 1.3606094388703085, "grad_norm": 1.740015040676697, "learning_rate": 1.1977553945192209e-05, "loss": 0.624, "step": 18307 }, { "epoch": 1.3606837606837607, "grad_norm": 2.198085897929362, "learning_rate": 1.1976767416226427e-05, "loss": 0.7261, "step": 18308 }, { "epoch": 1.360758082497213, "grad_norm": 1.9666483922205724, "learning_rate": 1.197598087453432e-05, "loss": 0.5582, "step": 18309 }, { "epoch": 1.3608324043106652, "grad_norm": 1.8715162107068313, "learning_rate": 1.1975194320120951e-05, "loss": 0.5727, "step": 18310 }, { "epoch": 1.3609067261241174, "grad_norm": 2.248659663090049, "learning_rate": 1.1974407752991381e-05, "loss": 0.6485, "step": 18311 }, { "epoch": 1.3609810479375697, "grad_norm": 1.8055101563304583, "learning_rate": 1.1973621173150683e-05, "loss": 0.6717, "step": 18312 }, { "epoch": 1.361055369751022, "grad_norm": 2.2246162476987754, "learning_rate": 1.1972834580603907e-05, "loss": 0.6602, "step": 18313 }, { "epoch": 1.3611296915644742, "grad_norm": 1.9288343622534367, "learning_rate": 1.1972047975356126e-05, "loss": 0.5666, "step": 18314 }, { "epoch": 1.3612040133779264, "grad_norm": 1.767832097639354, "learning_rate": 1.1971261357412405e-05, "loss": 0.5238, "step": 18315 }, { "epoch": 1.3612783351913786, "grad_norm": 1.6465582242676133, "learning_rate": 1.1970474726777803e-05, "loss": 0.4368, "step": 18316 }, { "epoch": 1.3613526570048309, "grad_norm": 1.8093996515048887, "learning_rate": 1.196968808345739e-05, "loss": 0.5, "step": 18317 }, { "epoch": 1.3614269788182831, "grad_norm": 2.2206967823007924, "learning_rate": 1.1968901427456225e-05, "loss": 0.7341, "step": 18318 }, { "epoch": 1.3615013006317354, "grad_norm": 2.571291246946495, "learning_rate": 1.1968114758779375e-05, "loss": 0.5268, "step": 18319 }, { "epoch": 1.3615756224451876, "grad_norm": 1.6637000723667428, "learning_rate": 1.1967328077431906e-05, "loss": 0.575, "step": 18320 }, { "epoch": 1.3616499442586398, "grad_norm": 1.8741686184564827, "learning_rate": 1.1966541383418878e-05, "loss": 0.6213, "step": 18321 }, { "epoch": 1.361724266072092, "grad_norm": 2.1310014781848237, "learning_rate": 1.1965754676745358e-05, "loss": 0.6234, "step": 18322 }, { "epoch": 1.3617985878855445, "grad_norm": 2.022495604939163, "learning_rate": 1.1964967957416415e-05, "loss": 0.5367, "step": 18323 }, { "epoch": 1.3618729096989965, "grad_norm": 2.5087607214037857, "learning_rate": 1.1964181225437109e-05, "loss": 0.6518, "step": 18324 }, { "epoch": 1.361947231512449, "grad_norm": 1.672974442794849, "learning_rate": 1.1963394480812504e-05, "loss": 0.5421, "step": 18325 }, { "epoch": 1.362021553325901, "grad_norm": 1.6063817372380433, "learning_rate": 1.196260772354767e-05, "loss": 0.5362, "step": 18326 }, { "epoch": 1.3620958751393535, "grad_norm": 1.9623733075407355, "learning_rate": 1.1961820953647669e-05, "loss": 0.5261, "step": 18327 }, { "epoch": 1.3621701969528057, "grad_norm": 1.374701943334954, "learning_rate": 1.1961034171117565e-05, "loss": 0.4604, "step": 18328 }, { "epoch": 1.362244518766258, "grad_norm": 1.7344308641592865, "learning_rate": 1.1960247375962426e-05, "loss": 0.5836, "step": 18329 }, { "epoch": 1.3623188405797102, "grad_norm": 2.1589046149452655, "learning_rate": 1.1959460568187316e-05, "loss": 0.723, "step": 18330 }, { "epoch": 1.3623931623931624, "grad_norm": 2.2003469090300625, "learning_rate": 1.19586737477973e-05, "loss": 0.6833, "step": 18331 }, { "epoch": 1.3624674842066147, "grad_norm": 1.6547748374978395, "learning_rate": 1.1957886914797446e-05, "loss": 0.5177, "step": 18332 }, { "epoch": 1.362541806020067, "grad_norm": 1.9470727486269153, "learning_rate": 1.1957100069192813e-05, "loss": 0.5965, "step": 18333 }, { "epoch": 1.3626161278335192, "grad_norm": 2.0953508221815085, "learning_rate": 1.1956313210988477e-05, "loss": 0.6676, "step": 18334 }, { "epoch": 1.3626904496469714, "grad_norm": 1.8841002528873763, "learning_rate": 1.1955526340189494e-05, "loss": 0.6347, "step": 18335 }, { "epoch": 1.3627647714604236, "grad_norm": 1.9674429705973713, "learning_rate": 1.1954739456800934e-05, "loss": 0.5537, "step": 18336 }, { "epoch": 1.3628390932738759, "grad_norm": 1.557835326420361, "learning_rate": 1.1953952560827866e-05, "loss": 0.6002, "step": 18337 }, { "epoch": 1.362913415087328, "grad_norm": 1.8588282417509354, "learning_rate": 1.195316565227535e-05, "loss": 0.53, "step": 18338 }, { "epoch": 1.3629877369007803, "grad_norm": 1.7745828717954273, "learning_rate": 1.1952378731148453e-05, "loss": 0.5333, "step": 18339 }, { "epoch": 1.3630620587142326, "grad_norm": 1.832820119901529, "learning_rate": 1.195159179745225e-05, "loss": 0.5389, "step": 18340 }, { "epoch": 1.3631363805276848, "grad_norm": 2.1393625126984808, "learning_rate": 1.1950804851191793e-05, "loss": 0.6404, "step": 18341 }, { "epoch": 1.363210702341137, "grad_norm": 2.303253095444152, "learning_rate": 1.1950017892372162e-05, "loss": 0.6732, "step": 18342 }, { "epoch": 1.3632850241545893, "grad_norm": 1.6785502283474054, "learning_rate": 1.1949230920998412e-05, "loss": 0.6263, "step": 18343 }, { "epoch": 1.3633593459680418, "grad_norm": 1.9211517218757093, "learning_rate": 1.1948443937075614e-05, "loss": 0.6421, "step": 18344 }, { "epoch": 1.3634336677814938, "grad_norm": 2.314362766069196, "learning_rate": 1.1947656940608836e-05, "loss": 0.7856, "step": 18345 }, { "epoch": 1.3635079895949462, "grad_norm": 2.096491780624701, "learning_rate": 1.1946869931603147e-05, "loss": 0.6452, "step": 18346 }, { "epoch": 1.3635823114083983, "grad_norm": 1.8012954091467035, "learning_rate": 1.1946082910063606e-05, "loss": 0.4953, "step": 18347 }, { "epoch": 1.3636566332218507, "grad_norm": 2.174877670665937, "learning_rate": 1.194529587599529e-05, "loss": 0.6489, "step": 18348 }, { "epoch": 1.3637309550353027, "grad_norm": 1.5591022336613936, "learning_rate": 1.1944508829403255e-05, "loss": 0.5301, "step": 18349 }, { "epoch": 1.3638052768487552, "grad_norm": 2.5705509752771545, "learning_rate": 1.1943721770292572e-05, "loss": 0.6817, "step": 18350 }, { "epoch": 1.3638795986622074, "grad_norm": 1.5673413191891667, "learning_rate": 1.1942934698668313e-05, "loss": 0.4081, "step": 18351 }, { "epoch": 1.3639539204756597, "grad_norm": 1.856108588596058, "learning_rate": 1.1942147614535538e-05, "loss": 0.4936, "step": 18352 }, { "epoch": 1.364028242289112, "grad_norm": 1.4227158399755473, "learning_rate": 1.1941360517899322e-05, "loss": 0.3753, "step": 18353 }, { "epoch": 1.3641025641025641, "grad_norm": 1.900984580860185, "learning_rate": 1.1940573408764724e-05, "loss": 0.6039, "step": 18354 }, { "epoch": 1.3641768859160164, "grad_norm": 2.038533575209563, "learning_rate": 1.1939786287136814e-05, "loss": 0.6215, "step": 18355 }, { "epoch": 1.3642512077294686, "grad_norm": 1.9676728686822247, "learning_rate": 1.1938999153020664e-05, "loss": 0.5697, "step": 18356 }, { "epoch": 1.3643255295429209, "grad_norm": 2.35898204592892, "learning_rate": 1.1938212006421335e-05, "loss": 0.7102, "step": 18357 }, { "epoch": 1.364399851356373, "grad_norm": 1.9679544750397069, "learning_rate": 1.1937424847343899e-05, "loss": 0.7211, "step": 18358 }, { "epoch": 1.3644741731698253, "grad_norm": 2.293685102457143, "learning_rate": 1.1936637675793424e-05, "loss": 0.5846, "step": 18359 }, { "epoch": 1.3645484949832776, "grad_norm": 2.5251019654371096, "learning_rate": 1.1935850491774975e-05, "loss": 0.7257, "step": 18360 }, { "epoch": 1.3646228167967298, "grad_norm": 2.0964157905298615, "learning_rate": 1.1935063295293618e-05, "loss": 0.6924, "step": 18361 }, { "epoch": 1.364697138610182, "grad_norm": 2.2041800305725747, "learning_rate": 1.1934276086354426e-05, "loss": 0.6607, "step": 18362 }, { "epoch": 1.3647714604236343, "grad_norm": 1.8778987092945811, "learning_rate": 1.1933488864962469e-05, "loss": 0.589, "step": 18363 }, { "epoch": 1.3648457822370865, "grad_norm": 2.0042743586554166, "learning_rate": 1.1932701631122807e-05, "loss": 0.4757, "step": 18364 }, { "epoch": 1.3649201040505388, "grad_norm": 2.2952650632844858, "learning_rate": 1.1931914384840512e-05, "loss": 0.7137, "step": 18365 }, { "epoch": 1.364994425863991, "grad_norm": 2.1258442149176555, "learning_rate": 1.1931127126120652e-05, "loss": 0.6895, "step": 18366 }, { "epoch": 1.3650687476774435, "grad_norm": 2.164299080106142, "learning_rate": 1.1930339854968296e-05, "loss": 0.7629, "step": 18367 }, { "epoch": 1.3651430694908955, "grad_norm": 2.1280984441780233, "learning_rate": 1.1929552571388516e-05, "loss": 0.6394, "step": 18368 }, { "epoch": 1.365217391304348, "grad_norm": 1.884176760132445, "learning_rate": 1.1928765275386372e-05, "loss": 0.6052, "step": 18369 }, { "epoch": 1.3652917131178, "grad_norm": 3.3282411951140554, "learning_rate": 1.1927977966966941e-05, "loss": 0.7516, "step": 18370 }, { "epoch": 1.3653660349312524, "grad_norm": 1.8104767956503751, "learning_rate": 1.1927190646135285e-05, "loss": 0.5628, "step": 18371 }, { "epoch": 1.3654403567447047, "grad_norm": 2.0036352614427377, "learning_rate": 1.1926403312896476e-05, "loss": 0.7049, "step": 18372 }, { "epoch": 1.365514678558157, "grad_norm": 1.6433870967081023, "learning_rate": 1.1925615967255582e-05, "loss": 0.4665, "step": 18373 }, { "epoch": 1.3655890003716091, "grad_norm": 2.1134972775283107, "learning_rate": 1.1924828609217673e-05, "loss": 0.6927, "step": 18374 }, { "epoch": 1.3656633221850614, "grad_norm": 1.8056866270440424, "learning_rate": 1.192404123878782e-05, "loss": 0.6061, "step": 18375 }, { "epoch": 1.3657376439985136, "grad_norm": 1.69644374293804, "learning_rate": 1.1923253855971086e-05, "loss": 0.5247, "step": 18376 }, { "epoch": 1.3658119658119658, "grad_norm": 1.6314636669673463, "learning_rate": 1.1922466460772544e-05, "loss": 0.5549, "step": 18377 }, { "epoch": 1.365886287625418, "grad_norm": 2.0818863430971914, "learning_rate": 1.1921679053197261e-05, "loss": 0.738, "step": 18378 }, { "epoch": 1.3659606094388703, "grad_norm": 1.5460099747979041, "learning_rate": 1.1920891633250309e-05, "loss": 0.5277, "step": 18379 }, { "epoch": 1.3660349312523226, "grad_norm": 1.882304507949575, "learning_rate": 1.192010420093676e-05, "loss": 0.6127, "step": 18380 }, { "epoch": 1.3661092530657748, "grad_norm": 1.766862784400129, "learning_rate": 1.1919316756261678e-05, "loss": 0.5932, "step": 18381 }, { "epoch": 1.366183574879227, "grad_norm": 1.927919394591342, "learning_rate": 1.1918529299230132e-05, "loss": 0.6138, "step": 18382 }, { "epoch": 1.3662578966926793, "grad_norm": 2.1568409101890427, "learning_rate": 1.1917741829847194e-05, "loss": 0.5596, "step": 18383 }, { "epoch": 1.3663322185061315, "grad_norm": 1.7464148846467642, "learning_rate": 1.1916954348117935e-05, "loss": 0.4567, "step": 18384 }, { "epoch": 1.3664065403195838, "grad_norm": 1.9708062973469473, "learning_rate": 1.1916166854047423e-05, "loss": 0.5501, "step": 18385 }, { "epoch": 1.366480862133036, "grad_norm": 2.5460774150719865, "learning_rate": 1.1915379347640731e-05, "loss": 0.5651, "step": 18386 }, { "epoch": 1.3665551839464882, "grad_norm": 1.859767066647156, "learning_rate": 1.1914591828902922e-05, "loss": 0.5558, "step": 18387 }, { "epoch": 1.3666295057599405, "grad_norm": 2.155534719848829, "learning_rate": 1.1913804297839072e-05, "loss": 0.7304, "step": 18388 }, { "epoch": 1.3667038275733927, "grad_norm": 1.8197899922589658, "learning_rate": 1.1913016754454248e-05, "loss": 0.5839, "step": 18389 }, { "epoch": 1.3667781493868452, "grad_norm": 1.5284626678275917, "learning_rate": 1.1912229198753522e-05, "loss": 0.4605, "step": 18390 }, { "epoch": 1.3668524712002972, "grad_norm": 2.040222960373252, "learning_rate": 1.1911441630741967e-05, "loss": 0.6161, "step": 18391 }, { "epoch": 1.3669267930137496, "grad_norm": 2.5562622624507667, "learning_rate": 1.1910654050424646e-05, "loss": 0.6372, "step": 18392 }, { "epoch": 1.3670011148272017, "grad_norm": 2.3738966157034644, "learning_rate": 1.1909866457806634e-05, "loss": 0.7519, "step": 18393 }, { "epoch": 1.3670754366406541, "grad_norm": 2.005357033237539, "learning_rate": 1.1909078852893e-05, "loss": 0.6441, "step": 18394 }, { "epoch": 1.3671497584541064, "grad_norm": 2.0202614379877097, "learning_rate": 1.1908291235688816e-05, "loss": 0.6837, "step": 18395 }, { "epoch": 1.3672240802675586, "grad_norm": 1.484141868875497, "learning_rate": 1.1907503606199153e-05, "loss": 0.4409, "step": 18396 }, { "epoch": 1.3672984020810108, "grad_norm": 2.197171642832511, "learning_rate": 1.1906715964429083e-05, "loss": 0.5863, "step": 18397 }, { "epoch": 1.367372723894463, "grad_norm": 2.3614852758079827, "learning_rate": 1.190592831038367e-05, "loss": 0.6814, "step": 18398 }, { "epoch": 1.3674470457079153, "grad_norm": 1.907033900238669, "learning_rate": 1.1905140644067992e-05, "loss": 0.645, "step": 18399 }, { "epoch": 1.3675213675213675, "grad_norm": 2.6384046666948695, "learning_rate": 1.1904352965487115e-05, "loss": 0.6388, "step": 18400 }, { "epoch": 1.3675956893348198, "grad_norm": 1.9306690167455123, "learning_rate": 1.1903565274646117e-05, "loss": 0.6053, "step": 18401 }, { "epoch": 1.367670011148272, "grad_norm": 1.9672463577829928, "learning_rate": 1.1902777571550061e-05, "loss": 0.6289, "step": 18402 }, { "epoch": 1.3677443329617243, "grad_norm": 2.4045068533267515, "learning_rate": 1.190198985620402e-05, "loss": 0.7691, "step": 18403 }, { "epoch": 1.3678186547751765, "grad_norm": 1.6930372302589256, "learning_rate": 1.1901202128613072e-05, "loss": 0.5218, "step": 18404 }, { "epoch": 1.3678929765886287, "grad_norm": 1.9814450936049632, "learning_rate": 1.190041438878228e-05, "loss": 0.7232, "step": 18405 }, { "epoch": 1.367967298402081, "grad_norm": 1.536118576473724, "learning_rate": 1.1899626636716716e-05, "loss": 0.4706, "step": 18406 }, { "epoch": 1.3680416202155332, "grad_norm": 2.327230502195956, "learning_rate": 1.1898838872421461e-05, "loss": 0.7373, "step": 18407 }, { "epoch": 1.3681159420289855, "grad_norm": 2.1347113601578527, "learning_rate": 1.1898051095901577e-05, "loss": 0.6361, "step": 18408 }, { "epoch": 1.3681902638424377, "grad_norm": 12.97939076233136, "learning_rate": 1.1897263307162139e-05, "loss": 0.5463, "step": 18409 }, { "epoch": 1.36826458565589, "grad_norm": 1.6694345468050231, "learning_rate": 1.1896475506208216e-05, "loss": 0.3996, "step": 18410 }, { "epoch": 1.3683389074693422, "grad_norm": 1.7819851845618946, "learning_rate": 1.1895687693044881e-05, "loss": 0.4826, "step": 18411 }, { "epoch": 1.3684132292827944, "grad_norm": 2.128409240093979, "learning_rate": 1.1894899867677213e-05, "loss": 0.7278, "step": 18412 }, { "epoch": 1.3684875510962469, "grad_norm": 2.42321126227747, "learning_rate": 1.1894112030110274e-05, "loss": 0.7516, "step": 18413 }, { "epoch": 1.3685618729096989, "grad_norm": 1.7190469177902192, "learning_rate": 1.1893324180349141e-05, "loss": 0.4632, "step": 18414 }, { "epoch": 1.3686361947231513, "grad_norm": 2.227748716090402, "learning_rate": 1.1892536318398884e-05, "loss": 0.6236, "step": 18415 }, { "epoch": 1.3687105165366034, "grad_norm": 1.7016124824980128, "learning_rate": 1.1891748444264577e-05, "loss": 0.5562, "step": 18416 }, { "epoch": 1.3687848383500558, "grad_norm": 2.5109514555021826, "learning_rate": 1.189096055795129e-05, "loss": 0.7052, "step": 18417 }, { "epoch": 1.368859160163508, "grad_norm": 1.8045047212286878, "learning_rate": 1.1890172659464101e-05, "loss": 0.5304, "step": 18418 }, { "epoch": 1.3689334819769603, "grad_norm": 2.713290264937443, "learning_rate": 1.1889384748808077e-05, "loss": 0.6863, "step": 18419 }, { "epoch": 1.3690078037904125, "grad_norm": 2.063256424869953, "learning_rate": 1.188859682598829e-05, "loss": 0.6655, "step": 18420 }, { "epoch": 1.3690821256038648, "grad_norm": 2.1750283184434407, "learning_rate": 1.1887808891009816e-05, "loss": 0.6217, "step": 18421 }, { "epoch": 1.369156447417317, "grad_norm": 1.899272445077197, "learning_rate": 1.1887020943877725e-05, "loss": 0.6781, "step": 18422 }, { "epoch": 1.3692307692307693, "grad_norm": 1.6552241768673521, "learning_rate": 1.1886232984597092e-05, "loss": 0.4863, "step": 18423 }, { "epoch": 1.3693050910442215, "grad_norm": 1.8046085309211675, "learning_rate": 1.188544501317299e-05, "loss": 0.5745, "step": 18424 }, { "epoch": 1.3693794128576737, "grad_norm": 2.0181968380692488, "learning_rate": 1.1884657029610492e-05, "loss": 0.6801, "step": 18425 }, { "epoch": 1.369453734671126, "grad_norm": 2.2274550155090114, "learning_rate": 1.1883869033914666e-05, "loss": 0.652, "step": 18426 }, { "epoch": 1.3695280564845782, "grad_norm": 2.07328282148909, "learning_rate": 1.1883081026090591e-05, "loss": 0.6595, "step": 18427 }, { "epoch": 1.3696023782980304, "grad_norm": 1.9935301318628045, "learning_rate": 1.1882293006143337e-05, "loss": 0.4779, "step": 18428 }, { "epoch": 1.3696767001114827, "grad_norm": 2.4093324919791206, "learning_rate": 1.188150497407798e-05, "loss": 0.7272, "step": 18429 }, { "epoch": 1.369751021924935, "grad_norm": 2.2667785626514254, "learning_rate": 1.1880716929899592e-05, "loss": 0.6134, "step": 18430 }, { "epoch": 1.3698253437383872, "grad_norm": 1.7794002606682329, "learning_rate": 1.1879928873613244e-05, "loss": 0.5349, "step": 18431 }, { "epoch": 1.3698996655518394, "grad_norm": 1.5755888080513318, "learning_rate": 1.1879140805224011e-05, "loss": 0.5086, "step": 18432 }, { "epoch": 1.3699739873652916, "grad_norm": 1.9166787057622976, "learning_rate": 1.1878352724736968e-05, "loss": 0.5947, "step": 18433 }, { "epoch": 1.370048309178744, "grad_norm": 1.7348013525572266, "learning_rate": 1.187756463215719e-05, "loss": 0.5743, "step": 18434 }, { "epoch": 1.3701226309921961, "grad_norm": 1.9078136756712174, "learning_rate": 1.1876776527489745e-05, "loss": 0.4691, "step": 18435 }, { "epoch": 1.3701969528056486, "grad_norm": 2.3758119726885725, "learning_rate": 1.1875988410739712e-05, "loss": 0.5664, "step": 18436 }, { "epoch": 1.3702712746191006, "grad_norm": 1.5669037079611692, "learning_rate": 1.1875200281912163e-05, "loss": 0.4798, "step": 18437 }, { "epoch": 1.370345596432553, "grad_norm": 2.171182177465097, "learning_rate": 1.1874412141012169e-05, "loss": 0.6987, "step": 18438 }, { "epoch": 1.370419918246005, "grad_norm": 2.325376959804584, "learning_rate": 1.187362398804481e-05, "loss": 0.7594, "step": 18439 }, { "epoch": 1.3704942400594575, "grad_norm": 2.018630199700823, "learning_rate": 1.1872835823015154e-05, "loss": 0.4633, "step": 18440 }, { "epoch": 1.3705685618729098, "grad_norm": 1.9233839816348184, "learning_rate": 1.187204764592828e-05, "loss": 0.573, "step": 18441 }, { "epoch": 1.370642883686362, "grad_norm": 2.637124454482101, "learning_rate": 1.1871259456789262e-05, "loss": 0.5829, "step": 18442 }, { "epoch": 1.3707172054998142, "grad_norm": 1.767334342658066, "learning_rate": 1.1870471255603171e-05, "loss": 0.5437, "step": 18443 }, { "epoch": 1.3707915273132665, "grad_norm": 2.3526982047466816, "learning_rate": 1.1869683042375082e-05, "loss": 0.5998, "step": 18444 }, { "epoch": 1.3708658491267187, "grad_norm": 1.872666107068018, "learning_rate": 1.1868894817110073e-05, "loss": 0.7165, "step": 18445 }, { "epoch": 1.370940170940171, "grad_norm": 2.0020754271015604, "learning_rate": 1.1868106579813214e-05, "loss": 0.6265, "step": 18446 }, { "epoch": 1.3710144927536232, "grad_norm": 1.7095138633801539, "learning_rate": 1.1867318330489586e-05, "loss": 0.5703, "step": 18447 }, { "epoch": 1.3710888145670754, "grad_norm": 1.8516768680972766, "learning_rate": 1.1866530069144253e-05, "loss": 0.667, "step": 18448 }, { "epoch": 1.3711631363805277, "grad_norm": 1.6893972734547928, "learning_rate": 1.18657417957823e-05, "loss": 0.5729, "step": 18449 }, { "epoch": 1.37123745819398, "grad_norm": 2.4758263605486186, "learning_rate": 1.1864953510408797e-05, "loss": 0.6755, "step": 18450 }, { "epoch": 1.3713117800074321, "grad_norm": 1.699338029259057, "learning_rate": 1.1864165213028822e-05, "loss": 0.4998, "step": 18451 }, { "epoch": 1.3713861018208844, "grad_norm": 1.8242550199726892, "learning_rate": 1.1863376903647445e-05, "loss": 0.5214, "step": 18452 }, { "epoch": 1.3714604236343366, "grad_norm": 2.3878203885474636, "learning_rate": 1.1862588582269748e-05, "loss": 0.6839, "step": 18453 }, { "epoch": 1.3715347454477889, "grad_norm": 1.7361887430612037, "learning_rate": 1.1861800248900798e-05, "loss": 0.5092, "step": 18454 }, { "epoch": 1.371609067261241, "grad_norm": 2.1702149177326087, "learning_rate": 1.1861011903545678e-05, "loss": 0.4216, "step": 18455 }, { "epoch": 1.3716833890746933, "grad_norm": 1.6760469114735406, "learning_rate": 1.186022354620946e-05, "loss": 0.5422, "step": 18456 }, { "epoch": 1.3717577108881458, "grad_norm": 2.046325566647295, "learning_rate": 1.1859435176897218e-05, "loss": 0.6228, "step": 18457 }, { "epoch": 1.3718320327015978, "grad_norm": 1.9555783695411055, "learning_rate": 1.1858646795614028e-05, "loss": 0.6333, "step": 18458 }, { "epoch": 1.3719063545150503, "grad_norm": 2.3280183284842706, "learning_rate": 1.1857858402364967e-05, "loss": 0.8243, "step": 18459 }, { "epoch": 1.3719806763285023, "grad_norm": 3.1331228133322457, "learning_rate": 1.1857069997155111e-05, "loss": 0.5856, "step": 18460 }, { "epoch": 1.3720549981419548, "grad_norm": 2.505762126659971, "learning_rate": 1.1856281579989532e-05, "loss": 0.5652, "step": 18461 }, { "epoch": 1.372129319955407, "grad_norm": 2.4815048525340497, "learning_rate": 1.1855493150873313e-05, "loss": 0.6915, "step": 18462 }, { "epoch": 1.3722036417688592, "grad_norm": 1.9730836994052177, "learning_rate": 1.1854704709811522e-05, "loss": 0.6006, "step": 18463 }, { "epoch": 1.3722779635823115, "grad_norm": 1.9454175434647325, "learning_rate": 1.1853916256809242e-05, "loss": 0.552, "step": 18464 }, { "epoch": 1.3723522853957637, "grad_norm": 1.9083502702789321, "learning_rate": 1.1853127791871542e-05, "loss": 0.5283, "step": 18465 }, { "epoch": 1.372426607209216, "grad_norm": 2.2533270446590987, "learning_rate": 1.1852339315003501e-05, "loss": 0.6107, "step": 18466 }, { "epoch": 1.3725009290226682, "grad_norm": 2.088223255067384, "learning_rate": 1.1851550826210196e-05, "loss": 0.507, "step": 18467 }, { "epoch": 1.3725752508361204, "grad_norm": 2.0241874849325447, "learning_rate": 1.1850762325496702e-05, "loss": 0.5551, "step": 18468 }, { "epoch": 1.3726495726495727, "grad_norm": 2.249784787805785, "learning_rate": 1.1849973812868098e-05, "loss": 0.8085, "step": 18469 }, { "epoch": 1.372723894463025, "grad_norm": 2.5882684152348134, "learning_rate": 1.1849185288329462e-05, "loss": 0.5052, "step": 18470 }, { "epoch": 1.3727982162764771, "grad_norm": 2.159735672757681, "learning_rate": 1.184839675188586e-05, "loss": 0.8006, "step": 18471 }, { "epoch": 1.3728725380899294, "grad_norm": 1.5878147130525744, "learning_rate": 1.1847608203542378e-05, "loss": 0.5573, "step": 18472 }, { "epoch": 1.3729468599033816, "grad_norm": 1.6983892732830044, "learning_rate": 1.1846819643304093e-05, "loss": 0.6296, "step": 18473 }, { "epoch": 1.3730211817168338, "grad_norm": 1.688419592671559, "learning_rate": 1.1846031071176076e-05, "loss": 0.6639, "step": 18474 }, { "epoch": 1.373095503530286, "grad_norm": 1.9346019711364704, "learning_rate": 1.1845242487163406e-05, "loss": 0.6221, "step": 18475 }, { "epoch": 1.3731698253437383, "grad_norm": 1.9387440142358419, "learning_rate": 1.1844453891271165e-05, "loss": 0.641, "step": 18476 }, { "epoch": 1.3732441471571906, "grad_norm": 1.6192765009946104, "learning_rate": 1.1843665283504422e-05, "loss": 0.5304, "step": 18477 }, { "epoch": 1.3733184689706428, "grad_norm": 1.816342976210905, "learning_rate": 1.1842876663868262e-05, "loss": 0.5456, "step": 18478 }, { "epoch": 1.373392790784095, "grad_norm": 2.069419056169906, "learning_rate": 1.1842088032367753e-05, "loss": 0.6836, "step": 18479 }, { "epoch": 1.3734671125975475, "grad_norm": 4.982207865082381, "learning_rate": 1.1841299389007977e-05, "loss": 0.4907, "step": 18480 }, { "epoch": 1.3735414344109995, "grad_norm": 2.541746963306197, "learning_rate": 1.1840510733794012e-05, "loss": 0.6869, "step": 18481 }, { "epoch": 1.373615756224452, "grad_norm": 2.0480151653391068, "learning_rate": 1.1839722066730934e-05, "loss": 0.6076, "step": 18482 }, { "epoch": 1.373690078037904, "grad_norm": 1.9527322073020479, "learning_rate": 1.1838933387823821e-05, "loss": 0.5674, "step": 18483 }, { "epoch": 1.3737643998513565, "grad_norm": 1.9413266006965662, "learning_rate": 1.1838144697077751e-05, "loss": 0.5765, "step": 18484 }, { "epoch": 1.3738387216648087, "grad_norm": 1.6859514060718626, "learning_rate": 1.1837355994497802e-05, "loss": 0.5659, "step": 18485 }, { "epoch": 1.373913043478261, "grad_norm": 1.7471396084965092, "learning_rate": 1.1836567280089046e-05, "loss": 0.5536, "step": 18486 }, { "epoch": 1.3739873652917132, "grad_norm": 1.8592651875984778, "learning_rate": 1.183577855385657e-05, "loss": 0.6477, "step": 18487 }, { "epoch": 1.3740616871051654, "grad_norm": 2.1187704711932698, "learning_rate": 1.1834989815805444e-05, "loss": 0.5402, "step": 18488 }, { "epoch": 1.3741360089186176, "grad_norm": 1.6393176845224111, "learning_rate": 1.1834201065940751e-05, "loss": 0.5044, "step": 18489 }, { "epoch": 1.3742103307320699, "grad_norm": 2.070510762929868, "learning_rate": 1.1833412304267564e-05, "loss": 0.5963, "step": 18490 }, { "epoch": 1.3742846525455221, "grad_norm": 2.4085470604629675, "learning_rate": 1.1832623530790966e-05, "loss": 0.4691, "step": 18491 }, { "epoch": 1.3743589743589744, "grad_norm": 1.7339948594943164, "learning_rate": 1.1831834745516034e-05, "loss": 0.5786, "step": 18492 }, { "epoch": 1.3744332961724266, "grad_norm": 2.446540747225588, "learning_rate": 1.1831045948447842e-05, "loss": 0.7456, "step": 18493 }, { "epoch": 1.3745076179858788, "grad_norm": 1.9349490889858, "learning_rate": 1.183025713959147e-05, "loss": 0.4698, "step": 18494 }, { "epoch": 1.374581939799331, "grad_norm": 2.299096028952037, "learning_rate": 1.1829468318952003e-05, "loss": 0.7008, "step": 18495 }, { "epoch": 1.3746562616127833, "grad_norm": 1.7289995181868987, "learning_rate": 1.1828679486534509e-05, "loss": 0.6056, "step": 18496 }, { "epoch": 1.3747305834262356, "grad_norm": 2.0133989215244266, "learning_rate": 1.1827890642344071e-05, "loss": 0.542, "step": 18497 }, { "epoch": 1.3748049052396878, "grad_norm": 1.7069988905638092, "learning_rate": 1.1827101786385771e-05, "loss": 0.4951, "step": 18498 }, { "epoch": 1.37487922705314, "grad_norm": 2.2144999718738343, "learning_rate": 1.1826312918664682e-05, "loss": 0.7893, "step": 18499 }, { "epoch": 1.3749535488665923, "grad_norm": 2.253912358005339, "learning_rate": 1.1825524039185887e-05, "loss": 0.6963, "step": 18500 }, { "epoch": 1.3750278706800447, "grad_norm": 2.4520950247848257, "learning_rate": 1.1824735147954463e-05, "loss": 0.7229, "step": 18501 }, { "epoch": 1.3751021924934967, "grad_norm": 1.8231694755023051, "learning_rate": 1.1823946244975486e-05, "loss": 0.5641, "step": 18502 }, { "epoch": 1.3751765143069492, "grad_norm": 1.8723422605300777, "learning_rate": 1.1823157330254041e-05, "loss": 0.6588, "step": 18503 }, { "epoch": 1.3752508361204012, "grad_norm": 1.3898387131732852, "learning_rate": 1.1822368403795203e-05, "loss": 0.467, "step": 18504 }, { "epoch": 1.3753251579338537, "grad_norm": 1.7253843549414576, "learning_rate": 1.182157946560405e-05, "loss": 0.5541, "step": 18505 }, { "epoch": 1.3753994797473057, "grad_norm": 1.8607865483055401, "learning_rate": 1.1820790515685667e-05, "loss": 0.6578, "step": 18506 }, { "epoch": 1.3754738015607582, "grad_norm": 1.8101665580104616, "learning_rate": 1.1820001554045125e-05, "loss": 0.5233, "step": 18507 }, { "epoch": 1.3755481233742104, "grad_norm": 1.6864336756807803, "learning_rate": 1.1819212580687508e-05, "loss": 0.4883, "step": 18508 }, { "epoch": 1.3756224451876626, "grad_norm": 1.6827518483392956, "learning_rate": 1.1818423595617896e-05, "loss": 0.5425, "step": 18509 }, { "epoch": 1.3756967670011149, "grad_norm": 2.1602141651538, "learning_rate": 1.1817634598841368e-05, "loss": 0.6217, "step": 18510 }, { "epoch": 1.3757710888145671, "grad_norm": 1.4937731912199288, "learning_rate": 1.1816845590363002e-05, "loss": 0.4259, "step": 18511 }, { "epoch": 1.3758454106280193, "grad_norm": 1.8093258136480754, "learning_rate": 1.1816056570187877e-05, "loss": 0.5635, "step": 18512 }, { "epoch": 1.3759197324414716, "grad_norm": 2.160332615900119, "learning_rate": 1.1815267538321073e-05, "loss": 0.6774, "step": 18513 }, { "epoch": 1.3759940542549238, "grad_norm": 1.7988139141213613, "learning_rate": 1.1814478494767674e-05, "loss": 0.6103, "step": 18514 }, { "epoch": 1.376068376068376, "grad_norm": 2.710284104922889, "learning_rate": 1.1813689439532755e-05, "loss": 0.6562, "step": 18515 }, { "epoch": 1.3761426978818283, "grad_norm": 1.9871751338708208, "learning_rate": 1.1812900372621398e-05, "loss": 0.6526, "step": 18516 }, { "epoch": 1.3762170196952805, "grad_norm": 1.7976617190437467, "learning_rate": 1.1812111294038684e-05, "loss": 0.5128, "step": 18517 }, { "epoch": 1.3762913415087328, "grad_norm": 1.9327562060361463, "learning_rate": 1.1811322203789688e-05, "loss": 0.4949, "step": 18518 }, { "epoch": 1.376365663322185, "grad_norm": 1.8193915802991956, "learning_rate": 1.1810533101879495e-05, "loss": 0.5659, "step": 18519 }, { "epoch": 1.3764399851356373, "grad_norm": 1.904473774257441, "learning_rate": 1.1809743988313185e-05, "loss": 0.6405, "step": 18520 }, { "epoch": 1.3765143069490895, "grad_norm": 1.7846249793778997, "learning_rate": 1.1808954863095837e-05, "loss": 0.5401, "step": 18521 }, { "epoch": 1.3765886287625417, "grad_norm": 1.699188603800798, "learning_rate": 1.1808165726232533e-05, "loss": 0.5369, "step": 18522 }, { "epoch": 1.376662950575994, "grad_norm": 2.2402587398135956, "learning_rate": 1.1807376577728348e-05, "loss": 0.5976, "step": 18523 }, { "epoch": 1.3767372723894464, "grad_norm": 2.079868298710092, "learning_rate": 1.1806587417588367e-05, "loss": 0.6975, "step": 18524 }, { "epoch": 1.3768115942028984, "grad_norm": 8.49114813921734, "learning_rate": 1.180579824581767e-05, "loss": 0.5866, "step": 18525 }, { "epoch": 1.376885916016351, "grad_norm": 2.2556209465822827, "learning_rate": 1.180500906242134e-05, "loss": 0.5309, "step": 18526 }, { "epoch": 1.376960237829803, "grad_norm": 1.7019271460539016, "learning_rate": 1.1804219867404455e-05, "loss": 0.516, "step": 18527 }, { "epoch": 1.3770345596432554, "grad_norm": 2.0765227579585708, "learning_rate": 1.1803430660772094e-05, "loss": 0.5586, "step": 18528 }, { "epoch": 1.3771088814567076, "grad_norm": 2.676583991552541, "learning_rate": 1.1802641442529341e-05, "loss": 0.6346, "step": 18529 }, { "epoch": 1.3771832032701599, "grad_norm": 3.1111067352442894, "learning_rate": 1.1801852212681276e-05, "loss": 0.4778, "step": 18530 }, { "epoch": 1.377257525083612, "grad_norm": 1.7481692329584326, "learning_rate": 1.1801062971232979e-05, "loss": 0.5266, "step": 18531 }, { "epoch": 1.3773318468970643, "grad_norm": 1.677130512071726, "learning_rate": 1.1800273718189535e-05, "loss": 0.5392, "step": 18532 }, { "epoch": 1.3774061687105166, "grad_norm": 2.0591250662374057, "learning_rate": 1.179948445355602e-05, "loss": 0.6988, "step": 18533 }, { "epoch": 1.3774804905239688, "grad_norm": 1.87856507815539, "learning_rate": 1.1798695177337517e-05, "loss": 0.5462, "step": 18534 }, { "epoch": 1.377554812337421, "grad_norm": 2.436321599765872, "learning_rate": 1.1797905889539107e-05, "loss": 0.7697, "step": 18535 }, { "epoch": 1.3776291341508733, "grad_norm": 2.301316279142428, "learning_rate": 1.1797116590165874e-05, "loss": 0.7347, "step": 18536 }, { "epoch": 1.3777034559643255, "grad_norm": 2.2996275414557275, "learning_rate": 1.17963272792229e-05, "loss": 0.6124, "step": 18537 }, { "epoch": 1.3777777777777778, "grad_norm": 1.6367889942643639, "learning_rate": 1.1795537956715261e-05, "loss": 0.418, "step": 18538 }, { "epoch": 1.37785209959123, "grad_norm": 1.9674521810865975, "learning_rate": 1.1794748622648042e-05, "loss": 0.5548, "step": 18539 }, { "epoch": 1.3779264214046822, "grad_norm": 2.8000514118132713, "learning_rate": 1.1793959277026324e-05, "loss": 0.7029, "step": 18540 }, { "epoch": 1.3780007432181345, "grad_norm": 1.644392620163018, "learning_rate": 1.1793169919855188e-05, "loss": 0.4121, "step": 18541 }, { "epoch": 1.3780750650315867, "grad_norm": 1.8373720134688736, "learning_rate": 1.179238055113972e-05, "loss": 0.5819, "step": 18542 }, { "epoch": 1.378149386845039, "grad_norm": 1.8763624785479862, "learning_rate": 1.1791591170884997e-05, "loss": 0.6973, "step": 18543 }, { "epoch": 1.3782237086584912, "grad_norm": 2.1309372440531407, "learning_rate": 1.1790801779096106e-05, "loss": 0.5565, "step": 18544 }, { "epoch": 1.3782980304719434, "grad_norm": 2.513568456544635, "learning_rate": 1.1790012375778124e-05, "loss": 0.8087, "step": 18545 }, { "epoch": 1.3783723522853957, "grad_norm": 2.2752143310348973, "learning_rate": 1.1789222960936135e-05, "loss": 0.6402, "step": 18546 }, { "epoch": 1.3784466740988481, "grad_norm": 1.9453397687668417, "learning_rate": 1.1788433534575221e-05, "loss": 0.6631, "step": 18547 }, { "epoch": 1.3785209959123002, "grad_norm": 2.1825181419443807, "learning_rate": 1.1787644096700465e-05, "loss": 0.8123, "step": 18548 }, { "epoch": 1.3785953177257526, "grad_norm": 1.7687572807969942, "learning_rate": 1.1786854647316952e-05, "loss": 0.569, "step": 18549 }, { "epoch": 1.3786696395392046, "grad_norm": 8.232433155012108, "learning_rate": 1.1786065186429758e-05, "loss": 0.5238, "step": 18550 }, { "epoch": 1.378743961352657, "grad_norm": 2.0934363157085096, "learning_rate": 1.1785275714043969e-05, "loss": 0.6686, "step": 18551 }, { "epoch": 1.3788182831661093, "grad_norm": 1.9183854761597516, "learning_rate": 1.178448623016467e-05, "loss": 0.5918, "step": 18552 }, { "epoch": 1.3788926049795616, "grad_norm": 1.8759873196194512, "learning_rate": 1.1783696734796937e-05, "loss": 0.6293, "step": 18553 }, { "epoch": 1.3789669267930138, "grad_norm": 1.7120564153724347, "learning_rate": 1.1782907227945861e-05, "loss": 0.5383, "step": 18554 }, { "epoch": 1.379041248606466, "grad_norm": 1.7390257093530168, "learning_rate": 1.178211770961652e-05, "loss": 0.5355, "step": 18555 }, { "epoch": 1.3791155704199183, "grad_norm": 2.1625604490818255, "learning_rate": 1.1781328179813997e-05, "loss": 0.5666, "step": 18556 }, { "epoch": 1.3791898922333705, "grad_norm": 1.5957316296395125, "learning_rate": 1.1780538638543376e-05, "loss": 0.4358, "step": 18557 }, { "epoch": 1.3792642140468228, "grad_norm": 1.8214449558320849, "learning_rate": 1.1779749085809739e-05, "loss": 0.5729, "step": 18558 }, { "epoch": 1.379338535860275, "grad_norm": 2.426171680199252, "learning_rate": 1.1778959521618173e-05, "loss": 0.7266, "step": 18559 }, { "epoch": 1.3794128576737272, "grad_norm": 1.9972510655112368, "learning_rate": 1.1778169945973755e-05, "loss": 0.6691, "step": 18560 }, { "epoch": 1.3794871794871795, "grad_norm": 6.284329188510454, "learning_rate": 1.177738035888157e-05, "loss": 0.6648, "step": 18561 }, { "epoch": 1.3795615013006317, "grad_norm": 2.223982708345679, "learning_rate": 1.1776590760346702e-05, "loss": 0.7334, "step": 18562 }, { "epoch": 1.379635823114084, "grad_norm": 2.2170275272616653, "learning_rate": 1.1775801150374238e-05, "loss": 0.7653, "step": 18563 }, { "epoch": 1.3797101449275362, "grad_norm": 1.664788195531337, "learning_rate": 1.1775011528969255e-05, "loss": 0.5501, "step": 18564 }, { "epoch": 1.3797844667409884, "grad_norm": 1.7030853425898564, "learning_rate": 1.1774221896136844e-05, "loss": 0.621, "step": 18565 }, { "epoch": 1.3798587885544407, "grad_norm": 1.9749006347708882, "learning_rate": 1.1773432251882081e-05, "loss": 0.6033, "step": 18566 }, { "epoch": 1.379933110367893, "grad_norm": 1.7277661934170883, "learning_rate": 1.1772642596210055e-05, "loss": 0.651, "step": 18567 }, { "epoch": 1.3800074321813454, "grad_norm": 2.10000635781159, "learning_rate": 1.1771852929125846e-05, "loss": 0.598, "step": 18568 }, { "epoch": 1.3800817539947974, "grad_norm": 2.1009262738091365, "learning_rate": 1.1771063250634542e-05, "loss": 0.5997, "step": 18569 }, { "epoch": 1.3801560758082498, "grad_norm": 1.911754479470666, "learning_rate": 1.1770273560741226e-05, "loss": 0.5975, "step": 18570 }, { "epoch": 1.3802303976217019, "grad_norm": 1.891889428539434, "learning_rate": 1.1769483859450977e-05, "loss": 0.6291, "step": 18571 }, { "epoch": 1.3803047194351543, "grad_norm": 1.9270796012261278, "learning_rate": 1.1768694146768883e-05, "loss": 0.5399, "step": 18572 }, { "epoch": 1.3803790412486063, "grad_norm": 1.9548303444959156, "learning_rate": 1.176790442270003e-05, "loss": 0.6849, "step": 18573 }, { "epoch": 1.3804533630620588, "grad_norm": 1.85458044049813, "learning_rate": 1.1767114687249499e-05, "loss": 0.6986, "step": 18574 }, { "epoch": 1.380527684875511, "grad_norm": 1.8491336963728886, "learning_rate": 1.1766324940422375e-05, "loss": 0.663, "step": 18575 }, { "epoch": 1.3806020066889633, "grad_norm": 2.2249406765643553, "learning_rate": 1.1765535182223745e-05, "loss": 0.7182, "step": 18576 }, { "epoch": 1.3806763285024155, "grad_norm": 1.7350831814005103, "learning_rate": 1.176474541265869e-05, "loss": 0.524, "step": 18577 }, { "epoch": 1.3807506503158677, "grad_norm": 2.1947593331271524, "learning_rate": 1.1763955631732293e-05, "loss": 0.6997, "step": 18578 }, { "epoch": 1.38082497212932, "grad_norm": 1.8612161615819658, "learning_rate": 1.1763165839449645e-05, "loss": 0.4954, "step": 18579 }, { "epoch": 1.3808992939427722, "grad_norm": 2.4527030173908893, "learning_rate": 1.1762376035815821e-05, "loss": 0.7191, "step": 18580 }, { "epoch": 1.3809736157562245, "grad_norm": 2.3282478373394757, "learning_rate": 1.1761586220835919e-05, "loss": 0.7667, "step": 18581 }, { "epoch": 1.3810479375696767, "grad_norm": 1.7900123479285956, "learning_rate": 1.176079639451501e-05, "loss": 0.5248, "step": 18582 }, { "epoch": 1.381122259383129, "grad_norm": 1.9676225250485149, "learning_rate": 1.176000655685819e-05, "loss": 0.5787, "step": 18583 }, { "epoch": 1.3811965811965812, "grad_norm": 2.5620071731940945, "learning_rate": 1.1759216707870536e-05, "loss": 0.7197, "step": 18584 }, { "epoch": 1.3812709030100334, "grad_norm": 2.1652504934756767, "learning_rate": 1.1758426847557136e-05, "loss": 0.6898, "step": 18585 }, { "epoch": 1.3813452248234857, "grad_norm": 2.2542517092455863, "learning_rate": 1.1757636975923077e-05, "loss": 0.6805, "step": 18586 }, { "epoch": 1.381419546636938, "grad_norm": 2.1087812600432545, "learning_rate": 1.1756847092973442e-05, "loss": 0.6189, "step": 18587 }, { "epoch": 1.3814938684503901, "grad_norm": 2.521183304014042, "learning_rate": 1.1756057198713318e-05, "loss": 0.8326, "step": 18588 }, { "epoch": 1.3815681902638424, "grad_norm": 2.207022068083986, "learning_rate": 1.1755267293147785e-05, "loss": 0.6168, "step": 18589 }, { "epoch": 1.3816425120772946, "grad_norm": 1.773475382169286, "learning_rate": 1.1754477376281935e-05, "loss": 0.5599, "step": 18590 }, { "epoch": 1.381716833890747, "grad_norm": 2.0648590503228865, "learning_rate": 1.175368744812085e-05, "loss": 0.6026, "step": 18591 }, { "epoch": 1.381791155704199, "grad_norm": 2.6724189427178193, "learning_rate": 1.1752897508669616e-05, "loss": 0.7, "step": 18592 }, { "epoch": 1.3818654775176515, "grad_norm": 2.215457747386553, "learning_rate": 1.175210755793332e-05, "loss": 0.662, "step": 18593 }, { "epoch": 1.3819397993311036, "grad_norm": 1.439189628442481, "learning_rate": 1.1751317595917044e-05, "loss": 0.4309, "step": 18594 }, { "epoch": 1.382014121144556, "grad_norm": 2.6894107467859643, "learning_rate": 1.1750527622625879e-05, "loss": 0.6336, "step": 18595 }, { "epoch": 1.3820884429580083, "grad_norm": 1.845012730624271, "learning_rate": 1.1749737638064905e-05, "loss": 0.6112, "step": 18596 }, { "epoch": 1.3821627647714605, "grad_norm": 2.2659012181706846, "learning_rate": 1.1748947642239212e-05, "loss": 0.8133, "step": 18597 }, { "epoch": 1.3822370865849127, "grad_norm": 2.2193278545638773, "learning_rate": 1.1748157635153887e-05, "loss": 0.5208, "step": 18598 }, { "epoch": 1.382311408398365, "grad_norm": 1.7746768691046189, "learning_rate": 1.1747367616814012e-05, "loss": 0.5689, "step": 18599 }, { "epoch": 1.3823857302118172, "grad_norm": 2.0874053237052523, "learning_rate": 1.1746577587224675e-05, "loss": 0.6935, "step": 18600 }, { "epoch": 1.3824600520252694, "grad_norm": 2.397914643654707, "learning_rate": 1.174578754639096e-05, "loss": 0.5963, "step": 18601 }, { "epoch": 1.3825343738387217, "grad_norm": 2.059507749098772, "learning_rate": 1.1744997494317957e-05, "loss": 0.6959, "step": 18602 }, { "epoch": 1.382608695652174, "grad_norm": 2.2321098991176243, "learning_rate": 1.1744207431010754e-05, "loss": 0.7623, "step": 18603 }, { "epoch": 1.3826830174656262, "grad_norm": 1.5451014273380725, "learning_rate": 1.174341735647443e-05, "loss": 0.428, "step": 18604 }, { "epoch": 1.3827573392790784, "grad_norm": 2.137857246081695, "learning_rate": 1.174262727071408e-05, "loss": 0.4945, "step": 18605 }, { "epoch": 1.3828316610925306, "grad_norm": 1.7344363936645453, "learning_rate": 1.174183717373478e-05, "loss": 0.5058, "step": 18606 }, { "epoch": 1.3829059829059829, "grad_norm": 1.7166509604247702, "learning_rate": 1.1741047065541626e-05, "loss": 0.5959, "step": 18607 }, { "epoch": 1.3829803047194351, "grad_norm": 1.960097042411048, "learning_rate": 1.1740256946139702e-05, "loss": 0.6929, "step": 18608 }, { "epoch": 1.3830546265328874, "grad_norm": 2.14515607221194, "learning_rate": 1.1739466815534093e-05, "loss": 0.6538, "step": 18609 }, { "epoch": 1.3831289483463396, "grad_norm": 2.292645718579195, "learning_rate": 1.1738676673729885e-05, "loss": 0.7303, "step": 18610 }, { "epoch": 1.3832032701597918, "grad_norm": 2.2310608062922532, "learning_rate": 1.1737886520732171e-05, "loss": 0.6751, "step": 18611 }, { "epoch": 1.383277591973244, "grad_norm": 1.7669555906806327, "learning_rate": 1.173709635654603e-05, "loss": 0.5146, "step": 18612 }, { "epoch": 1.3833519137866963, "grad_norm": 2.032136810601847, "learning_rate": 1.1736306181176554e-05, "loss": 0.7153, "step": 18613 }, { "epoch": 1.3834262356001488, "grad_norm": 1.8133861277174044, "learning_rate": 1.173551599462883e-05, "loss": 0.5955, "step": 18614 }, { "epoch": 1.3835005574136008, "grad_norm": 3.2910837453655186, "learning_rate": 1.1734725796907943e-05, "loss": 0.5453, "step": 18615 }, { "epoch": 1.3835748792270532, "grad_norm": 2.3979127820290755, "learning_rate": 1.1733935588018982e-05, "loss": 0.6098, "step": 18616 }, { "epoch": 1.3836492010405053, "grad_norm": 2.2289744407312675, "learning_rate": 1.173314536796703e-05, "loss": 0.505, "step": 18617 }, { "epoch": 1.3837235228539577, "grad_norm": 1.9534255890100085, "learning_rate": 1.1732355136757183e-05, "loss": 0.6312, "step": 18618 }, { "epoch": 1.38379784466741, "grad_norm": 2.657775347986584, "learning_rate": 1.1731564894394521e-05, "loss": 0.6411, "step": 18619 }, { "epoch": 1.3838721664808622, "grad_norm": 2.2262719402192013, "learning_rate": 1.1730774640884137e-05, "loss": 0.5887, "step": 18620 }, { "epoch": 1.3839464882943144, "grad_norm": 2.073837076691184, "learning_rate": 1.172998437623111e-05, "loss": 0.8077, "step": 18621 }, { "epoch": 1.3840208101077667, "grad_norm": 1.5826174109304998, "learning_rate": 1.172919410044054e-05, "loss": 0.4272, "step": 18622 }, { "epoch": 1.384095131921219, "grad_norm": 2.0755148075410674, "learning_rate": 1.1728403813517504e-05, "loss": 0.7007, "step": 18623 }, { "epoch": 1.3841694537346712, "grad_norm": 1.9441702662654619, "learning_rate": 1.1727613515467094e-05, "loss": 0.6671, "step": 18624 }, { "epoch": 1.3842437755481234, "grad_norm": 1.8944597569610948, "learning_rate": 1.17268232062944e-05, "loss": 0.6746, "step": 18625 }, { "epoch": 1.3843180973615756, "grad_norm": 2.126929644905134, "learning_rate": 1.1726032886004506e-05, "loss": 0.5663, "step": 18626 }, { "epoch": 1.3843924191750279, "grad_norm": 1.7207552637931236, "learning_rate": 1.17252425546025e-05, "loss": 0.5529, "step": 18627 }, { "epoch": 1.38446674098848, "grad_norm": 1.5061416437942718, "learning_rate": 1.1724452212093475e-05, "loss": 0.4404, "step": 18628 }, { "epoch": 1.3845410628019323, "grad_norm": 2.2426816846300257, "learning_rate": 1.1723661858482516e-05, "loss": 0.6218, "step": 18629 }, { "epoch": 1.3846153846153846, "grad_norm": 1.6212932838676295, "learning_rate": 1.172287149377471e-05, "loss": 0.5279, "step": 18630 }, { "epoch": 1.3846897064288368, "grad_norm": 2.221172989498387, "learning_rate": 1.1722081117975147e-05, "loss": 0.7062, "step": 18631 }, { "epoch": 1.384764028242289, "grad_norm": 1.816871643702365, "learning_rate": 1.1721290731088918e-05, "loss": 0.5716, "step": 18632 }, { "epoch": 1.3848383500557413, "grad_norm": 1.701706599772357, "learning_rate": 1.1720500333121105e-05, "loss": 0.5305, "step": 18633 }, { "epoch": 1.3849126718691935, "grad_norm": 2.377347218994992, "learning_rate": 1.1719709924076801e-05, "loss": 0.7719, "step": 18634 }, { "epoch": 1.384986993682646, "grad_norm": 1.9531633098835521, "learning_rate": 1.1718919503961094e-05, "loss": 0.4907, "step": 18635 }, { "epoch": 1.385061315496098, "grad_norm": 2.041192539912274, "learning_rate": 1.1718129072779075e-05, "loss": 0.635, "step": 18636 }, { "epoch": 1.3851356373095505, "grad_norm": 2.0943898168748403, "learning_rate": 1.1717338630535826e-05, "loss": 0.6812, "step": 18637 }, { "epoch": 1.3852099591230025, "grad_norm": 2.001924816191607, "learning_rate": 1.171654817723644e-05, "loss": 0.6602, "step": 18638 }, { "epoch": 1.385284280936455, "grad_norm": 2.219899857695941, "learning_rate": 1.171575771288601e-05, "loss": 0.6739, "step": 18639 }, { "epoch": 1.385358602749907, "grad_norm": 2.1213005529627322, "learning_rate": 1.1714967237489616e-05, "loss": 0.5563, "step": 18640 }, { "epoch": 1.3854329245633594, "grad_norm": 2.1211196638863097, "learning_rate": 1.1714176751052356e-05, "loss": 0.6031, "step": 18641 }, { "epoch": 1.3855072463768117, "grad_norm": 2.062954076885842, "learning_rate": 1.1713386253579313e-05, "loss": 0.6985, "step": 18642 }, { "epoch": 1.385581568190264, "grad_norm": 2.835498976857586, "learning_rate": 1.171259574507558e-05, "loss": 0.6056, "step": 18643 }, { "epoch": 1.3856558900037161, "grad_norm": 2.1439848191314583, "learning_rate": 1.171180522554624e-05, "loss": 0.5949, "step": 18644 }, { "epoch": 1.3857302118171684, "grad_norm": 2.349586253127084, "learning_rate": 1.1711014694996391e-05, "loss": 0.7305, "step": 18645 }, { "epoch": 1.3858045336306206, "grad_norm": 2.0274754097691092, "learning_rate": 1.1710224153431115e-05, "loss": 0.6738, "step": 18646 }, { "epoch": 1.3858788554440729, "grad_norm": 1.880567012812433, "learning_rate": 1.1709433600855508e-05, "loss": 0.5249, "step": 18647 }, { "epoch": 1.385953177257525, "grad_norm": 2.544569341831694, "learning_rate": 1.1708643037274657e-05, "loss": 0.6416, "step": 18648 }, { "epoch": 1.3860274990709773, "grad_norm": 1.7999857319315824, "learning_rate": 1.1707852462693645e-05, "loss": 0.5212, "step": 18649 }, { "epoch": 1.3861018208844296, "grad_norm": 2.1306191578778853, "learning_rate": 1.1707061877117572e-05, "loss": 0.6677, "step": 18650 }, { "epoch": 1.3861761426978818, "grad_norm": 2.2082739010977153, "learning_rate": 1.1706271280551523e-05, "loss": 0.6714, "step": 18651 }, { "epoch": 1.386250464511334, "grad_norm": 2.4835602980183693, "learning_rate": 1.170548067300059e-05, "loss": 0.5614, "step": 18652 }, { "epoch": 1.3863247863247863, "grad_norm": 1.523988067124756, "learning_rate": 1.1704690054469856e-05, "loss": 0.5552, "step": 18653 }, { "epoch": 1.3863991081382385, "grad_norm": 2.095739383044653, "learning_rate": 1.1703899424964418e-05, "loss": 0.6864, "step": 18654 }, { "epoch": 1.3864734299516908, "grad_norm": 2.0034480375830888, "learning_rate": 1.1703108784489363e-05, "loss": 0.68, "step": 18655 }, { "epoch": 1.386547751765143, "grad_norm": 2.2832139189710143, "learning_rate": 1.1702318133049783e-05, "loss": 0.767, "step": 18656 }, { "epoch": 1.3866220735785952, "grad_norm": 2.094452332108442, "learning_rate": 1.1701527470650766e-05, "loss": 0.7109, "step": 18657 }, { "epoch": 1.3866963953920477, "grad_norm": 1.9114175289687418, "learning_rate": 1.1700736797297407e-05, "loss": 0.6778, "step": 18658 }, { "epoch": 1.3867707172054997, "grad_norm": 1.8562725141053358, "learning_rate": 1.1699946112994789e-05, "loss": 0.5307, "step": 18659 }, { "epoch": 1.3868450390189522, "grad_norm": 1.88519717681648, "learning_rate": 1.1699155417748006e-05, "loss": 0.6008, "step": 18660 }, { "epoch": 1.3869193608324042, "grad_norm": 1.8345807021634049, "learning_rate": 1.1698364711562151e-05, "loss": 0.5895, "step": 18661 }, { "epoch": 1.3869936826458567, "grad_norm": 1.836602436803261, "learning_rate": 1.1697573994442312e-05, "loss": 0.4675, "step": 18662 }, { "epoch": 1.387068004459309, "grad_norm": 2.239711505202551, "learning_rate": 1.1696783266393581e-05, "loss": 0.7193, "step": 18663 }, { "epoch": 1.3871423262727611, "grad_norm": 1.7541372290347443, "learning_rate": 1.1695992527421042e-05, "loss": 0.4741, "step": 18664 }, { "epoch": 1.3872166480862134, "grad_norm": 2.484570633114219, "learning_rate": 1.1695201777529795e-05, "loss": 0.7209, "step": 18665 }, { "epoch": 1.3872909698996656, "grad_norm": 1.9682550204485667, "learning_rate": 1.1694411016724924e-05, "loss": 0.5611, "step": 18666 }, { "epoch": 1.3873652917131178, "grad_norm": 2.158300839459619, "learning_rate": 1.1693620245011526e-05, "loss": 0.4352, "step": 18667 }, { "epoch": 1.38743961352657, "grad_norm": 1.6030983012444742, "learning_rate": 1.1692829462394688e-05, "loss": 0.6124, "step": 18668 }, { "epoch": 1.3875139353400223, "grad_norm": 1.8112796714221473, "learning_rate": 1.1692038668879504e-05, "loss": 0.5458, "step": 18669 }, { "epoch": 1.3875882571534746, "grad_norm": 4.477904116911476, "learning_rate": 1.1691247864471058e-05, "loss": 0.5728, "step": 18670 }, { "epoch": 1.3876625789669268, "grad_norm": 1.7328172072290182, "learning_rate": 1.1690457049174449e-05, "loss": 0.5354, "step": 18671 }, { "epoch": 1.387736900780379, "grad_norm": 1.987026052029818, "learning_rate": 1.1689666222994764e-05, "loss": 0.4965, "step": 18672 }, { "epoch": 1.3878112225938313, "grad_norm": 2.3278479128126186, "learning_rate": 1.1688875385937097e-05, "loss": 0.7218, "step": 18673 }, { "epoch": 1.3878855444072835, "grad_norm": 2.2408015093113245, "learning_rate": 1.168808453800654e-05, "loss": 0.6038, "step": 18674 }, { "epoch": 1.3879598662207357, "grad_norm": 1.8164535565999438, "learning_rate": 1.1687293679208178e-05, "loss": 0.4906, "step": 18675 }, { "epoch": 1.388034188034188, "grad_norm": 1.896542950269339, "learning_rate": 1.1686502809547107e-05, "loss": 0.5159, "step": 18676 }, { "epoch": 1.3881085098476402, "grad_norm": 1.9364240465805687, "learning_rate": 1.1685711929028419e-05, "loss": 0.6159, "step": 18677 }, { "epoch": 1.3881828316610925, "grad_norm": 2.0138937242189825, "learning_rate": 1.1684921037657204e-05, "loss": 0.7801, "step": 18678 }, { "epoch": 1.3882571534745447, "grad_norm": 1.9077062153742994, "learning_rate": 1.1684130135438558e-05, "loss": 0.635, "step": 18679 }, { "epoch": 1.388331475287997, "grad_norm": 1.912389615401186, "learning_rate": 1.168333922237757e-05, "loss": 0.6871, "step": 18680 }, { "epoch": 1.3884057971014494, "grad_norm": 3.488387939420261, "learning_rate": 1.1682548298479328e-05, "loss": 0.8463, "step": 18681 }, { "epoch": 1.3884801189149014, "grad_norm": 1.924158393356936, "learning_rate": 1.1681757363748929e-05, "loss": 0.7441, "step": 18682 }, { "epoch": 1.3885544407283539, "grad_norm": 2.215379133429508, "learning_rate": 1.1680966418191461e-05, "loss": 0.6003, "step": 18683 }, { "epoch": 1.388628762541806, "grad_norm": 1.814688934460423, "learning_rate": 1.1680175461812023e-05, "loss": 0.5963, "step": 18684 }, { "epoch": 1.3887030843552584, "grad_norm": 2.1677634854313146, "learning_rate": 1.1679384494615703e-05, "loss": 0.6481, "step": 18685 }, { "epoch": 1.3887774061687106, "grad_norm": 1.9457822234827713, "learning_rate": 1.1678593516607588e-05, "loss": 0.6031, "step": 18686 }, { "epoch": 1.3888517279821628, "grad_norm": 2.0835985169161484, "learning_rate": 1.1677802527792774e-05, "loss": 0.5765, "step": 18687 }, { "epoch": 1.388926049795615, "grad_norm": 1.9703029484394312, "learning_rate": 1.1677011528176358e-05, "loss": 0.7374, "step": 18688 }, { "epoch": 1.3890003716090673, "grad_norm": 1.8902665971986874, "learning_rate": 1.1676220517763426e-05, "loss": 0.5877, "step": 18689 }, { "epoch": 1.3890746934225195, "grad_norm": 2.02300489369107, "learning_rate": 1.1675429496559078e-05, "loss": 0.5913, "step": 18690 }, { "epoch": 1.3891490152359718, "grad_norm": 1.6418475015298188, "learning_rate": 1.16746384645684e-05, "loss": 0.5012, "step": 18691 }, { "epoch": 1.389223337049424, "grad_norm": 1.8576729406587829, "learning_rate": 1.1673847421796485e-05, "loss": 0.4885, "step": 18692 }, { "epoch": 1.3892976588628763, "grad_norm": 2.02733111734376, "learning_rate": 1.1673056368248427e-05, "loss": 0.6069, "step": 18693 }, { "epoch": 1.3893719806763285, "grad_norm": 2.1661854347074403, "learning_rate": 1.167226530392932e-05, "loss": 0.4411, "step": 18694 }, { "epoch": 1.3894463024897807, "grad_norm": 2.0870453985586255, "learning_rate": 1.1671474228844256e-05, "loss": 0.7236, "step": 18695 }, { "epoch": 1.389520624303233, "grad_norm": 2.3145479481167817, "learning_rate": 1.167068314299833e-05, "loss": 0.7263, "step": 18696 }, { "epoch": 1.3895949461166852, "grad_norm": 2.0695572113801757, "learning_rate": 1.1669892046396627e-05, "loss": 0.5762, "step": 18697 }, { "epoch": 1.3896692679301375, "grad_norm": 2.0032237266749817, "learning_rate": 1.1669100939044249e-05, "loss": 0.5259, "step": 18698 }, { "epoch": 1.3897435897435897, "grad_norm": 2.002120943412564, "learning_rate": 1.1668309820946283e-05, "loss": 0.7256, "step": 18699 }, { "epoch": 1.389817911557042, "grad_norm": 1.6015378535856997, "learning_rate": 1.1667518692107827e-05, "loss": 0.4417, "step": 18700 }, { "epoch": 1.3898922333704942, "grad_norm": 2.216250638137443, "learning_rate": 1.1666727552533974e-05, "loss": 0.6743, "step": 18701 }, { "epoch": 1.3899665551839464, "grad_norm": 1.7101632095397694, "learning_rate": 1.1665936402229814e-05, "loss": 0.4637, "step": 18702 }, { "epoch": 1.3900408769973986, "grad_norm": 1.695039373215848, "learning_rate": 1.1665145241200442e-05, "loss": 0.6017, "step": 18703 }, { "epoch": 1.390115198810851, "grad_norm": 1.862224895937126, "learning_rate": 1.1664354069450949e-05, "loss": 0.5337, "step": 18704 }, { "epoch": 1.3901895206243031, "grad_norm": 1.8285051498201568, "learning_rate": 1.1663562886986433e-05, "loss": 0.4773, "step": 18705 }, { "epoch": 1.3902638424377556, "grad_norm": 1.8242262906507491, "learning_rate": 1.1662771693811985e-05, "loss": 0.5992, "step": 18706 }, { "epoch": 1.3903381642512076, "grad_norm": 1.88307331426745, "learning_rate": 1.1661980489932703e-05, "loss": 0.5797, "step": 18707 }, { "epoch": 1.39041248606466, "grad_norm": 1.9431121483673277, "learning_rate": 1.1661189275353673e-05, "loss": 0.6575, "step": 18708 }, { "epoch": 1.3904868078781123, "grad_norm": 1.773361812009236, "learning_rate": 1.166039805007999e-05, "loss": 0.6253, "step": 18709 }, { "epoch": 1.3905611296915645, "grad_norm": 2.0199657155790023, "learning_rate": 1.1659606814116753e-05, "loss": 0.5901, "step": 18710 }, { "epoch": 1.3906354515050168, "grad_norm": 1.8533272785810775, "learning_rate": 1.1658815567469052e-05, "loss": 0.5927, "step": 18711 }, { "epoch": 1.390709773318469, "grad_norm": 2.1938424751340917, "learning_rate": 1.1658024310141986e-05, "loss": 0.7197, "step": 18712 }, { "epoch": 1.3907840951319212, "grad_norm": 1.9185847750793823, "learning_rate": 1.1657233042140646e-05, "loss": 0.4887, "step": 18713 }, { "epoch": 1.3908584169453735, "grad_norm": 2.059941635602345, "learning_rate": 1.1656441763470123e-05, "loss": 0.5906, "step": 18714 }, { "epoch": 1.3909327387588257, "grad_norm": 2.2493158866519694, "learning_rate": 1.1655650474135514e-05, "loss": 0.6984, "step": 18715 }, { "epoch": 1.391007060572278, "grad_norm": 1.835085747792291, "learning_rate": 1.1654859174141912e-05, "loss": 0.466, "step": 18716 }, { "epoch": 1.3910813823857302, "grad_norm": 1.9800007419815289, "learning_rate": 1.1654067863494416e-05, "loss": 0.6466, "step": 18717 }, { "epoch": 1.3911557041991824, "grad_norm": 1.7759399470304167, "learning_rate": 1.1653276542198116e-05, "loss": 0.4366, "step": 18718 }, { "epoch": 1.3912300260126347, "grad_norm": 2.0042255552340795, "learning_rate": 1.1652485210258104e-05, "loss": 0.732, "step": 18719 }, { "epoch": 1.391304347826087, "grad_norm": 2.2663337514868656, "learning_rate": 1.1651693867679482e-05, "loss": 0.7962, "step": 18720 }, { "epoch": 1.3913786696395392, "grad_norm": 2.237535417984382, "learning_rate": 1.1650902514467337e-05, "loss": 0.5024, "step": 18721 }, { "epoch": 1.3914529914529914, "grad_norm": 1.8995244496741104, "learning_rate": 1.1650111150626768e-05, "loss": 0.5605, "step": 18722 }, { "epoch": 1.3915273132664436, "grad_norm": 2.0195639943590953, "learning_rate": 1.164931977616287e-05, "loss": 0.6717, "step": 18723 }, { "epoch": 1.3916016350798959, "grad_norm": 1.8381171424378215, "learning_rate": 1.164852839108074e-05, "loss": 0.5314, "step": 18724 }, { "epoch": 1.3916759568933483, "grad_norm": 1.688096175665994, "learning_rate": 1.1647736995385466e-05, "loss": 0.5467, "step": 18725 }, { "epoch": 1.3917502787068003, "grad_norm": 2.1181564581964496, "learning_rate": 1.1646945589082146e-05, "loss": 0.7073, "step": 18726 }, { "epoch": 1.3918246005202528, "grad_norm": 2.220517722796857, "learning_rate": 1.1646154172175877e-05, "loss": 0.7591, "step": 18727 }, { "epoch": 1.3918989223337048, "grad_norm": 2.4545168926354517, "learning_rate": 1.1645362744671753e-05, "loss": 0.6224, "step": 18728 }, { "epoch": 1.3919732441471573, "grad_norm": 2.387474392209752, "learning_rate": 1.164457130657487e-05, "loss": 0.665, "step": 18729 }, { "epoch": 1.3920475659606093, "grad_norm": 2.036135218873998, "learning_rate": 1.1643779857890323e-05, "loss": 0.6877, "step": 18730 }, { "epoch": 1.3921218877740618, "grad_norm": 1.7497780582630775, "learning_rate": 1.1642988398623203e-05, "loss": 0.6513, "step": 18731 }, { "epoch": 1.392196209587514, "grad_norm": 2.4462850123005015, "learning_rate": 1.164219692877861e-05, "loss": 0.7683, "step": 18732 }, { "epoch": 1.3922705314009662, "grad_norm": 2.5026289168698455, "learning_rate": 1.1641405448361638e-05, "loss": 0.7492, "step": 18733 }, { "epoch": 1.3923448532144185, "grad_norm": 1.5941691638896187, "learning_rate": 1.1640613957377387e-05, "loss": 0.6193, "step": 18734 }, { "epoch": 1.3924191750278707, "grad_norm": 1.9010693648947667, "learning_rate": 1.1639822455830947e-05, "loss": 0.4604, "step": 18735 }, { "epoch": 1.392493496841323, "grad_norm": 2.139072012171568, "learning_rate": 1.1639030943727412e-05, "loss": 0.8187, "step": 18736 }, { "epoch": 1.3925678186547752, "grad_norm": 2.774471494635137, "learning_rate": 1.1638239421071882e-05, "loss": 0.7035, "step": 18737 }, { "epoch": 1.3926421404682274, "grad_norm": 1.7697924833338072, "learning_rate": 1.1637447887869451e-05, "loss": 0.5773, "step": 18738 }, { "epoch": 1.3927164622816797, "grad_norm": 2.060010309552961, "learning_rate": 1.1636656344125219e-05, "loss": 0.6871, "step": 18739 }, { "epoch": 1.392790784095132, "grad_norm": 2.8213901783614244, "learning_rate": 1.1635864789844274e-05, "loss": 0.8653, "step": 18740 }, { "epoch": 1.3928651059085841, "grad_norm": 1.8409438641203484, "learning_rate": 1.1635073225031722e-05, "loss": 0.6784, "step": 18741 }, { "epoch": 1.3929394277220364, "grad_norm": 1.9945795368316186, "learning_rate": 1.1634281649692647e-05, "loss": 0.6369, "step": 18742 }, { "epoch": 1.3930137495354886, "grad_norm": 1.6236903360650916, "learning_rate": 1.1633490063832153e-05, "loss": 0.5806, "step": 18743 }, { "epoch": 1.3930880713489409, "grad_norm": 1.768720129248796, "learning_rate": 1.1632698467455336e-05, "loss": 0.6093, "step": 18744 }, { "epoch": 1.393162393162393, "grad_norm": 2.256852254132118, "learning_rate": 1.1631906860567293e-05, "loss": 0.6327, "step": 18745 }, { "epoch": 1.3932367149758453, "grad_norm": 1.8272129021151573, "learning_rate": 1.1631115243173114e-05, "loss": 0.5593, "step": 18746 }, { "epoch": 1.3933110367892976, "grad_norm": 1.9550042332061304, "learning_rate": 1.1630323615277903e-05, "loss": 0.6271, "step": 18747 }, { "epoch": 1.39338535860275, "grad_norm": 1.7039113504328312, "learning_rate": 1.1629531976886751e-05, "loss": 0.4699, "step": 18748 }, { "epoch": 1.393459680416202, "grad_norm": 2.165847775393923, "learning_rate": 1.162874032800476e-05, "loss": 0.7156, "step": 18749 }, { "epoch": 1.3935340022296545, "grad_norm": 2.3257105392185498, "learning_rate": 1.1627948668637021e-05, "loss": 0.6166, "step": 18750 }, { "epoch": 1.3936083240431065, "grad_norm": 1.9884616448936288, "learning_rate": 1.1627156998788633e-05, "loss": 0.4999, "step": 18751 }, { "epoch": 1.393682645856559, "grad_norm": 1.7000568480255203, "learning_rate": 1.1626365318464693e-05, "loss": 0.5276, "step": 18752 }, { "epoch": 1.3937569676700112, "grad_norm": 1.9923774570208954, "learning_rate": 1.1625573627670296e-05, "loss": 0.6202, "step": 18753 }, { "epoch": 1.3938312894834635, "grad_norm": 2.318580787958469, "learning_rate": 1.1624781926410542e-05, "loss": 0.4378, "step": 18754 }, { "epoch": 1.3939056112969157, "grad_norm": 2.109286285316404, "learning_rate": 1.1623990214690526e-05, "loss": 0.5708, "step": 18755 }, { "epoch": 1.393979933110368, "grad_norm": 1.9754479078988003, "learning_rate": 1.1623198492515346e-05, "loss": 0.6587, "step": 18756 }, { "epoch": 1.3940542549238202, "grad_norm": 1.7151455991507738, "learning_rate": 1.1622406759890095e-05, "loss": 0.4987, "step": 18757 }, { "epoch": 1.3941285767372724, "grad_norm": 2.4452905084777745, "learning_rate": 1.1621615016819879e-05, "loss": 0.7088, "step": 18758 }, { "epoch": 1.3942028985507247, "grad_norm": 2.176648022938824, "learning_rate": 1.1620823263309784e-05, "loss": 0.6646, "step": 18759 }, { "epoch": 1.394277220364177, "grad_norm": 1.6919628678986791, "learning_rate": 1.1620031499364917e-05, "loss": 0.5236, "step": 18760 }, { "epoch": 1.3943515421776291, "grad_norm": 1.8177994604039884, "learning_rate": 1.161923972499037e-05, "loss": 0.4913, "step": 18761 }, { "epoch": 1.3944258639910814, "grad_norm": 1.8152880380743317, "learning_rate": 1.1618447940191239e-05, "loss": 0.5668, "step": 18762 }, { "epoch": 1.3945001858045336, "grad_norm": 1.7290518096705771, "learning_rate": 1.1617656144972627e-05, "loss": 0.6162, "step": 18763 }, { "epoch": 1.3945745076179858, "grad_norm": 2.365304073595468, "learning_rate": 1.1616864339339629e-05, "loss": 0.6039, "step": 18764 }, { "epoch": 1.394648829431438, "grad_norm": 2.1072852163380564, "learning_rate": 1.1616072523297342e-05, "loss": 0.4814, "step": 18765 }, { "epoch": 1.3947231512448903, "grad_norm": 2.0679806333898147, "learning_rate": 1.161528069685086e-05, "loss": 0.7014, "step": 18766 }, { "epoch": 1.3947974730583426, "grad_norm": 1.5631931998972328, "learning_rate": 1.1614488860005292e-05, "loss": 0.6047, "step": 18767 }, { "epoch": 1.3948717948717948, "grad_norm": 2.40179289000258, "learning_rate": 1.1613697012765724e-05, "loss": 0.7634, "step": 18768 }, { "epoch": 1.394946116685247, "grad_norm": 2.0982176669550183, "learning_rate": 1.161290515513726e-05, "loss": 0.5302, "step": 18769 }, { "epoch": 1.3950204384986993, "grad_norm": 1.8651492117158668, "learning_rate": 1.1612113287124993e-05, "loss": 0.5754, "step": 18770 }, { "epoch": 1.3950947603121517, "grad_norm": 2.0193282965543173, "learning_rate": 1.1611321408734027e-05, "loss": 0.7551, "step": 18771 }, { "epoch": 1.3951690821256038, "grad_norm": 2.2355379652460594, "learning_rate": 1.1610529519969458e-05, "loss": 0.784, "step": 18772 }, { "epoch": 1.3952434039390562, "grad_norm": 1.9037829216967856, "learning_rate": 1.1609737620836381e-05, "loss": 0.623, "step": 18773 }, { "epoch": 1.3953177257525082, "grad_norm": 2.4628561450363224, "learning_rate": 1.1608945711339898e-05, "loss": 0.6682, "step": 18774 }, { "epoch": 1.3953920475659607, "grad_norm": 1.976065689185882, "learning_rate": 1.160815379148511e-05, "loss": 0.6438, "step": 18775 }, { "epoch": 1.395466369379413, "grad_norm": 4.689582120188163, "learning_rate": 1.1607361861277104e-05, "loss": 0.6589, "step": 18776 }, { "epoch": 1.3955406911928652, "grad_norm": 1.8970117052052378, "learning_rate": 1.1606569920720989e-05, "loss": 0.5292, "step": 18777 }, { "epoch": 1.3956150130063174, "grad_norm": 2.2928227365926386, "learning_rate": 1.160577796982186e-05, "loss": 0.7487, "step": 18778 }, { "epoch": 1.3956893348197696, "grad_norm": 2.0684522630740543, "learning_rate": 1.1604986008584818e-05, "loss": 0.55, "step": 18779 }, { "epoch": 1.3957636566332219, "grad_norm": 2.101964788546191, "learning_rate": 1.1604194037014955e-05, "loss": 0.7107, "step": 18780 }, { "epoch": 1.3958379784466741, "grad_norm": 1.7411903358565144, "learning_rate": 1.160340205511738e-05, "loss": 0.5037, "step": 18781 }, { "epoch": 1.3959123002601264, "grad_norm": 2.1512955992312177, "learning_rate": 1.1602610062897179e-05, "loss": 0.6667, "step": 18782 }, { "epoch": 1.3959866220735786, "grad_norm": 2.158364243042077, "learning_rate": 1.1601818060359462e-05, "loss": 0.5654, "step": 18783 }, { "epoch": 1.3960609438870308, "grad_norm": 2.2854206429795547, "learning_rate": 1.1601026047509323e-05, "loss": 0.6406, "step": 18784 }, { "epoch": 1.396135265700483, "grad_norm": 1.808446860223797, "learning_rate": 1.160023402435186e-05, "loss": 0.5536, "step": 18785 }, { "epoch": 1.3962095875139353, "grad_norm": 1.9244689553925942, "learning_rate": 1.1599441990892176e-05, "loss": 0.6457, "step": 18786 }, { "epoch": 1.3962839093273876, "grad_norm": 3.3311124715827076, "learning_rate": 1.1598649947135364e-05, "loss": 0.6537, "step": 18787 }, { "epoch": 1.3963582311408398, "grad_norm": 2.0908757224224717, "learning_rate": 1.1597857893086529e-05, "loss": 0.5626, "step": 18788 }, { "epoch": 1.396432552954292, "grad_norm": 2.0135756562953895, "learning_rate": 1.1597065828750766e-05, "loss": 0.6441, "step": 18789 }, { "epoch": 1.3965068747677443, "grad_norm": 1.464566730134047, "learning_rate": 1.1596273754133176e-05, "loss": 0.4049, "step": 18790 }, { "epoch": 1.3965811965811965, "grad_norm": 10.904695446156191, "learning_rate": 1.1595481669238859e-05, "loss": 0.6297, "step": 18791 }, { "epoch": 1.396655518394649, "grad_norm": 1.8014891401408701, "learning_rate": 1.1594689574072915e-05, "loss": 0.6968, "step": 18792 }, { "epoch": 1.396729840208101, "grad_norm": 2.202663801452025, "learning_rate": 1.159389746864044e-05, "loss": 0.6643, "step": 18793 }, { "epoch": 1.3968041620215534, "grad_norm": 1.9700169323677201, "learning_rate": 1.1593105352946541e-05, "loss": 0.6099, "step": 18794 }, { "epoch": 1.3968784838350055, "grad_norm": 2.4169960386561815, "learning_rate": 1.1592313226996305e-05, "loss": 0.5867, "step": 18795 }, { "epoch": 1.396952805648458, "grad_norm": 1.924823332536736, "learning_rate": 1.1591521090794844e-05, "loss": 0.6518, "step": 18796 }, { "epoch": 1.39702712746191, "grad_norm": 1.7280152651139273, "learning_rate": 1.1590728944347252e-05, "loss": 0.5731, "step": 18797 }, { "epoch": 1.3971014492753624, "grad_norm": 1.982192777837392, "learning_rate": 1.1589936787658629e-05, "loss": 0.6668, "step": 18798 }, { "epoch": 1.3971757710888146, "grad_norm": 1.8479865862440437, "learning_rate": 1.1589144620734076e-05, "loss": 0.5302, "step": 18799 }, { "epoch": 1.3972500929022669, "grad_norm": 2.244626653307638, "learning_rate": 1.1588352443578692e-05, "loss": 0.6951, "step": 18800 }, { "epoch": 1.397324414715719, "grad_norm": 2.786259708645946, "learning_rate": 1.1587560256197576e-05, "loss": 0.7129, "step": 18801 }, { "epoch": 1.3973987365291713, "grad_norm": 1.9824354650256188, "learning_rate": 1.158676805859583e-05, "loss": 0.6496, "step": 18802 }, { "epoch": 1.3974730583426236, "grad_norm": 1.9029140977062762, "learning_rate": 1.1585975850778555e-05, "loss": 0.582, "step": 18803 }, { "epoch": 1.3975473801560758, "grad_norm": 2.0039284815816423, "learning_rate": 1.158518363275085e-05, "loss": 0.5549, "step": 18804 }, { "epoch": 1.397621701969528, "grad_norm": 2.2949239815190148, "learning_rate": 1.1584391404517813e-05, "loss": 0.7712, "step": 18805 }, { "epoch": 1.3976960237829803, "grad_norm": 2.367926741736104, "learning_rate": 1.158359916608455e-05, "loss": 0.7436, "step": 18806 }, { "epoch": 1.3977703455964325, "grad_norm": 2.156800240097288, "learning_rate": 1.1582806917456153e-05, "loss": 0.7541, "step": 18807 }, { "epoch": 1.3978446674098848, "grad_norm": 1.8125721185901709, "learning_rate": 1.1582014658637728e-05, "loss": 0.5846, "step": 18808 }, { "epoch": 1.397918989223337, "grad_norm": 1.8339516400952502, "learning_rate": 1.1581222389634377e-05, "loss": 0.5567, "step": 18809 }, { "epoch": 1.3979933110367893, "grad_norm": 1.746849156836114, "learning_rate": 1.15804301104512e-05, "loss": 0.545, "step": 18810 }, { "epoch": 1.3980676328502415, "grad_norm": 1.7662807504419105, "learning_rate": 1.1579637821093293e-05, "loss": 0.5627, "step": 18811 }, { "epoch": 1.3981419546636937, "grad_norm": 1.3297428501972106, "learning_rate": 1.1578845521565758e-05, "loss": 0.3798, "step": 18812 }, { "epoch": 1.398216276477146, "grad_norm": 2.0651543609568357, "learning_rate": 1.15780532118737e-05, "loss": 0.7162, "step": 18813 }, { "epoch": 1.3982905982905982, "grad_norm": 2.010911542341353, "learning_rate": 1.1577260892022217e-05, "loss": 0.6086, "step": 18814 }, { "epoch": 1.3983649201040507, "grad_norm": 1.7469200243665912, "learning_rate": 1.1576468562016409e-05, "loss": 0.3459, "step": 18815 }, { "epoch": 1.3984392419175027, "grad_norm": 1.6731686802262709, "learning_rate": 1.1575676221861381e-05, "loss": 0.4948, "step": 18816 }, { "epoch": 1.3985135637309551, "grad_norm": 1.900101118449599, "learning_rate": 1.1574883871562229e-05, "loss": 0.5104, "step": 18817 }, { "epoch": 1.3985878855444072, "grad_norm": 2.1545877848959187, "learning_rate": 1.1574091511124055e-05, "loss": 0.5657, "step": 18818 }, { "epoch": 1.3986622073578596, "grad_norm": 1.742262582830911, "learning_rate": 1.1573299140551964e-05, "loss": 0.6297, "step": 18819 }, { "epoch": 1.3987365291713119, "grad_norm": 1.7090748257104575, "learning_rate": 1.1572506759851054e-05, "loss": 0.3875, "step": 18820 }, { "epoch": 1.398810850984764, "grad_norm": 3.6926592269726743, "learning_rate": 1.157171436902643e-05, "loss": 0.6212, "step": 18821 }, { "epoch": 1.3988851727982163, "grad_norm": 2.405902109266926, "learning_rate": 1.1570921968083187e-05, "loss": 0.6917, "step": 18822 }, { "epoch": 1.3989594946116686, "grad_norm": 2.004969346549553, "learning_rate": 1.1570129557026429e-05, "loss": 0.6064, "step": 18823 }, { "epoch": 1.3990338164251208, "grad_norm": 2.5527322515005872, "learning_rate": 1.1569337135861258e-05, "loss": 0.6459, "step": 18824 }, { "epoch": 1.399108138238573, "grad_norm": 2.067291112124132, "learning_rate": 1.1568544704592776e-05, "loss": 0.6469, "step": 18825 }, { "epoch": 1.3991824600520253, "grad_norm": 2.5014468914105787, "learning_rate": 1.1567752263226088e-05, "loss": 0.8718, "step": 18826 }, { "epoch": 1.3992567818654775, "grad_norm": 2.8737184841057255, "learning_rate": 1.1566959811766294e-05, "loss": 0.8271, "step": 18827 }, { "epoch": 1.3993311036789298, "grad_norm": 2.413601577083801, "learning_rate": 1.1566167350218489e-05, "loss": 0.7034, "step": 18828 }, { "epoch": 1.399405425492382, "grad_norm": 2.0405833824809085, "learning_rate": 1.156537487858778e-05, "loss": 0.6239, "step": 18829 }, { "epoch": 1.3994797473058342, "grad_norm": 2.2706200017455553, "learning_rate": 1.1564582396879271e-05, "loss": 0.6952, "step": 18830 }, { "epoch": 1.3995540691192865, "grad_norm": 1.8648936249086896, "learning_rate": 1.1563789905098061e-05, "loss": 0.5423, "step": 18831 }, { "epoch": 1.3996283909327387, "grad_norm": 2.3477541526671946, "learning_rate": 1.1562997403249255e-05, "loss": 0.5736, "step": 18832 }, { "epoch": 1.399702712746191, "grad_norm": 1.9299933863117582, "learning_rate": 1.156220489133795e-05, "loss": 0.6353, "step": 18833 }, { "epoch": 1.3997770345596432, "grad_norm": 1.8146306325029626, "learning_rate": 1.156141236936925e-05, "loss": 0.6669, "step": 18834 }, { "epoch": 1.3998513563730954, "grad_norm": 5.2971395735315365, "learning_rate": 1.156061983734826e-05, "loss": 0.6559, "step": 18835 }, { "epoch": 1.3999256781865477, "grad_norm": 1.9836849136286834, "learning_rate": 1.155982729528008e-05, "loss": 0.7601, "step": 18836 }, { "epoch": 1.4, "grad_norm": 1.806232377207768, "learning_rate": 1.1559034743169812e-05, "loss": 0.629, "step": 18837 }, { "epoch": 1.4000743218134524, "grad_norm": 1.9885694389233626, "learning_rate": 1.1558242181022563e-05, "loss": 0.5458, "step": 18838 }, { "epoch": 1.4001486436269044, "grad_norm": 1.7096699288279222, "learning_rate": 1.1557449608843428e-05, "loss": 0.6342, "step": 18839 }, { "epoch": 1.4002229654403568, "grad_norm": 1.8476502030107746, "learning_rate": 1.1556657026637512e-05, "loss": 0.556, "step": 18840 }, { "epoch": 1.4002972872538089, "grad_norm": 2.018518600446432, "learning_rate": 1.1555864434409921e-05, "loss": 0.6114, "step": 18841 }, { "epoch": 1.4003716090672613, "grad_norm": 2.5475303206377666, "learning_rate": 1.1555071832165757e-05, "loss": 0.6985, "step": 18842 }, { "epoch": 1.4004459308807136, "grad_norm": 1.8736655654353538, "learning_rate": 1.155427921991012e-05, "loss": 0.6538, "step": 18843 }, { "epoch": 1.4005202526941658, "grad_norm": 1.859493022464501, "learning_rate": 1.1553486597648113e-05, "loss": 0.5514, "step": 18844 }, { "epoch": 1.400594574507618, "grad_norm": 1.8139794034015149, "learning_rate": 1.155269396538484e-05, "loss": 0.6354, "step": 18845 }, { "epoch": 1.4006688963210703, "grad_norm": 1.745492588037378, "learning_rate": 1.1551901323125405e-05, "loss": 0.4971, "step": 18846 }, { "epoch": 1.4007432181345225, "grad_norm": 1.948099399994108, "learning_rate": 1.155110867087491e-05, "loss": 0.6642, "step": 18847 }, { "epoch": 1.4008175399479748, "grad_norm": 2.509079379409896, "learning_rate": 1.1550316008638456e-05, "loss": 0.663, "step": 18848 }, { "epoch": 1.400891861761427, "grad_norm": 2.468297374095429, "learning_rate": 1.154952333642115e-05, "loss": 0.6574, "step": 18849 }, { "epoch": 1.4009661835748792, "grad_norm": 1.6958540215196118, "learning_rate": 1.1548730654228093e-05, "loss": 0.5152, "step": 18850 }, { "epoch": 1.4010405053883315, "grad_norm": 2.0407876748126696, "learning_rate": 1.1547937962064387e-05, "loss": 0.6849, "step": 18851 }, { "epoch": 1.4011148272017837, "grad_norm": 2.015771242979901, "learning_rate": 1.1547145259935134e-05, "loss": 0.6021, "step": 18852 }, { "epoch": 1.401189149015236, "grad_norm": 1.7034006275215725, "learning_rate": 1.1546352547845446e-05, "loss": 0.4323, "step": 18853 }, { "epoch": 1.4012634708286882, "grad_norm": 1.9617034382116876, "learning_rate": 1.154555982580042e-05, "loss": 0.65, "step": 18854 }, { "epoch": 1.4013377926421404, "grad_norm": 2.219078102204094, "learning_rate": 1.1544767093805156e-05, "loss": 0.5124, "step": 18855 }, { "epoch": 1.4014121144555927, "grad_norm": 1.803716439200748, "learning_rate": 1.1543974351864764e-05, "loss": 0.5177, "step": 18856 }, { "epoch": 1.401486436269045, "grad_norm": 1.8766589027270366, "learning_rate": 1.1543181599984343e-05, "loss": 0.5367, "step": 18857 }, { "epoch": 1.4015607580824971, "grad_norm": 2.2149473232380936, "learning_rate": 1.1542388838169e-05, "loss": 0.7094, "step": 18858 }, { "epoch": 1.4016350798959496, "grad_norm": 2.107889838041932, "learning_rate": 1.154159606642384e-05, "loss": 0.8335, "step": 18859 }, { "epoch": 1.4017094017094016, "grad_norm": 1.5309120486711156, "learning_rate": 1.1540803284753965e-05, "loss": 0.5964, "step": 18860 }, { "epoch": 1.401783723522854, "grad_norm": 2.285300939767816, "learning_rate": 1.1540010493164475e-05, "loss": 0.5596, "step": 18861 }, { "epoch": 1.401858045336306, "grad_norm": 1.7154336366619234, "learning_rate": 1.1539217691660478e-05, "loss": 0.623, "step": 18862 }, { "epoch": 1.4019323671497586, "grad_norm": 2.249810797604622, "learning_rate": 1.1538424880247076e-05, "loss": 0.563, "step": 18863 }, { "epoch": 1.4020066889632106, "grad_norm": 1.8243017100072085, "learning_rate": 1.153763205892938e-05, "loss": 0.5463, "step": 18864 }, { "epoch": 1.402081010776663, "grad_norm": 1.8737855510898775, "learning_rate": 1.1536839227712486e-05, "loss": 0.7303, "step": 18865 }, { "epoch": 1.4021553325901153, "grad_norm": 1.6635880507687353, "learning_rate": 1.1536046386601499e-05, "loss": 0.5517, "step": 18866 }, { "epoch": 1.4022296544035675, "grad_norm": 1.8890174169926632, "learning_rate": 1.1535253535601526e-05, "loss": 0.6762, "step": 18867 }, { "epoch": 1.4023039762170197, "grad_norm": 2.0969748465222118, "learning_rate": 1.1534460674717669e-05, "loss": 0.5368, "step": 18868 }, { "epoch": 1.402378298030472, "grad_norm": 1.8436003316291592, "learning_rate": 1.1533667803955033e-05, "loss": 0.5607, "step": 18869 }, { "epoch": 1.4024526198439242, "grad_norm": 1.6243828365805353, "learning_rate": 1.1532874923318728e-05, "loss": 0.5533, "step": 18870 }, { "epoch": 1.4025269416573765, "grad_norm": 1.7444694034771775, "learning_rate": 1.1532082032813851e-05, "loss": 0.5053, "step": 18871 }, { "epoch": 1.4026012634708287, "grad_norm": 1.7528524500027158, "learning_rate": 1.153128913244551e-05, "loss": 0.6607, "step": 18872 }, { "epoch": 1.402675585284281, "grad_norm": 1.621454310872506, "learning_rate": 1.1530496222218807e-05, "loss": 0.5725, "step": 18873 }, { "epoch": 1.4027499070977332, "grad_norm": 2.1385532314900795, "learning_rate": 1.152970330213885e-05, "loss": 0.6288, "step": 18874 }, { "epoch": 1.4028242289111854, "grad_norm": 2.0557974830748162, "learning_rate": 1.1528910372210744e-05, "loss": 0.5795, "step": 18875 }, { "epoch": 1.4028985507246376, "grad_norm": 1.7641365222805165, "learning_rate": 1.1528117432439592e-05, "loss": 0.5304, "step": 18876 }, { "epoch": 1.4029728725380899, "grad_norm": 1.6148550790782925, "learning_rate": 1.1527324482830497e-05, "loss": 0.5744, "step": 18877 }, { "epoch": 1.4030471943515421, "grad_norm": 1.6401795831653634, "learning_rate": 1.1526531523388569e-05, "loss": 0.6188, "step": 18878 }, { "epoch": 1.4031215161649944, "grad_norm": 1.9359518388946313, "learning_rate": 1.1525738554118908e-05, "loss": 0.5745, "step": 18879 }, { "epoch": 1.4031958379784466, "grad_norm": 1.979176565826122, "learning_rate": 1.152494557502662e-05, "loss": 0.4585, "step": 18880 }, { "epoch": 1.4032701597918988, "grad_norm": 1.8419530407398375, "learning_rate": 1.1524152586116817e-05, "loss": 0.5745, "step": 18881 }, { "epoch": 1.4033444816053513, "grad_norm": 2.0965944884859984, "learning_rate": 1.1523359587394596e-05, "loss": 0.5496, "step": 18882 }, { "epoch": 1.4034188034188033, "grad_norm": 1.9407006151006336, "learning_rate": 1.1522566578865065e-05, "loss": 0.4975, "step": 18883 }, { "epoch": 1.4034931252322558, "grad_norm": 2.276753347193647, "learning_rate": 1.1521773560533328e-05, "loss": 0.4616, "step": 18884 }, { "epoch": 1.4035674470457078, "grad_norm": 1.9045588856756894, "learning_rate": 1.152098053240449e-05, "loss": 0.5465, "step": 18885 }, { "epoch": 1.4036417688591603, "grad_norm": 1.853249410666885, "learning_rate": 1.1520187494483663e-05, "loss": 0.6029, "step": 18886 }, { "epoch": 1.4037160906726125, "grad_norm": 2.0682455394519557, "learning_rate": 1.1519394446775945e-05, "loss": 0.5231, "step": 18887 }, { "epoch": 1.4037904124860647, "grad_norm": 1.7303321080830492, "learning_rate": 1.1518601389286445e-05, "loss": 0.5566, "step": 18888 }, { "epoch": 1.403864734299517, "grad_norm": 2.0243168259151347, "learning_rate": 1.151780832202027e-05, "loss": 0.6531, "step": 18889 }, { "epoch": 1.4039390561129692, "grad_norm": 2.3345322998626554, "learning_rate": 1.1517015244982519e-05, "loss": 0.7626, "step": 18890 }, { "epoch": 1.4040133779264214, "grad_norm": 1.8156703508267538, "learning_rate": 1.1516222158178306e-05, "loss": 0.4732, "step": 18891 }, { "epoch": 1.4040876997398737, "grad_norm": 2.1875072783485785, "learning_rate": 1.1515429061612734e-05, "loss": 0.6166, "step": 18892 }, { "epoch": 1.404162021553326, "grad_norm": 2.0531406378830592, "learning_rate": 1.151463595529091e-05, "loss": 0.6009, "step": 18893 }, { "epoch": 1.4042363433667782, "grad_norm": 2.0959523221865637, "learning_rate": 1.1513842839217933e-05, "loss": 0.5506, "step": 18894 }, { "epoch": 1.4043106651802304, "grad_norm": 2.9303478129470655, "learning_rate": 1.1513049713398916e-05, "loss": 0.5847, "step": 18895 }, { "epoch": 1.4043849869936826, "grad_norm": 1.9777376429428, "learning_rate": 1.1512256577838964e-05, "loss": 0.6901, "step": 18896 }, { "epoch": 1.4044593088071349, "grad_norm": 2.1660153100933024, "learning_rate": 1.1511463432543184e-05, "loss": 0.7334, "step": 18897 }, { "epoch": 1.4045336306205871, "grad_norm": 2.7442543346277164, "learning_rate": 1.151067027751668e-05, "loss": 0.5828, "step": 18898 }, { "epoch": 1.4046079524340394, "grad_norm": 2.1529426131638036, "learning_rate": 1.150987711276456e-05, "loss": 0.5674, "step": 18899 }, { "epoch": 1.4046822742474916, "grad_norm": 2.2026204451446905, "learning_rate": 1.1509083938291928e-05, "loss": 0.7024, "step": 18900 }, { "epoch": 1.4047565960609438, "grad_norm": 2.019913757125375, "learning_rate": 1.150829075410389e-05, "loss": 0.6385, "step": 18901 }, { "epoch": 1.404830917874396, "grad_norm": 2.2874876649212323, "learning_rate": 1.1507497560205556e-05, "loss": 0.7276, "step": 18902 }, { "epoch": 1.4049052396878483, "grad_norm": 2.0591934100803733, "learning_rate": 1.1506704356602035e-05, "loss": 0.6056, "step": 18903 }, { "epoch": 1.4049795615013005, "grad_norm": 1.9275213191189131, "learning_rate": 1.1505911143298425e-05, "loss": 0.4898, "step": 18904 }, { "epoch": 1.405053883314753, "grad_norm": 2.590499820545078, "learning_rate": 1.1505117920299838e-05, "loss": 0.5926, "step": 18905 }, { "epoch": 1.405128205128205, "grad_norm": 1.8352660656018767, "learning_rate": 1.1504324687611379e-05, "loss": 0.6034, "step": 18906 }, { "epoch": 1.4052025269416575, "grad_norm": 1.5785428246853719, "learning_rate": 1.1503531445238157e-05, "loss": 0.5201, "step": 18907 }, { "epoch": 1.4052768487551095, "grad_norm": 1.9160302871318444, "learning_rate": 1.1502738193185279e-05, "loss": 0.5917, "step": 18908 }, { "epoch": 1.405351170568562, "grad_norm": 3.4129477084670667, "learning_rate": 1.1501944931457849e-05, "loss": 0.6554, "step": 18909 }, { "epoch": 1.4054254923820142, "grad_norm": 1.7748800070436148, "learning_rate": 1.1501151660060977e-05, "loss": 0.6195, "step": 18910 }, { "epoch": 1.4054998141954664, "grad_norm": 1.9577363297151722, "learning_rate": 1.1500358378999765e-05, "loss": 0.4752, "step": 18911 }, { "epoch": 1.4055741360089187, "grad_norm": 1.6346686595918862, "learning_rate": 1.1499565088279326e-05, "loss": 0.5385, "step": 18912 }, { "epoch": 1.405648457822371, "grad_norm": 1.7546173276938495, "learning_rate": 1.1498771787904765e-05, "loss": 0.6065, "step": 18913 }, { "epoch": 1.4057227796358231, "grad_norm": 2.1054058992821165, "learning_rate": 1.1497978477881188e-05, "loss": 0.6469, "step": 18914 }, { "epoch": 1.4057971014492754, "grad_norm": 2.0507942467135676, "learning_rate": 1.1497185158213703e-05, "loss": 0.7308, "step": 18915 }, { "epoch": 1.4058714232627276, "grad_norm": 2.268363148432723, "learning_rate": 1.149639182890742e-05, "loss": 0.6076, "step": 18916 }, { "epoch": 1.4059457450761799, "grad_norm": 1.7837887110089998, "learning_rate": 1.1495598489967442e-05, "loss": 0.5268, "step": 18917 }, { "epoch": 1.406020066889632, "grad_norm": 2.3303221731152415, "learning_rate": 1.1494805141398877e-05, "loss": 0.5987, "step": 18918 }, { "epoch": 1.4060943887030843, "grad_norm": 2.153143305139717, "learning_rate": 1.1494011783206838e-05, "loss": 0.5906, "step": 18919 }, { "epoch": 1.4061687105165366, "grad_norm": 2.127223425219084, "learning_rate": 1.1493218415396426e-05, "loss": 0.7512, "step": 18920 }, { "epoch": 1.4062430323299888, "grad_norm": 2.088174030397008, "learning_rate": 1.149242503797275e-05, "loss": 0.5975, "step": 18921 }, { "epoch": 1.406317354143441, "grad_norm": 2.2656619252301486, "learning_rate": 1.1491631650940922e-05, "loss": 0.6602, "step": 18922 }, { "epoch": 1.4063916759568933, "grad_norm": 2.1492832289745047, "learning_rate": 1.1490838254306044e-05, "loss": 0.57, "step": 18923 }, { "epoch": 1.4064659977703455, "grad_norm": 2.3134871812618165, "learning_rate": 1.1490044848073231e-05, "loss": 0.6657, "step": 18924 }, { "epoch": 1.4065403195837978, "grad_norm": 1.6857548497161134, "learning_rate": 1.1489251432247584e-05, "loss": 0.5434, "step": 18925 }, { "epoch": 1.4066146413972502, "grad_norm": 1.8314225843009662, "learning_rate": 1.1488458006834213e-05, "loss": 0.6189, "step": 18926 }, { "epoch": 1.4066889632107022, "grad_norm": 2.6651120416722938, "learning_rate": 1.1487664571838228e-05, "loss": 0.7113, "step": 18927 }, { "epoch": 1.4067632850241547, "grad_norm": 2.0941225901471525, "learning_rate": 1.1486871127264734e-05, "loss": 0.6145, "step": 18928 }, { "epoch": 1.4068376068376067, "grad_norm": 1.7967705690701763, "learning_rate": 1.148607767311884e-05, "loss": 0.6787, "step": 18929 }, { "epoch": 1.4069119286510592, "grad_norm": 1.900490183386219, "learning_rate": 1.1485284209405658e-05, "loss": 0.5441, "step": 18930 }, { "epoch": 1.4069862504645112, "grad_norm": 1.8998766171991626, "learning_rate": 1.1484490736130294e-05, "loss": 0.5655, "step": 18931 }, { "epoch": 1.4070605722779637, "grad_norm": 1.7856936338628244, "learning_rate": 1.1483697253297852e-05, "loss": 0.6199, "step": 18932 }, { "epoch": 1.407134894091416, "grad_norm": 2.1043015103802665, "learning_rate": 1.1482903760913447e-05, "loss": 0.6551, "step": 18933 }, { "epoch": 1.4072092159048681, "grad_norm": 1.9140818417974201, "learning_rate": 1.1482110258982182e-05, "loss": 0.4435, "step": 18934 }, { "epoch": 1.4072835377183204, "grad_norm": 1.7056696559847624, "learning_rate": 1.148131674750917e-05, "loss": 0.5414, "step": 18935 }, { "epoch": 1.4073578595317726, "grad_norm": 1.9161029970507464, "learning_rate": 1.1480523226499518e-05, "loss": 0.7669, "step": 18936 }, { "epoch": 1.4074321813452249, "grad_norm": 1.4373771301434173, "learning_rate": 1.1479729695958333e-05, "loss": 0.3519, "step": 18937 }, { "epoch": 1.407506503158677, "grad_norm": 2.7201659303913965, "learning_rate": 1.1478936155890723e-05, "loss": 0.6758, "step": 18938 }, { "epoch": 1.4075808249721293, "grad_norm": 2.395751048916644, "learning_rate": 1.1478142606301804e-05, "loss": 0.5576, "step": 18939 }, { "epoch": 1.4076551467855816, "grad_norm": 1.5930061603191563, "learning_rate": 1.1477349047196676e-05, "loss": 0.4712, "step": 18940 }, { "epoch": 1.4077294685990338, "grad_norm": 1.7597715380783772, "learning_rate": 1.147655547858045e-05, "loss": 0.6771, "step": 18941 }, { "epoch": 1.407803790412486, "grad_norm": 1.8195140631855622, "learning_rate": 1.147576190045824e-05, "loss": 0.6609, "step": 18942 }, { "epoch": 1.4078781122259383, "grad_norm": 1.6022980838130443, "learning_rate": 1.1474968312835148e-05, "loss": 0.3644, "step": 18943 }, { "epoch": 1.4079524340393905, "grad_norm": 2.192829963912259, "learning_rate": 1.1474174715716291e-05, "loss": 0.6754, "step": 18944 }, { "epoch": 1.4080267558528428, "grad_norm": 2.244625028684674, "learning_rate": 1.1473381109106772e-05, "loss": 0.5837, "step": 18945 }, { "epoch": 1.408101077666295, "grad_norm": 1.4843750655614036, "learning_rate": 1.1472587493011702e-05, "loss": 0.4491, "step": 18946 }, { "epoch": 1.4081753994797472, "grad_norm": 1.6628456007753412, "learning_rate": 1.1471793867436187e-05, "loss": 0.5205, "step": 18947 }, { "epoch": 1.4082497212931995, "grad_norm": 4.130381817984714, "learning_rate": 1.1471000232385341e-05, "loss": 0.5689, "step": 18948 }, { "epoch": 1.408324043106652, "grad_norm": 1.865850656489947, "learning_rate": 1.1470206587864273e-05, "loss": 0.5477, "step": 18949 }, { "epoch": 1.408398364920104, "grad_norm": 2.0877712136234567, "learning_rate": 1.1469412933878092e-05, "loss": 0.5554, "step": 18950 }, { "epoch": 1.4084726867335564, "grad_norm": 1.9773512585381565, "learning_rate": 1.1468619270431905e-05, "loss": 0.6484, "step": 18951 }, { "epoch": 1.4085470085470084, "grad_norm": 1.640275084375555, "learning_rate": 1.1467825597530825e-05, "loss": 0.4951, "step": 18952 }, { "epoch": 1.4086213303604609, "grad_norm": 1.6573899486894925, "learning_rate": 1.1467031915179958e-05, "loss": 0.5036, "step": 18953 }, { "epoch": 1.4086956521739131, "grad_norm": 1.8295162471142101, "learning_rate": 1.1466238223384416e-05, "loss": 0.5913, "step": 18954 }, { "epoch": 1.4087699739873654, "grad_norm": 1.771898958019098, "learning_rate": 1.1465444522149307e-05, "loss": 0.6407, "step": 18955 }, { "epoch": 1.4088442958008176, "grad_norm": 2.2661552214988134, "learning_rate": 1.1464650811479748e-05, "loss": 0.6792, "step": 18956 }, { "epoch": 1.4089186176142698, "grad_norm": 1.693001235307968, "learning_rate": 1.146385709138084e-05, "loss": 0.4822, "step": 18957 }, { "epoch": 1.408992939427722, "grad_norm": 1.7937507623001598, "learning_rate": 1.1463063361857694e-05, "loss": 0.4466, "step": 18958 }, { "epoch": 1.4090672612411743, "grad_norm": 2.0524563553444772, "learning_rate": 1.1462269622915421e-05, "loss": 0.6549, "step": 18959 }, { "epoch": 1.4091415830546266, "grad_norm": 1.6944780451242636, "learning_rate": 1.1461475874559133e-05, "loss": 0.633, "step": 18960 }, { "epoch": 1.4092159048680788, "grad_norm": 2.0953195146565866, "learning_rate": 1.1460682116793942e-05, "loss": 0.7871, "step": 18961 }, { "epoch": 1.409290226681531, "grad_norm": 1.5218564337868177, "learning_rate": 1.1459888349624952e-05, "loss": 0.4366, "step": 18962 }, { "epoch": 1.4093645484949833, "grad_norm": 1.7520937584716747, "learning_rate": 1.1459094573057281e-05, "loss": 0.4438, "step": 18963 }, { "epoch": 1.4094388703084355, "grad_norm": 1.7585656533898442, "learning_rate": 1.145830078709603e-05, "loss": 0.5097, "step": 18964 }, { "epoch": 1.4095131921218877, "grad_norm": 1.8644443567014863, "learning_rate": 1.1457506991746317e-05, "loss": 0.5812, "step": 18965 }, { "epoch": 1.40958751393534, "grad_norm": 1.6124947638349607, "learning_rate": 1.1456713187013249e-05, "loss": 0.457, "step": 18966 }, { "epoch": 1.4096618357487922, "grad_norm": 2.9424983002758522, "learning_rate": 1.1455919372901939e-05, "loss": 0.5729, "step": 18967 }, { "epoch": 1.4097361575622445, "grad_norm": 1.9640454341590499, "learning_rate": 1.1455125549417494e-05, "loss": 0.663, "step": 18968 }, { "epoch": 1.4098104793756967, "grad_norm": 2.4375768047648463, "learning_rate": 1.1454331716565024e-05, "loss": 0.6734, "step": 18969 }, { "epoch": 1.409884801189149, "grad_norm": 2.0174003650306513, "learning_rate": 1.1453537874349642e-05, "loss": 0.6118, "step": 18970 }, { "epoch": 1.4099591230026012, "grad_norm": 1.7174337889242968, "learning_rate": 1.145274402277646e-05, "loss": 0.5466, "step": 18971 }, { "epoch": 1.4100334448160536, "grad_norm": 2.0338604244946326, "learning_rate": 1.1451950161850586e-05, "loss": 0.6198, "step": 18972 }, { "epoch": 1.4101077666295057, "grad_norm": 2.04001497786335, "learning_rate": 1.1451156291577137e-05, "loss": 0.6355, "step": 18973 }, { "epoch": 1.4101820884429581, "grad_norm": 1.8299004668408791, "learning_rate": 1.1450362411961217e-05, "loss": 0.5421, "step": 18974 }, { "epoch": 1.4102564102564101, "grad_norm": 1.714945933485973, "learning_rate": 1.1449568523007937e-05, "loss": 0.559, "step": 18975 }, { "epoch": 1.4103307320698626, "grad_norm": 1.65005633128496, "learning_rate": 1.1448774624722411e-05, "loss": 0.6197, "step": 18976 }, { "epoch": 1.4104050538833148, "grad_norm": 2.60313148542371, "learning_rate": 1.1447980717109748e-05, "loss": 0.5158, "step": 18977 }, { "epoch": 1.410479375696767, "grad_norm": 2.565680665882871, "learning_rate": 1.1447186800175064e-05, "loss": 0.6934, "step": 18978 }, { "epoch": 1.4105536975102193, "grad_norm": 2.302557791536229, "learning_rate": 1.1446392873923464e-05, "loss": 0.7639, "step": 18979 }, { "epoch": 1.4106280193236715, "grad_norm": 1.8581425492181212, "learning_rate": 1.144559893836006e-05, "loss": 0.6652, "step": 18980 }, { "epoch": 1.4107023411371238, "grad_norm": 1.5382470928755174, "learning_rate": 1.1444804993489967e-05, "loss": 0.4873, "step": 18981 }, { "epoch": 1.410776662950576, "grad_norm": 1.4971541588254325, "learning_rate": 1.1444011039318293e-05, "loss": 0.6017, "step": 18982 }, { "epoch": 1.4108509847640283, "grad_norm": 1.7393458429583997, "learning_rate": 1.1443217075850151e-05, "loss": 0.5019, "step": 18983 }, { "epoch": 1.4109253065774805, "grad_norm": 2.0043642912053277, "learning_rate": 1.1442423103090654e-05, "loss": 0.6841, "step": 18984 }, { "epoch": 1.4109996283909327, "grad_norm": 2.154951688889789, "learning_rate": 1.1441629121044913e-05, "loss": 0.6182, "step": 18985 }, { "epoch": 1.411073950204385, "grad_norm": 2.680645065601219, "learning_rate": 1.1440835129718035e-05, "loss": 0.6587, "step": 18986 }, { "epoch": 1.4111482720178372, "grad_norm": 1.9756295817799052, "learning_rate": 1.1440041129115137e-05, "loss": 0.6609, "step": 18987 }, { "epoch": 1.4112225938312895, "grad_norm": 1.9470021789361027, "learning_rate": 1.1439247119241328e-05, "loss": 0.6732, "step": 18988 }, { "epoch": 1.4112969156447417, "grad_norm": 1.6409662205817999, "learning_rate": 1.1438453100101722e-05, "loss": 0.5131, "step": 18989 }, { "epoch": 1.411371237458194, "grad_norm": 1.5789759752288404, "learning_rate": 1.1437659071701432e-05, "loss": 0.5413, "step": 18990 }, { "epoch": 1.4114455592716462, "grad_norm": 1.547857245413305, "learning_rate": 1.1436865034045564e-05, "loss": 0.46, "step": 18991 }, { "epoch": 1.4115198810850984, "grad_norm": 2.0991033388924247, "learning_rate": 1.1436070987139234e-05, "loss": 0.7099, "step": 18992 }, { "epoch": 1.4115942028985506, "grad_norm": 2.1429784031843235, "learning_rate": 1.1435276930987551e-05, "loss": 0.7434, "step": 18993 }, { "epoch": 1.4116685247120029, "grad_norm": 2.4836482142214744, "learning_rate": 1.1434482865595633e-05, "loss": 0.6578, "step": 18994 }, { "epoch": 1.4117428465254553, "grad_norm": 1.6729048947148262, "learning_rate": 1.143368879096859e-05, "loss": 0.5316, "step": 18995 }, { "epoch": 1.4118171683389074, "grad_norm": 1.8483279005407192, "learning_rate": 1.1432894707111531e-05, "loss": 0.6299, "step": 18996 }, { "epoch": 1.4118914901523598, "grad_norm": 2.0169900890058434, "learning_rate": 1.143210061402957e-05, "loss": 0.6328, "step": 18997 }, { "epoch": 1.4119658119658118, "grad_norm": 1.8278998973713536, "learning_rate": 1.1431306511727818e-05, "loss": 0.4641, "step": 18998 }, { "epoch": 1.4120401337792643, "grad_norm": 2.2231521255895177, "learning_rate": 1.1430512400211392e-05, "loss": 0.5411, "step": 18999 }, { "epoch": 1.4121144555927165, "grad_norm": 1.9133380185728568, "learning_rate": 1.14297182794854e-05, "loss": 0.4885, "step": 19000 }, { "epoch": 1.4121887774061688, "grad_norm": 2.0638416893058977, "learning_rate": 1.1428924149554957e-05, "loss": 0.7123, "step": 19001 }, { "epoch": 1.412263099219621, "grad_norm": 2.6979001590483405, "learning_rate": 1.1428130010425172e-05, "loss": 0.833, "step": 19002 }, { "epoch": 1.4123374210330732, "grad_norm": 1.9880083359638598, "learning_rate": 1.142733586210116e-05, "loss": 0.6141, "step": 19003 }, { "epoch": 1.4124117428465255, "grad_norm": 2.606287637425793, "learning_rate": 1.1426541704588035e-05, "loss": 0.5623, "step": 19004 }, { "epoch": 1.4124860646599777, "grad_norm": 1.6970206775002972, "learning_rate": 1.1425747537890907e-05, "loss": 0.5471, "step": 19005 }, { "epoch": 1.41256038647343, "grad_norm": 1.6817155626507059, "learning_rate": 1.1424953362014895e-05, "loss": 0.5709, "step": 19006 }, { "epoch": 1.4126347082868822, "grad_norm": 1.7724527924802038, "learning_rate": 1.1424159176965106e-05, "loss": 0.6411, "step": 19007 }, { "epoch": 1.4127090301003344, "grad_norm": 1.627941156073587, "learning_rate": 1.1423364982746653e-05, "loss": 0.4821, "step": 19008 }, { "epoch": 1.4127833519137867, "grad_norm": 1.9338895996090733, "learning_rate": 1.1422570779364648e-05, "loss": 0.6206, "step": 19009 }, { "epoch": 1.412857673727239, "grad_norm": 2.1401068318991494, "learning_rate": 1.1421776566824205e-05, "loss": 0.7369, "step": 19010 }, { "epoch": 1.4129319955406912, "grad_norm": 2.209351232085757, "learning_rate": 1.1420982345130442e-05, "loss": 0.6596, "step": 19011 }, { "epoch": 1.4130063173541434, "grad_norm": 1.9350453200936668, "learning_rate": 1.142018811428847e-05, "loss": 0.5419, "step": 19012 }, { "epoch": 1.4130806391675956, "grad_norm": 1.8980293099295182, "learning_rate": 1.1419393874303397e-05, "loss": 0.6574, "step": 19013 }, { "epoch": 1.4131549609810479, "grad_norm": 2.48127483414867, "learning_rate": 1.141859962518034e-05, "loss": 0.7699, "step": 19014 }, { "epoch": 1.4132292827945, "grad_norm": 2.442560027942268, "learning_rate": 1.1417805366924413e-05, "loss": 0.6632, "step": 19015 }, { "epoch": 1.4133036046079526, "grad_norm": 2.191031107561362, "learning_rate": 1.1417011099540726e-05, "loss": 0.6097, "step": 19016 }, { "epoch": 1.4133779264214046, "grad_norm": 2.3010647306589456, "learning_rate": 1.1416216823034398e-05, "loss": 0.6535, "step": 19017 }, { "epoch": 1.413452248234857, "grad_norm": 2.342997582361619, "learning_rate": 1.1415422537410542e-05, "loss": 0.6165, "step": 19018 }, { "epoch": 1.413526570048309, "grad_norm": 1.91785244483683, "learning_rate": 1.1414628242674266e-05, "loss": 0.574, "step": 19019 }, { "epoch": 1.4136008918617615, "grad_norm": 1.9451843785966585, "learning_rate": 1.1413833938830684e-05, "loss": 0.5234, "step": 19020 }, { "epoch": 1.4136752136752135, "grad_norm": 2.3360344404411104, "learning_rate": 1.1413039625884916e-05, "loss": 0.7195, "step": 19021 }, { "epoch": 1.413749535488666, "grad_norm": 2.017598401129812, "learning_rate": 1.1412245303842074e-05, "loss": 0.639, "step": 19022 }, { "epoch": 1.4138238573021182, "grad_norm": 1.9585617488567817, "learning_rate": 1.141145097270727e-05, "loss": 0.6394, "step": 19023 }, { "epoch": 1.4138981791155705, "grad_norm": 4.135484785338281, "learning_rate": 1.1410656632485614e-05, "loss": 0.7422, "step": 19024 }, { "epoch": 1.4139725009290227, "grad_norm": 2.043972771123724, "learning_rate": 1.1409862283182225e-05, "loss": 0.7043, "step": 19025 }, { "epoch": 1.414046822742475, "grad_norm": 2.0520266374943814, "learning_rate": 1.1409067924802215e-05, "loss": 0.6615, "step": 19026 }, { "epoch": 1.4141211445559272, "grad_norm": 1.9114130496437716, "learning_rate": 1.14082735573507e-05, "loss": 0.62, "step": 19027 }, { "epoch": 1.4141954663693794, "grad_norm": 1.9503607839466592, "learning_rate": 1.1407479180832794e-05, "loss": 0.6039, "step": 19028 }, { "epoch": 1.4142697881828317, "grad_norm": 2.2766360321516372, "learning_rate": 1.1406684795253611e-05, "loss": 0.628, "step": 19029 }, { "epoch": 1.414344109996284, "grad_norm": 1.7101229826888293, "learning_rate": 1.140589040061826e-05, "loss": 0.4858, "step": 19030 }, { "epoch": 1.4144184318097361, "grad_norm": 2.0741251061851633, "learning_rate": 1.1405095996931861e-05, "loss": 0.546, "step": 19031 }, { "epoch": 1.4144927536231884, "grad_norm": 1.9049160462841364, "learning_rate": 1.1404301584199528e-05, "loss": 0.5478, "step": 19032 }, { "epoch": 1.4145670754366406, "grad_norm": 1.4477231069651282, "learning_rate": 1.1403507162426375e-05, "loss": 0.4599, "step": 19033 }, { "epoch": 1.4146413972500929, "grad_norm": 2.124454766092195, "learning_rate": 1.1402712731617513e-05, "loss": 0.6285, "step": 19034 }, { "epoch": 1.414715719063545, "grad_norm": 2.2348782274835206, "learning_rate": 1.1401918291778062e-05, "loss": 0.6809, "step": 19035 }, { "epoch": 1.4147900408769973, "grad_norm": 1.766617211960961, "learning_rate": 1.1401123842913133e-05, "loss": 0.522, "step": 19036 }, { "epoch": 1.4148643626904496, "grad_norm": 2.462880740612821, "learning_rate": 1.1400329385027841e-05, "loss": 0.5686, "step": 19037 }, { "epoch": 1.4149386845039018, "grad_norm": 2.0235311062858075, "learning_rate": 1.13995349181273e-05, "loss": 0.7698, "step": 19038 }, { "epoch": 1.4150130063173543, "grad_norm": 2.777983755971666, "learning_rate": 1.139874044221663e-05, "loss": 0.6017, "step": 19039 }, { "epoch": 1.4150873281308063, "grad_norm": 1.5488838735509558, "learning_rate": 1.1397945957300938e-05, "loss": 0.4699, "step": 19040 }, { "epoch": 1.4151616499442587, "grad_norm": 1.9810801528886255, "learning_rate": 1.1397151463385343e-05, "loss": 0.5171, "step": 19041 }, { "epoch": 1.4152359717577108, "grad_norm": 1.8818657170825206, "learning_rate": 1.139635696047496e-05, "loss": 0.6578, "step": 19042 }, { "epoch": 1.4153102935711632, "grad_norm": 1.7928306762097388, "learning_rate": 1.1395562448574899e-05, "loss": 0.6183, "step": 19043 }, { "epoch": 1.4153846153846155, "grad_norm": 2.2298863136572904, "learning_rate": 1.1394767927690287e-05, "loss": 0.7792, "step": 19044 }, { "epoch": 1.4154589371980677, "grad_norm": 1.787672485017207, "learning_rate": 1.1393973397826226e-05, "loss": 0.5782, "step": 19045 }, { "epoch": 1.41553325901152, "grad_norm": 2.0079979523945273, "learning_rate": 1.139317885898784e-05, "loss": 0.5855, "step": 19046 }, { "epoch": 1.4156075808249722, "grad_norm": 2.6352731026829463, "learning_rate": 1.1392384311180238e-05, "loss": 0.7275, "step": 19047 }, { "epoch": 1.4156819026384244, "grad_norm": 1.8252080984488244, "learning_rate": 1.1391589754408539e-05, "loss": 0.6197, "step": 19048 }, { "epoch": 1.4157562244518767, "grad_norm": 2.369827467070537, "learning_rate": 1.1390795188677857e-05, "loss": 0.7121, "step": 19049 }, { "epoch": 1.415830546265329, "grad_norm": 1.9871787368758302, "learning_rate": 1.1390000613993309e-05, "loss": 0.63, "step": 19050 }, { "epoch": 1.4159048680787811, "grad_norm": 1.8059164967368635, "learning_rate": 1.1389206030360008e-05, "loss": 0.6178, "step": 19051 }, { "epoch": 1.4159791898922334, "grad_norm": 1.6292453636559823, "learning_rate": 1.1388411437783071e-05, "loss": 0.5778, "step": 19052 }, { "epoch": 1.4160535117056856, "grad_norm": 2.0357865707763545, "learning_rate": 1.1387616836267612e-05, "loss": 0.6567, "step": 19053 }, { "epoch": 1.4161278335191378, "grad_norm": 2.2541256626550545, "learning_rate": 1.1386822225818748e-05, "loss": 0.7125, "step": 19054 }, { "epoch": 1.41620215533259, "grad_norm": 1.9988576321766751, "learning_rate": 1.1386027606441597e-05, "loss": 0.5555, "step": 19055 }, { "epoch": 1.4162764771460423, "grad_norm": 1.7460114985996842, "learning_rate": 1.1385232978141269e-05, "loss": 0.5472, "step": 19056 }, { "epoch": 1.4163507989594946, "grad_norm": 2.15081893249694, "learning_rate": 1.1384438340922887e-05, "loss": 0.5862, "step": 19057 }, { "epoch": 1.4164251207729468, "grad_norm": 2.302246835581337, "learning_rate": 1.138364369479156e-05, "loss": 0.6003, "step": 19058 }, { "epoch": 1.416499442586399, "grad_norm": 2.1876204713409817, "learning_rate": 1.1382849039752406e-05, "loss": 0.5963, "step": 19059 }, { "epoch": 1.4165737643998513, "grad_norm": 2.009761970805053, "learning_rate": 1.1382054375810543e-05, "loss": 0.5942, "step": 19060 }, { "epoch": 1.4166480862133035, "grad_norm": 1.9590979868991067, "learning_rate": 1.1381259702971086e-05, "loss": 0.5877, "step": 19061 }, { "epoch": 1.416722408026756, "grad_norm": 1.6855773726936476, "learning_rate": 1.138046502123915e-05, "loss": 0.5296, "step": 19062 }, { "epoch": 1.416796729840208, "grad_norm": 1.6608029744335193, "learning_rate": 1.1379670330619851e-05, "loss": 0.4918, "step": 19063 }, { "epoch": 1.4168710516536605, "grad_norm": 1.9553407277713772, "learning_rate": 1.1378875631118307e-05, "loss": 0.5435, "step": 19064 }, { "epoch": 1.4169453734671125, "grad_norm": 2.1208357193874905, "learning_rate": 1.1378080922739631e-05, "loss": 0.5492, "step": 19065 }, { "epoch": 1.417019695280565, "grad_norm": 2.1852554170746137, "learning_rate": 1.1377286205488947e-05, "loss": 0.4758, "step": 19066 }, { "epoch": 1.4170940170940172, "grad_norm": 2.086568488604662, "learning_rate": 1.1376491479371362e-05, "loss": 0.6266, "step": 19067 }, { "epoch": 1.4171683389074694, "grad_norm": 1.6952141242744818, "learning_rate": 1.1375696744391994e-05, "loss": 0.5201, "step": 19068 }, { "epoch": 1.4172426607209216, "grad_norm": 2.261355770637268, "learning_rate": 1.1374902000555968e-05, "loss": 0.5644, "step": 19069 }, { "epoch": 1.4173169825343739, "grad_norm": 2.1717165284496045, "learning_rate": 1.1374107247868389e-05, "loss": 0.7114, "step": 19070 }, { "epoch": 1.4173913043478261, "grad_norm": 7.176744936677812, "learning_rate": 1.1373312486334383e-05, "loss": 0.6819, "step": 19071 }, { "epoch": 1.4174656261612784, "grad_norm": 1.9556421988483155, "learning_rate": 1.1372517715959057e-05, "loss": 0.5909, "step": 19072 }, { "epoch": 1.4175399479747306, "grad_norm": 1.769488137567266, "learning_rate": 1.1371722936747536e-05, "loss": 0.5969, "step": 19073 }, { "epoch": 1.4176142697881828, "grad_norm": 2.402570202809854, "learning_rate": 1.1370928148704936e-05, "loss": 0.6946, "step": 19074 }, { "epoch": 1.417688591601635, "grad_norm": 2.1141401614548787, "learning_rate": 1.137013335183637e-05, "loss": 0.6278, "step": 19075 }, { "epoch": 1.4177629134150873, "grad_norm": 1.6564445640260634, "learning_rate": 1.1369338546146955e-05, "loss": 0.5485, "step": 19076 }, { "epoch": 1.4178372352285395, "grad_norm": 1.900418215979953, "learning_rate": 1.1368543731641812e-05, "loss": 0.6574, "step": 19077 }, { "epoch": 1.4179115570419918, "grad_norm": 1.6551529002117247, "learning_rate": 1.1367748908326053e-05, "loss": 0.6054, "step": 19078 }, { "epoch": 1.417985878855444, "grad_norm": 2.3519990929252375, "learning_rate": 1.1366954076204799e-05, "loss": 0.7267, "step": 19079 }, { "epoch": 1.4180602006688963, "grad_norm": 3.214861200774606, "learning_rate": 1.1366159235283167e-05, "loss": 0.5415, "step": 19080 }, { "epoch": 1.4181345224823485, "grad_norm": 2.0591543159416865, "learning_rate": 1.1365364385566272e-05, "loss": 0.5716, "step": 19081 }, { "epoch": 1.4182088442958007, "grad_norm": 1.7816199653986466, "learning_rate": 1.1364569527059231e-05, "loss": 0.71, "step": 19082 }, { "epoch": 1.4182831661092532, "grad_norm": 1.8813835803330787, "learning_rate": 1.1363774659767161e-05, "loss": 0.6246, "step": 19083 }, { "epoch": 1.4183574879227052, "grad_norm": 2.200488839655296, "learning_rate": 1.136297978369518e-05, "loss": 0.5867, "step": 19084 }, { "epoch": 1.4184318097361577, "grad_norm": 1.7100807613047357, "learning_rate": 1.1362184898848408e-05, "loss": 0.4595, "step": 19085 }, { "epoch": 1.4185061315496097, "grad_norm": 1.861077990642053, "learning_rate": 1.136139000523196e-05, "loss": 0.5568, "step": 19086 }, { "epoch": 1.4185804533630622, "grad_norm": 2.0275201849920403, "learning_rate": 1.1360595102850953e-05, "loss": 0.543, "step": 19087 }, { "epoch": 1.4186547751765142, "grad_norm": 1.5857602993434445, "learning_rate": 1.1359800191710508e-05, "loss": 0.5382, "step": 19088 }, { "epoch": 1.4187290969899666, "grad_norm": 2.0155290793910177, "learning_rate": 1.1359005271815737e-05, "loss": 0.6742, "step": 19089 }, { "epoch": 1.4188034188034189, "grad_norm": 1.8740530526241408, "learning_rate": 1.1358210343171763e-05, "loss": 0.5886, "step": 19090 }, { "epoch": 1.418877740616871, "grad_norm": 1.623866541849389, "learning_rate": 1.13574154057837e-05, "loss": 0.4788, "step": 19091 }, { "epoch": 1.4189520624303233, "grad_norm": 1.7743836830673116, "learning_rate": 1.1356620459656666e-05, "loss": 0.6043, "step": 19092 }, { "epoch": 1.4190263842437756, "grad_norm": 1.8331026493085023, "learning_rate": 1.1355825504795784e-05, "loss": 0.5967, "step": 19093 }, { "epoch": 1.4191007060572278, "grad_norm": 2.0277058241646615, "learning_rate": 1.1355030541206163e-05, "loss": 0.5486, "step": 19094 }, { "epoch": 1.41917502787068, "grad_norm": 1.7514989043681628, "learning_rate": 1.1354235568892924e-05, "loss": 0.4955, "step": 19095 }, { "epoch": 1.4192493496841323, "grad_norm": 2.2630999343005973, "learning_rate": 1.135344058786119e-05, "loss": 0.5544, "step": 19096 }, { "epoch": 1.4193236714975845, "grad_norm": 1.9093563044208854, "learning_rate": 1.1352645598116077e-05, "loss": 0.5465, "step": 19097 }, { "epoch": 1.4193979933110368, "grad_norm": 2.1110168291796065, "learning_rate": 1.13518505996627e-05, "loss": 0.5372, "step": 19098 }, { "epoch": 1.419472315124489, "grad_norm": 1.8971579219305634, "learning_rate": 1.1351055592506181e-05, "loss": 0.5753, "step": 19099 }, { "epoch": 1.4195466369379413, "grad_norm": 2.0099992068584878, "learning_rate": 1.1350260576651632e-05, "loss": 0.5463, "step": 19100 }, { "epoch": 1.4196209587513935, "grad_norm": 1.6325771121814219, "learning_rate": 1.1349465552104179e-05, "loss": 0.5654, "step": 19101 }, { "epoch": 1.4196952805648457, "grad_norm": 2.2115856564892105, "learning_rate": 1.1348670518868938e-05, "loss": 0.5738, "step": 19102 }, { "epoch": 1.419769602378298, "grad_norm": 2.7475527856331676, "learning_rate": 1.1347875476951022e-05, "loss": 0.7832, "step": 19103 }, { "epoch": 1.4198439241917502, "grad_norm": 1.9589271046411019, "learning_rate": 1.1347080426355557e-05, "loss": 0.6287, "step": 19104 }, { "epoch": 1.4199182460052024, "grad_norm": 2.296960742315469, "learning_rate": 1.1346285367087658e-05, "loss": 0.6681, "step": 19105 }, { "epoch": 1.419992567818655, "grad_norm": 2.165018915972535, "learning_rate": 1.134549029915244e-05, "loss": 0.6205, "step": 19106 }, { "epoch": 1.420066889632107, "grad_norm": 1.6794718876814034, "learning_rate": 1.1344695222555027e-05, "loss": 0.5103, "step": 19107 }, { "epoch": 1.4201412114455594, "grad_norm": 3.0971655263834426, "learning_rate": 1.1343900137300539e-05, "loss": 0.7846, "step": 19108 }, { "epoch": 1.4202155332590114, "grad_norm": 2.053245480997582, "learning_rate": 1.1343105043394088e-05, "loss": 0.5252, "step": 19109 }, { "epoch": 1.4202898550724639, "grad_norm": 1.876735686135554, "learning_rate": 1.1342309940840799e-05, "loss": 0.4833, "step": 19110 }, { "epoch": 1.420364176885916, "grad_norm": 2.531957560338292, "learning_rate": 1.1341514829645785e-05, "loss": 0.6997, "step": 19111 }, { "epoch": 1.4204384986993683, "grad_norm": 2.102467649614778, "learning_rate": 1.134071970981417e-05, "loss": 0.7087, "step": 19112 }, { "epoch": 1.4205128205128206, "grad_norm": 2.29412483050064, "learning_rate": 1.1339924581351069e-05, "loss": 0.5009, "step": 19113 }, { "epoch": 1.4205871423262728, "grad_norm": 2.1580475097002503, "learning_rate": 1.1339129444261607e-05, "loss": 0.6077, "step": 19114 }, { "epoch": 1.420661464139725, "grad_norm": 2.2884663959662848, "learning_rate": 1.13383342985509e-05, "loss": 0.7171, "step": 19115 }, { "epoch": 1.4207357859531773, "grad_norm": 2.439234069878668, "learning_rate": 1.1337539144224064e-05, "loss": 0.772, "step": 19116 }, { "epoch": 1.4208101077666295, "grad_norm": 1.9069572730538338, "learning_rate": 1.1336743981286218e-05, "loss": 0.4363, "step": 19117 }, { "epoch": 1.4208844295800818, "grad_norm": 2.4059923661215787, "learning_rate": 1.1335948809742485e-05, "loss": 0.6481, "step": 19118 }, { "epoch": 1.420958751393534, "grad_norm": 1.900973637746212, "learning_rate": 1.1335153629597983e-05, "loss": 0.6704, "step": 19119 }, { "epoch": 1.4210330732069862, "grad_norm": 1.786737209720059, "learning_rate": 1.1334358440857831e-05, "loss": 0.5234, "step": 19120 }, { "epoch": 1.4211073950204385, "grad_norm": 2.028290041870549, "learning_rate": 1.1333563243527153e-05, "loss": 0.5966, "step": 19121 }, { "epoch": 1.4211817168338907, "grad_norm": 1.64679964616002, "learning_rate": 1.1332768037611056e-05, "loss": 0.5214, "step": 19122 }, { "epoch": 1.421256038647343, "grad_norm": 1.9923645841496276, "learning_rate": 1.1331972823114672e-05, "loss": 0.6138, "step": 19123 }, { "epoch": 1.4213303604607952, "grad_norm": 2.2379077056007564, "learning_rate": 1.1331177600043115e-05, "loss": 0.6172, "step": 19124 }, { "epoch": 1.4214046822742474, "grad_norm": 2.504557852438347, "learning_rate": 1.1330382368401508e-05, "loss": 0.7359, "step": 19125 }, { "epoch": 1.4214790040876997, "grad_norm": 1.4571361978499484, "learning_rate": 1.132958712819497e-05, "loss": 0.4197, "step": 19126 }, { "epoch": 1.421553325901152, "grad_norm": 1.8622915815499272, "learning_rate": 1.1328791879428616e-05, "loss": 0.6648, "step": 19127 }, { "epoch": 1.4216276477146041, "grad_norm": 2.1731517690260884, "learning_rate": 1.1327996622107566e-05, "loss": 0.5587, "step": 19128 }, { "epoch": 1.4217019695280566, "grad_norm": 2.0801092377467603, "learning_rate": 1.1327201356236945e-05, "loss": 0.5822, "step": 19129 }, { "epoch": 1.4217762913415086, "grad_norm": 2.2269480237029624, "learning_rate": 1.132640608182187e-05, "loss": 0.7359, "step": 19130 }, { "epoch": 1.421850613154961, "grad_norm": 2.2115273194400213, "learning_rate": 1.1325610798867463e-05, "loss": 0.6145, "step": 19131 }, { "epoch": 1.421924934968413, "grad_norm": 2.4625677775827577, "learning_rate": 1.1324815507378846e-05, "loss": 0.6671, "step": 19132 }, { "epoch": 1.4219992567818656, "grad_norm": 2.025013532808204, "learning_rate": 1.1324020207361128e-05, "loss": 0.564, "step": 19133 }, { "epoch": 1.4220735785953178, "grad_norm": 2.1454152340800596, "learning_rate": 1.132322489881944e-05, "loss": 0.6959, "step": 19134 }, { "epoch": 1.42214790040877, "grad_norm": 1.805256183494477, "learning_rate": 1.1322429581758897e-05, "loss": 0.678, "step": 19135 }, { "epoch": 1.4222222222222223, "grad_norm": 1.7437563770670184, "learning_rate": 1.1321634256184625e-05, "loss": 0.5899, "step": 19136 }, { "epoch": 1.4222965440356745, "grad_norm": 2.3004940859177485, "learning_rate": 1.132083892210174e-05, "loss": 0.5188, "step": 19137 }, { "epoch": 1.4223708658491268, "grad_norm": 1.9940301053405693, "learning_rate": 1.132004357951536e-05, "loss": 0.6873, "step": 19138 }, { "epoch": 1.422445187662579, "grad_norm": 2.731177164612537, "learning_rate": 1.1319248228430608e-05, "loss": 0.5429, "step": 19139 }, { "epoch": 1.4225195094760312, "grad_norm": 1.9863311056706703, "learning_rate": 1.1318452868852603e-05, "loss": 0.5435, "step": 19140 }, { "epoch": 1.4225938312894835, "grad_norm": 1.8988822403842185, "learning_rate": 1.1317657500786467e-05, "loss": 0.475, "step": 19141 }, { "epoch": 1.4226681531029357, "grad_norm": 2.480625396233914, "learning_rate": 1.1316862124237324e-05, "loss": 0.6851, "step": 19142 }, { "epoch": 1.422742474916388, "grad_norm": 1.8685585826050202, "learning_rate": 1.1316066739210292e-05, "loss": 0.6341, "step": 19143 }, { "epoch": 1.4228167967298402, "grad_norm": 1.5206914701002066, "learning_rate": 1.1315271345710486e-05, "loss": 0.4492, "step": 19144 }, { "epoch": 1.4228911185432924, "grad_norm": 1.466900663451967, "learning_rate": 1.1314475943743034e-05, "loss": 0.3253, "step": 19145 }, { "epoch": 1.4229654403567447, "grad_norm": 1.6577084440228664, "learning_rate": 1.131368053331305e-05, "loss": 0.5809, "step": 19146 }, { "epoch": 1.423039762170197, "grad_norm": 1.7213867469111996, "learning_rate": 1.1312885114425663e-05, "loss": 0.5266, "step": 19147 }, { "epoch": 1.4231140839836491, "grad_norm": 1.4897814968834884, "learning_rate": 1.1312089687085989e-05, "loss": 0.4672, "step": 19148 }, { "epoch": 1.4231884057971014, "grad_norm": 2.2664713209602314, "learning_rate": 1.131129425129915e-05, "loss": 0.6998, "step": 19149 }, { "epoch": 1.4232627276105538, "grad_norm": 2.005459205477244, "learning_rate": 1.1310498807070266e-05, "loss": 0.6531, "step": 19150 }, { "epoch": 1.4233370494240059, "grad_norm": 2.1375313087874246, "learning_rate": 1.1309703354404458e-05, "loss": 0.6207, "step": 19151 }, { "epoch": 1.4234113712374583, "grad_norm": 1.9743147387334492, "learning_rate": 1.1308907893306847e-05, "loss": 0.621, "step": 19152 }, { "epoch": 1.4234856930509103, "grad_norm": 1.7593950268935044, "learning_rate": 1.1308112423782558e-05, "loss": 0.5835, "step": 19153 }, { "epoch": 1.4235600148643628, "grad_norm": 1.9592343292992163, "learning_rate": 1.1307316945836708e-05, "loss": 0.375, "step": 19154 }, { "epoch": 1.4236343366778148, "grad_norm": 1.7574862790178547, "learning_rate": 1.1306521459474418e-05, "loss": 0.5863, "step": 19155 }, { "epoch": 1.4237086584912673, "grad_norm": 2.650200219839255, "learning_rate": 1.130572596470081e-05, "loss": 0.5825, "step": 19156 }, { "epoch": 1.4237829803047195, "grad_norm": 2.3249522015225486, "learning_rate": 1.1304930461521007e-05, "loss": 0.5825, "step": 19157 }, { "epoch": 1.4238573021181717, "grad_norm": 2.363786640592269, "learning_rate": 1.130413494994013e-05, "loss": 0.6989, "step": 19158 }, { "epoch": 1.423931623931624, "grad_norm": 2.256404642475395, "learning_rate": 1.1303339429963303e-05, "loss": 0.6552, "step": 19159 }, { "epoch": 1.4240059457450762, "grad_norm": 1.8203283235044712, "learning_rate": 1.130254390159564e-05, "loss": 0.6041, "step": 19160 }, { "epoch": 1.4240802675585285, "grad_norm": 1.547674607782871, "learning_rate": 1.1301748364842266e-05, "loss": 0.5252, "step": 19161 }, { "epoch": 1.4241545893719807, "grad_norm": 1.6106813833512847, "learning_rate": 1.1300952819708306e-05, "loss": 0.4456, "step": 19162 }, { "epoch": 1.424228911185433, "grad_norm": 1.8693574695535335, "learning_rate": 1.1300157266198877e-05, "loss": 0.6709, "step": 19163 }, { "epoch": 1.4243032329988852, "grad_norm": 2.2630277400157017, "learning_rate": 1.1299361704319108e-05, "loss": 0.741, "step": 19164 }, { "epoch": 1.4243775548123374, "grad_norm": 2.643665224284388, "learning_rate": 1.1298566134074112e-05, "loss": 0.7682, "step": 19165 }, { "epoch": 1.4244518766257896, "grad_norm": 1.9238235125312162, "learning_rate": 1.1297770555469015e-05, "loss": 0.6495, "step": 19166 }, { "epoch": 1.4245261984392419, "grad_norm": 1.951596507943887, "learning_rate": 1.1296974968508938e-05, "loss": 0.6101, "step": 19167 }, { "epoch": 1.4246005202526941, "grad_norm": 2.005260546376274, "learning_rate": 1.1296179373199003e-05, "loss": 0.5462, "step": 19168 }, { "epoch": 1.4246748420661464, "grad_norm": 2.3829638914841804, "learning_rate": 1.1295383769544334e-05, "loss": 0.8, "step": 19169 }, { "epoch": 1.4247491638795986, "grad_norm": 2.3929765419448974, "learning_rate": 1.1294588157550053e-05, "loss": 0.6671, "step": 19170 }, { "epoch": 1.4248234856930508, "grad_norm": 1.954718350306131, "learning_rate": 1.1293792537221277e-05, "loss": 0.6782, "step": 19171 }, { "epoch": 1.424897807506503, "grad_norm": 1.6354893392238699, "learning_rate": 1.1292996908563134e-05, "loss": 0.5918, "step": 19172 }, { "epoch": 1.4249721293199555, "grad_norm": 2.0429049316033607, "learning_rate": 1.1292201271580743e-05, "loss": 0.5942, "step": 19173 }, { "epoch": 1.4250464511334076, "grad_norm": 1.7304783849355043, "learning_rate": 1.1291405626279228e-05, "loss": 0.5099, "step": 19174 }, { "epoch": 1.42512077294686, "grad_norm": 2.1260109464775536, "learning_rate": 1.1290609972663708e-05, "loss": 0.6293, "step": 19175 }, { "epoch": 1.425195094760312, "grad_norm": 1.8912473358848993, "learning_rate": 1.1289814310739314e-05, "loss": 0.6489, "step": 19176 }, { "epoch": 1.4252694165737645, "grad_norm": 1.7027988940569652, "learning_rate": 1.1289018640511159e-05, "loss": 0.542, "step": 19177 }, { "epoch": 1.4253437383872167, "grad_norm": 2.121710322444788, "learning_rate": 1.1288222961984364e-05, "loss": 0.6367, "step": 19178 }, { "epoch": 1.425418060200669, "grad_norm": 1.8776550764769009, "learning_rate": 1.1287427275164062e-05, "loss": 0.4999, "step": 19179 }, { "epoch": 1.4254923820141212, "grad_norm": 1.5926409298238344, "learning_rate": 1.128663158005537e-05, "loss": 0.4148, "step": 19180 }, { "epoch": 1.4255667038275734, "grad_norm": 1.735923648570526, "learning_rate": 1.1285835876663409e-05, "loss": 0.4625, "step": 19181 }, { "epoch": 1.4256410256410257, "grad_norm": 1.7074026917860783, "learning_rate": 1.1285040164993304e-05, "loss": 0.5893, "step": 19182 }, { "epoch": 1.425715347454478, "grad_norm": 2.201719566162286, "learning_rate": 1.1284244445050177e-05, "loss": 0.5409, "step": 19183 }, { "epoch": 1.4257896692679302, "grad_norm": 2.21488114693062, "learning_rate": 1.128344871683915e-05, "loss": 0.5804, "step": 19184 }, { "epoch": 1.4258639910813824, "grad_norm": 2.0842875334086535, "learning_rate": 1.1282652980365349e-05, "loss": 0.8139, "step": 19185 }, { "epoch": 1.4259383128948346, "grad_norm": 2.1502297144074074, "learning_rate": 1.1281857235633894e-05, "loss": 0.6283, "step": 19186 }, { "epoch": 1.4260126347082869, "grad_norm": 2.184799457348696, "learning_rate": 1.1281061482649908e-05, "loss": 0.5253, "step": 19187 }, { "epoch": 1.4260869565217391, "grad_norm": 1.7223449335598198, "learning_rate": 1.1280265721418514e-05, "loss": 0.5479, "step": 19188 }, { "epoch": 1.4261612783351914, "grad_norm": 1.9987630094164988, "learning_rate": 1.1279469951944835e-05, "loss": 0.6168, "step": 19189 }, { "epoch": 1.4262356001486436, "grad_norm": 2.053753845221256, "learning_rate": 1.1278674174233996e-05, "loss": 0.5741, "step": 19190 }, { "epoch": 1.4263099219620958, "grad_norm": 1.9581071698718469, "learning_rate": 1.1277878388291122e-05, "loss": 0.583, "step": 19191 }, { "epoch": 1.426384243775548, "grad_norm": 2.087387643847917, "learning_rate": 1.1277082594121327e-05, "loss": 0.5362, "step": 19192 }, { "epoch": 1.4264585655890003, "grad_norm": 1.9236746100337845, "learning_rate": 1.1276286791729747e-05, "loss": 0.5055, "step": 19193 }, { "epoch": 1.4265328874024525, "grad_norm": 2.0699999901633594, "learning_rate": 1.1275490981121493e-05, "loss": 0.5215, "step": 19194 }, { "epoch": 1.4266072092159048, "grad_norm": 1.9249519782075213, "learning_rate": 1.1274695162301696e-05, "loss": 0.5908, "step": 19195 }, { "epoch": 1.4266815310293572, "grad_norm": 1.615956507963641, "learning_rate": 1.1273899335275483e-05, "loss": 0.5399, "step": 19196 }, { "epoch": 1.4267558528428093, "grad_norm": 1.584630315331636, "learning_rate": 1.1273103500047966e-05, "loss": 0.5253, "step": 19197 }, { "epoch": 1.4268301746562617, "grad_norm": 2.1532570095964414, "learning_rate": 1.1272307656624275e-05, "loss": 0.6676, "step": 19198 }, { "epoch": 1.4269044964697137, "grad_norm": 1.9987344451321216, "learning_rate": 1.1271511805009536e-05, "loss": 0.5925, "step": 19199 }, { "epoch": 1.4269788182831662, "grad_norm": 1.8143300975433745, "learning_rate": 1.1270715945208868e-05, "loss": 0.5161, "step": 19200 }, { "epoch": 1.4270531400966184, "grad_norm": 1.4222605573982157, "learning_rate": 1.1269920077227397e-05, "loss": 0.4183, "step": 19201 }, { "epoch": 1.4271274619100707, "grad_norm": 2.2405329861387155, "learning_rate": 1.1269124201070247e-05, "loss": 0.4601, "step": 19202 }, { "epoch": 1.427201783723523, "grad_norm": 2.4998503346333396, "learning_rate": 1.1268328316742543e-05, "loss": 0.7743, "step": 19203 }, { "epoch": 1.4272761055369751, "grad_norm": 1.8592697874995325, "learning_rate": 1.1267532424249405e-05, "loss": 0.6756, "step": 19204 }, { "epoch": 1.4273504273504274, "grad_norm": 1.994769477348593, "learning_rate": 1.1266736523595961e-05, "loss": 0.5433, "step": 19205 }, { "epoch": 1.4274247491638796, "grad_norm": 1.6321244254621787, "learning_rate": 1.1265940614787329e-05, "loss": 0.5193, "step": 19206 }, { "epoch": 1.4274990709773319, "grad_norm": 1.5011712690616918, "learning_rate": 1.1265144697828641e-05, "loss": 0.4534, "step": 19207 }, { "epoch": 1.427573392790784, "grad_norm": 1.898441970820532, "learning_rate": 1.1264348772725013e-05, "loss": 0.6667, "step": 19208 }, { "epoch": 1.4276477146042363, "grad_norm": 2.2361045850671055, "learning_rate": 1.1263552839481574e-05, "loss": 0.6603, "step": 19209 }, { "epoch": 1.4277220364176886, "grad_norm": 2.3085564819638558, "learning_rate": 1.1262756898103453e-05, "loss": 0.7312, "step": 19210 }, { "epoch": 1.4277963582311408, "grad_norm": 2.000999522235595, "learning_rate": 1.1261960948595762e-05, "loss": 0.6253, "step": 19211 }, { "epoch": 1.427870680044593, "grad_norm": 2.3178269176957222, "learning_rate": 1.1261164990963635e-05, "loss": 0.6981, "step": 19212 }, { "epoch": 1.4279450018580453, "grad_norm": 3.044001801717146, "learning_rate": 1.1260369025212195e-05, "loss": 0.6827, "step": 19213 }, { "epoch": 1.4280193236714975, "grad_norm": 2.1712920293360405, "learning_rate": 1.1259573051346559e-05, "loss": 0.6855, "step": 19214 }, { "epoch": 1.4280936454849498, "grad_norm": 1.8770523742260605, "learning_rate": 1.1258777069371863e-05, "loss": 0.7428, "step": 19215 }, { "epoch": 1.428167967298402, "grad_norm": 2.166084282908816, "learning_rate": 1.1257981079293223e-05, "loss": 0.57, "step": 19216 }, { "epoch": 1.4282422891118542, "grad_norm": 2.0744933251734006, "learning_rate": 1.1257185081115765e-05, "loss": 0.5482, "step": 19217 }, { "epoch": 1.4283166109253065, "grad_norm": 2.03758133759184, "learning_rate": 1.1256389074844617e-05, "loss": 0.7085, "step": 19218 }, { "epoch": 1.428390932738759, "grad_norm": 1.9334329448590206, "learning_rate": 1.12555930604849e-05, "loss": 0.5695, "step": 19219 }, { "epoch": 1.428465254552211, "grad_norm": 2.061571444473786, "learning_rate": 1.1254797038041738e-05, "loss": 0.5048, "step": 19220 }, { "epoch": 1.4285395763656634, "grad_norm": 1.7973077504102768, "learning_rate": 1.125400100752026e-05, "loss": 0.4795, "step": 19221 }, { "epoch": 1.4286138981791154, "grad_norm": 2.0983184797084213, "learning_rate": 1.1253204968925588e-05, "loss": 0.5597, "step": 19222 }, { "epoch": 1.428688219992568, "grad_norm": 2.106998179493848, "learning_rate": 1.1252408922262847e-05, "loss": 0.7813, "step": 19223 }, { "epoch": 1.4287625418060201, "grad_norm": 2.0272309146106133, "learning_rate": 1.1251612867537166e-05, "loss": 0.645, "step": 19224 }, { "epoch": 1.4288368636194724, "grad_norm": 1.9463970697122257, "learning_rate": 1.1250816804753661e-05, "loss": 0.5597, "step": 19225 }, { "epoch": 1.4289111854329246, "grad_norm": 1.8469030666810764, "learning_rate": 1.1250020733917466e-05, "loss": 0.5898, "step": 19226 }, { "epoch": 1.4289855072463769, "grad_norm": 1.8099136311660653, "learning_rate": 1.1249224655033702e-05, "loss": 0.6019, "step": 19227 }, { "epoch": 1.429059829059829, "grad_norm": 2.4885130593025173, "learning_rate": 1.1248428568107492e-05, "loss": 0.7976, "step": 19228 }, { "epoch": 1.4291341508732813, "grad_norm": 4.069800676902501, "learning_rate": 1.1247632473143968e-05, "loss": 0.554, "step": 19229 }, { "epoch": 1.4292084726867336, "grad_norm": 1.8502150439197058, "learning_rate": 1.1246836370148247e-05, "loss": 0.584, "step": 19230 }, { "epoch": 1.4292827945001858, "grad_norm": 1.6583862016207132, "learning_rate": 1.1246040259125459e-05, "loss": 0.5426, "step": 19231 }, { "epoch": 1.429357116313638, "grad_norm": 2.534766759213053, "learning_rate": 1.1245244140080729e-05, "loss": 0.6256, "step": 19232 }, { "epoch": 1.4294314381270903, "grad_norm": 2.900506314971587, "learning_rate": 1.1244448013019183e-05, "loss": 0.6183, "step": 19233 }, { "epoch": 1.4295057599405425, "grad_norm": 2.9977378677452498, "learning_rate": 1.1243651877945943e-05, "loss": 0.6226, "step": 19234 }, { "epoch": 1.4295800817539948, "grad_norm": 2.1976469135464374, "learning_rate": 1.1242855734866138e-05, "loss": 0.624, "step": 19235 }, { "epoch": 1.429654403567447, "grad_norm": 1.5263828610920902, "learning_rate": 1.124205958378489e-05, "loss": 0.4865, "step": 19236 }, { "epoch": 1.4297287253808992, "grad_norm": 2.4807057439731963, "learning_rate": 1.124126342470733e-05, "loss": 0.6438, "step": 19237 }, { "epoch": 1.4298030471943515, "grad_norm": 1.7068987601785313, "learning_rate": 1.124046725763858e-05, "loss": 0.7023, "step": 19238 }, { "epoch": 1.4298773690078037, "grad_norm": 2.2755026348287557, "learning_rate": 1.1239671082583763e-05, "loss": 0.6013, "step": 19239 }, { "epoch": 1.4299516908212562, "grad_norm": 2.090494368358881, "learning_rate": 1.1238874899548013e-05, "loss": 0.6716, "step": 19240 }, { "epoch": 1.4300260126347082, "grad_norm": 2.4571635379685004, "learning_rate": 1.1238078708536447e-05, "loss": 0.654, "step": 19241 }, { "epoch": 1.4301003344481606, "grad_norm": 1.6852123853397163, "learning_rate": 1.1237282509554195e-05, "loss": 0.3998, "step": 19242 }, { "epoch": 1.4301746562616127, "grad_norm": 1.8274584519408454, "learning_rate": 1.1236486302606381e-05, "loss": 0.6003, "step": 19243 }, { "epoch": 1.4302489780750651, "grad_norm": 2.335892207070691, "learning_rate": 1.1235690087698136e-05, "loss": 0.648, "step": 19244 }, { "epoch": 1.4303232998885171, "grad_norm": 1.9051594909287088, "learning_rate": 1.123489386483458e-05, "loss": 0.6863, "step": 19245 }, { "epoch": 1.4303976217019696, "grad_norm": 2.4696520940668933, "learning_rate": 1.1234097634020844e-05, "loss": 0.7715, "step": 19246 }, { "epoch": 1.4304719435154218, "grad_norm": 1.7294753316531335, "learning_rate": 1.1233301395262049e-05, "loss": 0.5706, "step": 19247 }, { "epoch": 1.430546265328874, "grad_norm": 2.1974733108677453, "learning_rate": 1.1232505148563321e-05, "loss": 0.4958, "step": 19248 }, { "epoch": 1.4306205871423263, "grad_norm": 1.8725009631657348, "learning_rate": 1.1231708893929794e-05, "loss": 0.5671, "step": 19249 }, { "epoch": 1.4306949089557786, "grad_norm": 1.9247241310031689, "learning_rate": 1.1230912631366586e-05, "loss": 0.6815, "step": 19250 }, { "epoch": 1.4307692307692308, "grad_norm": 2.019846546265941, "learning_rate": 1.123011636087883e-05, "loss": 0.6649, "step": 19251 }, { "epoch": 1.430843552582683, "grad_norm": 1.8181471977037922, "learning_rate": 1.1229320082471645e-05, "loss": 0.5461, "step": 19252 }, { "epoch": 1.4309178743961353, "grad_norm": 2.236293101178379, "learning_rate": 1.1228523796150159e-05, "loss": 0.6919, "step": 19253 }, { "epoch": 1.4309921962095875, "grad_norm": 2.2931730194932802, "learning_rate": 1.1227727501919503e-05, "loss": 0.7325, "step": 19254 }, { "epoch": 1.4310665180230397, "grad_norm": 1.728104954721975, "learning_rate": 1.1226931199784803e-05, "loss": 0.6413, "step": 19255 }, { "epoch": 1.431140839836492, "grad_norm": 1.7756478922383594, "learning_rate": 1.122613488975118e-05, "loss": 0.5431, "step": 19256 }, { "epoch": 1.4312151616499442, "grad_norm": 1.9844929396258195, "learning_rate": 1.122533857182377e-05, "loss": 0.4921, "step": 19257 }, { "epoch": 1.4312894834633965, "grad_norm": 2.3951110840550807, "learning_rate": 1.1224542246007688e-05, "loss": 0.6199, "step": 19258 }, { "epoch": 1.4313638052768487, "grad_norm": 2.0338492935495744, "learning_rate": 1.1223745912308068e-05, "loss": 0.657, "step": 19259 }, { "epoch": 1.431438127090301, "grad_norm": 1.8100075340498827, "learning_rate": 1.1222949570730037e-05, "loss": 0.5557, "step": 19260 }, { "epoch": 1.4315124489037532, "grad_norm": 1.7992581457332788, "learning_rate": 1.122215322127872e-05, "loss": 0.4901, "step": 19261 }, { "epoch": 1.4315867707172054, "grad_norm": 1.729901403267427, "learning_rate": 1.1221356863959246e-05, "loss": 0.5467, "step": 19262 }, { "epoch": 1.4316610925306579, "grad_norm": 1.5014407906867961, "learning_rate": 1.1220560498776733e-05, "loss": 0.4399, "step": 19263 }, { "epoch": 1.43173541434411, "grad_norm": 3.5420166275916043, "learning_rate": 1.1219764125736319e-05, "loss": 0.5371, "step": 19264 }, { "epoch": 1.4318097361575624, "grad_norm": 1.5766150726623127, "learning_rate": 1.1218967744843129e-05, "loss": 0.4941, "step": 19265 }, { "epoch": 1.4318840579710144, "grad_norm": 1.9428641374605948, "learning_rate": 1.1218171356102285e-05, "loss": 0.6473, "step": 19266 }, { "epoch": 1.4319583797844668, "grad_norm": 1.9959322119159295, "learning_rate": 1.1217374959518917e-05, "loss": 0.6713, "step": 19267 }, { "epoch": 1.432032701597919, "grad_norm": 1.8028713112114998, "learning_rate": 1.1216578555098155e-05, "loss": 0.4915, "step": 19268 }, { "epoch": 1.4321070234113713, "grad_norm": 2.1505070587184507, "learning_rate": 1.121578214284512e-05, "loss": 0.6049, "step": 19269 }, { "epoch": 1.4321813452248235, "grad_norm": 2.540473872554132, "learning_rate": 1.1214985722764941e-05, "loss": 0.7662, "step": 19270 }, { "epoch": 1.4322556670382758, "grad_norm": 2.1123463313180824, "learning_rate": 1.121418929486275e-05, "loss": 0.5398, "step": 19271 }, { "epoch": 1.432329988851728, "grad_norm": 3.028130122080927, "learning_rate": 1.1213392859143672e-05, "loss": 0.6267, "step": 19272 }, { "epoch": 1.4324043106651803, "grad_norm": 3.433522858756695, "learning_rate": 1.1212596415612835e-05, "loss": 0.5536, "step": 19273 }, { "epoch": 1.4324786324786325, "grad_norm": 2.320768941283517, "learning_rate": 1.1211799964275362e-05, "loss": 0.5574, "step": 19274 }, { "epoch": 1.4325529542920847, "grad_norm": 1.5476767211889193, "learning_rate": 1.1211003505136384e-05, "loss": 0.4883, "step": 19275 }, { "epoch": 1.432627276105537, "grad_norm": 1.6501566730709916, "learning_rate": 1.1210207038201029e-05, "loss": 0.5274, "step": 19276 }, { "epoch": 1.4327015979189892, "grad_norm": 1.9230528864692156, "learning_rate": 1.1209410563474423e-05, "loss": 0.5008, "step": 19277 }, { "epoch": 1.4327759197324414, "grad_norm": 1.6315286829809388, "learning_rate": 1.1208614080961697e-05, "loss": 0.5089, "step": 19278 }, { "epoch": 1.4328502415458937, "grad_norm": 1.943771096203383, "learning_rate": 1.1207817590667975e-05, "loss": 0.7677, "step": 19279 }, { "epoch": 1.432924563359346, "grad_norm": 1.6665959420256102, "learning_rate": 1.1207021092598388e-05, "loss": 0.5117, "step": 19280 }, { "epoch": 1.4329988851727982, "grad_norm": 2.203459458807889, "learning_rate": 1.1206224586758056e-05, "loss": 0.6785, "step": 19281 }, { "epoch": 1.4330732069862504, "grad_norm": 2.166279719424209, "learning_rate": 1.1205428073152116e-05, "loss": 0.6267, "step": 19282 }, { "epoch": 1.4331475287997026, "grad_norm": 2.2525625855326994, "learning_rate": 1.1204631551785694e-05, "loss": 0.7173, "step": 19283 }, { "epoch": 1.4332218506131549, "grad_norm": 1.8983439602121353, "learning_rate": 1.1203835022663918e-05, "loss": 0.604, "step": 19284 }, { "epoch": 1.4332961724266071, "grad_norm": 2.303837010906561, "learning_rate": 1.1203038485791911e-05, "loss": 0.6508, "step": 19285 }, { "epoch": 1.4333704942400596, "grad_norm": 1.5136166674844913, "learning_rate": 1.1202241941174804e-05, "loss": 0.4158, "step": 19286 }, { "epoch": 1.4334448160535116, "grad_norm": 2.169690908286955, "learning_rate": 1.1201445388817727e-05, "loss": 0.5442, "step": 19287 }, { "epoch": 1.433519137866964, "grad_norm": 2.2922485351059714, "learning_rate": 1.1200648828725806e-05, "loss": 0.6922, "step": 19288 }, { "epoch": 1.433593459680416, "grad_norm": 1.931829435307874, "learning_rate": 1.1199852260904174e-05, "loss": 0.6009, "step": 19289 }, { "epoch": 1.4336677814938685, "grad_norm": 1.8541032612593504, "learning_rate": 1.1199055685357954e-05, "loss": 0.4972, "step": 19290 }, { "epoch": 1.4337421033073208, "grad_norm": 2.2165941445797874, "learning_rate": 1.1198259102092273e-05, "loss": 0.6966, "step": 19291 }, { "epoch": 1.433816425120773, "grad_norm": 2.246513784643627, "learning_rate": 1.1197462511112262e-05, "loss": 0.6007, "step": 19292 }, { "epoch": 1.4338907469342252, "grad_norm": 2.11963548575414, "learning_rate": 1.1196665912423052e-05, "loss": 0.6208, "step": 19293 }, { "epoch": 1.4339650687476775, "grad_norm": 2.0776432217553333, "learning_rate": 1.1195869306029769e-05, "loss": 0.6059, "step": 19294 }, { "epoch": 1.4340393905611297, "grad_norm": 5.31436297926087, "learning_rate": 1.1195072691937543e-05, "loss": 0.5205, "step": 19295 }, { "epoch": 1.434113712374582, "grad_norm": 1.8493277996492707, "learning_rate": 1.1194276070151497e-05, "loss": 0.6459, "step": 19296 }, { "epoch": 1.4341880341880342, "grad_norm": 1.949472794570086, "learning_rate": 1.1193479440676765e-05, "loss": 0.5402, "step": 19297 }, { "epoch": 1.4342623560014864, "grad_norm": 1.9807216107438959, "learning_rate": 1.1192682803518474e-05, "loss": 0.6831, "step": 19298 }, { "epoch": 1.4343366778149387, "grad_norm": 1.9839363065378037, "learning_rate": 1.1191886158681753e-05, "loss": 0.5321, "step": 19299 }, { "epoch": 1.434410999628391, "grad_norm": 2.084156220756773, "learning_rate": 1.1191089506171734e-05, "loss": 0.5346, "step": 19300 }, { "epoch": 1.4344853214418432, "grad_norm": 2.0638593834739885, "learning_rate": 1.119029284599354e-05, "loss": 0.6627, "step": 19301 }, { "epoch": 1.4345596432552954, "grad_norm": 2.314099897068268, "learning_rate": 1.1189496178152302e-05, "loss": 0.8199, "step": 19302 }, { "epoch": 1.4346339650687476, "grad_norm": 2.9602315825796315, "learning_rate": 1.1188699502653146e-05, "loss": 0.7478, "step": 19303 }, { "epoch": 1.4347082868821999, "grad_norm": 1.5802583705699604, "learning_rate": 1.1187902819501209e-05, "loss": 0.5343, "step": 19304 }, { "epoch": 1.434782608695652, "grad_norm": 1.9847301696374469, "learning_rate": 1.1187106128701615e-05, "loss": 0.6394, "step": 19305 }, { "epoch": 1.4348569305091043, "grad_norm": 1.9839253204849727, "learning_rate": 1.1186309430259494e-05, "loss": 0.5924, "step": 19306 }, { "epoch": 1.4349312523225568, "grad_norm": 2.426560221376187, "learning_rate": 1.1185512724179972e-05, "loss": 0.6593, "step": 19307 }, { "epoch": 1.4350055741360088, "grad_norm": 1.8284690219442363, "learning_rate": 1.1184716010468182e-05, "loss": 0.5287, "step": 19308 }, { "epoch": 1.4350798959494613, "grad_norm": 1.9401130503802524, "learning_rate": 1.118391928912925e-05, "loss": 0.5986, "step": 19309 }, { "epoch": 1.4351542177629133, "grad_norm": 2.140861314704038, "learning_rate": 1.1183122560168309e-05, "loss": 0.7178, "step": 19310 }, { "epoch": 1.4352285395763658, "grad_norm": 2.004622070737625, "learning_rate": 1.1182325823590487e-05, "loss": 0.5789, "step": 19311 }, { "epoch": 1.4353028613898178, "grad_norm": 1.9710748849623314, "learning_rate": 1.118152907940091e-05, "loss": 0.4933, "step": 19312 }, { "epoch": 1.4353771832032702, "grad_norm": 2.077381141443989, "learning_rate": 1.118073232760471e-05, "loss": 0.6529, "step": 19313 }, { "epoch": 1.4354515050167225, "grad_norm": 1.949717417134332, "learning_rate": 1.1179935568207018e-05, "loss": 0.6322, "step": 19314 }, { "epoch": 1.4355258268301747, "grad_norm": 3.1617454114383623, "learning_rate": 1.1179138801212963e-05, "loss": 0.7291, "step": 19315 }, { "epoch": 1.435600148643627, "grad_norm": 1.4965487724784245, "learning_rate": 1.1178342026627673e-05, "loss": 0.5078, "step": 19316 }, { "epoch": 1.4356744704570792, "grad_norm": 2.1084940678508923, "learning_rate": 1.1177545244456281e-05, "loss": 0.7099, "step": 19317 }, { "epoch": 1.4357487922705314, "grad_norm": 2.1072698576349636, "learning_rate": 1.1176748454703907e-05, "loss": 0.6213, "step": 19318 }, { "epoch": 1.4358231140839837, "grad_norm": 2.0837821491908963, "learning_rate": 1.1175951657375693e-05, "loss": 0.601, "step": 19319 }, { "epoch": 1.435897435897436, "grad_norm": 2.129516747708355, "learning_rate": 1.1175154852476759e-05, "loss": 0.7264, "step": 19320 }, { "epoch": 1.4359717577108881, "grad_norm": 2.756663720385391, "learning_rate": 1.1174358040012243e-05, "loss": 0.5985, "step": 19321 }, { "epoch": 1.4360460795243404, "grad_norm": 1.8698342641294903, "learning_rate": 1.1173561219987268e-05, "loss": 0.5425, "step": 19322 }, { "epoch": 1.4361204013377926, "grad_norm": 1.8180350712274118, "learning_rate": 1.117276439240697e-05, "loss": 0.6959, "step": 19323 }, { "epoch": 1.4361947231512449, "grad_norm": 2.000724402494149, "learning_rate": 1.1171967557276473e-05, "loss": 0.6596, "step": 19324 }, { "epoch": 1.436269044964697, "grad_norm": 1.9456455541506137, "learning_rate": 1.117117071460091e-05, "loss": 0.6051, "step": 19325 }, { "epoch": 1.4363433667781493, "grad_norm": 3.179465212829247, "learning_rate": 1.1170373864385412e-05, "loss": 0.6877, "step": 19326 }, { "epoch": 1.4364176885916016, "grad_norm": 1.97415645975855, "learning_rate": 1.1169577006635107e-05, "loss": 0.6704, "step": 19327 }, { "epoch": 1.4364920104050538, "grad_norm": 2.005213193003434, "learning_rate": 1.1168780141355127e-05, "loss": 0.5779, "step": 19328 }, { "epoch": 1.436566332218506, "grad_norm": 1.7431989250401914, "learning_rate": 1.1167983268550599e-05, "loss": 0.6088, "step": 19329 }, { "epoch": 1.4366406540319585, "grad_norm": 2.059236533332765, "learning_rate": 1.1167186388226654e-05, "loss": 0.6076, "step": 19330 }, { "epoch": 1.4367149758454105, "grad_norm": 2.1391037283031396, "learning_rate": 1.1166389500388426e-05, "loss": 0.694, "step": 19331 }, { "epoch": 1.436789297658863, "grad_norm": 1.705802594854254, "learning_rate": 1.1165592605041045e-05, "loss": 0.4202, "step": 19332 }, { "epoch": 1.436863619472315, "grad_norm": 2.4874585219282066, "learning_rate": 1.1164795702189636e-05, "loss": 0.6348, "step": 19333 }, { "epoch": 1.4369379412857675, "grad_norm": 2.5053260774773634, "learning_rate": 1.1163998791839335e-05, "loss": 0.6484, "step": 19334 }, { "epoch": 1.4370122630992197, "grad_norm": 2.046238406606073, "learning_rate": 1.116320187399527e-05, "loss": 0.6584, "step": 19335 }, { "epoch": 1.437086584912672, "grad_norm": 1.6111196535685968, "learning_rate": 1.1162404948662568e-05, "loss": 0.5498, "step": 19336 }, { "epoch": 1.4371609067261242, "grad_norm": 1.8251456367224628, "learning_rate": 1.1161608015846364e-05, "loss": 0.5407, "step": 19337 }, { "epoch": 1.4372352285395764, "grad_norm": 2.295092964902172, "learning_rate": 1.1160811075551792e-05, "loss": 0.6223, "step": 19338 }, { "epoch": 1.4373095503530287, "grad_norm": 1.8290646690406995, "learning_rate": 1.1160014127783976e-05, "loss": 0.5921, "step": 19339 }, { "epoch": 1.437383872166481, "grad_norm": 2.1886794981791753, "learning_rate": 1.115921717254805e-05, "loss": 0.7134, "step": 19340 }, { "epoch": 1.4374581939799331, "grad_norm": 1.8761228362400477, "learning_rate": 1.1158420209849143e-05, "loss": 0.5437, "step": 19341 }, { "epoch": 1.4375325157933854, "grad_norm": 1.6307183688572362, "learning_rate": 1.1157623239692385e-05, "loss": 0.4474, "step": 19342 }, { "epoch": 1.4376068376068376, "grad_norm": 2.073731547381068, "learning_rate": 1.1156826262082913e-05, "loss": 0.6284, "step": 19343 }, { "epoch": 1.4376811594202898, "grad_norm": 1.94688375112128, "learning_rate": 1.1156029277025851e-05, "loss": 0.5503, "step": 19344 }, { "epoch": 1.437755481233742, "grad_norm": 1.618038139797829, "learning_rate": 1.1155232284526335e-05, "loss": 0.5344, "step": 19345 }, { "epoch": 1.4378298030471943, "grad_norm": 1.9776461560481169, "learning_rate": 1.1154435284589491e-05, "loss": 0.5424, "step": 19346 }, { "epoch": 1.4379041248606466, "grad_norm": 2.139085343738313, "learning_rate": 1.1153638277220451e-05, "loss": 0.6448, "step": 19347 }, { "epoch": 1.4379784466740988, "grad_norm": 2.0948569502239427, "learning_rate": 1.115284126242435e-05, "loss": 0.4777, "step": 19348 }, { "epoch": 1.438052768487551, "grad_norm": 1.92095936923955, "learning_rate": 1.1152044240206317e-05, "loss": 0.5624, "step": 19349 }, { "epoch": 1.4381270903010033, "grad_norm": 1.747774889869578, "learning_rate": 1.115124721057148e-05, "loss": 0.6068, "step": 19350 }, { "epoch": 1.4382014121144555, "grad_norm": 1.9834949134918627, "learning_rate": 1.1150450173524978e-05, "loss": 0.7121, "step": 19351 }, { "epoch": 1.4382757339279078, "grad_norm": 1.5358447406127949, "learning_rate": 1.1149653129071935e-05, "loss": 0.4757, "step": 19352 }, { "epoch": 1.4383500557413602, "grad_norm": 2.197463256925011, "learning_rate": 1.1148856077217484e-05, "loss": 0.6955, "step": 19353 }, { "epoch": 1.4384243775548122, "grad_norm": 2.593030970947079, "learning_rate": 1.1148059017966758e-05, "loss": 0.6283, "step": 19354 }, { "epoch": 1.4384986993682647, "grad_norm": 1.7451008727497714, "learning_rate": 1.1147261951324888e-05, "loss": 0.589, "step": 19355 }, { "epoch": 1.4385730211817167, "grad_norm": 2.3577161048902604, "learning_rate": 1.1146464877297003e-05, "loss": 0.655, "step": 19356 }, { "epoch": 1.4386473429951692, "grad_norm": 2.422620067623085, "learning_rate": 1.1145667795888243e-05, "loss": 0.8155, "step": 19357 }, { "epoch": 1.4387216648086214, "grad_norm": 2.370615339223353, "learning_rate": 1.1144870707103725e-05, "loss": 0.6608, "step": 19358 }, { "epoch": 1.4387959866220736, "grad_norm": 1.8227415573423793, "learning_rate": 1.1144073610948592e-05, "loss": 0.5201, "step": 19359 }, { "epoch": 1.4388703084355259, "grad_norm": 1.6298551993030734, "learning_rate": 1.1143276507427974e-05, "loss": 0.5837, "step": 19360 }, { "epoch": 1.4389446302489781, "grad_norm": 1.6852497634493835, "learning_rate": 1.1142479396546998e-05, "loss": 0.5087, "step": 19361 }, { "epoch": 1.4390189520624304, "grad_norm": 1.9433597400841065, "learning_rate": 1.1141682278310804e-05, "loss": 0.6984, "step": 19362 }, { "epoch": 1.4390932738758826, "grad_norm": 2.2246696940569692, "learning_rate": 1.1140885152724514e-05, "loss": 0.6439, "step": 19363 }, { "epoch": 1.4391675956893348, "grad_norm": 2.0400697006253754, "learning_rate": 1.1140088019793265e-05, "loss": 0.574, "step": 19364 }, { "epoch": 1.439241917502787, "grad_norm": 2.532376807914415, "learning_rate": 1.1139290879522191e-05, "loss": 0.5952, "step": 19365 }, { "epoch": 1.4393162393162393, "grad_norm": 1.9061695187712484, "learning_rate": 1.113849373191642e-05, "loss": 0.5371, "step": 19366 }, { "epoch": 1.4393905611296915, "grad_norm": 2.0312623208818406, "learning_rate": 1.1137696576981086e-05, "loss": 0.6673, "step": 19367 }, { "epoch": 1.4394648829431438, "grad_norm": 1.9526477290284776, "learning_rate": 1.1136899414721321e-05, "loss": 0.4972, "step": 19368 }, { "epoch": 1.439539204756596, "grad_norm": 2.0726679211306203, "learning_rate": 1.1136102245142254e-05, "loss": 0.6067, "step": 19369 }, { "epoch": 1.4396135265700483, "grad_norm": 2.1136520913921113, "learning_rate": 1.1135305068249021e-05, "loss": 0.4644, "step": 19370 }, { "epoch": 1.4396878483835005, "grad_norm": 2.4204283851874178, "learning_rate": 1.1134507884046753e-05, "loss": 0.7399, "step": 19371 }, { "epoch": 1.4397621701969527, "grad_norm": 2.0972982169862617, "learning_rate": 1.1133710692540582e-05, "loss": 0.6874, "step": 19372 }, { "epoch": 1.439836492010405, "grad_norm": 2.1717921689780826, "learning_rate": 1.1132913493735639e-05, "loss": 0.7731, "step": 19373 }, { "epoch": 1.4399108138238574, "grad_norm": 2.4783846410635384, "learning_rate": 1.1132116287637063e-05, "loss": 0.4679, "step": 19374 }, { "epoch": 1.4399851356373095, "grad_norm": 1.9215049667026536, "learning_rate": 1.1131319074249975e-05, "loss": 0.622, "step": 19375 }, { "epoch": 1.440059457450762, "grad_norm": 3.9133734438106167, "learning_rate": 1.1130521853579516e-05, "loss": 0.6205, "step": 19376 }, { "epoch": 1.440133779264214, "grad_norm": 1.6099239628446966, "learning_rate": 1.1129724625630815e-05, "loss": 0.4487, "step": 19377 }, { "epoch": 1.4402081010776664, "grad_norm": 3.179314341663835, "learning_rate": 1.1128927390409008e-05, "loss": 0.5836, "step": 19378 }, { "epoch": 1.4402824228911184, "grad_norm": 1.8732985231102555, "learning_rate": 1.1128130147919223e-05, "loss": 0.503, "step": 19379 }, { "epoch": 1.4403567447045709, "grad_norm": 1.8336403510036061, "learning_rate": 1.1127332898166593e-05, "loss": 0.6429, "step": 19380 }, { "epoch": 1.440431066518023, "grad_norm": 1.7261148960123143, "learning_rate": 1.1126535641156255e-05, "loss": 0.564, "step": 19381 }, { "epoch": 1.4405053883314753, "grad_norm": 1.7333609991016667, "learning_rate": 1.112573837689334e-05, "loss": 0.5002, "step": 19382 }, { "epoch": 1.4405797101449276, "grad_norm": 1.7522967993820124, "learning_rate": 1.1124941105382979e-05, "loss": 0.498, "step": 19383 }, { "epoch": 1.4406540319583798, "grad_norm": 1.7095805845836591, "learning_rate": 1.1124143826630302e-05, "loss": 0.5797, "step": 19384 }, { "epoch": 1.440728353771832, "grad_norm": 1.7995160017830378, "learning_rate": 1.1123346540640449e-05, "loss": 0.5833, "step": 19385 }, { "epoch": 1.4408026755852843, "grad_norm": 1.984339310975415, "learning_rate": 1.1122549247418549e-05, "loss": 0.6733, "step": 19386 }, { "epoch": 1.4408769973987365, "grad_norm": 2.06528365441853, "learning_rate": 1.1121751946969735e-05, "loss": 0.6291, "step": 19387 }, { "epoch": 1.4409513192121888, "grad_norm": 2.649902401822462, "learning_rate": 1.1120954639299137e-05, "loss": 0.6684, "step": 19388 }, { "epoch": 1.441025641025641, "grad_norm": 1.7809365103803638, "learning_rate": 1.1120157324411894e-05, "loss": 0.5977, "step": 19389 }, { "epoch": 1.4410999628390933, "grad_norm": 1.753520141748878, "learning_rate": 1.1119360002313135e-05, "loss": 0.5819, "step": 19390 }, { "epoch": 1.4411742846525455, "grad_norm": 1.9322843682182005, "learning_rate": 1.1118562673007997e-05, "loss": 0.6414, "step": 19391 }, { "epoch": 1.4412486064659977, "grad_norm": 1.7066646688815503, "learning_rate": 1.1117765336501609e-05, "loss": 0.5307, "step": 19392 }, { "epoch": 1.44132292827945, "grad_norm": 2.1358423651175795, "learning_rate": 1.1116967992799107e-05, "loss": 0.7528, "step": 19393 }, { "epoch": 1.4413972500929022, "grad_norm": 2.2607375284620583, "learning_rate": 1.1116170641905621e-05, "loss": 0.6337, "step": 19394 }, { "epoch": 1.4414715719063544, "grad_norm": 3.6085991087008056, "learning_rate": 1.1115373283826285e-05, "loss": 0.6104, "step": 19395 }, { "epoch": 1.4415458937198067, "grad_norm": 2.0335732382686604, "learning_rate": 1.1114575918566237e-05, "loss": 0.5534, "step": 19396 }, { "epoch": 1.4416202155332591, "grad_norm": 1.6814345270152988, "learning_rate": 1.1113778546130605e-05, "loss": 0.5592, "step": 19397 }, { "epoch": 1.4416945373467112, "grad_norm": 2.1909672233105946, "learning_rate": 1.1112981166524528e-05, "loss": 0.681, "step": 19398 }, { "epoch": 1.4417688591601636, "grad_norm": 1.7755608862856878, "learning_rate": 1.1112183779753132e-05, "loss": 0.6103, "step": 19399 }, { "epoch": 1.4418431809736156, "grad_norm": 1.9371512900802625, "learning_rate": 1.1111386385821554e-05, "loss": 0.554, "step": 19400 }, { "epoch": 1.441917502787068, "grad_norm": 1.8274002225380999, "learning_rate": 1.111058898473493e-05, "loss": 0.4997, "step": 19401 }, { "epoch": 1.4419918246005203, "grad_norm": 1.809325241921516, "learning_rate": 1.1109791576498391e-05, "loss": 0.5141, "step": 19402 }, { "epoch": 1.4420661464139726, "grad_norm": 2.0314184262703234, "learning_rate": 1.1108994161117073e-05, "loss": 0.5267, "step": 19403 }, { "epoch": 1.4421404682274248, "grad_norm": 2.0477900506242563, "learning_rate": 1.1108196738596108e-05, "loss": 0.7331, "step": 19404 }, { "epoch": 1.442214790040877, "grad_norm": 1.6599113806006809, "learning_rate": 1.110739930894063e-05, "loss": 0.4668, "step": 19405 }, { "epoch": 1.4422891118543293, "grad_norm": 2.0516998811185894, "learning_rate": 1.110660187215577e-05, "loss": 0.4608, "step": 19406 }, { "epoch": 1.4423634336677815, "grad_norm": 1.921801350178109, "learning_rate": 1.1105804428246667e-05, "loss": 0.6261, "step": 19407 }, { "epoch": 1.4424377554812338, "grad_norm": 2.0445900158408707, "learning_rate": 1.1105006977218453e-05, "loss": 0.816, "step": 19408 }, { "epoch": 1.442512077294686, "grad_norm": 2.731128815534039, "learning_rate": 1.1104209519076262e-05, "loss": 0.6401, "step": 19409 }, { "epoch": 1.4425863991081382, "grad_norm": 1.7529856022223291, "learning_rate": 1.1103412053825224e-05, "loss": 0.5634, "step": 19410 }, { "epoch": 1.4426607209215905, "grad_norm": 2.159893728247142, "learning_rate": 1.1102614581470477e-05, "loss": 0.6544, "step": 19411 }, { "epoch": 1.4427350427350427, "grad_norm": 1.970034474313475, "learning_rate": 1.1101817102017154e-05, "loss": 0.5344, "step": 19412 }, { "epoch": 1.442809364548495, "grad_norm": 1.9491874656210848, "learning_rate": 1.1101019615470394e-05, "loss": 0.6358, "step": 19413 }, { "epoch": 1.4428836863619472, "grad_norm": 2.027666154994467, "learning_rate": 1.1100222121835323e-05, "loss": 0.5176, "step": 19414 }, { "epoch": 1.4429580081753994, "grad_norm": 2.162408688598918, "learning_rate": 1.1099424621117081e-05, "loss": 0.6994, "step": 19415 }, { "epoch": 1.4430323299888517, "grad_norm": 1.6818525871960202, "learning_rate": 1.1098627113320799e-05, "loss": 0.501, "step": 19416 }, { "epoch": 1.443106651802304, "grad_norm": 1.7932564451198625, "learning_rate": 1.1097829598451613e-05, "loss": 0.4029, "step": 19417 }, { "epoch": 1.4431809736157561, "grad_norm": 2.1720439285452624, "learning_rate": 1.1097032076514656e-05, "loss": 0.6181, "step": 19418 }, { "epoch": 1.4432552954292084, "grad_norm": 1.468503556422359, "learning_rate": 1.1096234547515065e-05, "loss": 0.4509, "step": 19419 }, { "epoch": 1.4433296172426608, "grad_norm": 1.540651540421566, "learning_rate": 1.1095437011457974e-05, "loss": 0.3929, "step": 19420 }, { "epoch": 1.4434039390561129, "grad_norm": 1.9724661989433605, "learning_rate": 1.1094639468348515e-05, "loss": 0.7858, "step": 19421 }, { "epoch": 1.4434782608695653, "grad_norm": 2.0222306466578424, "learning_rate": 1.1093841918191824e-05, "loss": 0.5865, "step": 19422 }, { "epoch": 1.4435525826830173, "grad_norm": 2.183492224276666, "learning_rate": 1.1093044360993032e-05, "loss": 0.5579, "step": 19423 }, { "epoch": 1.4436269044964698, "grad_norm": 1.7965996102709227, "learning_rate": 1.1092246796757284e-05, "loss": 0.6282, "step": 19424 }, { "epoch": 1.443701226309922, "grad_norm": 1.818747045945938, "learning_rate": 1.1091449225489706e-05, "loss": 0.6623, "step": 19425 }, { "epoch": 1.4437755481233743, "grad_norm": 1.9201342744738883, "learning_rate": 1.1090651647195432e-05, "loss": 0.5594, "step": 19426 }, { "epoch": 1.4438498699368265, "grad_norm": 1.886108349598299, "learning_rate": 1.1089854061879598e-05, "loss": 0.5603, "step": 19427 }, { "epoch": 1.4439241917502788, "grad_norm": 2.149347552205973, "learning_rate": 1.108905646954734e-05, "loss": 0.7225, "step": 19428 }, { "epoch": 1.443998513563731, "grad_norm": 1.8077373178409462, "learning_rate": 1.1088258870203796e-05, "loss": 0.6407, "step": 19429 }, { "epoch": 1.4440728353771832, "grad_norm": 2.412627607651271, "learning_rate": 1.1087461263854097e-05, "loss": 0.7518, "step": 19430 }, { "epoch": 1.4441471571906355, "grad_norm": 2.252541328002291, "learning_rate": 1.1086663650503383e-05, "loss": 0.6983, "step": 19431 }, { "epoch": 1.4442214790040877, "grad_norm": 1.7243987039867779, "learning_rate": 1.1085866030156777e-05, "loss": 0.7137, "step": 19432 }, { "epoch": 1.44429580081754, "grad_norm": 1.8008291738599294, "learning_rate": 1.1085068402819426e-05, "loss": 0.5532, "step": 19433 }, { "epoch": 1.4443701226309922, "grad_norm": 2.068950597333438, "learning_rate": 1.1084270768496458e-05, "loss": 0.7648, "step": 19434 }, { "epoch": 1.4444444444444444, "grad_norm": 4.084109767859954, "learning_rate": 1.1083473127193016e-05, "loss": 0.7214, "step": 19435 }, { "epoch": 1.4445187662578967, "grad_norm": 1.6458402037740252, "learning_rate": 1.1082675478914229e-05, "loss": 0.4567, "step": 19436 }, { "epoch": 1.444593088071349, "grad_norm": 1.6817987415058753, "learning_rate": 1.108187782366523e-05, "loss": 0.4976, "step": 19437 }, { "epoch": 1.4446674098848011, "grad_norm": 1.404185010864763, "learning_rate": 1.108108016145116e-05, "loss": 0.4011, "step": 19438 }, { "epoch": 1.4447417316982534, "grad_norm": 1.7921728733375542, "learning_rate": 1.108028249227715e-05, "loss": 0.5395, "step": 19439 }, { "epoch": 1.4448160535117056, "grad_norm": 1.6830417947267189, "learning_rate": 1.1079484816148338e-05, "loss": 0.5703, "step": 19440 }, { "epoch": 1.444890375325158, "grad_norm": 1.954360404973945, "learning_rate": 1.1078687133069861e-05, "loss": 0.5412, "step": 19441 }, { "epoch": 1.44496469713861, "grad_norm": 1.6030735245901915, "learning_rate": 1.1077889443046854e-05, "loss": 0.5365, "step": 19442 }, { "epoch": 1.4450390189520625, "grad_norm": 2.917288055097657, "learning_rate": 1.1077091746084445e-05, "loss": 0.6878, "step": 19443 }, { "epoch": 1.4451133407655146, "grad_norm": 1.7203896840856985, "learning_rate": 1.1076294042187778e-05, "loss": 0.4756, "step": 19444 }, { "epoch": 1.445187662578967, "grad_norm": 2.2399242976701643, "learning_rate": 1.1075496331361986e-05, "loss": 0.6929, "step": 19445 }, { "epoch": 1.445261984392419, "grad_norm": 2.1922312441716074, "learning_rate": 1.1074698613612204e-05, "loss": 0.6404, "step": 19446 }, { "epoch": 1.4453363062058715, "grad_norm": 1.89861081325687, "learning_rate": 1.1073900888943569e-05, "loss": 0.5447, "step": 19447 }, { "epoch": 1.4454106280193237, "grad_norm": 1.9104805536577374, "learning_rate": 1.1073103157361216e-05, "loss": 0.6193, "step": 19448 }, { "epoch": 1.445484949832776, "grad_norm": 2.6421378511339073, "learning_rate": 1.1072305418870279e-05, "loss": 0.7211, "step": 19449 }, { "epoch": 1.4455592716462282, "grad_norm": 2.0171902662830044, "learning_rate": 1.1071507673475894e-05, "loss": 0.6693, "step": 19450 }, { "epoch": 1.4456335934596805, "grad_norm": 2.1007607485713065, "learning_rate": 1.1070709921183201e-05, "loss": 0.5518, "step": 19451 }, { "epoch": 1.4457079152731327, "grad_norm": 1.8653549252927717, "learning_rate": 1.1069912161997333e-05, "loss": 0.4819, "step": 19452 }, { "epoch": 1.445782237086585, "grad_norm": 2.0338341517285516, "learning_rate": 1.1069114395923425e-05, "loss": 0.6478, "step": 19453 }, { "epoch": 1.4458565589000372, "grad_norm": 2.188041420465582, "learning_rate": 1.1068316622966614e-05, "loss": 0.7166, "step": 19454 }, { "epoch": 1.4459308807134894, "grad_norm": 2.02150065116836, "learning_rate": 1.1067518843132037e-05, "loss": 0.6096, "step": 19455 }, { "epoch": 1.4460052025269416, "grad_norm": 1.92869913079677, "learning_rate": 1.1066721056424828e-05, "loss": 0.6097, "step": 19456 }, { "epoch": 1.4460795243403939, "grad_norm": 1.7329117263681981, "learning_rate": 1.1065923262850124e-05, "loss": 0.5929, "step": 19457 }, { "epoch": 1.4461538461538461, "grad_norm": 1.7150117179580602, "learning_rate": 1.1065125462413064e-05, "loss": 0.5882, "step": 19458 }, { "epoch": 1.4462281679672984, "grad_norm": 2.05938377215521, "learning_rate": 1.1064327655118779e-05, "loss": 0.644, "step": 19459 }, { "epoch": 1.4463024897807506, "grad_norm": 2.289077752210911, "learning_rate": 1.1063529840972407e-05, "loss": 0.6006, "step": 19460 }, { "epoch": 1.4463768115942028, "grad_norm": 1.6711605590254006, "learning_rate": 1.1062732019979086e-05, "loss": 0.5317, "step": 19461 }, { "epoch": 1.446451133407655, "grad_norm": 1.8766469489976527, "learning_rate": 1.106193419214395e-05, "loss": 0.6074, "step": 19462 }, { "epoch": 1.4465254552211073, "grad_norm": 5.959630146406925, "learning_rate": 1.1061136357472139e-05, "loss": 0.6597, "step": 19463 }, { "epoch": 1.4465997770345598, "grad_norm": 2.2910137057139006, "learning_rate": 1.1060338515968789e-05, "loss": 0.5927, "step": 19464 }, { "epoch": 1.4466740988480118, "grad_norm": 1.9642903006559647, "learning_rate": 1.1059540667639032e-05, "loss": 0.5704, "step": 19465 }, { "epoch": 1.4467484206614643, "grad_norm": 2.394750653919167, "learning_rate": 1.1058742812488006e-05, "loss": 0.6508, "step": 19466 }, { "epoch": 1.4468227424749163, "grad_norm": 1.7388552856408792, "learning_rate": 1.105794495052085e-05, "loss": 0.5408, "step": 19467 }, { "epoch": 1.4468970642883687, "grad_norm": 1.943859385165533, "learning_rate": 1.1057147081742701e-05, "loss": 0.6203, "step": 19468 }, { "epoch": 1.446971386101821, "grad_norm": 1.8613350989110562, "learning_rate": 1.1056349206158692e-05, "loss": 0.5184, "step": 19469 }, { "epoch": 1.4470457079152732, "grad_norm": 2.6050862487050828, "learning_rate": 1.1055551323773964e-05, "loss": 0.5373, "step": 19470 }, { "epoch": 1.4471200297287254, "grad_norm": 2.3774615438422595, "learning_rate": 1.1054753434593648e-05, "loss": 0.7418, "step": 19471 }, { "epoch": 1.4471943515421777, "grad_norm": 1.9760730853804371, "learning_rate": 1.1053955538622884e-05, "loss": 0.6599, "step": 19472 }, { "epoch": 1.44726867335563, "grad_norm": 1.7113873913544795, "learning_rate": 1.1053157635866809e-05, "loss": 0.5364, "step": 19473 }, { "epoch": 1.4473429951690822, "grad_norm": 1.6381151470338955, "learning_rate": 1.1052359726330563e-05, "loss": 0.5568, "step": 19474 }, { "epoch": 1.4474173169825344, "grad_norm": 1.8567552205402098, "learning_rate": 1.105156181001928e-05, "loss": 0.5957, "step": 19475 }, { "epoch": 1.4474916387959866, "grad_norm": 2.0240173460728856, "learning_rate": 1.1050763886938092e-05, "loss": 0.6815, "step": 19476 }, { "epoch": 1.4475659606094389, "grad_norm": 1.692569568293606, "learning_rate": 1.1049965957092143e-05, "loss": 0.5126, "step": 19477 }, { "epoch": 1.4476402824228911, "grad_norm": 1.9213005127194058, "learning_rate": 1.1049168020486568e-05, "loss": 0.4594, "step": 19478 }, { "epoch": 1.4477146042363433, "grad_norm": 1.5123660573181203, "learning_rate": 1.1048370077126504e-05, "loss": 0.4402, "step": 19479 }, { "epoch": 1.4477889260497956, "grad_norm": 2.0334153199801297, "learning_rate": 1.1047572127017086e-05, "loss": 0.7259, "step": 19480 }, { "epoch": 1.4478632478632478, "grad_norm": 2.0706802753660445, "learning_rate": 1.1046774170163457e-05, "loss": 0.659, "step": 19481 }, { "epoch": 1.4479375696767, "grad_norm": 1.6423960906711046, "learning_rate": 1.1045976206570746e-05, "loss": 0.5889, "step": 19482 }, { "epoch": 1.4480118914901523, "grad_norm": 2.1284065222673396, "learning_rate": 1.1045178236244095e-05, "loss": 0.7794, "step": 19483 }, { "epoch": 1.4480862133036045, "grad_norm": 2.29551876009645, "learning_rate": 1.104438025918864e-05, "loss": 0.4235, "step": 19484 }, { "epoch": 1.4481605351170568, "grad_norm": 1.6084685698309833, "learning_rate": 1.1043582275409523e-05, "loss": 0.4222, "step": 19485 }, { "epoch": 1.448234856930509, "grad_norm": 2.021618802548837, "learning_rate": 1.1042784284911873e-05, "loss": 0.5955, "step": 19486 }, { "epoch": 1.4483091787439615, "grad_norm": 1.9748961025278806, "learning_rate": 1.1041986287700836e-05, "loss": 0.6228, "step": 19487 }, { "epoch": 1.4483835005574135, "grad_norm": 1.8596303704851271, "learning_rate": 1.1041188283781544e-05, "loss": 0.4558, "step": 19488 }, { "epoch": 1.448457822370866, "grad_norm": 2.421056174743704, "learning_rate": 1.1040390273159135e-05, "loss": 0.6305, "step": 19489 }, { "epoch": 1.448532144184318, "grad_norm": 2.3430702840259863, "learning_rate": 1.103959225583875e-05, "loss": 0.578, "step": 19490 }, { "epoch": 1.4486064659977704, "grad_norm": 1.724719598601908, "learning_rate": 1.103879423182552e-05, "loss": 0.4511, "step": 19491 }, { "epoch": 1.4486807878112227, "grad_norm": 1.9762696931536716, "learning_rate": 1.103799620112459e-05, "loss": 0.6301, "step": 19492 }, { "epoch": 1.448755109624675, "grad_norm": 1.7473978959780947, "learning_rate": 1.1037198163741092e-05, "loss": 0.466, "step": 19493 }, { "epoch": 1.4488294314381271, "grad_norm": 2.0525393108280148, "learning_rate": 1.1036400119680163e-05, "loss": 0.6767, "step": 19494 }, { "epoch": 1.4489037532515794, "grad_norm": 2.019057042971924, "learning_rate": 1.1035602068946949e-05, "loss": 0.5834, "step": 19495 }, { "epoch": 1.4489780750650316, "grad_norm": 1.9324839900285533, "learning_rate": 1.1034804011546583e-05, "loss": 0.6467, "step": 19496 }, { "epoch": 1.4490523968784839, "grad_norm": 1.7906084150238044, "learning_rate": 1.1034005947484199e-05, "loss": 0.6289, "step": 19497 }, { "epoch": 1.449126718691936, "grad_norm": 1.8933914295116558, "learning_rate": 1.1033207876764943e-05, "loss": 0.6392, "step": 19498 }, { "epoch": 1.4492010405053883, "grad_norm": 1.9132738950013082, "learning_rate": 1.1032409799393943e-05, "loss": 0.599, "step": 19499 }, { "epoch": 1.4492753623188406, "grad_norm": 2.684014337333997, "learning_rate": 1.1031611715376344e-05, "loss": 0.5343, "step": 19500 }, { "epoch": 1.4493496841322928, "grad_norm": 1.8519223120811035, "learning_rate": 1.1030813624717285e-05, "loss": 0.511, "step": 19501 }, { "epoch": 1.449424005945745, "grad_norm": 2.4099141679707636, "learning_rate": 1.1030015527421897e-05, "loss": 0.5731, "step": 19502 }, { "epoch": 1.4494983277591973, "grad_norm": 2.254004466043974, "learning_rate": 1.1029217423495324e-05, "loss": 0.5976, "step": 19503 }, { "epoch": 1.4495726495726495, "grad_norm": 1.7114398886999267, "learning_rate": 1.1028419312942704e-05, "loss": 0.52, "step": 19504 }, { "epoch": 1.4496469713861018, "grad_norm": 1.766538979415942, "learning_rate": 1.1027621195769174e-05, "loss": 0.505, "step": 19505 }, { "epoch": 1.449721293199554, "grad_norm": 2.2054114906696904, "learning_rate": 1.102682307197987e-05, "loss": 0.6373, "step": 19506 }, { "epoch": 1.4497956150130062, "grad_norm": 1.8956777276384018, "learning_rate": 1.1026024941579934e-05, "loss": 0.55, "step": 19507 }, { "epoch": 1.4498699368264585, "grad_norm": 2.1876096930541364, "learning_rate": 1.1025226804574503e-05, "loss": 0.7253, "step": 19508 }, { "epoch": 1.4499442586399107, "grad_norm": 2.0975166897824375, "learning_rate": 1.1024428660968715e-05, "loss": 0.5535, "step": 19509 }, { "epoch": 1.4500185804533632, "grad_norm": 1.9968031649123164, "learning_rate": 1.1023630510767707e-05, "loss": 0.6367, "step": 19510 }, { "epoch": 1.4500929022668152, "grad_norm": 1.907164205915308, "learning_rate": 1.1022832353976618e-05, "loss": 0.6245, "step": 19511 }, { "epoch": 1.4501672240802677, "grad_norm": 1.710914860332461, "learning_rate": 1.102203419060059e-05, "loss": 0.6306, "step": 19512 }, { "epoch": 1.4502415458937197, "grad_norm": 1.5323278936705402, "learning_rate": 1.1021236020644757e-05, "loss": 0.5024, "step": 19513 }, { "epoch": 1.4503158677071721, "grad_norm": 2.2869799903667962, "learning_rate": 1.102043784411426e-05, "loss": 0.638, "step": 19514 }, { "epoch": 1.4503901895206244, "grad_norm": 2.0497159098024738, "learning_rate": 1.1019639661014238e-05, "loss": 0.6993, "step": 19515 }, { "epoch": 1.4504645113340766, "grad_norm": 2.0059086615137187, "learning_rate": 1.1018841471349828e-05, "loss": 0.6407, "step": 19516 }, { "epoch": 1.4505388331475288, "grad_norm": 1.943362325133793, "learning_rate": 1.1018043275126169e-05, "loss": 0.6448, "step": 19517 }, { "epoch": 1.450613154960981, "grad_norm": 1.9016222763513095, "learning_rate": 1.1017245072348404e-05, "loss": 0.5947, "step": 19518 }, { "epoch": 1.4506874767744333, "grad_norm": 2.001437176103663, "learning_rate": 1.1016446863021664e-05, "loss": 0.522, "step": 19519 }, { "epoch": 1.4507617985878856, "grad_norm": 13.694716377102504, "learning_rate": 1.1015648647151093e-05, "loss": 0.7207, "step": 19520 }, { "epoch": 1.4508361204013378, "grad_norm": 2.0467718880113974, "learning_rate": 1.1014850424741827e-05, "loss": 0.548, "step": 19521 }, { "epoch": 1.45091044221479, "grad_norm": 1.7494154307451022, "learning_rate": 1.1014052195799008e-05, "loss": 0.668, "step": 19522 }, { "epoch": 1.4509847640282423, "grad_norm": 2.038798083017645, "learning_rate": 1.1013253960327775e-05, "loss": 0.6456, "step": 19523 }, { "epoch": 1.4510590858416945, "grad_norm": 1.8708986378725219, "learning_rate": 1.1012455718333264e-05, "loss": 0.6296, "step": 19524 }, { "epoch": 1.4511334076551468, "grad_norm": 2.188339279959638, "learning_rate": 1.1011657469820613e-05, "loss": 0.6809, "step": 19525 }, { "epoch": 1.451207729468599, "grad_norm": 2.5603443180962087, "learning_rate": 1.101085921479497e-05, "loss": 0.6816, "step": 19526 }, { "epoch": 1.4512820512820512, "grad_norm": 1.790914573000111, "learning_rate": 1.1010060953261464e-05, "loss": 0.5811, "step": 19527 }, { "epoch": 1.4513563730955035, "grad_norm": 2.3572181441267883, "learning_rate": 1.1009262685225237e-05, "loss": 0.7148, "step": 19528 }, { "epoch": 1.4514306949089557, "grad_norm": 1.8610841353558294, "learning_rate": 1.100846441069143e-05, "loss": 0.5658, "step": 19529 }, { "epoch": 1.451505016722408, "grad_norm": 1.981624355579907, "learning_rate": 1.1007666129665183e-05, "loss": 0.6099, "step": 19530 }, { "epoch": 1.4515793385358604, "grad_norm": 2.1045392391435986, "learning_rate": 1.1006867842151631e-05, "loss": 0.6294, "step": 19531 }, { "epoch": 1.4516536603493124, "grad_norm": 1.8193712027109745, "learning_rate": 1.100606954815592e-05, "loss": 0.6181, "step": 19532 }, { "epoch": 1.4517279821627649, "grad_norm": 1.5269448817372333, "learning_rate": 1.1005271247683183e-05, "loss": 0.4541, "step": 19533 }, { "epoch": 1.451802303976217, "grad_norm": 1.8278553406531497, "learning_rate": 1.1004472940738562e-05, "loss": 0.5524, "step": 19534 }, { "epoch": 1.4518766257896694, "grad_norm": 1.5553607408372174, "learning_rate": 1.1003674627327197e-05, "loss": 0.5115, "step": 19535 }, { "epoch": 1.4519509476031214, "grad_norm": 1.8496417694473217, "learning_rate": 1.1002876307454224e-05, "loss": 0.6239, "step": 19536 }, { "epoch": 1.4520252694165738, "grad_norm": 1.9221415554426102, "learning_rate": 1.100207798112479e-05, "loss": 0.5877, "step": 19537 }, { "epoch": 1.452099591230026, "grad_norm": 2.562829013526172, "learning_rate": 1.1001279648344027e-05, "loss": 0.702, "step": 19538 }, { "epoch": 1.4521739130434783, "grad_norm": 1.8537572144954775, "learning_rate": 1.1000481309117077e-05, "loss": 0.5494, "step": 19539 }, { "epoch": 1.4522482348569306, "grad_norm": 1.5674018807980232, "learning_rate": 1.0999682963449083e-05, "loss": 0.5175, "step": 19540 }, { "epoch": 1.4523225566703828, "grad_norm": 2.190100806991982, "learning_rate": 1.099888461134518e-05, "loss": 0.6121, "step": 19541 }, { "epoch": 1.452396878483835, "grad_norm": 2.2096776939172353, "learning_rate": 1.099808625281051e-05, "loss": 0.6483, "step": 19542 }, { "epoch": 1.4524712002972873, "grad_norm": 1.5880257463376608, "learning_rate": 1.0997287887850211e-05, "loss": 0.4751, "step": 19543 }, { "epoch": 1.4525455221107395, "grad_norm": 2.455155736015645, "learning_rate": 1.0996489516469426e-05, "loss": 0.715, "step": 19544 }, { "epoch": 1.4526198439241917, "grad_norm": 1.9559428515495356, "learning_rate": 1.0995691138673294e-05, "loss": 0.5176, "step": 19545 }, { "epoch": 1.452694165737644, "grad_norm": 2.2256845549176663, "learning_rate": 1.0994892754466954e-05, "loss": 0.4729, "step": 19546 }, { "epoch": 1.4527684875510962, "grad_norm": 2.3620697173326946, "learning_rate": 1.0994094363855546e-05, "loss": 0.7042, "step": 19547 }, { "epoch": 1.4528428093645485, "grad_norm": 1.9681445151562114, "learning_rate": 1.0993295966844206e-05, "loss": 0.5478, "step": 19548 }, { "epoch": 1.4529171311780007, "grad_norm": 2.120370532488785, "learning_rate": 1.0992497563438085e-05, "loss": 0.6825, "step": 19549 }, { "epoch": 1.452991452991453, "grad_norm": 2.016229372592611, "learning_rate": 1.0991699153642311e-05, "loss": 0.6106, "step": 19550 }, { "epoch": 1.4530657748049052, "grad_norm": 1.749835379909265, "learning_rate": 1.099090073746203e-05, "loss": 0.5116, "step": 19551 }, { "epoch": 1.4531400966183574, "grad_norm": 2.0626612601754672, "learning_rate": 1.0990102314902385e-05, "loss": 0.6093, "step": 19552 }, { "epoch": 1.4532144184318097, "grad_norm": 2.328706356511252, "learning_rate": 1.0989303885968507e-05, "loss": 0.73, "step": 19553 }, { "epoch": 1.4532887402452621, "grad_norm": 2.4666922743581323, "learning_rate": 1.0988505450665547e-05, "loss": 0.7092, "step": 19554 }, { "epoch": 1.4533630620587141, "grad_norm": 2.216883497116418, "learning_rate": 1.0987707008998638e-05, "loss": 0.6171, "step": 19555 }, { "epoch": 1.4534373838721666, "grad_norm": 2.0550606178470807, "learning_rate": 1.0986908560972924e-05, "loss": 0.6886, "step": 19556 }, { "epoch": 1.4535117056856186, "grad_norm": 2.4164778174246786, "learning_rate": 1.0986110106593542e-05, "loss": 0.6872, "step": 19557 }, { "epoch": 1.453586027499071, "grad_norm": 2.3082291808069626, "learning_rate": 1.0985311645865632e-05, "loss": 0.5251, "step": 19558 }, { "epoch": 1.4536603493125233, "grad_norm": 2.1531392000950316, "learning_rate": 1.098451317879434e-05, "loss": 0.6339, "step": 19559 }, { "epoch": 1.4537346711259755, "grad_norm": 1.5616516369915228, "learning_rate": 1.0983714705384804e-05, "loss": 0.5192, "step": 19560 }, { "epoch": 1.4538089929394278, "grad_norm": 1.6050646733261156, "learning_rate": 1.0982916225642164e-05, "loss": 0.5396, "step": 19561 }, { "epoch": 1.45388331475288, "grad_norm": 2.47251519556001, "learning_rate": 1.0982117739571559e-05, "loss": 0.6358, "step": 19562 }, { "epoch": 1.4539576365663323, "grad_norm": 2.237783022110145, "learning_rate": 1.098131924717813e-05, "loss": 0.6426, "step": 19563 }, { "epoch": 1.4540319583797845, "grad_norm": 1.770779538285335, "learning_rate": 1.0980520748467019e-05, "loss": 0.4708, "step": 19564 }, { "epoch": 1.4541062801932367, "grad_norm": 1.8216505431894099, "learning_rate": 1.0979722243443367e-05, "loss": 0.4996, "step": 19565 }, { "epoch": 1.454180602006689, "grad_norm": 1.5786566291485724, "learning_rate": 1.0978923732112315e-05, "loss": 0.4592, "step": 19566 }, { "epoch": 1.4542549238201412, "grad_norm": 1.697114485310794, "learning_rate": 1.0978125214479002e-05, "loss": 0.5808, "step": 19567 }, { "epoch": 1.4543292456335934, "grad_norm": 9.868089216762337, "learning_rate": 1.0977326690548569e-05, "loss": 0.6432, "step": 19568 }, { "epoch": 1.4544035674470457, "grad_norm": 1.757841378558037, "learning_rate": 1.0976528160326156e-05, "loss": 0.6, "step": 19569 }, { "epoch": 1.454477889260498, "grad_norm": 2.751124287995249, "learning_rate": 1.0975729623816906e-05, "loss": 0.7351, "step": 19570 }, { "epoch": 1.4545522110739502, "grad_norm": 1.9972470626571457, "learning_rate": 1.0974931081025964e-05, "loss": 0.6314, "step": 19571 }, { "epoch": 1.4546265328874024, "grad_norm": 1.9498914164220693, "learning_rate": 1.0974132531958463e-05, "loss": 0.7666, "step": 19572 }, { "epoch": 1.4547008547008546, "grad_norm": 1.6251677615265547, "learning_rate": 1.0973333976619546e-05, "loss": 0.5058, "step": 19573 }, { "epoch": 1.4547751765143069, "grad_norm": 2.4158148460470783, "learning_rate": 1.0972535415014356e-05, "loss": 0.6749, "step": 19574 }, { "epoch": 1.4548494983277591, "grad_norm": 2.150831565043128, "learning_rate": 1.0971736847148034e-05, "loss": 0.6409, "step": 19575 }, { "epoch": 1.4549238201412114, "grad_norm": 2.3152508375820067, "learning_rate": 1.097093827302572e-05, "loss": 0.7078, "step": 19576 }, { "epoch": 1.4549981419546638, "grad_norm": 1.8524858651161387, "learning_rate": 1.0970139692652557e-05, "loss": 0.5343, "step": 19577 }, { "epoch": 1.4550724637681158, "grad_norm": 2.1844046665713464, "learning_rate": 1.0969341106033684e-05, "loss": 0.5832, "step": 19578 }, { "epoch": 1.4551467855815683, "grad_norm": 1.8810411749714782, "learning_rate": 1.0968542513174244e-05, "loss": 0.5727, "step": 19579 }, { "epoch": 1.4552211073950203, "grad_norm": 2.0640108824821546, "learning_rate": 1.0967743914079377e-05, "loss": 0.5829, "step": 19580 }, { "epoch": 1.4552954292084728, "grad_norm": 2.0725226963012955, "learning_rate": 1.0966945308754222e-05, "loss": 0.5241, "step": 19581 }, { "epoch": 1.455369751021925, "grad_norm": 1.8315848214976669, "learning_rate": 1.096614669720393e-05, "loss": 0.63, "step": 19582 }, { "epoch": 1.4554440728353772, "grad_norm": 1.8660790962461928, "learning_rate": 1.0965348079433633e-05, "loss": 0.4984, "step": 19583 }, { "epoch": 1.4555183946488295, "grad_norm": 1.7710607705048864, "learning_rate": 1.0964549455448473e-05, "loss": 0.6871, "step": 19584 }, { "epoch": 1.4555927164622817, "grad_norm": 1.7193119497827203, "learning_rate": 1.0963750825253594e-05, "loss": 0.4678, "step": 19585 }, { "epoch": 1.455667038275734, "grad_norm": 2.0701087789093022, "learning_rate": 1.0962952188854138e-05, "loss": 0.5284, "step": 19586 }, { "epoch": 1.4557413600891862, "grad_norm": 1.950210904685638, "learning_rate": 1.0962153546255244e-05, "loss": 0.5799, "step": 19587 }, { "epoch": 1.4558156819026384, "grad_norm": 1.8630411598136754, "learning_rate": 1.0961354897462057e-05, "loss": 0.5187, "step": 19588 }, { "epoch": 1.4558900037160907, "grad_norm": 1.9195967613104366, "learning_rate": 1.0960556242479718e-05, "loss": 0.5634, "step": 19589 }, { "epoch": 1.455964325529543, "grad_norm": 1.8715825519498384, "learning_rate": 1.0959757581313369e-05, "loss": 0.5694, "step": 19590 }, { "epoch": 1.4560386473429952, "grad_norm": 1.9529250005064394, "learning_rate": 1.0958958913968145e-05, "loss": 0.4148, "step": 19591 }, { "epoch": 1.4561129691564474, "grad_norm": 1.6949975164081499, "learning_rate": 1.0958160240449196e-05, "loss": 0.5725, "step": 19592 }, { "epoch": 1.4561872909698996, "grad_norm": 2.3083008541908687, "learning_rate": 1.0957361560761664e-05, "loss": 0.6243, "step": 19593 }, { "epoch": 1.4562616127833519, "grad_norm": 1.9291327762075496, "learning_rate": 1.0956562874910688e-05, "loss": 0.5755, "step": 19594 }, { "epoch": 1.456335934596804, "grad_norm": 1.9804366448642294, "learning_rate": 1.0955764182901406e-05, "loss": 0.603, "step": 19595 }, { "epoch": 1.4564102564102563, "grad_norm": 1.9269891186412047, "learning_rate": 1.0954965484738965e-05, "loss": 0.632, "step": 19596 }, { "epoch": 1.4564845782237086, "grad_norm": 2.013376610519367, "learning_rate": 1.0954166780428505e-05, "loss": 0.5253, "step": 19597 }, { "epoch": 1.456558900037161, "grad_norm": 2.0117395326112484, "learning_rate": 1.0953368069975167e-05, "loss": 0.7127, "step": 19598 }, { "epoch": 1.456633221850613, "grad_norm": 2.135721810781251, "learning_rate": 1.0952569353384102e-05, "loss": 0.4844, "step": 19599 }, { "epoch": 1.4567075436640655, "grad_norm": 2.0172864787840363, "learning_rate": 1.0951770630660441e-05, "loss": 0.6943, "step": 19600 }, { "epoch": 1.4567818654775175, "grad_norm": 1.7381384777450335, "learning_rate": 1.095097190180933e-05, "loss": 0.5486, "step": 19601 }, { "epoch": 1.45685618729097, "grad_norm": 6.361132288287594, "learning_rate": 1.0950173166835909e-05, "loss": 0.6951, "step": 19602 }, { "epoch": 1.456930509104422, "grad_norm": 1.6884095817898865, "learning_rate": 1.0949374425745323e-05, "loss": 0.4779, "step": 19603 }, { "epoch": 1.4570048309178745, "grad_norm": 1.7781528582622361, "learning_rate": 1.0948575678542717e-05, "loss": 0.5459, "step": 19604 }, { "epoch": 1.4570791527313267, "grad_norm": 1.8127994720111336, "learning_rate": 1.094777692523323e-05, "loss": 0.5892, "step": 19605 }, { "epoch": 1.457153474544779, "grad_norm": 1.7059559973965812, "learning_rate": 1.0946978165822e-05, "loss": 0.5614, "step": 19606 }, { "epoch": 1.4572277963582312, "grad_norm": 2.007425917470975, "learning_rate": 1.0946179400314176e-05, "loss": 0.6277, "step": 19607 }, { "epoch": 1.4573021181716834, "grad_norm": 1.8126855845475092, "learning_rate": 1.0945380628714897e-05, "loss": 0.6333, "step": 19608 }, { "epoch": 1.4573764399851357, "grad_norm": 2.10995179395452, "learning_rate": 1.0944581851029308e-05, "loss": 0.6806, "step": 19609 }, { "epoch": 1.457450761798588, "grad_norm": 1.7187182608359515, "learning_rate": 1.094378306726255e-05, "loss": 0.4741, "step": 19610 }, { "epoch": 1.4575250836120401, "grad_norm": 2.140675758278205, "learning_rate": 1.0942984277419768e-05, "loss": 0.6924, "step": 19611 }, { "epoch": 1.4575994054254924, "grad_norm": 2.3624268366626695, "learning_rate": 1.0942185481506098e-05, "loss": 0.6889, "step": 19612 }, { "epoch": 1.4576737272389446, "grad_norm": 1.874917152734297, "learning_rate": 1.0941386679526688e-05, "loss": 0.579, "step": 19613 }, { "epoch": 1.4577480490523969, "grad_norm": 1.8915014619223685, "learning_rate": 1.094058787148668e-05, "loss": 0.5483, "step": 19614 }, { "epoch": 1.457822370865849, "grad_norm": 2.1896082686612255, "learning_rate": 1.0939789057391217e-05, "loss": 0.5563, "step": 19615 }, { "epoch": 1.4578966926793013, "grad_norm": 1.7172928810979424, "learning_rate": 1.0938990237245439e-05, "loss": 0.4962, "step": 19616 }, { "epoch": 1.4579710144927536, "grad_norm": 1.7394391361122579, "learning_rate": 1.0938191411054491e-05, "loss": 0.4643, "step": 19617 }, { "epoch": 1.4580453363062058, "grad_norm": 1.9931094815301138, "learning_rate": 1.0937392578823516e-05, "loss": 0.6686, "step": 19618 }, { "epoch": 1.458119658119658, "grad_norm": 1.7940745041389083, "learning_rate": 1.0936593740557655e-05, "loss": 0.6382, "step": 19619 }, { "epoch": 1.4581939799331103, "grad_norm": 1.8304044144241136, "learning_rate": 1.0935794896262051e-05, "loss": 0.6086, "step": 19620 }, { "epoch": 1.4582683017465627, "grad_norm": 1.6557778300496977, "learning_rate": 1.0934996045941851e-05, "loss": 0.4506, "step": 19621 }, { "epoch": 1.4583426235600148, "grad_norm": 2.0243374049929104, "learning_rate": 1.0934197189602196e-05, "loss": 0.64, "step": 19622 }, { "epoch": 1.4584169453734672, "grad_norm": 1.959890804270453, "learning_rate": 1.0933398327248224e-05, "loss": 0.6261, "step": 19623 }, { "epoch": 1.4584912671869192, "grad_norm": 2.337537091450035, "learning_rate": 1.0932599458885085e-05, "loss": 0.6075, "step": 19624 }, { "epoch": 1.4585655890003717, "grad_norm": 1.8572632152932433, "learning_rate": 1.0931800584517915e-05, "loss": 0.5435, "step": 19625 }, { "epoch": 1.458639910813824, "grad_norm": 2.226127487582312, "learning_rate": 1.0931001704151867e-05, "loss": 0.5357, "step": 19626 }, { "epoch": 1.4587142326272762, "grad_norm": 1.8118978938891597, "learning_rate": 1.0930202817792075e-05, "loss": 0.5391, "step": 19627 }, { "epoch": 1.4587885544407284, "grad_norm": 1.8457211021029751, "learning_rate": 1.0929403925443687e-05, "loss": 0.6716, "step": 19628 }, { "epoch": 1.4588628762541807, "grad_norm": 1.978957773722038, "learning_rate": 1.0928605027111843e-05, "loss": 0.5419, "step": 19629 }, { "epoch": 1.458937198067633, "grad_norm": 1.9480803299302012, "learning_rate": 1.0927806122801687e-05, "loss": 0.7618, "step": 19630 }, { "epoch": 1.4590115198810851, "grad_norm": 1.737613215012735, "learning_rate": 1.0927007212518364e-05, "loss": 0.4407, "step": 19631 }, { "epoch": 1.4590858416945374, "grad_norm": 1.7789070589071987, "learning_rate": 1.092620829626702e-05, "loss": 0.6618, "step": 19632 }, { "epoch": 1.4591601635079896, "grad_norm": 2.093253602874517, "learning_rate": 1.0925409374052794e-05, "loss": 0.4954, "step": 19633 }, { "epoch": 1.4592344853214418, "grad_norm": 2.1930365979502007, "learning_rate": 1.0924610445880828e-05, "loss": 0.6435, "step": 19634 }, { "epoch": 1.459308807134894, "grad_norm": 2.037565771702177, "learning_rate": 1.0923811511756269e-05, "loss": 0.5496, "step": 19635 }, { "epoch": 1.4593831289483463, "grad_norm": 2.0170581705463113, "learning_rate": 1.0923012571684257e-05, "loss": 0.626, "step": 19636 }, { "epoch": 1.4594574507617986, "grad_norm": 2.186588500023387, "learning_rate": 1.0922213625669942e-05, "loss": 0.5477, "step": 19637 }, { "epoch": 1.4595317725752508, "grad_norm": 2.1640765746272095, "learning_rate": 1.0921414673718463e-05, "loss": 0.7443, "step": 19638 }, { "epoch": 1.459606094388703, "grad_norm": 2.033052273458436, "learning_rate": 1.0920615715834963e-05, "loss": 0.7301, "step": 19639 }, { "epoch": 1.4596804162021553, "grad_norm": 1.76208880285013, "learning_rate": 1.0919816752024587e-05, "loss": 0.4834, "step": 19640 }, { "epoch": 1.4597547380156075, "grad_norm": 2.040774425292279, "learning_rate": 1.0919017782292477e-05, "loss": 0.5503, "step": 19641 }, { "epoch": 1.4598290598290597, "grad_norm": 1.974148493309325, "learning_rate": 1.091821880664378e-05, "loss": 0.6171, "step": 19642 }, { "epoch": 1.459903381642512, "grad_norm": 1.8947794556777553, "learning_rate": 1.0917419825083639e-05, "loss": 0.5009, "step": 19643 }, { "epoch": 1.4599777034559644, "grad_norm": 1.7437004896187205, "learning_rate": 1.0916620837617193e-05, "loss": 0.6039, "step": 19644 }, { "epoch": 1.4600520252694165, "grad_norm": 2.4104295893868977, "learning_rate": 1.0915821844249594e-05, "loss": 0.5905, "step": 19645 }, { "epoch": 1.460126347082869, "grad_norm": 1.7830446660933452, "learning_rate": 1.0915022844985978e-05, "loss": 0.6714, "step": 19646 }, { "epoch": 1.460200668896321, "grad_norm": 1.6933379960949744, "learning_rate": 1.0914223839831494e-05, "loss": 0.4566, "step": 19647 }, { "epoch": 1.4602749907097734, "grad_norm": 1.7241472605772625, "learning_rate": 1.0913424828791287e-05, "loss": 0.5468, "step": 19648 }, { "epoch": 1.4603493125232256, "grad_norm": 2.101952413103202, "learning_rate": 1.0912625811870495e-05, "loss": 0.706, "step": 19649 }, { "epoch": 1.4604236343366779, "grad_norm": 1.9485236401479158, "learning_rate": 1.0911826789074266e-05, "loss": 0.498, "step": 19650 }, { "epoch": 1.4604979561501301, "grad_norm": 1.678827926377236, "learning_rate": 1.0911027760407741e-05, "loss": 0.3559, "step": 19651 }, { "epoch": 1.4605722779635824, "grad_norm": 1.9559466691226757, "learning_rate": 1.091022872587607e-05, "loss": 0.627, "step": 19652 }, { "epoch": 1.4606465997770346, "grad_norm": 1.9984113015885119, "learning_rate": 1.0909429685484391e-05, "loss": 0.6307, "step": 19653 }, { "epoch": 1.4607209215904868, "grad_norm": 2.014582420451642, "learning_rate": 1.0908630639237854e-05, "loss": 0.6328, "step": 19654 }, { "epoch": 1.460795243403939, "grad_norm": 2.3309437016175436, "learning_rate": 1.0907831587141598e-05, "loss": 0.6828, "step": 19655 }, { "epoch": 1.4608695652173913, "grad_norm": 1.9907019653877571, "learning_rate": 1.0907032529200771e-05, "loss": 0.6353, "step": 19656 }, { "epoch": 1.4609438870308435, "grad_norm": 1.7393701783330904, "learning_rate": 1.0906233465420514e-05, "loss": 0.5465, "step": 19657 }, { "epoch": 1.4610182088442958, "grad_norm": 1.5228953720393676, "learning_rate": 1.0905434395805974e-05, "loss": 0.4315, "step": 19658 }, { "epoch": 1.461092530657748, "grad_norm": 1.8610984336081369, "learning_rate": 1.0904635320362294e-05, "loss": 0.637, "step": 19659 }, { "epoch": 1.4611668524712003, "grad_norm": 1.5267040733319246, "learning_rate": 1.0903836239094618e-05, "loss": 0.508, "step": 19660 }, { "epoch": 1.4612411742846525, "grad_norm": 1.7506113833715418, "learning_rate": 1.090303715200809e-05, "loss": 0.5395, "step": 19661 }, { "epoch": 1.4613154960981047, "grad_norm": 1.635191658402317, "learning_rate": 1.0902238059107859e-05, "loss": 0.6559, "step": 19662 }, { "epoch": 1.461389817911557, "grad_norm": 1.9280374729464178, "learning_rate": 1.0901438960399064e-05, "loss": 0.6373, "step": 19663 }, { "epoch": 1.4614641397250092, "grad_norm": 1.5574795987553918, "learning_rate": 1.0900639855886853e-05, "loss": 0.5444, "step": 19664 }, { "epoch": 1.4615384615384617, "grad_norm": 2.346176255336265, "learning_rate": 1.0899840745576369e-05, "loss": 0.5666, "step": 19665 }, { "epoch": 1.4616127833519137, "grad_norm": 1.9172921276998878, "learning_rate": 1.0899041629472757e-05, "loss": 0.6339, "step": 19666 }, { "epoch": 1.4616871051653662, "grad_norm": 2.026150460577942, "learning_rate": 1.0898242507581163e-05, "loss": 0.6815, "step": 19667 }, { "epoch": 1.4617614269788182, "grad_norm": 2.3118193854907383, "learning_rate": 1.0897443379906728e-05, "loss": 0.6054, "step": 19668 }, { "epoch": 1.4618357487922706, "grad_norm": 2.067897785399506, "learning_rate": 1.0896644246454597e-05, "loss": 0.5811, "step": 19669 }, { "epoch": 1.4619100706057226, "grad_norm": 1.9417775209418835, "learning_rate": 1.0895845107229924e-05, "loss": 0.5925, "step": 19670 }, { "epoch": 1.461984392419175, "grad_norm": 1.969094104904288, "learning_rate": 1.089504596223784e-05, "loss": 0.5951, "step": 19671 }, { "epoch": 1.4620587142326273, "grad_norm": 1.893136204354877, "learning_rate": 1.0894246811483497e-05, "loss": 0.6298, "step": 19672 }, { "epoch": 1.4621330360460796, "grad_norm": 1.711767696359167, "learning_rate": 1.0893447654972045e-05, "loss": 0.5422, "step": 19673 }, { "epoch": 1.4622073578595318, "grad_norm": 2.1123884961140287, "learning_rate": 1.0892648492708617e-05, "loss": 0.592, "step": 19674 }, { "epoch": 1.462281679672984, "grad_norm": 1.8828432578735892, "learning_rate": 1.0891849324698366e-05, "loss": 0.5057, "step": 19675 }, { "epoch": 1.4623560014864363, "grad_norm": 2.248008904131377, "learning_rate": 1.0891050150946438e-05, "loss": 0.5393, "step": 19676 }, { "epoch": 1.4624303232998885, "grad_norm": 3.0751915225374984, "learning_rate": 1.0890250971457974e-05, "loss": 0.7506, "step": 19677 }, { "epoch": 1.4625046451133408, "grad_norm": 2.1944737841516098, "learning_rate": 1.0889451786238119e-05, "loss": 0.7331, "step": 19678 }, { "epoch": 1.462578966926793, "grad_norm": 1.4860947939028608, "learning_rate": 1.0888652595292021e-05, "loss": 0.4173, "step": 19679 }, { "epoch": 1.4626532887402452, "grad_norm": 1.7712712434666136, "learning_rate": 1.0887853398624824e-05, "loss": 0.7238, "step": 19680 }, { "epoch": 1.4627276105536975, "grad_norm": 2.036608936878187, "learning_rate": 1.0887054196241673e-05, "loss": 0.5231, "step": 19681 }, { "epoch": 1.4628019323671497, "grad_norm": 2.31203745353385, "learning_rate": 1.0886254988147712e-05, "loss": 0.7286, "step": 19682 }, { "epoch": 1.462876254180602, "grad_norm": 1.9623248377361375, "learning_rate": 1.0885455774348086e-05, "loss": 0.6089, "step": 19683 }, { "epoch": 1.4629505759940542, "grad_norm": 2.285246700168325, "learning_rate": 1.0884656554847945e-05, "loss": 0.569, "step": 19684 }, { "epoch": 1.4630248978075064, "grad_norm": 2.576830703468872, "learning_rate": 1.088385732965243e-05, "loss": 0.6864, "step": 19685 }, { "epoch": 1.4630992196209587, "grad_norm": 1.9995501352176346, "learning_rate": 1.0883058098766687e-05, "loss": 0.5402, "step": 19686 }, { "epoch": 1.463173541434411, "grad_norm": 1.9966589903933458, "learning_rate": 1.088225886219586e-05, "loss": 0.6877, "step": 19687 }, { "epoch": 1.4632478632478634, "grad_norm": 1.9301167745435308, "learning_rate": 1.0881459619945098e-05, "loss": 0.5347, "step": 19688 }, { "epoch": 1.4633221850613154, "grad_norm": 2.198075151542652, "learning_rate": 1.0880660372019545e-05, "loss": 0.5829, "step": 19689 }, { "epoch": 1.4633965068747679, "grad_norm": 1.7461131686420965, "learning_rate": 1.0879861118424346e-05, "loss": 0.6315, "step": 19690 }, { "epoch": 1.4634708286882199, "grad_norm": 1.9782655524858619, "learning_rate": 1.0879061859164645e-05, "loss": 0.7094, "step": 19691 }, { "epoch": 1.4635451505016723, "grad_norm": 1.561730449670745, "learning_rate": 1.0878262594245591e-05, "loss": 0.3381, "step": 19692 }, { "epoch": 1.4636194723151246, "grad_norm": 1.9402716052614575, "learning_rate": 1.0877463323672329e-05, "loss": 0.6111, "step": 19693 }, { "epoch": 1.4636937941285768, "grad_norm": 1.4694334653733574, "learning_rate": 1.087666404745e-05, "loss": 0.5041, "step": 19694 }, { "epoch": 1.463768115942029, "grad_norm": 2.765316860511096, "learning_rate": 1.0875864765583757e-05, "loss": 0.6674, "step": 19695 }, { "epoch": 1.4638424377554813, "grad_norm": 2.263442265094093, "learning_rate": 1.087506547807874e-05, "loss": 0.5498, "step": 19696 }, { "epoch": 1.4639167595689335, "grad_norm": 2.8505554641847386, "learning_rate": 1.08742661849401e-05, "loss": 0.6677, "step": 19697 }, { "epoch": 1.4639910813823858, "grad_norm": 3.1346121681831023, "learning_rate": 1.0873466886172978e-05, "loss": 0.4224, "step": 19698 }, { "epoch": 1.464065403195838, "grad_norm": 1.7598181065510492, "learning_rate": 1.087266758178252e-05, "loss": 0.5707, "step": 19699 }, { "epoch": 1.4641397250092902, "grad_norm": 1.7644139953399522, "learning_rate": 1.0871868271773873e-05, "loss": 0.4736, "step": 19700 }, { "epoch": 1.4642140468227425, "grad_norm": 1.8212793503478881, "learning_rate": 1.0871068956152186e-05, "loss": 0.5151, "step": 19701 }, { "epoch": 1.4642883686361947, "grad_norm": 2.1987993427814128, "learning_rate": 1.08702696349226e-05, "loss": 0.6542, "step": 19702 }, { "epoch": 1.464362690449647, "grad_norm": 1.7327074179796107, "learning_rate": 1.0869470308090267e-05, "loss": 0.5129, "step": 19703 }, { "epoch": 1.4644370122630992, "grad_norm": 1.7844427490579098, "learning_rate": 1.0868670975660324e-05, "loss": 0.5333, "step": 19704 }, { "epoch": 1.4645113340765514, "grad_norm": 2.187034297154894, "learning_rate": 1.0867871637637923e-05, "loss": 0.8207, "step": 19705 }, { "epoch": 1.4645856558900037, "grad_norm": 1.6867373527502094, "learning_rate": 1.086707229402821e-05, "loss": 0.6348, "step": 19706 }, { "epoch": 1.464659977703456, "grad_norm": 2.744591957740094, "learning_rate": 1.0866272944836336e-05, "loss": 0.7222, "step": 19707 }, { "epoch": 1.4647342995169081, "grad_norm": 1.43090116018928, "learning_rate": 1.0865473590067436e-05, "loss": 0.3762, "step": 19708 }, { "epoch": 1.4648086213303604, "grad_norm": 2.3131987615899963, "learning_rate": 1.0864674229726664e-05, "loss": 0.5719, "step": 19709 }, { "epoch": 1.4648829431438126, "grad_norm": 1.7097593149451433, "learning_rate": 1.0863874863819163e-05, "loss": 0.5725, "step": 19710 }, { "epoch": 1.464957264957265, "grad_norm": 1.8147324600356793, "learning_rate": 1.0863075492350079e-05, "loss": 0.5717, "step": 19711 }, { "epoch": 1.465031586770717, "grad_norm": 2.584008999171302, "learning_rate": 1.0862276115324564e-05, "loss": 0.6722, "step": 19712 }, { "epoch": 1.4651059085841696, "grad_norm": 2.0560689187078025, "learning_rate": 1.0861476732747757e-05, "loss": 0.6277, "step": 19713 }, { "epoch": 1.4651802303976216, "grad_norm": 1.9098785374471199, "learning_rate": 1.086067734462481e-05, "loss": 0.6012, "step": 19714 }, { "epoch": 1.465254552211074, "grad_norm": 3.882654554936098, "learning_rate": 1.0859877950960864e-05, "loss": 0.6165, "step": 19715 }, { "epoch": 1.4653288740245263, "grad_norm": 2.136537311821033, "learning_rate": 1.085907855176107e-05, "loss": 0.4338, "step": 19716 }, { "epoch": 1.4654031958379785, "grad_norm": 4.220231442631402, "learning_rate": 1.085827914703057e-05, "loss": 0.4862, "step": 19717 }, { "epoch": 1.4654775176514307, "grad_norm": 2.0100056142858893, "learning_rate": 1.085747973677452e-05, "loss": 0.5674, "step": 19718 }, { "epoch": 1.465551839464883, "grad_norm": 2.0709036389872617, "learning_rate": 1.0856680320998056e-05, "loss": 0.6543, "step": 19719 }, { "epoch": 1.4656261612783352, "grad_norm": 1.5156419103942018, "learning_rate": 1.0855880899706329e-05, "loss": 0.4562, "step": 19720 }, { "epoch": 1.4657004830917875, "grad_norm": 1.8527123720308822, "learning_rate": 1.0855081472904483e-05, "loss": 0.5483, "step": 19721 }, { "epoch": 1.4657748049052397, "grad_norm": 2.1129334397607757, "learning_rate": 1.0854282040597669e-05, "loss": 0.6644, "step": 19722 }, { "epoch": 1.465849126718692, "grad_norm": 1.6958589705217515, "learning_rate": 1.0853482602791029e-05, "loss": 0.5696, "step": 19723 }, { "epoch": 1.4659234485321442, "grad_norm": 1.6171213629346195, "learning_rate": 1.0852683159489715e-05, "loss": 0.4673, "step": 19724 }, { "epoch": 1.4659977703455964, "grad_norm": 2.407716812740754, "learning_rate": 1.0851883710698871e-05, "loss": 0.722, "step": 19725 }, { "epoch": 1.4660720921590487, "grad_norm": 2.394018173466377, "learning_rate": 1.0851084256423642e-05, "loss": 0.7668, "step": 19726 }, { "epoch": 1.466146413972501, "grad_norm": 2.1326698276319216, "learning_rate": 1.0850284796669178e-05, "loss": 0.6061, "step": 19727 }, { "epoch": 1.4662207357859531, "grad_norm": 2.2466335635640395, "learning_rate": 1.0849485331440623e-05, "loss": 0.6882, "step": 19728 }, { "epoch": 1.4662950575994054, "grad_norm": 3.5740190579462383, "learning_rate": 1.0848685860743128e-05, "loss": 0.7374, "step": 19729 }, { "epoch": 1.4663693794128576, "grad_norm": 2.05244240809848, "learning_rate": 1.0847886384581838e-05, "loss": 0.5755, "step": 19730 }, { "epoch": 1.4664437012263098, "grad_norm": 1.7849347962671538, "learning_rate": 1.0847086902961899e-05, "loss": 0.613, "step": 19731 }, { "epoch": 1.4665180230397623, "grad_norm": 1.7863284972934304, "learning_rate": 1.0846287415888455e-05, "loss": 0.5833, "step": 19732 }, { "epoch": 1.4665923448532143, "grad_norm": 1.9818525965886538, "learning_rate": 1.0845487923366658e-05, "loss": 0.5491, "step": 19733 }, { "epoch": 1.4666666666666668, "grad_norm": 1.8277200809863947, "learning_rate": 1.0844688425401654e-05, "loss": 0.5809, "step": 19734 }, { "epoch": 1.4667409884801188, "grad_norm": 1.601871263706928, "learning_rate": 1.084388892199859e-05, "loss": 0.5472, "step": 19735 }, { "epoch": 1.4668153102935713, "grad_norm": 2.214467972037173, "learning_rate": 1.0843089413162617e-05, "loss": 0.6457, "step": 19736 }, { "epoch": 1.4668896321070233, "grad_norm": 1.847782803883227, "learning_rate": 1.0842289898898873e-05, "loss": 0.5177, "step": 19737 }, { "epoch": 1.4669639539204757, "grad_norm": 1.8802554136399814, "learning_rate": 1.084149037921251e-05, "loss": 0.5585, "step": 19738 }, { "epoch": 1.467038275733928, "grad_norm": 1.8359840545062756, "learning_rate": 1.0840690854108678e-05, "loss": 0.6062, "step": 19739 }, { "epoch": 1.4671125975473802, "grad_norm": 2.1053206310465393, "learning_rate": 1.0839891323592522e-05, "loss": 0.7136, "step": 19740 }, { "epoch": 1.4671869193608325, "grad_norm": 2.455457229064484, "learning_rate": 1.083909178766919e-05, "loss": 0.5773, "step": 19741 }, { "epoch": 1.4672612411742847, "grad_norm": 2.684648814484046, "learning_rate": 1.0838292246343826e-05, "loss": 0.6412, "step": 19742 }, { "epoch": 1.467335562987737, "grad_norm": 2.2393387580928548, "learning_rate": 1.0837492699621581e-05, "loss": 0.5393, "step": 19743 }, { "epoch": 1.4674098848011892, "grad_norm": 2.1357112196759913, "learning_rate": 1.08366931475076e-05, "loss": 0.6714, "step": 19744 }, { "epoch": 1.4674842066146414, "grad_norm": 2.1451045589290603, "learning_rate": 1.0835893590007032e-05, "loss": 0.7037, "step": 19745 }, { "epoch": 1.4675585284280936, "grad_norm": 2.4300021589913015, "learning_rate": 1.0835094027125028e-05, "loss": 0.7623, "step": 19746 }, { "epoch": 1.4676328502415459, "grad_norm": 1.9504270181166001, "learning_rate": 1.083429445886673e-05, "loss": 0.5575, "step": 19747 }, { "epoch": 1.4677071720549981, "grad_norm": 1.8699805973197725, "learning_rate": 1.0833494885237288e-05, "loss": 0.569, "step": 19748 }, { "epoch": 1.4677814938684504, "grad_norm": 1.7778864119788014, "learning_rate": 1.0832695306241847e-05, "loss": 0.5809, "step": 19749 }, { "epoch": 1.4678558156819026, "grad_norm": 3.60386184429489, "learning_rate": 1.0831895721885557e-05, "loss": 0.6791, "step": 19750 }, { "epoch": 1.4679301374953548, "grad_norm": 1.680650980417658, "learning_rate": 1.0831096132173568e-05, "loss": 0.5561, "step": 19751 }, { "epoch": 1.468004459308807, "grad_norm": 1.826625911715403, "learning_rate": 1.0830296537111026e-05, "loss": 0.564, "step": 19752 }, { "epoch": 1.4680787811222593, "grad_norm": 1.9596389069744267, "learning_rate": 1.0829496936703076e-05, "loss": 0.6311, "step": 19753 }, { "epoch": 1.4681531029357116, "grad_norm": 2.2901540753109866, "learning_rate": 1.0828697330954867e-05, "loss": 0.6254, "step": 19754 }, { "epoch": 1.468227424749164, "grad_norm": 1.9418389834871315, "learning_rate": 1.0827897719871547e-05, "loss": 0.6372, "step": 19755 }, { "epoch": 1.468301746562616, "grad_norm": 2.0004789767620554, "learning_rate": 1.0827098103458264e-05, "loss": 0.7615, "step": 19756 }, { "epoch": 1.4683760683760685, "grad_norm": 2.4562189236795864, "learning_rate": 1.082629848172017e-05, "loss": 0.5951, "step": 19757 }, { "epoch": 1.4684503901895205, "grad_norm": 2.223592651943561, "learning_rate": 1.0825498854662411e-05, "loss": 0.6693, "step": 19758 }, { "epoch": 1.468524712002973, "grad_norm": 2.2456948965874868, "learning_rate": 1.0824699222290126e-05, "loss": 0.6855, "step": 19759 }, { "epoch": 1.4685990338164252, "grad_norm": 2.4693357304908394, "learning_rate": 1.0823899584608475e-05, "loss": 0.5985, "step": 19760 }, { "epoch": 1.4686733556298774, "grad_norm": 1.7987747662346552, "learning_rate": 1.08230999416226e-05, "loss": 0.4893, "step": 19761 }, { "epoch": 1.4687476774433297, "grad_norm": 2.043081999510432, "learning_rate": 1.0822300293337652e-05, "loss": 0.66, "step": 19762 }, { "epoch": 1.468821999256782, "grad_norm": 1.7502673930742065, "learning_rate": 1.0821500639758775e-05, "loss": 0.603, "step": 19763 }, { "epoch": 1.4688963210702342, "grad_norm": 2.027034762313076, "learning_rate": 1.0820700980891119e-05, "loss": 0.6056, "step": 19764 }, { "epoch": 1.4689706428836864, "grad_norm": 2.274231552293845, "learning_rate": 1.0819901316739833e-05, "loss": 0.6693, "step": 19765 }, { "epoch": 1.4690449646971386, "grad_norm": 2.1291170102263064, "learning_rate": 1.0819101647310067e-05, "loss": 0.6603, "step": 19766 }, { "epoch": 1.4691192865105909, "grad_norm": 1.6801206685979204, "learning_rate": 1.0818301972606965e-05, "loss": 0.558, "step": 19767 }, { "epoch": 1.469193608324043, "grad_norm": 2.155740145388207, "learning_rate": 1.0817502292635678e-05, "loss": 0.6928, "step": 19768 }, { "epoch": 1.4692679301374953, "grad_norm": 2.3412608222712277, "learning_rate": 1.0816702607401355e-05, "loss": 0.5554, "step": 19769 }, { "epoch": 1.4693422519509476, "grad_norm": 2.1325566007844423, "learning_rate": 1.0815902916909142e-05, "loss": 0.6609, "step": 19770 }, { "epoch": 1.4694165737643998, "grad_norm": 2.0764712788965016, "learning_rate": 1.0815103221164188e-05, "loss": 0.583, "step": 19771 }, { "epoch": 1.469490895577852, "grad_norm": 2.0144029687432305, "learning_rate": 1.0814303520171641e-05, "loss": 0.6567, "step": 19772 }, { "epoch": 1.4695652173913043, "grad_norm": 1.8540110223558173, "learning_rate": 1.0813503813936652e-05, "loss": 0.5243, "step": 19773 }, { "epoch": 1.4696395392047565, "grad_norm": 2.3037065904502705, "learning_rate": 1.0812704102464366e-05, "loss": 0.7057, "step": 19774 }, { "epoch": 1.4697138610182088, "grad_norm": 1.839004764637316, "learning_rate": 1.0811904385759934e-05, "loss": 0.6206, "step": 19775 }, { "epoch": 1.469788182831661, "grad_norm": 2.6569851359421484, "learning_rate": 1.0811104663828504e-05, "loss": 0.7043, "step": 19776 }, { "epoch": 1.4698625046451133, "grad_norm": 2.3939656931986377, "learning_rate": 1.0810304936675221e-05, "loss": 0.7589, "step": 19777 }, { "epoch": 1.4699368264585657, "grad_norm": 2.077633919188165, "learning_rate": 1.0809505204305242e-05, "loss": 0.5627, "step": 19778 }, { "epoch": 1.4700111482720177, "grad_norm": 2.2308007205835545, "learning_rate": 1.0808705466723708e-05, "loss": 0.5777, "step": 19779 }, { "epoch": 1.4700854700854702, "grad_norm": 2.7613883520591367, "learning_rate": 1.0807905723935772e-05, "loss": 0.6313, "step": 19780 }, { "epoch": 1.4701597918989222, "grad_norm": 1.706168858929013, "learning_rate": 1.0807105975946578e-05, "loss": 0.4553, "step": 19781 }, { "epoch": 1.4702341137123747, "grad_norm": 5.403773081353141, "learning_rate": 1.080630622276128e-05, "loss": 0.571, "step": 19782 }, { "epoch": 1.470308435525827, "grad_norm": 1.982186755036837, "learning_rate": 1.0805506464385022e-05, "loss": 0.4464, "step": 19783 }, { "epoch": 1.4703827573392791, "grad_norm": 1.7600256327216504, "learning_rate": 1.0804706700822958e-05, "loss": 0.5964, "step": 19784 }, { "epoch": 1.4704570791527314, "grad_norm": 2.00709594743143, "learning_rate": 1.080390693208023e-05, "loss": 0.6414, "step": 19785 }, { "epoch": 1.4705314009661836, "grad_norm": 1.7820140351857061, "learning_rate": 1.0803107158161996e-05, "loss": 0.5231, "step": 19786 }, { "epoch": 1.4706057227796359, "grad_norm": 2.3060246359564993, "learning_rate": 1.0802307379073396e-05, "loss": 0.6876, "step": 19787 }, { "epoch": 1.470680044593088, "grad_norm": 1.9461580601824657, "learning_rate": 1.0801507594819582e-05, "loss": 0.4574, "step": 19788 }, { "epoch": 1.4707543664065403, "grad_norm": 2.153096340712843, "learning_rate": 1.0800707805405705e-05, "loss": 0.6498, "step": 19789 }, { "epoch": 1.4708286882199926, "grad_norm": 1.884803744006202, "learning_rate": 1.0799908010836914e-05, "loss": 0.5701, "step": 19790 }, { "epoch": 1.4709030100334448, "grad_norm": 2.0796055366928026, "learning_rate": 1.0799108211118356e-05, "loss": 0.7041, "step": 19791 }, { "epoch": 1.470977331846897, "grad_norm": 2.0836606384223715, "learning_rate": 1.0798308406255182e-05, "loss": 0.6251, "step": 19792 }, { "epoch": 1.4710516536603493, "grad_norm": 1.7899247446756814, "learning_rate": 1.079750859625254e-05, "loss": 0.5726, "step": 19793 }, { "epoch": 1.4711259754738015, "grad_norm": 1.8593129845910714, "learning_rate": 1.0796708781115578e-05, "loss": 0.5636, "step": 19794 }, { "epoch": 1.4712002972872538, "grad_norm": 2.1535785719777225, "learning_rate": 1.0795908960849447e-05, "loss": 0.6895, "step": 19795 }, { "epoch": 1.471274619100706, "grad_norm": 1.9387975138727598, "learning_rate": 1.0795109135459292e-05, "loss": 0.636, "step": 19796 }, { "epoch": 1.4713489409141582, "grad_norm": 2.179022313584475, "learning_rate": 1.079430930495027e-05, "loss": 0.6068, "step": 19797 }, { "epoch": 1.4714232627276105, "grad_norm": 1.7554493124107426, "learning_rate": 1.0793509469327523e-05, "loss": 0.5833, "step": 19798 }, { "epoch": 1.4714975845410627, "grad_norm": 3.244215273349348, "learning_rate": 1.0792709628596203e-05, "loss": 0.5867, "step": 19799 }, { "epoch": 1.471571906354515, "grad_norm": 1.992887692894084, "learning_rate": 1.079190978276146e-05, "loss": 0.6139, "step": 19800 }, { "epoch": 1.4716462281679674, "grad_norm": 1.8083716043650189, "learning_rate": 1.0791109931828445e-05, "loss": 0.5303, "step": 19801 }, { "epoch": 1.4717205499814194, "grad_norm": 2.003832824737298, "learning_rate": 1.0790310075802301e-05, "loss": 0.6336, "step": 19802 }, { "epoch": 1.471794871794872, "grad_norm": 2.153493658084475, "learning_rate": 1.0789510214688188e-05, "loss": 0.6223, "step": 19803 }, { "epoch": 1.471869193608324, "grad_norm": 2.1328808654426696, "learning_rate": 1.0788710348491245e-05, "loss": 0.6166, "step": 19804 }, { "epoch": 1.4719435154217764, "grad_norm": 1.60598544369045, "learning_rate": 1.0787910477216626e-05, "loss": 0.528, "step": 19805 }, { "epoch": 1.4720178372352286, "grad_norm": 3.1731974687843407, "learning_rate": 1.0787110600869482e-05, "loss": 0.7242, "step": 19806 }, { "epoch": 1.4720921590486808, "grad_norm": 2.380492362012744, "learning_rate": 1.0786310719454958e-05, "loss": 0.6148, "step": 19807 }, { "epoch": 1.472166480862133, "grad_norm": 1.8276737657217559, "learning_rate": 1.0785510832978209e-05, "loss": 0.4751, "step": 19808 }, { "epoch": 1.4722408026755853, "grad_norm": 2.1389678990357925, "learning_rate": 1.078471094144438e-05, "loss": 0.7064, "step": 19809 }, { "epoch": 1.4723151244890376, "grad_norm": 2.0028102685598097, "learning_rate": 1.0783911044858623e-05, "loss": 0.4992, "step": 19810 }, { "epoch": 1.4723894463024898, "grad_norm": 2.1061536661752194, "learning_rate": 1.0783111143226085e-05, "loss": 0.6585, "step": 19811 }, { "epoch": 1.472463768115942, "grad_norm": 1.9095977935067854, "learning_rate": 1.0782311236551925e-05, "loss": 0.6013, "step": 19812 }, { "epoch": 1.4725380899293943, "grad_norm": 2.0856529140294433, "learning_rate": 1.078151132484128e-05, "loss": 0.6231, "step": 19813 }, { "epoch": 1.4726124117428465, "grad_norm": 2.2561295851730137, "learning_rate": 1.0780711408099309e-05, "loss": 0.5929, "step": 19814 }, { "epoch": 1.4726867335562988, "grad_norm": 2.0974769063585668, "learning_rate": 1.0779911486331156e-05, "loss": 0.572, "step": 19815 }, { "epoch": 1.472761055369751, "grad_norm": 2.024895495642575, "learning_rate": 1.0779111559541976e-05, "loss": 0.4911, "step": 19816 }, { "epoch": 1.4728353771832032, "grad_norm": 2.1884757146368616, "learning_rate": 1.0778311627736915e-05, "loss": 0.6369, "step": 19817 }, { "epoch": 1.4729096989966555, "grad_norm": 2.0194076242455252, "learning_rate": 1.0777511690921122e-05, "loss": 0.669, "step": 19818 }, { "epoch": 1.4729840208101077, "grad_norm": 2.0347856301077916, "learning_rate": 1.0776711749099752e-05, "loss": 0.6054, "step": 19819 }, { "epoch": 1.47305834262356, "grad_norm": 2.1806879151493166, "learning_rate": 1.077591180227795e-05, "loss": 0.6689, "step": 19820 }, { "epoch": 1.4731326644370122, "grad_norm": 1.9724324404416143, "learning_rate": 1.0775111850460869e-05, "loss": 0.6044, "step": 19821 }, { "epoch": 1.4732069862504646, "grad_norm": 2.2498852129699913, "learning_rate": 1.0774311893653659e-05, "loss": 0.6904, "step": 19822 }, { "epoch": 1.4732813080639167, "grad_norm": 2.3725828078385374, "learning_rate": 1.0773511931861467e-05, "loss": 0.6564, "step": 19823 }, { "epoch": 1.4733556298773691, "grad_norm": 1.6674752868672242, "learning_rate": 1.0772711965089446e-05, "loss": 0.5382, "step": 19824 }, { "epoch": 1.4734299516908211, "grad_norm": 1.973773754698569, "learning_rate": 1.0771911993342747e-05, "loss": 0.5894, "step": 19825 }, { "epoch": 1.4735042735042736, "grad_norm": 1.562647561856306, "learning_rate": 1.0771112016626517e-05, "loss": 0.4999, "step": 19826 }, { "epoch": 1.4735785953177256, "grad_norm": 1.8959989973777012, "learning_rate": 1.0770312034945907e-05, "loss": 0.7366, "step": 19827 }, { "epoch": 1.473652917131178, "grad_norm": 2.197259673243275, "learning_rate": 1.0769512048306072e-05, "loss": 0.5586, "step": 19828 }, { "epoch": 1.4737272389446303, "grad_norm": 1.953426553324752, "learning_rate": 1.0768712056712154e-05, "loss": 0.7345, "step": 19829 }, { "epoch": 1.4738015607580826, "grad_norm": 2.343989428909076, "learning_rate": 1.076791206016931e-05, "loss": 0.6616, "step": 19830 }, { "epoch": 1.4738758825715348, "grad_norm": 2.6956662980707224, "learning_rate": 1.0767112058682687e-05, "loss": 0.8466, "step": 19831 }, { "epoch": 1.473950204384987, "grad_norm": 1.9623955049567223, "learning_rate": 1.0766312052257437e-05, "loss": 0.5534, "step": 19832 }, { "epoch": 1.4740245261984393, "grad_norm": 2.4017807420212924, "learning_rate": 1.0765512040898711e-05, "loss": 0.6614, "step": 19833 }, { "epoch": 1.4740988480118915, "grad_norm": 2.040275150922398, "learning_rate": 1.0764712024611657e-05, "loss": 0.7373, "step": 19834 }, { "epoch": 1.4741731698253437, "grad_norm": 2.013597666944971, "learning_rate": 1.0763912003401425e-05, "loss": 0.7109, "step": 19835 }, { "epoch": 1.474247491638796, "grad_norm": 1.8580511364368004, "learning_rate": 1.0763111977273169e-05, "loss": 0.6222, "step": 19836 }, { "epoch": 1.4743218134522482, "grad_norm": 1.9474966987852689, "learning_rate": 1.0762311946232037e-05, "loss": 0.6037, "step": 19837 }, { "epoch": 1.4743961352657005, "grad_norm": 1.9789436653447492, "learning_rate": 1.076151191028318e-05, "loss": 0.5468, "step": 19838 }, { "epoch": 1.4744704570791527, "grad_norm": 3.049355683788399, "learning_rate": 1.0760711869431753e-05, "loss": 0.7215, "step": 19839 }, { "epoch": 1.474544778892605, "grad_norm": 2.155072307759853, "learning_rate": 1.0759911823682899e-05, "loss": 0.6843, "step": 19840 }, { "epoch": 1.4746191007060572, "grad_norm": 2.132753149716104, "learning_rate": 1.0759111773041768e-05, "loss": 0.6067, "step": 19841 }, { "epoch": 1.4746934225195094, "grad_norm": 2.060150561346734, "learning_rate": 1.075831171751352e-05, "loss": 0.7233, "step": 19842 }, { "epoch": 1.4747677443329616, "grad_norm": 1.8719799683374492, "learning_rate": 1.0757511657103302e-05, "loss": 0.6197, "step": 19843 }, { "epoch": 1.4748420661464139, "grad_norm": 2.091127929061441, "learning_rate": 1.075671159181626e-05, "loss": 0.5877, "step": 19844 }, { "epoch": 1.4749163879598663, "grad_norm": 1.756549183598111, "learning_rate": 1.0755911521657548e-05, "loss": 0.5499, "step": 19845 }, { "epoch": 1.4749907097733184, "grad_norm": 2.028455951109806, "learning_rate": 1.0755111446632316e-05, "loss": 0.5975, "step": 19846 }, { "epoch": 1.4750650315867708, "grad_norm": 1.9193647386711328, "learning_rate": 1.0754311366745718e-05, "loss": 0.4874, "step": 19847 }, { "epoch": 1.4751393534002228, "grad_norm": 2.0023798033953946, "learning_rate": 1.0753511282002902e-05, "loss": 0.5489, "step": 19848 }, { "epoch": 1.4752136752136753, "grad_norm": 1.9611191068306273, "learning_rate": 1.0752711192409017e-05, "loss": 0.5813, "step": 19849 }, { "epoch": 1.4752879970271275, "grad_norm": 2.0320838113252035, "learning_rate": 1.0751911097969221e-05, "loss": 0.5607, "step": 19850 }, { "epoch": 1.4753623188405798, "grad_norm": 1.7471231275631793, "learning_rate": 1.0751110998688659e-05, "loss": 0.5323, "step": 19851 }, { "epoch": 1.475436640654032, "grad_norm": 2.4679638200686647, "learning_rate": 1.075031089457248e-05, "loss": 0.6154, "step": 19852 }, { "epoch": 1.4755109624674843, "grad_norm": 1.7267403079241224, "learning_rate": 1.074951078562584e-05, "loss": 0.5469, "step": 19853 }, { "epoch": 1.4755852842809365, "grad_norm": 1.5813844879048842, "learning_rate": 1.0748710671853891e-05, "loss": 0.4508, "step": 19854 }, { "epoch": 1.4756596060943887, "grad_norm": 1.9815383868078997, "learning_rate": 1.074791055326178e-05, "loss": 0.5138, "step": 19855 }, { "epoch": 1.475733927907841, "grad_norm": 2.0031776079321486, "learning_rate": 1.0747110429854658e-05, "loss": 0.7011, "step": 19856 }, { "epoch": 1.4758082497212932, "grad_norm": 2.2453513771737947, "learning_rate": 1.0746310301637679e-05, "loss": 0.6365, "step": 19857 }, { "epoch": 1.4758825715347454, "grad_norm": 2.0723424854757053, "learning_rate": 1.0745510168615994e-05, "loss": 0.5806, "step": 19858 }, { "epoch": 1.4759568933481977, "grad_norm": 1.7086413916302368, "learning_rate": 1.0744710030794753e-05, "loss": 0.5451, "step": 19859 }, { "epoch": 1.47603121516165, "grad_norm": 1.9040555642049835, "learning_rate": 1.0743909888179108e-05, "loss": 0.6667, "step": 19860 }, { "epoch": 1.4761055369751022, "grad_norm": 2.05402676972538, "learning_rate": 1.0743109740774209e-05, "loss": 0.5572, "step": 19861 }, { "epoch": 1.4761798587885544, "grad_norm": 1.9147455613138356, "learning_rate": 1.0742309588585208e-05, "loss": 0.5405, "step": 19862 }, { "epoch": 1.4762541806020066, "grad_norm": 1.8494475163462556, "learning_rate": 1.0741509431617255e-05, "loss": 0.5596, "step": 19863 }, { "epoch": 1.4763285024154589, "grad_norm": 1.9536488310511906, "learning_rate": 1.0740709269875502e-05, "loss": 0.6293, "step": 19864 }, { "epoch": 1.4764028242289111, "grad_norm": 2.1042081422889267, "learning_rate": 1.0739909103365102e-05, "loss": 0.7011, "step": 19865 }, { "epoch": 1.4764771460423634, "grad_norm": 1.7267297541367537, "learning_rate": 1.0739108932091209e-05, "loss": 0.4628, "step": 19866 }, { "epoch": 1.4765514678558156, "grad_norm": 2.0513979869479666, "learning_rate": 1.0738308756058968e-05, "loss": 0.7134, "step": 19867 }, { "epoch": 1.476625789669268, "grad_norm": 2.207874093945076, "learning_rate": 1.0737508575273534e-05, "loss": 0.7069, "step": 19868 }, { "epoch": 1.47670011148272, "grad_norm": 2.231098293301312, "learning_rate": 1.0736708389740058e-05, "loss": 0.7576, "step": 19869 }, { "epoch": 1.4767744332961725, "grad_norm": 2.109771334511636, "learning_rate": 1.073590819946369e-05, "loss": 0.6192, "step": 19870 }, { "epoch": 1.4768487551096245, "grad_norm": 1.8724079065441233, "learning_rate": 1.0735108004449586e-05, "loss": 0.5463, "step": 19871 }, { "epoch": 1.476923076923077, "grad_norm": 1.889306529070247, "learning_rate": 1.0734307804702895e-05, "loss": 0.69, "step": 19872 }, { "epoch": 1.4769973987365292, "grad_norm": 1.875848051815228, "learning_rate": 1.0733507600228767e-05, "loss": 0.5954, "step": 19873 }, { "epoch": 1.4770717205499815, "grad_norm": 1.5924614428364838, "learning_rate": 1.0732707391032356e-05, "loss": 0.5352, "step": 19874 }, { "epoch": 1.4771460423634337, "grad_norm": 2.2102254526329075, "learning_rate": 1.073190717711881e-05, "loss": 0.6365, "step": 19875 }, { "epoch": 1.477220364176886, "grad_norm": 2.351629557178751, "learning_rate": 1.0731106958493287e-05, "loss": 0.6024, "step": 19876 }, { "epoch": 1.4772946859903382, "grad_norm": 1.652367526725989, "learning_rate": 1.0730306735160936e-05, "loss": 0.5757, "step": 19877 }, { "epoch": 1.4773690078037904, "grad_norm": 2.1371620677969836, "learning_rate": 1.0729506507126905e-05, "loss": 0.6448, "step": 19878 }, { "epoch": 1.4774433296172427, "grad_norm": 1.7561496324747232, "learning_rate": 1.072870627439635e-05, "loss": 0.5448, "step": 19879 }, { "epoch": 1.477517651430695, "grad_norm": 1.9848495007681677, "learning_rate": 1.072790603697442e-05, "loss": 0.6478, "step": 19880 }, { "epoch": 1.4775919732441471, "grad_norm": 1.716847406285589, "learning_rate": 1.072710579486627e-05, "loss": 0.5981, "step": 19881 }, { "epoch": 1.4776662950575994, "grad_norm": 1.978024404270801, "learning_rate": 1.0726305548077054e-05, "loss": 0.5672, "step": 19882 }, { "epoch": 1.4777406168710516, "grad_norm": 1.6831080352860521, "learning_rate": 1.0725505296611918e-05, "loss": 0.5976, "step": 19883 }, { "epoch": 1.4778149386845039, "grad_norm": 1.6968123103521346, "learning_rate": 1.0724705040476016e-05, "loss": 0.5763, "step": 19884 }, { "epoch": 1.477889260497956, "grad_norm": 2.2118232388316943, "learning_rate": 1.07239047796745e-05, "loss": 0.7902, "step": 19885 }, { "epoch": 1.4779635823114083, "grad_norm": 1.8126996522071688, "learning_rate": 1.0723104514212523e-05, "loss": 0.5676, "step": 19886 }, { "epoch": 1.4780379041248606, "grad_norm": 2.876172127315013, "learning_rate": 1.0722304244095239e-05, "loss": 0.6275, "step": 19887 }, { "epoch": 1.4781122259383128, "grad_norm": 2.589456616647272, "learning_rate": 1.0721503969327798e-05, "loss": 0.6808, "step": 19888 }, { "epoch": 1.4781865477517653, "grad_norm": 1.9402831129815963, "learning_rate": 1.0720703689915348e-05, "loss": 0.6147, "step": 19889 }, { "epoch": 1.4782608695652173, "grad_norm": 1.7753038761587505, "learning_rate": 1.0719903405863045e-05, "loss": 0.435, "step": 19890 }, { "epoch": 1.4783351913786698, "grad_norm": 2.179690885603733, "learning_rate": 1.0719103117176045e-05, "loss": 0.6434, "step": 19891 }, { "epoch": 1.4784095131921218, "grad_norm": 1.7534079878590576, "learning_rate": 1.0718302823859492e-05, "loss": 0.4572, "step": 19892 }, { "epoch": 1.4784838350055742, "grad_norm": 2.13533207436006, "learning_rate": 1.0717502525918548e-05, "loss": 0.589, "step": 19893 }, { "epoch": 1.4785581568190262, "grad_norm": 1.960104843062765, "learning_rate": 1.0716702223358357e-05, "loss": 0.5402, "step": 19894 }, { "epoch": 1.4786324786324787, "grad_norm": 1.7328860478942247, "learning_rate": 1.0715901916184075e-05, "loss": 0.5299, "step": 19895 }, { "epoch": 1.478706800445931, "grad_norm": 1.9392628056823042, "learning_rate": 1.0715101604400852e-05, "loss": 0.7562, "step": 19896 }, { "epoch": 1.4787811222593832, "grad_norm": 2.0182289256110177, "learning_rate": 1.0714301288013843e-05, "loss": 0.5934, "step": 19897 }, { "epoch": 1.4788554440728354, "grad_norm": 1.9574879602733213, "learning_rate": 1.07135009670282e-05, "loss": 0.6458, "step": 19898 }, { "epoch": 1.4789297658862877, "grad_norm": 1.8917885455749492, "learning_rate": 1.0712700641449076e-05, "loss": 0.5548, "step": 19899 }, { "epoch": 1.47900408769974, "grad_norm": 2.082988034293047, "learning_rate": 1.071190031128162e-05, "loss": 0.5424, "step": 19900 }, { "epoch": 1.4790784095131921, "grad_norm": 1.9037944795308028, "learning_rate": 1.0711099976530985e-05, "loss": 0.5951, "step": 19901 }, { "epoch": 1.4791527313266444, "grad_norm": 2.3461834614561865, "learning_rate": 1.0710299637202327e-05, "loss": 0.7482, "step": 19902 }, { "epoch": 1.4792270531400966, "grad_norm": 2.2741538955069416, "learning_rate": 1.0709499293300798e-05, "loss": 0.6457, "step": 19903 }, { "epoch": 1.4793013749535489, "grad_norm": 2.138046584131524, "learning_rate": 1.070869894483155e-05, "loss": 0.7991, "step": 19904 }, { "epoch": 1.479375696767001, "grad_norm": 1.6993290661246703, "learning_rate": 1.0707898591799734e-05, "loss": 0.6497, "step": 19905 }, { "epoch": 1.4794500185804533, "grad_norm": 1.8528275865533443, "learning_rate": 1.0707098234210503e-05, "loss": 0.5752, "step": 19906 }, { "epoch": 1.4795243403939056, "grad_norm": 1.6990580663347261, "learning_rate": 1.070629787206901e-05, "loss": 0.538, "step": 19907 }, { "epoch": 1.4795986622073578, "grad_norm": 2.8077637807033393, "learning_rate": 1.0705497505380407e-05, "loss": 0.8244, "step": 19908 }, { "epoch": 1.47967298402081, "grad_norm": 1.8234575032444331, "learning_rate": 1.070469713414985e-05, "loss": 0.478, "step": 19909 }, { "epoch": 1.4797473058342623, "grad_norm": 2.009109658422577, "learning_rate": 1.0703896758382491e-05, "loss": 0.6624, "step": 19910 }, { "epoch": 1.4798216276477145, "grad_norm": 1.9697982956656102, "learning_rate": 1.0703096378083478e-05, "loss": 0.709, "step": 19911 }, { "epoch": 1.479895949461167, "grad_norm": 2.019085216788891, "learning_rate": 1.0702295993257967e-05, "loss": 0.7753, "step": 19912 }, { "epoch": 1.479970271274619, "grad_norm": 2.5296377852441654, "learning_rate": 1.070149560391111e-05, "loss": 0.6221, "step": 19913 }, { "epoch": 1.4800445930880715, "grad_norm": 2.0369560427276774, "learning_rate": 1.0700695210048062e-05, "loss": 0.6732, "step": 19914 }, { "epoch": 1.4801189149015235, "grad_norm": 2.194989584185281, "learning_rate": 1.0699894811673976e-05, "loss": 0.5117, "step": 19915 }, { "epoch": 1.480193236714976, "grad_norm": 1.483004217706141, "learning_rate": 1.0699094408794003e-05, "loss": 0.5132, "step": 19916 }, { "epoch": 1.4802675585284282, "grad_norm": 1.5191988976019353, "learning_rate": 1.0698294001413296e-05, "loss": 0.5028, "step": 19917 }, { "epoch": 1.4803418803418804, "grad_norm": 1.6078245206293698, "learning_rate": 1.0697493589537006e-05, "loss": 0.4491, "step": 19918 }, { "epoch": 1.4804162021553327, "grad_norm": 3.549388914997074, "learning_rate": 1.0696693173170292e-05, "loss": 0.6423, "step": 19919 }, { "epoch": 1.4804905239687849, "grad_norm": 2.0933022605209706, "learning_rate": 1.0695892752318303e-05, "loss": 0.7117, "step": 19920 }, { "epoch": 1.4805648457822371, "grad_norm": 1.8434614646496323, "learning_rate": 1.0695092326986193e-05, "loss": 0.5749, "step": 19921 }, { "epoch": 1.4806391675956894, "grad_norm": 2.068562407998492, "learning_rate": 1.0694291897179113e-05, "loss": 0.5332, "step": 19922 }, { "epoch": 1.4807134894091416, "grad_norm": 2.000567460039245, "learning_rate": 1.0693491462902217e-05, "loss": 0.7597, "step": 19923 }, { "epoch": 1.4807878112225938, "grad_norm": 2.370457129877112, "learning_rate": 1.069269102416066e-05, "loss": 0.6976, "step": 19924 }, { "epoch": 1.480862133036046, "grad_norm": 2.434051987423209, "learning_rate": 1.0691890580959595e-05, "loss": 0.6469, "step": 19925 }, { "epoch": 1.4809364548494983, "grad_norm": 1.9846861516574565, "learning_rate": 1.0691090133304174e-05, "loss": 0.6065, "step": 19926 }, { "epoch": 1.4810107766629506, "grad_norm": 1.9802920712251697, "learning_rate": 1.0690289681199551e-05, "loss": 0.5542, "step": 19927 }, { "epoch": 1.4810850984764028, "grad_norm": 2.161259910189615, "learning_rate": 1.0689489224650878e-05, "loss": 0.5919, "step": 19928 }, { "epoch": 1.481159420289855, "grad_norm": 2.480455622877294, "learning_rate": 1.0688688763663308e-05, "loss": 0.563, "step": 19929 }, { "epoch": 1.4812337421033073, "grad_norm": 1.6565031834865098, "learning_rate": 1.0687888298241995e-05, "loss": 0.4837, "step": 19930 }, { "epoch": 1.4813080639167595, "grad_norm": 1.7402532577857448, "learning_rate": 1.0687087828392096e-05, "loss": 0.6441, "step": 19931 }, { "epoch": 1.4813823857302117, "grad_norm": 1.7188600304038446, "learning_rate": 1.0686287354118758e-05, "loss": 0.573, "step": 19932 }, { "epoch": 1.481456707543664, "grad_norm": 2.32862007630724, "learning_rate": 1.0685486875427143e-05, "loss": 0.794, "step": 19933 }, { "epoch": 1.4815310293571162, "grad_norm": 1.9252632152552522, "learning_rate": 1.0684686392322394e-05, "loss": 0.5709, "step": 19934 }, { "epoch": 1.4816053511705687, "grad_norm": 1.8264821534098967, "learning_rate": 1.068388590480967e-05, "loss": 0.5031, "step": 19935 }, { "epoch": 1.4816796729840207, "grad_norm": 1.861957534320405, "learning_rate": 1.0683085412894123e-05, "loss": 0.5424, "step": 19936 }, { "epoch": 1.4817539947974732, "grad_norm": 2.078776885881845, "learning_rate": 1.0682284916580912e-05, "loss": 0.5797, "step": 19937 }, { "epoch": 1.4818283166109252, "grad_norm": 2.2916406488207777, "learning_rate": 1.0681484415875185e-05, "loss": 0.638, "step": 19938 }, { "epoch": 1.4819026384243776, "grad_norm": 2.1227733640115525, "learning_rate": 1.0680683910782095e-05, "loss": 0.6609, "step": 19939 }, { "epoch": 1.4819769602378299, "grad_norm": 1.6459116526717756, "learning_rate": 1.0679883401306796e-05, "loss": 0.5411, "step": 19940 }, { "epoch": 1.4820512820512821, "grad_norm": 1.9134756865511116, "learning_rate": 1.0679082887454444e-05, "loss": 0.5809, "step": 19941 }, { "epoch": 1.4821256038647344, "grad_norm": 1.7317970215049183, "learning_rate": 1.0678282369230193e-05, "loss": 0.5083, "step": 19942 }, { "epoch": 1.4821999256781866, "grad_norm": 2.323116015686219, "learning_rate": 1.0677481846639194e-05, "loss": 0.6702, "step": 19943 }, { "epoch": 1.4822742474916388, "grad_norm": 1.6588875460300476, "learning_rate": 1.0676681319686604e-05, "loss": 0.5558, "step": 19944 }, { "epoch": 1.482348569305091, "grad_norm": 1.818474996825143, "learning_rate": 1.0675880788377573e-05, "loss": 0.5012, "step": 19945 }, { "epoch": 1.4824228911185433, "grad_norm": 1.7107474312429973, "learning_rate": 1.0675080252717256e-05, "loss": 0.4657, "step": 19946 }, { "epoch": 1.4824972129319955, "grad_norm": 1.5727599610636855, "learning_rate": 1.067427971271081e-05, "loss": 0.551, "step": 19947 }, { "epoch": 1.4825715347454478, "grad_norm": 2.2108881326916454, "learning_rate": 1.0673479168363383e-05, "loss": 0.7021, "step": 19948 }, { "epoch": 1.4826458565589, "grad_norm": 2.2872854428687552, "learning_rate": 1.0672678619680133e-05, "loss": 0.7882, "step": 19949 }, { "epoch": 1.4827201783723523, "grad_norm": 2.4301452357495776, "learning_rate": 1.0671878066666214e-05, "loss": 0.5409, "step": 19950 }, { "epoch": 1.4827945001858045, "grad_norm": 2.144139691498366, "learning_rate": 1.067107750932678e-05, "loss": 0.6314, "step": 19951 }, { "epoch": 1.4828688219992567, "grad_norm": 2.1362216701831427, "learning_rate": 1.067027694766698e-05, "loss": 0.6452, "step": 19952 }, { "epoch": 1.482943143812709, "grad_norm": 1.9803323746812926, "learning_rate": 1.0669476381691975e-05, "loss": 0.5906, "step": 19953 }, { "epoch": 1.4830174656261612, "grad_norm": 2.0421762737078493, "learning_rate": 1.0668675811406914e-05, "loss": 0.6527, "step": 19954 }, { "epoch": 1.4830917874396135, "grad_norm": 2.144189469534425, "learning_rate": 1.0667875236816954e-05, "loss": 0.5724, "step": 19955 }, { "epoch": 1.483166109253066, "grad_norm": 2.0518035812282984, "learning_rate": 1.0667074657927248e-05, "loss": 0.6138, "step": 19956 }, { "epoch": 1.483240431066518, "grad_norm": 2.037052552712668, "learning_rate": 1.0666274074742948e-05, "loss": 0.7729, "step": 19957 }, { "epoch": 1.4833147528799704, "grad_norm": 2.388052218582714, "learning_rate": 1.0665473487269214e-05, "loss": 0.6739, "step": 19958 }, { "epoch": 1.4833890746934224, "grad_norm": 2.1081428281330563, "learning_rate": 1.0664672895511192e-05, "loss": 0.7102, "step": 19959 }, { "epoch": 1.4834633965068749, "grad_norm": 2.1474740678416717, "learning_rate": 1.0663872299474041e-05, "loss": 0.7617, "step": 19960 }, { "epoch": 1.4835377183203269, "grad_norm": 1.9078590143968248, "learning_rate": 1.0663071699162916e-05, "loss": 0.6608, "step": 19961 }, { "epoch": 1.4836120401337793, "grad_norm": 1.8307537639802651, "learning_rate": 1.066227109458297e-05, "loss": 0.6237, "step": 19962 }, { "epoch": 1.4836863619472316, "grad_norm": 1.861942135871388, "learning_rate": 1.0661470485739355e-05, "loss": 0.605, "step": 19963 }, { "epoch": 1.4837606837606838, "grad_norm": 2.0129372825547787, "learning_rate": 1.066066987263723e-05, "loss": 0.5882, "step": 19964 }, { "epoch": 1.483835005574136, "grad_norm": 1.7190390374657272, "learning_rate": 1.0659869255281745e-05, "loss": 0.6072, "step": 19965 }, { "epoch": 1.4839093273875883, "grad_norm": 1.882971140291135, "learning_rate": 1.0659068633678055e-05, "loss": 0.6101, "step": 19966 }, { "epoch": 1.4839836492010405, "grad_norm": 1.6468950182224478, "learning_rate": 1.0658268007831318e-05, "loss": 0.5527, "step": 19967 }, { "epoch": 1.4840579710144928, "grad_norm": 1.849019583171099, "learning_rate": 1.0657467377746685e-05, "loss": 0.6466, "step": 19968 }, { "epoch": 1.484132292827945, "grad_norm": 2.0312367959012416, "learning_rate": 1.0656666743429311e-05, "loss": 0.5491, "step": 19969 }, { "epoch": 1.4842066146413972, "grad_norm": 1.9680831687930511, "learning_rate": 1.0655866104884349e-05, "loss": 0.5762, "step": 19970 }, { "epoch": 1.4842809364548495, "grad_norm": 1.8307239030684177, "learning_rate": 1.0655065462116956e-05, "loss": 0.7034, "step": 19971 }, { "epoch": 1.4843552582683017, "grad_norm": 2.2627893965998434, "learning_rate": 1.0654264815132288e-05, "loss": 0.6098, "step": 19972 }, { "epoch": 1.484429580081754, "grad_norm": 1.8681319251722508, "learning_rate": 1.0653464163935494e-05, "loss": 0.4162, "step": 19973 }, { "epoch": 1.4845039018952062, "grad_norm": 1.9879123477766092, "learning_rate": 1.0652663508531732e-05, "loss": 0.6904, "step": 19974 }, { "epoch": 1.4845782237086584, "grad_norm": 1.8271255471535026, "learning_rate": 1.0651862848926157e-05, "loss": 0.5362, "step": 19975 }, { "epoch": 1.4846525455221107, "grad_norm": 2.04528303088486, "learning_rate": 1.065106218512392e-05, "loss": 0.6159, "step": 19976 }, { "epoch": 1.484726867335563, "grad_norm": 2.9990328594510696, "learning_rate": 1.0650261517130183e-05, "loss": 0.6152, "step": 19977 }, { "epoch": 1.4848011891490152, "grad_norm": 1.763421625035501, "learning_rate": 1.0649460844950094e-05, "loss": 0.4724, "step": 19978 }, { "epoch": 1.4848755109624676, "grad_norm": 1.4233280935917396, "learning_rate": 1.0648660168588809e-05, "loss": 0.4436, "step": 19979 }, { "epoch": 1.4849498327759196, "grad_norm": 2.1215321130259337, "learning_rate": 1.0647859488051487e-05, "loss": 0.6019, "step": 19980 }, { "epoch": 1.485024154589372, "grad_norm": 2.129515445271217, "learning_rate": 1.0647058803343276e-05, "loss": 0.6803, "step": 19981 }, { "epoch": 1.485098476402824, "grad_norm": 5.001054202093001, "learning_rate": 1.0646258114469332e-05, "loss": 0.5886, "step": 19982 }, { "epoch": 1.4851727982162766, "grad_norm": 2.3058253990371167, "learning_rate": 1.0645457421434813e-05, "loss": 0.7028, "step": 19983 }, { "epoch": 1.4852471200297288, "grad_norm": 2.2105621900440684, "learning_rate": 1.0644656724244876e-05, "loss": 0.5754, "step": 19984 }, { "epoch": 1.485321441843181, "grad_norm": 1.9642321880945495, "learning_rate": 1.0643856022904668e-05, "loss": 0.6998, "step": 19985 }, { "epoch": 1.4853957636566333, "grad_norm": 2.1328922014552085, "learning_rate": 1.0643055317419353e-05, "loss": 0.6703, "step": 19986 }, { "epoch": 1.4854700854700855, "grad_norm": 1.5776488450937471, "learning_rate": 1.0642254607794077e-05, "loss": 0.5106, "step": 19987 }, { "epoch": 1.4855444072835378, "grad_norm": 1.822626339515691, "learning_rate": 1.0641453894034002e-05, "loss": 0.6189, "step": 19988 }, { "epoch": 1.48561872909699, "grad_norm": 1.7572870587911835, "learning_rate": 1.064065317614428e-05, "loss": 0.5106, "step": 19989 }, { "epoch": 1.4856930509104422, "grad_norm": 2.992504169752569, "learning_rate": 1.0639852454130064e-05, "loss": 0.7427, "step": 19990 }, { "epoch": 1.4857673727238945, "grad_norm": 1.7859397165871886, "learning_rate": 1.0639051727996514e-05, "loss": 0.556, "step": 19991 }, { "epoch": 1.4858416945373467, "grad_norm": 2.2356160036320243, "learning_rate": 1.063825099774878e-05, "loss": 0.7151, "step": 19992 }, { "epoch": 1.485916016350799, "grad_norm": 1.9531340052642894, "learning_rate": 1.063745026339202e-05, "loss": 0.6574, "step": 19993 }, { "epoch": 1.4859903381642512, "grad_norm": 2.0051977772329574, "learning_rate": 1.0636649524931386e-05, "loss": 0.7179, "step": 19994 }, { "epoch": 1.4860646599777034, "grad_norm": 1.9706132387679527, "learning_rate": 1.063584878237204e-05, "loss": 0.6797, "step": 19995 }, { "epoch": 1.4861389817911557, "grad_norm": 2.021697447325224, "learning_rate": 1.0635048035719129e-05, "loss": 0.6191, "step": 19996 }, { "epoch": 1.486213303604608, "grad_norm": 1.8026739276085038, "learning_rate": 1.0634247284977812e-05, "loss": 0.5653, "step": 19997 }, { "epoch": 1.4862876254180601, "grad_norm": 1.8554910005446295, "learning_rate": 1.0633446530153244e-05, "loss": 0.6169, "step": 19998 }, { "epoch": 1.4863619472315124, "grad_norm": 1.9573844975340258, "learning_rate": 1.063264577125058e-05, "loss": 0.6338, "step": 19999 }, { "epoch": 1.4864362690449646, "grad_norm": 1.7519942297677726, "learning_rate": 1.0631845008274977e-05, "loss": 0.5902, "step": 20000 }, { "epoch": 1.4865105908584169, "grad_norm": 1.4201676535635028, "learning_rate": 1.0631044241231588e-05, "loss": 0.3718, "step": 20001 }, { "epoch": 1.4865849126718693, "grad_norm": 2.8856936481888105, "learning_rate": 1.0630243470125569e-05, "loss": 0.5393, "step": 20002 }, { "epoch": 1.4866592344853213, "grad_norm": 1.8643815515911495, "learning_rate": 1.0629442694962075e-05, "loss": 0.5376, "step": 20003 }, { "epoch": 1.4867335562987738, "grad_norm": 2.4246920084355326, "learning_rate": 1.0628641915746261e-05, "loss": 0.5195, "step": 20004 }, { "epoch": 1.4868078781122258, "grad_norm": 1.8077766437808518, "learning_rate": 1.062784113248328e-05, "loss": 0.636, "step": 20005 }, { "epoch": 1.4868821999256783, "grad_norm": 2.0894563629613905, "learning_rate": 1.0627040345178297e-05, "loss": 0.5993, "step": 20006 }, { "epoch": 1.4869565217391305, "grad_norm": 1.8532680529945567, "learning_rate": 1.0626239553836457e-05, "loss": 0.5753, "step": 20007 }, { "epoch": 1.4870308435525827, "grad_norm": 1.5588391863843112, "learning_rate": 1.062543875846292e-05, "loss": 0.4789, "step": 20008 }, { "epoch": 1.487105165366035, "grad_norm": 2.0206971984293727, "learning_rate": 1.062463795906284e-05, "loss": 0.6972, "step": 20009 }, { "epoch": 1.4871794871794872, "grad_norm": 1.8334298616779838, "learning_rate": 1.0623837155641372e-05, "loss": 0.5993, "step": 20010 }, { "epoch": 1.4872538089929395, "grad_norm": 2.0171989216254773, "learning_rate": 1.0623036348203674e-05, "loss": 0.7006, "step": 20011 }, { "epoch": 1.4873281308063917, "grad_norm": 2.1652084004560366, "learning_rate": 1.0622235536754903e-05, "loss": 0.5955, "step": 20012 }, { "epoch": 1.487402452619844, "grad_norm": 2.3952009368797604, "learning_rate": 1.062143472130021e-05, "loss": 0.7155, "step": 20013 }, { "epoch": 1.4874767744332962, "grad_norm": 2.229719828775684, "learning_rate": 1.0620633901844752e-05, "loss": 0.5204, "step": 20014 }, { "epoch": 1.4875510962467484, "grad_norm": 1.8714781102557774, "learning_rate": 1.0619833078393685e-05, "loss": 0.4623, "step": 20015 }, { "epoch": 1.4876254180602007, "grad_norm": 1.8881590607489902, "learning_rate": 1.0619032250952164e-05, "loss": 0.5286, "step": 20016 }, { "epoch": 1.487699739873653, "grad_norm": 2.0157236394538547, "learning_rate": 1.0618231419525345e-05, "loss": 0.6115, "step": 20017 }, { "epoch": 1.4877740616871051, "grad_norm": 2.1265444386916865, "learning_rate": 1.0617430584118387e-05, "loss": 0.7185, "step": 20018 }, { "epoch": 1.4878483835005574, "grad_norm": 1.819653066360309, "learning_rate": 1.0616629744736445e-05, "loss": 0.4877, "step": 20019 }, { "epoch": 1.4879227053140096, "grad_norm": 1.9403003482275996, "learning_rate": 1.0615828901384669e-05, "loss": 0.6441, "step": 20020 }, { "epoch": 1.4879970271274618, "grad_norm": 1.8395124420523745, "learning_rate": 1.0615028054068218e-05, "loss": 0.6026, "step": 20021 }, { "epoch": 1.488071348940914, "grad_norm": 1.876833072867352, "learning_rate": 1.0614227202792247e-05, "loss": 0.7533, "step": 20022 }, { "epoch": 1.4881456707543665, "grad_norm": 1.6210350984295148, "learning_rate": 1.0613426347561916e-05, "loss": 0.4533, "step": 20023 }, { "epoch": 1.4882199925678186, "grad_norm": 1.7131791685587716, "learning_rate": 1.061262548838238e-05, "loss": 0.4508, "step": 20024 }, { "epoch": 1.488294314381271, "grad_norm": 2.1009819288233293, "learning_rate": 1.061182462525879e-05, "loss": 0.5578, "step": 20025 }, { "epoch": 1.488368636194723, "grad_norm": 2.301918385277461, "learning_rate": 1.0611023758196301e-05, "loss": 0.5559, "step": 20026 }, { "epoch": 1.4884429580081755, "grad_norm": 2.1690404906772844, "learning_rate": 1.0610222887200077e-05, "loss": 0.5611, "step": 20027 }, { "epoch": 1.4885172798216275, "grad_norm": 2.371507254290406, "learning_rate": 1.060942201227527e-05, "loss": 0.6065, "step": 20028 }, { "epoch": 1.48859160163508, "grad_norm": 2.219773858514378, "learning_rate": 1.0608621133427035e-05, "loss": 0.6775, "step": 20029 }, { "epoch": 1.4886659234485322, "grad_norm": 1.9350101593659819, "learning_rate": 1.0607820250660532e-05, "loss": 0.5867, "step": 20030 }, { "epoch": 1.4887402452619845, "grad_norm": 3.018842709264651, "learning_rate": 1.0607019363980908e-05, "loss": 0.6868, "step": 20031 }, { "epoch": 1.4888145670754367, "grad_norm": 1.7156696414575505, "learning_rate": 1.0606218473393329e-05, "loss": 0.5666, "step": 20032 }, { "epoch": 1.488888888888889, "grad_norm": 2.5272391806410517, "learning_rate": 1.0605417578902943e-05, "loss": 0.7487, "step": 20033 }, { "epoch": 1.4889632107023412, "grad_norm": 2.101856716318656, "learning_rate": 1.0604616680514913e-05, "loss": 0.5218, "step": 20034 }, { "epoch": 1.4890375325157934, "grad_norm": 1.5117258873526058, "learning_rate": 1.0603815778234395e-05, "loss": 0.4284, "step": 20035 }, { "epoch": 1.4891118543292456, "grad_norm": 2.1337442701745024, "learning_rate": 1.0603014872066537e-05, "loss": 0.6804, "step": 20036 }, { "epoch": 1.4891861761426979, "grad_norm": 1.9892008308286626, "learning_rate": 1.0602213962016503e-05, "loss": 0.5812, "step": 20037 }, { "epoch": 1.4892604979561501, "grad_norm": 1.65978537308322, "learning_rate": 1.0601413048089444e-05, "loss": 0.4678, "step": 20038 }, { "epoch": 1.4893348197696024, "grad_norm": 1.8706848199285722, "learning_rate": 1.0600612130290523e-05, "loss": 0.6281, "step": 20039 }, { "epoch": 1.4894091415830546, "grad_norm": 2.554984673208274, "learning_rate": 1.059981120862489e-05, "loss": 0.6265, "step": 20040 }, { "epoch": 1.4894834633965068, "grad_norm": 1.8379284259635367, "learning_rate": 1.0599010283097707e-05, "loss": 0.5079, "step": 20041 }, { "epoch": 1.489557785209959, "grad_norm": 1.9147372734075419, "learning_rate": 1.0598209353714122e-05, "loss": 0.553, "step": 20042 }, { "epoch": 1.4896321070234113, "grad_norm": 2.181973967277609, "learning_rate": 1.0597408420479298e-05, "loss": 0.6517, "step": 20043 }, { "epoch": 1.4897064288368635, "grad_norm": 4.175871376098847, "learning_rate": 1.059660748339839e-05, "loss": 0.692, "step": 20044 }, { "epoch": 1.4897807506503158, "grad_norm": 1.9855434355363648, "learning_rate": 1.0595806542476555e-05, "loss": 0.613, "step": 20045 }, { "epoch": 1.4898550724637682, "grad_norm": 2.0998434827449204, "learning_rate": 1.0595005597718948e-05, "loss": 0.7674, "step": 20046 }, { "epoch": 1.4899293942772203, "grad_norm": 1.7214251499563955, "learning_rate": 1.0594204649130723e-05, "loss": 0.5543, "step": 20047 }, { "epoch": 1.4900037160906727, "grad_norm": 1.8945025443339916, "learning_rate": 1.0593403696717041e-05, "loss": 0.5844, "step": 20048 }, { "epoch": 1.4900780379041247, "grad_norm": 1.474155832474279, "learning_rate": 1.0592602740483056e-05, "loss": 0.5373, "step": 20049 }, { "epoch": 1.4901523597175772, "grad_norm": 1.85250035187644, "learning_rate": 1.0591801780433925e-05, "loss": 0.603, "step": 20050 }, { "epoch": 1.4902266815310294, "grad_norm": 2.561826926448025, "learning_rate": 1.0591000816574809e-05, "loss": 0.6968, "step": 20051 }, { "epoch": 1.4903010033444817, "grad_norm": 2.138621660403447, "learning_rate": 1.0590199848910858e-05, "loss": 0.7103, "step": 20052 }, { "epoch": 1.490375325157934, "grad_norm": 2.1268490777630205, "learning_rate": 1.058939887744723e-05, "loss": 0.5702, "step": 20053 }, { "epoch": 1.4904496469713862, "grad_norm": 1.8598778133242557, "learning_rate": 1.0588597902189084e-05, "loss": 0.6546, "step": 20054 }, { "epoch": 1.4905239687848384, "grad_norm": 1.9690937980841738, "learning_rate": 1.0587796923141573e-05, "loss": 0.6598, "step": 20055 }, { "epoch": 1.4905982905982906, "grad_norm": 1.897481377336787, "learning_rate": 1.0586995940309859e-05, "loss": 0.6337, "step": 20056 }, { "epoch": 1.4906726124117429, "grad_norm": 1.7631230598070993, "learning_rate": 1.0586194953699094e-05, "loss": 0.578, "step": 20057 }, { "epoch": 1.490746934225195, "grad_norm": 1.9962466550785332, "learning_rate": 1.0585393963314436e-05, "loss": 0.6899, "step": 20058 }, { "epoch": 1.4908212560386473, "grad_norm": 2.0124980662759926, "learning_rate": 1.0584592969161043e-05, "loss": 0.6404, "step": 20059 }, { "epoch": 1.4908955778520996, "grad_norm": 2.4951151148703308, "learning_rate": 1.0583791971244068e-05, "loss": 0.5907, "step": 20060 }, { "epoch": 1.4909698996655518, "grad_norm": 1.8369481974356328, "learning_rate": 1.0582990969568672e-05, "loss": 0.5777, "step": 20061 }, { "epoch": 1.491044221479004, "grad_norm": 1.7593381339794636, "learning_rate": 1.0582189964140013e-05, "loss": 0.6325, "step": 20062 }, { "epoch": 1.4911185432924563, "grad_norm": 2.444353679073105, "learning_rate": 1.0581388954963245e-05, "loss": 0.7646, "step": 20063 }, { "epoch": 1.4911928651059085, "grad_norm": 2.058629781677928, "learning_rate": 1.0580587942043523e-05, "loss": 0.5783, "step": 20064 }, { "epoch": 1.4912671869193608, "grad_norm": 2.058783403182805, "learning_rate": 1.0579786925386004e-05, "loss": 0.5646, "step": 20065 }, { "epoch": 1.491341508732813, "grad_norm": 1.9165224327943724, "learning_rate": 1.057898590499585e-05, "loss": 0.5323, "step": 20066 }, { "epoch": 1.4914158305462653, "grad_norm": 2.262681308677972, "learning_rate": 1.0578184880878215e-05, "loss": 0.641, "step": 20067 }, { "epoch": 1.4914901523597175, "grad_norm": 3.9708952223812264, "learning_rate": 1.0577383853038257e-05, "loss": 0.5394, "step": 20068 }, { "epoch": 1.49156447417317, "grad_norm": 1.9081299733363983, "learning_rate": 1.0576582821481126e-05, "loss": 0.5761, "step": 20069 }, { "epoch": 1.491638795986622, "grad_norm": 2.0600587454883326, "learning_rate": 1.057578178621199e-05, "loss": 0.8016, "step": 20070 }, { "epoch": 1.4917131178000744, "grad_norm": 2.246621304657688, "learning_rate": 1.0574980747235996e-05, "loss": 0.6368, "step": 20071 }, { "epoch": 1.4917874396135264, "grad_norm": 2.018932433941309, "learning_rate": 1.057417970455831e-05, "loss": 0.6396, "step": 20072 }, { "epoch": 1.491861761426979, "grad_norm": 2.1353608090012317, "learning_rate": 1.0573378658184083e-05, "loss": 0.6474, "step": 20073 }, { "epoch": 1.4919360832404311, "grad_norm": 2.504415914680115, "learning_rate": 1.0572577608118476e-05, "loss": 0.743, "step": 20074 }, { "epoch": 1.4920104050538834, "grad_norm": 2.2131091437480883, "learning_rate": 1.0571776554366642e-05, "loss": 0.7411, "step": 20075 }, { "epoch": 1.4920847268673356, "grad_norm": 2.2352929243736903, "learning_rate": 1.057097549693374e-05, "loss": 0.6437, "step": 20076 }, { "epoch": 1.4921590486807879, "grad_norm": 2.205818739762947, "learning_rate": 1.0570174435824928e-05, "loss": 0.7067, "step": 20077 }, { "epoch": 1.49223337049424, "grad_norm": 1.9816660039681346, "learning_rate": 1.0569373371045365e-05, "loss": 0.6586, "step": 20078 }, { "epoch": 1.4923076923076923, "grad_norm": 1.5475329928485329, "learning_rate": 1.0568572302600202e-05, "loss": 0.4632, "step": 20079 }, { "epoch": 1.4923820141211446, "grad_norm": 2.011919064650239, "learning_rate": 1.0567771230494604e-05, "loss": 0.4998, "step": 20080 }, { "epoch": 1.4924563359345968, "grad_norm": 1.6913476787848711, "learning_rate": 1.0566970154733721e-05, "loss": 0.6019, "step": 20081 }, { "epoch": 1.492530657748049, "grad_norm": 1.954894381805705, "learning_rate": 1.0566169075322712e-05, "loss": 0.5318, "step": 20082 }, { "epoch": 1.4926049795615013, "grad_norm": 5.0140709653967726, "learning_rate": 1.056536799226674e-05, "loss": 0.5611, "step": 20083 }, { "epoch": 1.4926793013749535, "grad_norm": 2.1247483184127924, "learning_rate": 1.0564566905570955e-05, "loss": 0.6427, "step": 20084 }, { "epoch": 1.4927536231884058, "grad_norm": 2.0776401817015153, "learning_rate": 1.056376581524052e-05, "loss": 0.6862, "step": 20085 }, { "epoch": 1.492827945001858, "grad_norm": 1.6894019128966211, "learning_rate": 1.0562964721280588e-05, "loss": 0.6218, "step": 20086 }, { "epoch": 1.4929022668153102, "grad_norm": 1.9210457507033114, "learning_rate": 1.056216362369632e-05, "loss": 0.5859, "step": 20087 }, { "epoch": 1.4929765886287625, "grad_norm": 2.434880017628337, "learning_rate": 1.0561362522492868e-05, "loss": 0.6876, "step": 20088 }, { "epoch": 1.4930509104422147, "grad_norm": 2.043559836054301, "learning_rate": 1.0560561417675399e-05, "loss": 0.6318, "step": 20089 }, { "epoch": 1.493125232255667, "grad_norm": 2.045675693799369, "learning_rate": 1.0559760309249062e-05, "loss": 0.5906, "step": 20090 }, { "epoch": 1.4931995540691192, "grad_norm": 1.9853943423460245, "learning_rate": 1.055895919721902e-05, "loss": 0.6277, "step": 20091 }, { "epoch": 1.4932738758825717, "grad_norm": 2.6632809562914024, "learning_rate": 1.0558158081590422e-05, "loss": 0.8578, "step": 20092 }, { "epoch": 1.4933481976960237, "grad_norm": 2.1119750246210995, "learning_rate": 1.0557356962368433e-05, "loss": 0.7286, "step": 20093 }, { "epoch": 1.4934225195094761, "grad_norm": 2.143333134304239, "learning_rate": 1.0556555839558212e-05, "loss": 0.6071, "step": 20094 }, { "epoch": 1.4934968413229281, "grad_norm": 1.623034645777809, "learning_rate": 1.055575471316491e-05, "loss": 0.4462, "step": 20095 }, { "epoch": 1.4935711631363806, "grad_norm": 1.5469469187431721, "learning_rate": 1.0554953583193688e-05, "loss": 0.5596, "step": 20096 }, { "epoch": 1.4936454849498328, "grad_norm": 1.7184018019374039, "learning_rate": 1.0554152449649707e-05, "loss": 0.5577, "step": 20097 }, { "epoch": 1.493719806763285, "grad_norm": 2.121565607299953, "learning_rate": 1.0553351312538116e-05, "loss": 0.64, "step": 20098 }, { "epoch": 1.4937941285767373, "grad_norm": 2.3348182765113923, "learning_rate": 1.0552550171864082e-05, "loss": 0.7028, "step": 20099 }, { "epoch": 1.4938684503901896, "grad_norm": 1.983057581782185, "learning_rate": 1.0551749027632758e-05, "loss": 0.5566, "step": 20100 }, { "epoch": 1.4939427722036418, "grad_norm": 2.24372354506336, "learning_rate": 1.0550947879849302e-05, "loss": 0.6586, "step": 20101 }, { "epoch": 1.494017094017094, "grad_norm": 1.6902387569570827, "learning_rate": 1.0550146728518872e-05, "loss": 0.6038, "step": 20102 }, { "epoch": 1.4940914158305463, "grad_norm": 1.9011141746895432, "learning_rate": 1.0549345573646625e-05, "loss": 0.4947, "step": 20103 }, { "epoch": 1.4941657376439985, "grad_norm": 2.2750520368929683, "learning_rate": 1.0548544415237722e-05, "loss": 0.6892, "step": 20104 }, { "epoch": 1.4942400594574508, "grad_norm": 2.1068163621141407, "learning_rate": 1.0547743253297319e-05, "loss": 0.5777, "step": 20105 }, { "epoch": 1.494314381270903, "grad_norm": 1.4068254097355766, "learning_rate": 1.054694208783057e-05, "loss": 0.4453, "step": 20106 }, { "epoch": 1.4943887030843552, "grad_norm": 1.9946602311035841, "learning_rate": 1.0546140918842637e-05, "loss": 0.6766, "step": 20107 }, { "epoch": 1.4944630248978075, "grad_norm": 1.9364599276870302, "learning_rate": 1.054533974633868e-05, "loss": 0.4944, "step": 20108 }, { "epoch": 1.4945373467112597, "grad_norm": 2.20787232713601, "learning_rate": 1.054453857032385e-05, "loss": 0.6878, "step": 20109 }, { "epoch": 1.494611668524712, "grad_norm": 1.6724784526405017, "learning_rate": 1.054373739080331e-05, "loss": 0.5647, "step": 20110 }, { "epoch": 1.4946859903381642, "grad_norm": 2.2679377647622725, "learning_rate": 1.0542936207782219e-05, "loss": 0.6902, "step": 20111 }, { "epoch": 1.4947603121516164, "grad_norm": 2.0374693713411713, "learning_rate": 1.0542135021265732e-05, "loss": 0.6277, "step": 20112 }, { "epoch": 1.4948346339650689, "grad_norm": 2.018855115904062, "learning_rate": 1.0541333831259006e-05, "loss": 0.6022, "step": 20113 }, { "epoch": 1.494908955778521, "grad_norm": 1.9001444685201276, "learning_rate": 1.0540532637767204e-05, "loss": 0.5729, "step": 20114 }, { "epoch": 1.4949832775919734, "grad_norm": 1.804317543940352, "learning_rate": 1.053973144079548e-05, "loss": 0.5093, "step": 20115 }, { "epoch": 1.4950575994054254, "grad_norm": 2.036336075571698, "learning_rate": 1.0538930240348992e-05, "loss": 0.5771, "step": 20116 }, { "epoch": 1.4951319212188778, "grad_norm": 1.787739253380489, "learning_rate": 1.05381290364329e-05, "loss": 0.4942, "step": 20117 }, { "epoch": 1.4952062430323299, "grad_norm": 2.038074896297813, "learning_rate": 1.053732782905236e-05, "loss": 0.6338, "step": 20118 }, { "epoch": 1.4952805648457823, "grad_norm": 2.353745712912912, "learning_rate": 1.0536526618212532e-05, "loss": 0.6494, "step": 20119 }, { "epoch": 1.4953548866592346, "grad_norm": 2.1111123986604814, "learning_rate": 1.0535725403918572e-05, "loss": 0.5993, "step": 20120 }, { "epoch": 1.4954292084726868, "grad_norm": 2.216513171702624, "learning_rate": 1.0534924186175642e-05, "loss": 0.6226, "step": 20121 }, { "epoch": 1.495503530286139, "grad_norm": 1.8786402903636026, "learning_rate": 1.0534122964988897e-05, "loss": 0.5563, "step": 20122 }, { "epoch": 1.4955778520995913, "grad_norm": 1.7852743025227433, "learning_rate": 1.0533321740363495e-05, "loss": 0.4201, "step": 20123 }, { "epoch": 1.4956521739130435, "grad_norm": 2.4000287933932674, "learning_rate": 1.0532520512304596e-05, "loss": 0.5984, "step": 20124 }, { "epoch": 1.4957264957264957, "grad_norm": 2.480307344656133, "learning_rate": 1.0531719280817361e-05, "loss": 0.6658, "step": 20125 }, { "epoch": 1.495800817539948, "grad_norm": 1.9265771291272473, "learning_rate": 1.053091804590694e-05, "loss": 0.5393, "step": 20126 }, { "epoch": 1.4958751393534002, "grad_norm": 1.9612220650940964, "learning_rate": 1.0530116807578498e-05, "loss": 0.556, "step": 20127 }, { "epoch": 1.4959494611668525, "grad_norm": 1.947993008330489, "learning_rate": 1.0529315565837193e-05, "loss": 0.6636, "step": 20128 }, { "epoch": 1.4960237829803047, "grad_norm": 1.884341029017746, "learning_rate": 1.0528514320688179e-05, "loss": 0.6121, "step": 20129 }, { "epoch": 1.496098104793757, "grad_norm": 1.8522092118588915, "learning_rate": 1.052771307213662e-05, "loss": 0.5318, "step": 20130 }, { "epoch": 1.4961724266072092, "grad_norm": 2.001278430387235, "learning_rate": 1.0526911820187669e-05, "loss": 0.6102, "step": 20131 }, { "epoch": 1.4962467484206614, "grad_norm": 2.3802597680385182, "learning_rate": 1.0526110564846487e-05, "loss": 0.7352, "step": 20132 }, { "epoch": 1.4963210702341136, "grad_norm": 1.7405171641092942, "learning_rate": 1.0525309306118234e-05, "loss": 0.5903, "step": 20133 }, { "epoch": 1.4963953920475659, "grad_norm": 1.6972775098604846, "learning_rate": 1.0524508044008065e-05, "loss": 0.4841, "step": 20134 }, { "epoch": 1.4964697138610181, "grad_norm": 1.753534335013393, "learning_rate": 1.0523706778521143e-05, "loss": 0.5114, "step": 20135 }, { "epoch": 1.4965440356744706, "grad_norm": 2.0637836624657795, "learning_rate": 1.0522905509662624e-05, "loss": 0.6675, "step": 20136 }, { "epoch": 1.4966183574879226, "grad_norm": 2.677904575722304, "learning_rate": 1.0522104237437665e-05, "loss": 0.4826, "step": 20137 }, { "epoch": 1.496692679301375, "grad_norm": 2.376579038536748, "learning_rate": 1.0521302961851427e-05, "loss": 0.617, "step": 20138 }, { "epoch": 1.496767001114827, "grad_norm": 2.2384802648072357, "learning_rate": 1.0520501682909068e-05, "loss": 0.681, "step": 20139 }, { "epoch": 1.4968413229282795, "grad_norm": 2.2912143716662037, "learning_rate": 1.0519700400615742e-05, "loss": 0.6322, "step": 20140 }, { "epoch": 1.4969156447417318, "grad_norm": 1.6549889349029256, "learning_rate": 1.0518899114976616e-05, "loss": 0.4912, "step": 20141 }, { "epoch": 1.496989966555184, "grad_norm": 1.8197397435983205, "learning_rate": 1.0518097825996845e-05, "loss": 0.4425, "step": 20142 }, { "epoch": 1.4970642883686363, "grad_norm": 1.9777106025551194, "learning_rate": 1.0517296533681585e-05, "loss": 0.5918, "step": 20143 }, { "epoch": 1.4971386101820885, "grad_norm": 2.117824952196721, "learning_rate": 1.0516495238035997e-05, "loss": 0.7273, "step": 20144 }, { "epoch": 1.4972129319955407, "grad_norm": 1.6461999709496964, "learning_rate": 1.051569393906524e-05, "loss": 0.466, "step": 20145 }, { "epoch": 1.497287253808993, "grad_norm": 1.689749938945189, "learning_rate": 1.051489263677447e-05, "loss": 0.4743, "step": 20146 }, { "epoch": 1.4973615756224452, "grad_norm": 1.821708873444145, "learning_rate": 1.0514091331168852e-05, "loss": 0.6338, "step": 20147 }, { "epoch": 1.4974358974358974, "grad_norm": 1.9927255618776987, "learning_rate": 1.0513290022253537e-05, "loss": 0.5971, "step": 20148 }, { "epoch": 1.4975102192493497, "grad_norm": 2.03364227195886, "learning_rate": 1.051248871003369e-05, "loss": 0.6825, "step": 20149 }, { "epoch": 1.497584541062802, "grad_norm": 4.752687350712248, "learning_rate": 1.0511687394514464e-05, "loss": 0.6474, "step": 20150 }, { "epoch": 1.4976588628762542, "grad_norm": 1.7501189717605588, "learning_rate": 1.0510886075701023e-05, "loss": 0.5703, "step": 20151 }, { "epoch": 1.4977331846897064, "grad_norm": 2.191799064215819, "learning_rate": 1.0510084753598524e-05, "loss": 0.7574, "step": 20152 }, { "epoch": 1.4978075065031586, "grad_norm": 1.8613395752089779, "learning_rate": 1.0509283428212127e-05, "loss": 0.6155, "step": 20153 }, { "epoch": 1.4978818283166109, "grad_norm": 1.742906291714665, "learning_rate": 1.0508482099546988e-05, "loss": 0.6251, "step": 20154 }, { "epoch": 1.4979561501300631, "grad_norm": 1.6444730975975341, "learning_rate": 1.050768076760827e-05, "loss": 0.552, "step": 20155 }, { "epoch": 1.4980304719435154, "grad_norm": 2.1425788084625847, "learning_rate": 1.0506879432401128e-05, "loss": 0.4962, "step": 20156 }, { "epoch": 1.4981047937569676, "grad_norm": 1.9148490963274916, "learning_rate": 1.050607809393072e-05, "loss": 0.5976, "step": 20157 }, { "epoch": 1.4981791155704198, "grad_norm": 1.843461301850863, "learning_rate": 1.0505276752202212e-05, "loss": 0.5326, "step": 20158 }, { "epoch": 1.4982534373838723, "grad_norm": 1.8050414414521245, "learning_rate": 1.0504475407220755e-05, "loss": 0.3899, "step": 20159 }, { "epoch": 1.4983277591973243, "grad_norm": 1.670140788399905, "learning_rate": 1.0503674058991516e-05, "loss": 0.5556, "step": 20160 }, { "epoch": 1.4984020810107768, "grad_norm": 2.479258063237877, "learning_rate": 1.0502872707519645e-05, "loss": 0.5901, "step": 20161 }, { "epoch": 1.4984764028242288, "grad_norm": 2.0612841224512533, "learning_rate": 1.0502071352810306e-05, "loss": 0.6145, "step": 20162 }, { "epoch": 1.4985507246376812, "grad_norm": 1.8238834301863676, "learning_rate": 1.050126999486866e-05, "loss": 0.504, "step": 20163 }, { "epoch": 1.4986250464511335, "grad_norm": 2.1321213883391463, "learning_rate": 1.0500468633699862e-05, "loss": 0.6021, "step": 20164 }, { "epoch": 1.4986993682645857, "grad_norm": 2.512896647736914, "learning_rate": 1.0499667269309072e-05, "loss": 0.6621, "step": 20165 }, { "epoch": 1.498773690078038, "grad_norm": 1.895937382349013, "learning_rate": 1.0498865901701455e-05, "loss": 0.5546, "step": 20166 }, { "epoch": 1.4988480118914902, "grad_norm": 1.7113643681043158, "learning_rate": 1.0498064530882159e-05, "loss": 0.6962, "step": 20167 }, { "epoch": 1.4989223337049424, "grad_norm": 2.2117230158759593, "learning_rate": 1.0497263156856352e-05, "loss": 0.573, "step": 20168 }, { "epoch": 1.4989966555183947, "grad_norm": 1.5964507010618003, "learning_rate": 1.049646177962919e-05, "loss": 0.3897, "step": 20169 }, { "epoch": 1.499070977331847, "grad_norm": 2.0371018631049957, "learning_rate": 1.0495660399205836e-05, "loss": 0.5988, "step": 20170 }, { "epoch": 1.4991452991452991, "grad_norm": 1.497771687169605, "learning_rate": 1.0494859015591443e-05, "loss": 0.4653, "step": 20171 }, { "epoch": 1.4992196209587514, "grad_norm": 2.188452314260586, "learning_rate": 1.0494057628791174e-05, "loss": 0.653, "step": 20172 }, { "epoch": 1.4992939427722036, "grad_norm": 1.8804158519258471, "learning_rate": 1.0493256238810189e-05, "loss": 0.5587, "step": 20173 }, { "epoch": 1.4993682645856559, "grad_norm": 2.378673871115605, "learning_rate": 1.0492454845653641e-05, "loss": 0.5875, "step": 20174 }, { "epoch": 1.499442586399108, "grad_norm": 1.877315378277608, "learning_rate": 1.0491653449326697e-05, "loss": 0.5237, "step": 20175 }, { "epoch": 1.4995169082125603, "grad_norm": 1.699786401647662, "learning_rate": 1.0490852049834516e-05, "loss": 0.6264, "step": 20176 }, { "epoch": 1.4995912300260126, "grad_norm": 2.459778511916302, "learning_rate": 1.0490050647182255e-05, "loss": 0.803, "step": 20177 }, { "epoch": 1.4996655518394648, "grad_norm": 1.8847197834760427, "learning_rate": 1.0489249241375071e-05, "loss": 0.6669, "step": 20178 }, { "epoch": 1.499739873652917, "grad_norm": 1.6607780884734027, "learning_rate": 1.0488447832418125e-05, "loss": 0.5918, "step": 20179 }, { "epoch": 1.4998141954663695, "grad_norm": 1.696243401173749, "learning_rate": 1.0487646420316578e-05, "loss": 0.5105, "step": 20180 }, { "epoch": 1.4998885172798215, "grad_norm": 2.366009148339107, "learning_rate": 1.0486845005075592e-05, "loss": 0.6378, "step": 20181 }, { "epoch": 1.499962839093274, "grad_norm": 1.8406171853137674, "learning_rate": 1.048604358670032e-05, "loss": 0.6282, "step": 20182 }, { "epoch": 1.500037160906726, "grad_norm": 1.622822117736469, "learning_rate": 1.0485242165195925e-05, "loss": 0.5219, "step": 20183 }, { "epoch": 1.5001114827201785, "grad_norm": 1.647953649589093, "learning_rate": 1.0484440740567565e-05, "loss": 0.5176, "step": 20184 }, { "epoch": 1.5001858045336305, "grad_norm": 1.5615985422828618, "learning_rate": 1.0483639312820402e-05, "loss": 0.5, "step": 20185 }, { "epoch": 1.500260126347083, "grad_norm": 2.0653131179890196, "learning_rate": 1.0482837881959594e-05, "loss": 0.7491, "step": 20186 }, { "epoch": 1.500334448160535, "grad_norm": 2.06466318041116, "learning_rate": 1.04820364479903e-05, "loss": 0.545, "step": 20187 }, { "epoch": 1.5004087699739874, "grad_norm": 1.6238946803144845, "learning_rate": 1.0481235010917683e-05, "loss": 0.3926, "step": 20188 }, { "epoch": 1.5004830917874397, "grad_norm": 2.0742560352701926, "learning_rate": 1.0480433570746898e-05, "loss": 0.6319, "step": 20189 }, { "epoch": 1.500557413600892, "grad_norm": 2.1429172536945345, "learning_rate": 1.0479632127483107e-05, "loss": 0.6674, "step": 20190 }, { "epoch": 1.5006317354143441, "grad_norm": 1.8739477188980767, "learning_rate": 1.0478830681131467e-05, "loss": 0.5289, "step": 20191 }, { "epoch": 1.5007060572277964, "grad_norm": 1.7201888511821515, "learning_rate": 1.0478029231697143e-05, "loss": 0.6121, "step": 20192 }, { "epoch": 1.5007803790412486, "grad_norm": 1.9444591611638327, "learning_rate": 1.047722777918529e-05, "loss": 0.5208, "step": 20193 }, { "epoch": 1.5008547008547009, "grad_norm": 1.9560528100365562, "learning_rate": 1.047642632360107e-05, "loss": 0.6896, "step": 20194 }, { "epoch": 1.500929022668153, "grad_norm": 2.98511459258977, "learning_rate": 1.0475624864949641e-05, "loss": 0.7232, "step": 20195 }, { "epoch": 1.5010033444816053, "grad_norm": 2.257126262379232, "learning_rate": 1.0474823403236165e-05, "loss": 0.6137, "step": 20196 }, { "epoch": 1.5010776662950576, "grad_norm": 2.065179880044061, "learning_rate": 1.04740219384658e-05, "loss": 0.599, "step": 20197 }, { "epoch": 1.5011519881085098, "grad_norm": 1.94028198027622, "learning_rate": 1.0473220470643706e-05, "loss": 0.6527, "step": 20198 }, { "epoch": 1.501226309921962, "grad_norm": 1.4999707265172912, "learning_rate": 1.0472418999775045e-05, "loss": 0.4499, "step": 20199 }, { "epoch": 1.5013006317354143, "grad_norm": 1.637651846960631, "learning_rate": 1.0471617525864974e-05, "loss": 0.4414, "step": 20200 }, { "epoch": 1.5013749535488667, "grad_norm": 1.8903028646198576, "learning_rate": 1.0470816048918653e-05, "loss": 0.3869, "step": 20201 }, { "epoch": 1.5014492753623188, "grad_norm": 1.9460717163930845, "learning_rate": 1.0470014568941243e-05, "loss": 0.6707, "step": 20202 }, { "epoch": 1.5015235971757712, "grad_norm": 2.316038046780081, "learning_rate": 1.0469213085937903e-05, "loss": 0.4754, "step": 20203 }, { "epoch": 1.5015979189892232, "grad_norm": 1.6978921582243756, "learning_rate": 1.0468411599913797e-05, "loss": 0.6324, "step": 20204 }, { "epoch": 1.5016722408026757, "grad_norm": 2.3553185382369364, "learning_rate": 1.0467610110874079e-05, "loss": 0.8829, "step": 20205 }, { "epoch": 1.5017465626161277, "grad_norm": 2.10367017965797, "learning_rate": 1.0466808618823909e-05, "loss": 0.6641, "step": 20206 }, { "epoch": 1.5018208844295802, "grad_norm": 1.8705770863634323, "learning_rate": 1.0466007123768451e-05, "loss": 0.5761, "step": 20207 }, { "epoch": 1.5018952062430322, "grad_norm": 1.7864424249041961, "learning_rate": 1.0465205625712864e-05, "loss": 0.4139, "step": 20208 }, { "epoch": 1.5019695280564846, "grad_norm": 1.8324042514221786, "learning_rate": 1.0464404124662306e-05, "loss": 0.5561, "step": 20209 }, { "epoch": 1.5020438498699367, "grad_norm": 2.4679208230939103, "learning_rate": 1.0463602620621941e-05, "loss": 0.7382, "step": 20210 }, { "epoch": 1.5021181716833891, "grad_norm": 1.7624724956299478, "learning_rate": 1.0462801113596925e-05, "loss": 0.5121, "step": 20211 }, { "epoch": 1.5021924934968414, "grad_norm": 1.7212030545553656, "learning_rate": 1.0461999603592419e-05, "loss": 0.4848, "step": 20212 }, { "epoch": 1.5022668153102936, "grad_norm": 2.1893458907879966, "learning_rate": 1.0461198090613583e-05, "loss": 0.6102, "step": 20213 }, { "epoch": 1.5023411371237458, "grad_norm": 2.274423008618923, "learning_rate": 1.046039657466558e-05, "loss": 0.6049, "step": 20214 }, { "epoch": 1.502415458937198, "grad_norm": 2.012879580181633, "learning_rate": 1.0459595055753566e-05, "loss": 0.5327, "step": 20215 }, { "epoch": 1.5024897807506503, "grad_norm": 1.8848414829688833, "learning_rate": 1.0458793533882703e-05, "loss": 0.4928, "step": 20216 }, { "epoch": 1.5025641025641026, "grad_norm": 3.0506361805930737, "learning_rate": 1.045799200905815e-05, "loss": 0.5741, "step": 20217 }, { "epoch": 1.5026384243775548, "grad_norm": 2.2159390615038075, "learning_rate": 1.0457190481285069e-05, "loss": 0.6954, "step": 20218 }, { "epoch": 1.502712746191007, "grad_norm": 1.5783542664297667, "learning_rate": 1.0456388950568621e-05, "loss": 0.5042, "step": 20219 }, { "epoch": 1.5027870680044593, "grad_norm": 1.6764552466170144, "learning_rate": 1.0455587416913962e-05, "loss": 0.5132, "step": 20220 }, { "epoch": 1.5028613898179115, "grad_norm": 2.0009374185098276, "learning_rate": 1.0454785880326257e-05, "loss": 0.6358, "step": 20221 }, { "epoch": 1.502935711631364, "grad_norm": 2.172771701588, "learning_rate": 1.0453984340810664e-05, "loss": 0.6728, "step": 20222 }, { "epoch": 1.503010033444816, "grad_norm": 1.7976976026915223, "learning_rate": 1.045318279837234e-05, "loss": 0.4677, "step": 20223 }, { "epoch": 1.5030843552582684, "grad_norm": 2.104358148948427, "learning_rate": 1.045238125301645e-05, "loss": 0.6688, "step": 20224 }, { "epoch": 1.5031586770717205, "grad_norm": 1.8256735994377775, "learning_rate": 1.0451579704748155e-05, "loss": 0.5766, "step": 20225 }, { "epoch": 1.503232998885173, "grad_norm": 1.9899418680171912, "learning_rate": 1.0450778153572616e-05, "loss": 0.5919, "step": 20226 }, { "epoch": 1.503307320698625, "grad_norm": 1.9631429845977175, "learning_rate": 1.0449976599494986e-05, "loss": 0.7414, "step": 20227 }, { "epoch": 1.5033816425120774, "grad_norm": 1.815358377528544, "learning_rate": 1.044917504252043e-05, "loss": 0.592, "step": 20228 }, { "epoch": 1.5034559643255294, "grad_norm": 1.858141939570428, "learning_rate": 1.0448373482654108e-05, "loss": 0.5572, "step": 20229 }, { "epoch": 1.5035302861389819, "grad_norm": 1.924573631842948, "learning_rate": 1.0447571919901185e-05, "loss": 0.62, "step": 20230 }, { "epoch": 1.503604607952434, "grad_norm": 1.6857053208639503, "learning_rate": 1.0446770354266813e-05, "loss": 0.4608, "step": 20231 }, { "epoch": 1.5036789297658864, "grad_norm": 2.0132861995043294, "learning_rate": 1.044596878575616e-05, "loss": 0.5458, "step": 20232 }, { "epoch": 1.5037532515793386, "grad_norm": 2.1222937104398443, "learning_rate": 1.044516721437438e-05, "loss": 0.5702, "step": 20233 }, { "epoch": 1.5038275733927908, "grad_norm": 1.992012912708547, "learning_rate": 1.0444365640126638e-05, "loss": 0.6004, "step": 20234 }, { "epoch": 1.503901895206243, "grad_norm": 1.9818009764520494, "learning_rate": 1.0443564063018092e-05, "loss": 0.6091, "step": 20235 }, { "epoch": 1.5039762170196953, "grad_norm": 2.296678649736182, "learning_rate": 1.0442762483053905e-05, "loss": 0.7529, "step": 20236 }, { "epoch": 1.5040505388331475, "grad_norm": 1.8387741296815538, "learning_rate": 1.0441960900239237e-05, "loss": 0.5185, "step": 20237 }, { "epoch": 1.5041248606465998, "grad_norm": 2.159395670868334, "learning_rate": 1.0441159314579245e-05, "loss": 0.615, "step": 20238 }, { "epoch": 1.504199182460052, "grad_norm": 1.5018412116218671, "learning_rate": 1.0440357726079094e-05, "loss": 0.5314, "step": 20239 }, { "epoch": 1.5042735042735043, "grad_norm": 1.7773565297350877, "learning_rate": 1.0439556134743942e-05, "loss": 0.6105, "step": 20240 }, { "epoch": 1.5043478260869565, "grad_norm": 1.9980650628465018, "learning_rate": 1.0438754540578951e-05, "loss": 0.6485, "step": 20241 }, { "epoch": 1.5044221479004087, "grad_norm": 1.992989261488744, "learning_rate": 1.043795294358928e-05, "loss": 0.584, "step": 20242 }, { "epoch": 1.504496469713861, "grad_norm": 2.1979229030264107, "learning_rate": 1.0437151343780092e-05, "loss": 0.6344, "step": 20243 }, { "epoch": 1.5045707915273132, "grad_norm": 1.8594874572918987, "learning_rate": 1.0436349741156546e-05, "loss": 0.6023, "step": 20244 }, { "epoch": 1.5046451133407657, "grad_norm": 2.407442720902496, "learning_rate": 1.04355481357238e-05, "loss": 0.577, "step": 20245 }, { "epoch": 1.5047194351542177, "grad_norm": 2.1557391767129293, "learning_rate": 1.0434746527487021e-05, "loss": 0.5482, "step": 20246 }, { "epoch": 1.5047937569676701, "grad_norm": 1.6554482864444413, "learning_rate": 1.0433944916451369e-05, "loss": 0.5462, "step": 20247 }, { "epoch": 1.5048680787811222, "grad_norm": 1.7348069341773946, "learning_rate": 1.0433143302621998e-05, "loss": 0.4412, "step": 20248 }, { "epoch": 1.5049424005945746, "grad_norm": 2.144007982600762, "learning_rate": 1.0432341686004075e-05, "loss": 0.6696, "step": 20249 }, { "epoch": 1.5050167224080266, "grad_norm": 2.1755025693180863, "learning_rate": 1.0431540066602757e-05, "loss": 0.7094, "step": 20250 }, { "epoch": 1.505091044221479, "grad_norm": 2.2844761559839535, "learning_rate": 1.0430738444423206e-05, "loss": 0.6487, "step": 20251 }, { "epoch": 1.5051653660349311, "grad_norm": 1.797327005503, "learning_rate": 1.0429936819470585e-05, "loss": 0.5206, "step": 20252 }, { "epoch": 1.5052396878483836, "grad_norm": 1.9124414536067194, "learning_rate": 1.042913519175005e-05, "loss": 0.5559, "step": 20253 }, { "epoch": 1.5053140096618356, "grad_norm": 2.2849902259082886, "learning_rate": 1.0428333561266769e-05, "loss": 0.5768, "step": 20254 }, { "epoch": 1.505388331475288, "grad_norm": 1.6381915297056302, "learning_rate": 1.0427531928025898e-05, "loss": 0.4358, "step": 20255 }, { "epoch": 1.5054626532887403, "grad_norm": 1.8356342082297585, "learning_rate": 1.0426730292032596e-05, "loss": 0.5999, "step": 20256 }, { "epoch": 1.5055369751021925, "grad_norm": 1.824706277606424, "learning_rate": 1.0425928653292027e-05, "loss": 0.598, "step": 20257 }, { "epoch": 1.5056112969156448, "grad_norm": 1.8845316175458224, "learning_rate": 1.0425127011809355e-05, "loss": 0.4129, "step": 20258 }, { "epoch": 1.505685618729097, "grad_norm": 1.8012811664308823, "learning_rate": 1.0424325367589734e-05, "loss": 0.4606, "step": 20259 }, { "epoch": 1.5057599405425492, "grad_norm": 1.7727640208402, "learning_rate": 1.0423523720638329e-05, "loss": 0.5653, "step": 20260 }, { "epoch": 1.5058342623560015, "grad_norm": 3.407984851657093, "learning_rate": 1.04227220709603e-05, "loss": 0.873, "step": 20261 }, { "epoch": 1.5059085841694537, "grad_norm": 2.1614892148688525, "learning_rate": 1.042192041856081e-05, "loss": 0.6772, "step": 20262 }, { "epoch": 1.505982905982906, "grad_norm": 2.082760601563865, "learning_rate": 1.0421118763445017e-05, "loss": 0.7018, "step": 20263 }, { "epoch": 1.5060572277963582, "grad_norm": 2.0612278836360334, "learning_rate": 1.042031710561808e-05, "loss": 0.5392, "step": 20264 }, { "epoch": 1.5061315496098104, "grad_norm": 2.0691658954558125, "learning_rate": 1.0419515445085167e-05, "loss": 0.607, "step": 20265 }, { "epoch": 1.5062058714232627, "grad_norm": 1.9232487642708773, "learning_rate": 1.0418713781851437e-05, "loss": 0.495, "step": 20266 }, { "epoch": 1.506280193236715, "grad_norm": 1.7310639259452374, "learning_rate": 1.0417912115922046e-05, "loss": 0.563, "step": 20267 }, { "epoch": 1.5063545150501674, "grad_norm": 2.0595545894054617, "learning_rate": 1.041711044730216e-05, "loss": 0.5583, "step": 20268 }, { "epoch": 1.5064288368636194, "grad_norm": 3.54370990964, "learning_rate": 1.041630877599694e-05, "loss": 0.6174, "step": 20269 }, { "epoch": 1.5065031586770719, "grad_norm": 1.9030472254055921, "learning_rate": 1.0415507102011542e-05, "loss": 0.7535, "step": 20270 }, { "epoch": 1.5065774804905239, "grad_norm": 1.840136895215782, "learning_rate": 1.0414705425351133e-05, "loss": 0.4412, "step": 20271 }, { "epoch": 1.5066518023039763, "grad_norm": 1.9989726908516074, "learning_rate": 1.0413903746020872e-05, "loss": 0.6053, "step": 20272 }, { "epoch": 1.5067261241174283, "grad_norm": 1.7948229584698123, "learning_rate": 1.041310206402592e-05, "loss": 0.5622, "step": 20273 }, { "epoch": 1.5068004459308808, "grad_norm": 2.145694407362821, "learning_rate": 1.0412300379371438e-05, "loss": 0.5453, "step": 20274 }, { "epoch": 1.5068747677443328, "grad_norm": 2.0454113542824817, "learning_rate": 1.0411498692062587e-05, "loss": 0.5362, "step": 20275 }, { "epoch": 1.5069490895577853, "grad_norm": 2.83257734938394, "learning_rate": 1.041069700210453e-05, "loss": 0.5001, "step": 20276 }, { "epoch": 1.5070234113712373, "grad_norm": 1.9953120193832374, "learning_rate": 1.0409895309502427e-05, "loss": 0.6722, "step": 20277 }, { "epoch": 1.5070977331846898, "grad_norm": 2.2185444296067396, "learning_rate": 1.0409093614261438e-05, "loss": 0.5666, "step": 20278 }, { "epoch": 1.507172054998142, "grad_norm": 1.584774758308757, "learning_rate": 1.0408291916386726e-05, "loss": 0.4874, "step": 20279 }, { "epoch": 1.5072463768115942, "grad_norm": 1.8316479889474926, "learning_rate": 1.0407490215883454e-05, "loss": 0.5538, "step": 20280 }, { "epoch": 1.5073206986250465, "grad_norm": 1.8253829032933129, "learning_rate": 1.0406688512756777e-05, "loss": 0.5572, "step": 20281 }, { "epoch": 1.5073950204384987, "grad_norm": 4.289516431608285, "learning_rate": 1.0405886807011862e-05, "loss": 0.6529, "step": 20282 }, { "epoch": 1.507469342251951, "grad_norm": 2.825851622970629, "learning_rate": 1.0405085098653874e-05, "loss": 0.6191, "step": 20283 }, { "epoch": 1.5075436640654032, "grad_norm": 2.218983031794299, "learning_rate": 1.0404283387687964e-05, "loss": 0.5753, "step": 20284 }, { "epoch": 1.5076179858788554, "grad_norm": 2.0440426092447823, "learning_rate": 1.04034816741193e-05, "loss": 0.66, "step": 20285 }, { "epoch": 1.5076923076923077, "grad_norm": 2.1023383448405033, "learning_rate": 1.0402679957953039e-05, "loss": 0.6814, "step": 20286 }, { "epoch": 1.50776662950576, "grad_norm": 2.108663406931404, "learning_rate": 1.0401878239194348e-05, "loss": 0.638, "step": 20287 }, { "epoch": 1.5078409513192121, "grad_norm": 1.96767373933977, "learning_rate": 1.0401076517848384e-05, "loss": 0.4288, "step": 20288 }, { "epoch": 1.5079152731326646, "grad_norm": 2.0521951768671767, "learning_rate": 1.0400274793920314e-05, "loss": 0.7041, "step": 20289 }, { "epoch": 1.5079895949461166, "grad_norm": 1.9741696495111871, "learning_rate": 1.0399473067415292e-05, "loss": 0.543, "step": 20290 }, { "epoch": 1.508063916759569, "grad_norm": 1.7192506842489683, "learning_rate": 1.0398671338338485e-05, "loss": 0.4693, "step": 20291 }, { "epoch": 1.508138238573021, "grad_norm": 2.0460522901942, "learning_rate": 1.039786960669505e-05, "loss": 0.6362, "step": 20292 }, { "epoch": 1.5082125603864736, "grad_norm": 1.8776119795920658, "learning_rate": 1.0397067872490152e-05, "loss": 0.6063, "step": 20293 }, { "epoch": 1.5082868821999256, "grad_norm": 2.0209535465839568, "learning_rate": 1.0396266135728954e-05, "loss": 0.5833, "step": 20294 }, { "epoch": 1.508361204013378, "grad_norm": 2.079943479553574, "learning_rate": 1.0395464396416614e-05, "loss": 0.6477, "step": 20295 }, { "epoch": 1.50843552582683, "grad_norm": 1.7725622760113213, "learning_rate": 1.0394662654558296e-05, "loss": 0.6238, "step": 20296 }, { "epoch": 1.5085098476402825, "grad_norm": 2.287966805526027, "learning_rate": 1.0393860910159156e-05, "loss": 0.7922, "step": 20297 }, { "epoch": 1.5085841694537345, "grad_norm": 2.0356963112788957, "learning_rate": 1.0393059163224364e-05, "loss": 0.5713, "step": 20298 }, { "epoch": 1.508658491267187, "grad_norm": 1.8883899325183133, "learning_rate": 1.0392257413759075e-05, "loss": 0.6098, "step": 20299 }, { "epoch": 1.508732813080639, "grad_norm": 2.0829225026253164, "learning_rate": 1.0391455661768457e-05, "loss": 0.6354, "step": 20300 }, { "epoch": 1.5088071348940915, "grad_norm": 1.6964983679488566, "learning_rate": 1.0390653907257664e-05, "loss": 0.5212, "step": 20301 }, { "epoch": 1.5088814567075437, "grad_norm": 1.8663649368032105, "learning_rate": 1.0389852150231863e-05, "loss": 0.645, "step": 20302 }, { "epoch": 1.508955778520996, "grad_norm": 2.0452377039859897, "learning_rate": 1.0389050390696214e-05, "loss": 0.7282, "step": 20303 }, { "epoch": 1.5090301003344482, "grad_norm": 1.8765991690637365, "learning_rate": 1.0388248628655878e-05, "loss": 0.5963, "step": 20304 }, { "epoch": 1.5091044221479004, "grad_norm": 1.7895126046439376, "learning_rate": 1.0387446864116015e-05, "loss": 0.5734, "step": 20305 }, { "epoch": 1.5091787439613527, "grad_norm": 2.5587732450231613, "learning_rate": 1.0386645097081793e-05, "loss": 0.6434, "step": 20306 }, { "epoch": 1.509253065774805, "grad_norm": 1.9058204318074792, "learning_rate": 1.0385843327558372e-05, "loss": 0.5539, "step": 20307 }, { "epoch": 1.5093273875882571, "grad_norm": 1.9137709071743148, "learning_rate": 1.0385041555550907e-05, "loss": 0.595, "step": 20308 }, { "epoch": 1.5094017094017094, "grad_norm": 1.6685553838420335, "learning_rate": 1.0384239781064567e-05, "loss": 0.6457, "step": 20309 }, { "epoch": 1.5094760312151616, "grad_norm": 1.8193515000059324, "learning_rate": 1.0383438004104508e-05, "loss": 0.5285, "step": 20310 }, { "epoch": 1.5095503530286138, "grad_norm": 1.7550349784078705, "learning_rate": 1.0382636224675899e-05, "loss": 0.5742, "step": 20311 }, { "epoch": 1.5096246748420663, "grad_norm": 1.9420308656657463, "learning_rate": 1.0381834442783896e-05, "loss": 0.6553, "step": 20312 }, { "epoch": 1.5096989966555183, "grad_norm": 1.9928172444050312, "learning_rate": 1.0381032658433662e-05, "loss": 0.6207, "step": 20313 }, { "epoch": 1.5097733184689708, "grad_norm": 1.8160642469784032, "learning_rate": 1.0380230871630362e-05, "loss": 0.5381, "step": 20314 }, { "epoch": 1.5098476402824228, "grad_norm": 1.9987498897678873, "learning_rate": 1.0379429082379153e-05, "loss": 0.5598, "step": 20315 }, { "epoch": 1.5099219620958753, "grad_norm": 1.9088703915880607, "learning_rate": 1.0378627290685199e-05, "loss": 0.5662, "step": 20316 }, { "epoch": 1.5099962839093273, "grad_norm": 1.6882221848308927, "learning_rate": 1.0377825496553664e-05, "loss": 0.6488, "step": 20317 }, { "epoch": 1.5100706057227797, "grad_norm": 2.478712388245633, "learning_rate": 1.0377023699989708e-05, "loss": 0.5205, "step": 20318 }, { "epoch": 1.5101449275362318, "grad_norm": 1.5420890723652998, "learning_rate": 1.0376221900998492e-05, "loss": 0.4068, "step": 20319 }, { "epoch": 1.5102192493496842, "grad_norm": 2.4767133756261064, "learning_rate": 1.0375420099585178e-05, "loss": 0.604, "step": 20320 }, { "epoch": 1.5102935711631362, "grad_norm": 2.6684594761919462, "learning_rate": 1.037461829575493e-05, "loss": 0.692, "step": 20321 }, { "epoch": 1.5103678929765887, "grad_norm": 2.0060347026560494, "learning_rate": 1.0373816489512906e-05, "loss": 0.594, "step": 20322 }, { "epoch": 1.510442214790041, "grad_norm": 2.935842379811992, "learning_rate": 1.0373014680864277e-05, "loss": 0.7129, "step": 20323 }, { "epoch": 1.5105165366034932, "grad_norm": 2.2248892311993713, "learning_rate": 1.0372212869814198e-05, "loss": 0.6633, "step": 20324 }, { "epoch": 1.5105908584169454, "grad_norm": 1.419233151291877, "learning_rate": 1.0371411056367826e-05, "loss": 0.4124, "step": 20325 }, { "epoch": 1.5106651802303976, "grad_norm": 2.3896466463204034, "learning_rate": 1.0370609240530331e-05, "loss": 0.6049, "step": 20326 }, { "epoch": 1.5107395020438499, "grad_norm": 1.9252284373185986, "learning_rate": 1.0369807422306874e-05, "loss": 0.5659, "step": 20327 }, { "epoch": 1.5108138238573021, "grad_norm": 1.6862763014680315, "learning_rate": 1.036900560170262e-05, "loss": 0.4881, "step": 20328 }, { "epoch": 1.5108881456707544, "grad_norm": 2.7926392470553494, "learning_rate": 1.0368203778722723e-05, "loss": 0.479, "step": 20329 }, { "epoch": 1.5109624674842066, "grad_norm": 2.136228462151452, "learning_rate": 1.036740195337235e-05, "loss": 0.7541, "step": 20330 }, { "epoch": 1.5110367892976588, "grad_norm": 2.271023653576516, "learning_rate": 1.036660012565666e-05, "loss": 0.6631, "step": 20331 }, { "epoch": 1.511111111111111, "grad_norm": 2.043315737165016, "learning_rate": 1.0365798295580818e-05, "loss": 0.5511, "step": 20332 }, { "epoch": 1.5111854329245633, "grad_norm": 1.6672232661771635, "learning_rate": 1.0364996463149986e-05, "loss": 0.5793, "step": 20333 }, { "epoch": 1.5112597547380155, "grad_norm": 2.2143941253622432, "learning_rate": 1.036419462836933e-05, "loss": 0.8208, "step": 20334 }, { "epoch": 1.511334076551468, "grad_norm": 2.0885375623421663, "learning_rate": 1.0363392791244004e-05, "loss": 0.7323, "step": 20335 }, { "epoch": 1.51140839836492, "grad_norm": 2.581942550077266, "learning_rate": 1.0362590951779175e-05, "loss": 0.6157, "step": 20336 }, { "epoch": 1.5114827201783725, "grad_norm": 1.9047288460718301, "learning_rate": 1.0361789109980001e-05, "loss": 0.6253, "step": 20337 }, { "epoch": 1.5115570419918245, "grad_norm": 2.0890943607791876, "learning_rate": 1.036098726585165e-05, "loss": 0.6108, "step": 20338 }, { "epoch": 1.511631363805277, "grad_norm": 1.526637696669024, "learning_rate": 1.0360185419399282e-05, "loss": 0.5184, "step": 20339 }, { "epoch": 1.511705685618729, "grad_norm": 1.9640422002652416, "learning_rate": 1.0359383570628061e-05, "loss": 0.6608, "step": 20340 }, { "epoch": 1.5117800074321814, "grad_norm": 1.8764313712824843, "learning_rate": 1.0358581719543144e-05, "loss": 0.5199, "step": 20341 }, { "epoch": 1.5118543292456335, "grad_norm": 1.704678790644112, "learning_rate": 1.0357779866149697e-05, "loss": 0.4938, "step": 20342 }, { "epoch": 1.511928651059086, "grad_norm": 1.6633553939317465, "learning_rate": 1.035697801045288e-05, "loss": 0.4652, "step": 20343 }, { "epoch": 1.512002972872538, "grad_norm": 1.7625036875359568, "learning_rate": 1.0356176152457862e-05, "loss": 0.4398, "step": 20344 }, { "epoch": 1.5120772946859904, "grad_norm": 1.801659956567178, "learning_rate": 1.03553742921698e-05, "loss": 0.5167, "step": 20345 }, { "epoch": 1.5121516164994426, "grad_norm": 2.0470249357299486, "learning_rate": 1.0354572429593853e-05, "loss": 0.6595, "step": 20346 }, { "epoch": 1.5122259383128949, "grad_norm": 1.7973542221502226, "learning_rate": 1.0353770564735188e-05, "loss": 0.6103, "step": 20347 }, { "epoch": 1.512300260126347, "grad_norm": 1.8629907196582356, "learning_rate": 1.0352968697598966e-05, "loss": 0.705, "step": 20348 }, { "epoch": 1.5123745819397993, "grad_norm": 2.1595207455045524, "learning_rate": 1.0352166828190351e-05, "loss": 0.7022, "step": 20349 }, { "epoch": 1.5124489037532516, "grad_norm": 2.0368352446930684, "learning_rate": 1.0351364956514504e-05, "loss": 0.6681, "step": 20350 }, { "epoch": 1.5125232255667038, "grad_norm": 2.4089128293944886, "learning_rate": 1.035056308257659e-05, "loss": 0.6502, "step": 20351 }, { "epoch": 1.512597547380156, "grad_norm": 1.9518935442254397, "learning_rate": 1.0349761206381765e-05, "loss": 0.7263, "step": 20352 }, { "epoch": 1.5126718691936083, "grad_norm": 2.2488212509914223, "learning_rate": 1.0348959327935197e-05, "loss": 0.6024, "step": 20353 }, { "epoch": 1.5127461910070605, "grad_norm": 2.0340097760549107, "learning_rate": 1.0348157447242047e-05, "loss": 0.6344, "step": 20354 }, { "epoch": 1.5128205128205128, "grad_norm": 2.7013069733204285, "learning_rate": 1.034735556430748e-05, "loss": 0.6943, "step": 20355 }, { "epoch": 1.512894834633965, "grad_norm": 1.73907007749531, "learning_rate": 1.0346553679136655e-05, "loss": 0.5397, "step": 20356 }, { "epoch": 1.5129691564474173, "grad_norm": 1.8941844751294739, "learning_rate": 1.0345751791734733e-05, "loss": 0.5183, "step": 20357 }, { "epoch": 1.5130434782608697, "grad_norm": 2.1374374063570185, "learning_rate": 1.034494990210688e-05, "loss": 0.7099, "step": 20358 }, { "epoch": 1.5131178000743217, "grad_norm": 1.9761864882037665, "learning_rate": 1.0344148010258254e-05, "loss": 0.642, "step": 20359 }, { "epoch": 1.5131921218877742, "grad_norm": 1.990870388151715, "learning_rate": 1.0343346116194027e-05, "loss": 0.7355, "step": 20360 }, { "epoch": 1.5132664437012262, "grad_norm": 2.1759719596675557, "learning_rate": 1.0342544219919352e-05, "loss": 0.4947, "step": 20361 }, { "epoch": 1.5133407655146787, "grad_norm": 1.7032257320878796, "learning_rate": 1.03417423214394e-05, "loss": 0.5451, "step": 20362 }, { "epoch": 1.5134150873281307, "grad_norm": 1.5852639880853576, "learning_rate": 1.0340940420759323e-05, "loss": 0.5434, "step": 20363 }, { "epoch": 1.5134894091415831, "grad_norm": 1.7371381144779154, "learning_rate": 1.0340138517884289e-05, "loss": 0.6285, "step": 20364 }, { "epoch": 1.5135637309550352, "grad_norm": 2.1975772783693643, "learning_rate": 1.0339336612819462e-05, "loss": 0.724, "step": 20365 }, { "epoch": 1.5136380527684876, "grad_norm": 1.9821452813135743, "learning_rate": 1.0338534705570004e-05, "loss": 0.6799, "step": 20366 }, { "epoch": 1.5137123745819396, "grad_norm": 2.351352461608297, "learning_rate": 1.0337732796141079e-05, "loss": 0.7068, "step": 20367 }, { "epoch": 1.513786696395392, "grad_norm": 1.8014774881783973, "learning_rate": 1.0336930884537846e-05, "loss": 0.5813, "step": 20368 }, { "epoch": 1.5138610182088443, "grad_norm": 2.2249069633661245, "learning_rate": 1.033612897076547e-05, "loss": 0.4882, "step": 20369 }, { "epoch": 1.5139353400222966, "grad_norm": 1.779833650689075, "learning_rate": 1.0335327054829111e-05, "loss": 0.6213, "step": 20370 }, { "epoch": 1.5140096618357488, "grad_norm": 2.2020428828996814, "learning_rate": 1.0334525136733934e-05, "loss": 0.6917, "step": 20371 }, { "epoch": 1.514083983649201, "grad_norm": 1.7311000347153227, "learning_rate": 1.0333723216485106e-05, "loss": 0.5482, "step": 20372 }, { "epoch": 1.5141583054626533, "grad_norm": 1.8514814281281187, "learning_rate": 1.0332921294087782e-05, "loss": 0.5788, "step": 20373 }, { "epoch": 1.5142326272761055, "grad_norm": 1.7629465441975265, "learning_rate": 1.0332119369547128e-05, "loss": 0.716, "step": 20374 }, { "epoch": 1.5143069490895578, "grad_norm": 1.897135307293448, "learning_rate": 1.0331317442868307e-05, "loss": 0.6023, "step": 20375 }, { "epoch": 1.51438127090301, "grad_norm": 2.3051579097622934, "learning_rate": 1.033051551405648e-05, "loss": 0.7125, "step": 20376 }, { "epoch": 1.5144555927164622, "grad_norm": 2.1305770465159592, "learning_rate": 1.0329713583116816e-05, "loss": 0.6115, "step": 20377 }, { "epoch": 1.5145299145299145, "grad_norm": 1.8671735639400047, "learning_rate": 1.0328911650054467e-05, "loss": 0.4699, "step": 20378 }, { "epoch": 1.514604236343367, "grad_norm": 1.664785651627425, "learning_rate": 1.0328109714874608e-05, "loss": 0.4286, "step": 20379 }, { "epoch": 1.514678558156819, "grad_norm": 1.7923489648508881, "learning_rate": 1.0327307777582391e-05, "loss": 0.5146, "step": 20380 }, { "epoch": 1.5147528799702714, "grad_norm": 1.9318565238119134, "learning_rate": 1.0326505838182983e-05, "loss": 0.6098, "step": 20381 }, { "epoch": 1.5148272017837234, "grad_norm": 2.020339386956935, "learning_rate": 1.032570389668155e-05, "loss": 0.6022, "step": 20382 }, { "epoch": 1.514901523597176, "grad_norm": 1.927113287317901, "learning_rate": 1.0324901953083252e-05, "loss": 0.528, "step": 20383 }, { "epoch": 1.514975845410628, "grad_norm": 2.1726608860138907, "learning_rate": 1.0324100007393253e-05, "loss": 0.5615, "step": 20384 }, { "epoch": 1.5150501672240804, "grad_norm": 1.8629613971767012, "learning_rate": 1.0323298059616713e-05, "loss": 0.6221, "step": 20385 }, { "epoch": 1.5151244890375324, "grad_norm": 2.5368090781342887, "learning_rate": 1.0322496109758798e-05, "loss": 0.5123, "step": 20386 }, { "epoch": 1.5151988108509848, "grad_norm": 1.8036052717298436, "learning_rate": 1.0321694157824666e-05, "loss": 0.5966, "step": 20387 }, { "epoch": 1.5152731326644369, "grad_norm": 1.6624476559969938, "learning_rate": 1.0320892203819488e-05, "loss": 0.5054, "step": 20388 }, { "epoch": 1.5153474544778893, "grad_norm": 1.6116959932915806, "learning_rate": 1.0320090247748422e-05, "loss": 0.5276, "step": 20389 }, { "epoch": 1.5154217762913416, "grad_norm": 2.129566804742407, "learning_rate": 1.0319288289616631e-05, "loss": 0.6278, "step": 20390 }, { "epoch": 1.5154960981047938, "grad_norm": 1.7604839362616092, "learning_rate": 1.031848632942928e-05, "loss": 0.5583, "step": 20391 }, { "epoch": 1.515570419918246, "grad_norm": 4.694082773527387, "learning_rate": 1.0317684367191528e-05, "loss": 0.6802, "step": 20392 }, { "epoch": 1.5156447417316983, "grad_norm": 1.8163408231327138, "learning_rate": 1.0316882402908542e-05, "loss": 0.6326, "step": 20393 }, { "epoch": 1.5157190635451505, "grad_norm": 2.0517600672877614, "learning_rate": 1.0316080436585484e-05, "loss": 0.4375, "step": 20394 }, { "epoch": 1.5157933853586028, "grad_norm": 1.8779444810834385, "learning_rate": 1.0315278468227513e-05, "loss": 0.5494, "step": 20395 }, { "epoch": 1.515867707172055, "grad_norm": 2.145188694317854, "learning_rate": 1.03144764978398e-05, "loss": 0.6666, "step": 20396 }, { "epoch": 1.5159420289855072, "grad_norm": 2.1339064802365177, "learning_rate": 1.0313674525427502e-05, "loss": 0.4927, "step": 20397 }, { "epoch": 1.5160163507989595, "grad_norm": 1.8701654032934711, "learning_rate": 1.0312872550995784e-05, "loss": 0.4879, "step": 20398 }, { "epoch": 1.5160906726124117, "grad_norm": 1.8429008974468857, "learning_rate": 1.031207057454981e-05, "loss": 0.4272, "step": 20399 }, { "epoch": 1.516164994425864, "grad_norm": 2.102916766837717, "learning_rate": 1.0311268596094739e-05, "loss": 0.6379, "step": 20400 }, { "epoch": 1.5162393162393162, "grad_norm": 2.0458447084948284, "learning_rate": 1.0310466615635738e-05, "loss": 0.579, "step": 20401 }, { "epoch": 1.5163136380527686, "grad_norm": 2.224679160045663, "learning_rate": 1.0309664633177969e-05, "loss": 0.7249, "step": 20402 }, { "epoch": 1.5163879598662207, "grad_norm": 1.8109715196152536, "learning_rate": 1.0308862648726596e-05, "loss": 0.5745, "step": 20403 }, { "epoch": 1.5164622816796731, "grad_norm": 1.862243611300639, "learning_rate": 1.0308060662286779e-05, "loss": 0.5171, "step": 20404 }, { "epoch": 1.5165366034931251, "grad_norm": 1.9095482667845982, "learning_rate": 1.0307258673863687e-05, "loss": 0.5384, "step": 20405 }, { "epoch": 1.5166109253065776, "grad_norm": 2.1111807606863597, "learning_rate": 1.0306456683462476e-05, "loss": 0.6196, "step": 20406 }, { "epoch": 1.5166852471200296, "grad_norm": 1.9736937184963659, "learning_rate": 1.0305654691088316e-05, "loss": 0.5556, "step": 20407 }, { "epoch": 1.516759568933482, "grad_norm": 1.9834781619820199, "learning_rate": 1.0304852696746365e-05, "loss": 0.676, "step": 20408 }, { "epoch": 1.516833890746934, "grad_norm": 1.7000135995182448, "learning_rate": 1.0304050700441788e-05, "loss": 0.5032, "step": 20409 }, { "epoch": 1.5169082125603865, "grad_norm": 2.445652016284369, "learning_rate": 1.0303248702179752e-05, "loss": 0.5587, "step": 20410 }, { "epoch": 1.5169825343738386, "grad_norm": 1.8030767281294586, "learning_rate": 1.0302446701965411e-05, "loss": 0.6131, "step": 20411 }, { "epoch": 1.517056856187291, "grad_norm": 2.2340567374339666, "learning_rate": 1.0301644699803937e-05, "loss": 0.6199, "step": 20412 }, { "epoch": 1.5171311780007433, "grad_norm": 1.6782909435996742, "learning_rate": 1.030084269570049e-05, "loss": 0.4757, "step": 20413 }, { "epoch": 1.5172054998141955, "grad_norm": 2.075942056483379, "learning_rate": 1.0300040689660232e-05, "loss": 0.6703, "step": 20414 }, { "epoch": 1.5172798216276477, "grad_norm": 1.714932029184843, "learning_rate": 1.0299238681688328e-05, "loss": 0.6478, "step": 20415 }, { "epoch": 1.5173541434411, "grad_norm": 1.7289693719173913, "learning_rate": 1.0298436671789941e-05, "loss": 0.4374, "step": 20416 }, { "epoch": 1.5174284652545522, "grad_norm": 2.0580812383532603, "learning_rate": 1.0297634659970235e-05, "loss": 0.5542, "step": 20417 }, { "epoch": 1.5175027870680045, "grad_norm": 1.8586723266939063, "learning_rate": 1.0296832646234372e-05, "loss": 0.5702, "step": 20418 }, { "epoch": 1.5175771088814567, "grad_norm": 1.7582007432709428, "learning_rate": 1.0296030630587516e-05, "loss": 0.4577, "step": 20419 }, { "epoch": 1.517651430694909, "grad_norm": 1.8819993341894332, "learning_rate": 1.029522861303483e-05, "loss": 0.6013, "step": 20420 }, { "epoch": 1.5177257525083612, "grad_norm": 1.9301680013163192, "learning_rate": 1.0294426593581479e-05, "loss": 0.546, "step": 20421 }, { "epoch": 1.5178000743218134, "grad_norm": 2.0103084456832563, "learning_rate": 1.0293624572232623e-05, "loss": 0.6749, "step": 20422 }, { "epoch": 1.5178743961352656, "grad_norm": 1.8570625947086632, "learning_rate": 1.0292822548993426e-05, "loss": 0.5875, "step": 20423 }, { "epoch": 1.5179487179487179, "grad_norm": 2.4905043641870215, "learning_rate": 1.0292020523869055e-05, "loss": 0.8017, "step": 20424 }, { "epoch": 1.5180230397621703, "grad_norm": 1.8431924450627084, "learning_rate": 1.029121849686467e-05, "loss": 0.5494, "step": 20425 }, { "epoch": 1.5180973615756224, "grad_norm": 3.0970223012766573, "learning_rate": 1.0290416467985435e-05, "loss": 0.807, "step": 20426 }, { "epoch": 1.5181716833890748, "grad_norm": 1.5274037482203806, "learning_rate": 1.0289614437236517e-05, "loss": 0.481, "step": 20427 }, { "epoch": 1.5182460052025268, "grad_norm": 2.3332110015542478, "learning_rate": 1.0288812404623072e-05, "loss": 0.5849, "step": 20428 }, { "epoch": 1.5183203270159793, "grad_norm": 2.2797889846855135, "learning_rate": 1.028801037015027e-05, "loss": 0.7014, "step": 20429 }, { "epoch": 1.5183946488294313, "grad_norm": 2.085120100744492, "learning_rate": 1.0287208333823273e-05, "loss": 0.6583, "step": 20430 }, { "epoch": 1.5184689706428838, "grad_norm": 1.9343574948406428, "learning_rate": 1.028640629564724e-05, "loss": 0.6745, "step": 20431 }, { "epoch": 1.5185432924563358, "grad_norm": 1.619180966487962, "learning_rate": 1.0285604255627342e-05, "loss": 0.4642, "step": 20432 }, { "epoch": 1.5186176142697883, "grad_norm": 1.9145839283519714, "learning_rate": 1.0284802213768736e-05, "loss": 0.5814, "step": 20433 }, { "epoch": 1.5186919360832403, "grad_norm": 1.6770165528063683, "learning_rate": 1.028400017007659e-05, "loss": 0.5678, "step": 20434 }, { "epoch": 1.5187662578966927, "grad_norm": 2.1737218487370478, "learning_rate": 1.0283198124556066e-05, "loss": 0.5969, "step": 20435 }, { "epoch": 1.518840579710145, "grad_norm": 1.939681002016173, "learning_rate": 1.0282396077212324e-05, "loss": 0.5367, "step": 20436 }, { "epoch": 1.5189149015235972, "grad_norm": 2.3233509198613236, "learning_rate": 1.0281594028050533e-05, "loss": 0.6593, "step": 20437 }, { "epoch": 1.5189892233370494, "grad_norm": 2.3071691226142272, "learning_rate": 1.0280791977075854e-05, "loss": 0.5214, "step": 20438 }, { "epoch": 1.5190635451505017, "grad_norm": 2.08499015679223, "learning_rate": 1.0279989924293451e-05, "loss": 0.7471, "step": 20439 }, { "epoch": 1.519137866963954, "grad_norm": 2.3008389967161924, "learning_rate": 1.0279187869708486e-05, "loss": 0.6598, "step": 20440 }, { "epoch": 1.5192121887774062, "grad_norm": 3.624086815881795, "learning_rate": 1.0278385813326129e-05, "loss": 0.7045, "step": 20441 }, { "epoch": 1.5192865105908584, "grad_norm": 2.245969491553126, "learning_rate": 1.0277583755151533e-05, "loss": 0.472, "step": 20442 }, { "epoch": 1.5193608324043106, "grad_norm": 1.8511872699049443, "learning_rate": 1.027678169518987e-05, "loss": 0.5343, "step": 20443 }, { "epoch": 1.5194351542177629, "grad_norm": 1.7124344168973578, "learning_rate": 1.0275979633446299e-05, "loss": 0.4321, "step": 20444 }, { "epoch": 1.5195094760312151, "grad_norm": 1.7925809130780412, "learning_rate": 1.0275177569925985e-05, "loss": 0.5296, "step": 20445 }, { "epoch": 1.5195837978446676, "grad_norm": 1.9598496928323503, "learning_rate": 1.0274375504634093e-05, "loss": 0.6027, "step": 20446 }, { "epoch": 1.5196581196581196, "grad_norm": 1.8092833942716653, "learning_rate": 1.0273573437575788e-05, "loss": 0.5447, "step": 20447 }, { "epoch": 1.519732441471572, "grad_norm": 2.0184273491531317, "learning_rate": 1.0272771368756227e-05, "loss": 0.5682, "step": 20448 }, { "epoch": 1.519806763285024, "grad_norm": 1.9680360223522313, "learning_rate": 1.0271969298180583e-05, "loss": 0.6582, "step": 20449 }, { "epoch": 1.5198810850984765, "grad_norm": 1.8507170330213143, "learning_rate": 1.027116722585401e-05, "loss": 0.5801, "step": 20450 }, { "epoch": 1.5199554069119285, "grad_norm": 1.8604512241698874, "learning_rate": 1.027036515178168e-05, "loss": 0.5766, "step": 20451 }, { "epoch": 1.520029728725381, "grad_norm": 1.9957926972852982, "learning_rate": 1.0269563075968753e-05, "loss": 0.6675, "step": 20452 }, { "epoch": 1.520104050538833, "grad_norm": 2.0470126143639362, "learning_rate": 1.0268760998420391e-05, "loss": 0.5832, "step": 20453 }, { "epoch": 1.5201783723522855, "grad_norm": 1.635262820406922, "learning_rate": 1.0267958919141762e-05, "loss": 0.5328, "step": 20454 }, { "epoch": 1.5202526941657375, "grad_norm": 2.132722415237164, "learning_rate": 1.0267156838138024e-05, "loss": 0.6815, "step": 20455 }, { "epoch": 1.52032701597919, "grad_norm": 2.052722159432401, "learning_rate": 1.0266354755414345e-05, "loss": 0.7063, "step": 20456 }, { "epoch": 1.5204013377926422, "grad_norm": 1.9311365910554263, "learning_rate": 1.026555267097589e-05, "loss": 0.6836, "step": 20457 }, { "epoch": 1.5204756596060944, "grad_norm": 2.1027598302416783, "learning_rate": 1.026475058482782e-05, "loss": 0.6785, "step": 20458 }, { "epoch": 1.5205499814195467, "grad_norm": 1.8115794314680485, "learning_rate": 1.0263948496975297e-05, "loss": 0.5296, "step": 20459 }, { "epoch": 1.520624303232999, "grad_norm": 2.6507047875348073, "learning_rate": 1.0263146407423492e-05, "loss": 0.7799, "step": 20460 }, { "epoch": 1.5206986250464511, "grad_norm": 2.0903118374856335, "learning_rate": 1.0262344316177562e-05, "loss": 0.618, "step": 20461 }, { "epoch": 1.5207729468599034, "grad_norm": 2.527111794781799, "learning_rate": 1.026154222324267e-05, "loss": 0.5874, "step": 20462 }, { "epoch": 1.5208472686733556, "grad_norm": 1.6750925708308517, "learning_rate": 1.0260740128623985e-05, "loss": 0.4601, "step": 20463 }, { "epoch": 1.5209215904868079, "grad_norm": 2.1109411721217137, "learning_rate": 1.025993803232667e-05, "loss": 0.6769, "step": 20464 }, { "epoch": 1.52099591230026, "grad_norm": 2.8801039268665902, "learning_rate": 1.0259135934355888e-05, "loss": 0.5229, "step": 20465 }, { "epoch": 1.5210702341137123, "grad_norm": 1.8374579020962802, "learning_rate": 1.0258333834716798e-05, "loss": 0.4706, "step": 20466 }, { "epoch": 1.5211445559271646, "grad_norm": 1.8224595801269674, "learning_rate": 1.025753173341457e-05, "loss": 0.4603, "step": 20467 }, { "epoch": 1.5212188777406168, "grad_norm": 2.830092915210322, "learning_rate": 1.0256729630454367e-05, "loss": 0.618, "step": 20468 }, { "epoch": 1.5212931995540693, "grad_norm": 2.081749508166952, "learning_rate": 1.0255927525841352e-05, "loss": 0.7012, "step": 20469 }, { "epoch": 1.5213675213675213, "grad_norm": 2.0811365032262805, "learning_rate": 1.0255125419580689e-05, "loss": 0.7146, "step": 20470 }, { "epoch": 1.5214418431809738, "grad_norm": 2.113586507449277, "learning_rate": 1.025432331167754e-05, "loss": 0.5309, "step": 20471 }, { "epoch": 1.5215161649944258, "grad_norm": 2.1582617990196664, "learning_rate": 1.0253521202137073e-05, "loss": 0.6892, "step": 20472 }, { "epoch": 1.5215904868078782, "grad_norm": 2.5203581469078484, "learning_rate": 1.0252719090964447e-05, "loss": 0.779, "step": 20473 }, { "epoch": 1.5216648086213302, "grad_norm": 2.3686632496291615, "learning_rate": 1.0251916978164831e-05, "loss": 0.7449, "step": 20474 }, { "epoch": 1.5217391304347827, "grad_norm": 1.820011657646686, "learning_rate": 1.0251114863743385e-05, "loss": 0.6294, "step": 20475 }, { "epoch": 1.5218134522482347, "grad_norm": 1.9226521504599405, "learning_rate": 1.0250312747705278e-05, "loss": 0.6312, "step": 20476 }, { "epoch": 1.5218877740616872, "grad_norm": 1.7029261861230027, "learning_rate": 1.0249510630055668e-05, "loss": 0.4882, "step": 20477 }, { "epoch": 1.5219620958751392, "grad_norm": 2.1448988401340046, "learning_rate": 1.0248708510799721e-05, "loss": 0.6037, "step": 20478 }, { "epoch": 1.5220364176885917, "grad_norm": 2.2327631461910413, "learning_rate": 1.0247906389942599e-05, "loss": 0.5505, "step": 20479 }, { "epoch": 1.522110739502044, "grad_norm": 2.163841542984568, "learning_rate": 1.0247104267489474e-05, "loss": 0.6985, "step": 20480 }, { "epoch": 1.5221850613154961, "grad_norm": 1.708427394517908, "learning_rate": 1.0246302143445503e-05, "loss": 0.5271, "step": 20481 }, { "epoch": 1.5222593831289484, "grad_norm": 1.8008110653930784, "learning_rate": 1.0245500017815851e-05, "loss": 0.5334, "step": 20482 }, { "epoch": 1.5223337049424006, "grad_norm": 1.71048513366704, "learning_rate": 1.024469789060568e-05, "loss": 0.6266, "step": 20483 }, { "epoch": 1.5224080267558529, "grad_norm": 1.6451041408097873, "learning_rate": 1.0243895761820157e-05, "loss": 0.5729, "step": 20484 }, { "epoch": 1.522482348569305, "grad_norm": 2.123033727366927, "learning_rate": 1.0243093631464447e-05, "loss": 0.7092, "step": 20485 }, { "epoch": 1.5225566703827573, "grad_norm": 2.1196375130620657, "learning_rate": 1.0242291499543715e-05, "loss": 0.5739, "step": 20486 }, { "epoch": 1.5226309921962096, "grad_norm": 1.8622986964354398, "learning_rate": 1.0241489366063125e-05, "loss": 0.4061, "step": 20487 }, { "epoch": 1.5227053140096618, "grad_norm": 1.8672146979335693, "learning_rate": 1.0240687231027833e-05, "loss": 0.6268, "step": 20488 }, { "epoch": 1.522779635823114, "grad_norm": 1.7910057791117733, "learning_rate": 1.0239885094443009e-05, "loss": 0.5173, "step": 20489 }, { "epoch": 1.5228539576365663, "grad_norm": 1.5901822072053753, "learning_rate": 1.023908295631382e-05, "loss": 0.4861, "step": 20490 }, { "epoch": 1.5229282794500185, "grad_norm": 2.4903090731545725, "learning_rate": 1.0238280816645427e-05, "loss": 0.6689, "step": 20491 }, { "epoch": 1.523002601263471, "grad_norm": 2.020809287134568, "learning_rate": 1.0237478675442998e-05, "loss": 0.6646, "step": 20492 }, { "epoch": 1.523076923076923, "grad_norm": 1.54317240611037, "learning_rate": 1.023667653271169e-05, "loss": 0.5942, "step": 20493 }, { "epoch": 1.5231512448903755, "grad_norm": 1.7893752633802171, "learning_rate": 1.0235874388456667e-05, "loss": 0.6053, "step": 20494 }, { "epoch": 1.5232255667038275, "grad_norm": 2.1055321853773417, "learning_rate": 1.0235072242683101e-05, "loss": 0.615, "step": 20495 }, { "epoch": 1.52329988851728, "grad_norm": 1.6559287728313754, "learning_rate": 1.023427009539615e-05, "loss": 0.5878, "step": 20496 }, { "epoch": 1.523374210330732, "grad_norm": 1.9043053852338772, "learning_rate": 1.0233467946600985e-05, "loss": 0.634, "step": 20497 }, { "epoch": 1.5234485321441844, "grad_norm": 2.2924773520155433, "learning_rate": 1.0232665796302764e-05, "loss": 0.7248, "step": 20498 }, { "epoch": 1.5235228539576364, "grad_norm": 3.2174574829300044, "learning_rate": 1.023186364450665e-05, "loss": 0.604, "step": 20499 }, { "epoch": 1.5235971757710889, "grad_norm": 1.65351727152727, "learning_rate": 1.023106149121781e-05, "loss": 0.5898, "step": 20500 }, { "epoch": 1.523671497584541, "grad_norm": 1.8926496428202653, "learning_rate": 1.0230259336441407e-05, "loss": 0.4766, "step": 20501 }, { "epoch": 1.5237458193979934, "grad_norm": 1.9136692968890847, "learning_rate": 1.0229457180182612e-05, "loss": 0.52, "step": 20502 }, { "epoch": 1.5238201412114456, "grad_norm": 2.0753407576389957, "learning_rate": 1.0228655022446581e-05, "loss": 0.6094, "step": 20503 }, { "epoch": 1.5238944630248978, "grad_norm": 1.9619279238936396, "learning_rate": 1.022785286323848e-05, "loss": 0.6624, "step": 20504 }, { "epoch": 1.52396878483835, "grad_norm": 2.143748179650668, "learning_rate": 1.0227050702563473e-05, "loss": 0.5501, "step": 20505 }, { "epoch": 1.5240431066518023, "grad_norm": 1.9076959231365085, "learning_rate": 1.0226248540426724e-05, "loss": 0.6486, "step": 20506 }, { "epoch": 1.5241174284652546, "grad_norm": 1.6044725439465286, "learning_rate": 1.0225446376833401e-05, "loss": 0.4396, "step": 20507 }, { "epoch": 1.5241917502787068, "grad_norm": 2.634132900185685, "learning_rate": 1.0224644211788668e-05, "loss": 0.7423, "step": 20508 }, { "epoch": 1.524266072092159, "grad_norm": 1.7040741448650258, "learning_rate": 1.0223842045297686e-05, "loss": 0.6131, "step": 20509 }, { "epoch": 1.5243403939056113, "grad_norm": 1.8145243372446678, "learning_rate": 1.0223039877365619e-05, "loss": 0.6105, "step": 20510 }, { "epoch": 1.5244147157190635, "grad_norm": 2.1041608247027828, "learning_rate": 1.0222237707997634e-05, "loss": 0.5868, "step": 20511 }, { "epoch": 1.5244890375325157, "grad_norm": 2.3438210932662216, "learning_rate": 1.0221435537198892e-05, "loss": 0.611, "step": 20512 }, { "epoch": 1.5245633593459682, "grad_norm": 1.582847035636372, "learning_rate": 1.0220633364974561e-05, "loss": 0.5491, "step": 20513 }, { "epoch": 1.5246376811594202, "grad_norm": 2.284887709547165, "learning_rate": 1.0219831191329806e-05, "loss": 0.7873, "step": 20514 }, { "epoch": 1.5247120029728727, "grad_norm": 1.8180065591029424, "learning_rate": 1.0219029016269788e-05, "loss": 0.5181, "step": 20515 }, { "epoch": 1.5247863247863247, "grad_norm": 1.8851671003820143, "learning_rate": 1.021822683979967e-05, "loss": 0.6856, "step": 20516 }, { "epoch": 1.5248606465997772, "grad_norm": 2.128137071566925, "learning_rate": 1.021742466192462e-05, "loss": 0.6101, "step": 20517 }, { "epoch": 1.5249349684132292, "grad_norm": 2.1968113640538456, "learning_rate": 1.02166224826498e-05, "loss": 0.608, "step": 20518 }, { "epoch": 1.5250092902266816, "grad_norm": 1.7736305630320275, "learning_rate": 1.0215820301980381e-05, "loss": 0.6455, "step": 20519 }, { "epoch": 1.5250836120401337, "grad_norm": 1.9951307949274288, "learning_rate": 1.021501811992152e-05, "loss": 0.6135, "step": 20520 }, { "epoch": 1.5251579338535861, "grad_norm": 1.7652611146068367, "learning_rate": 1.0214215936478382e-05, "loss": 0.5055, "step": 20521 }, { "epoch": 1.5252322556670381, "grad_norm": 1.8222428709468879, "learning_rate": 1.0213413751656131e-05, "loss": 0.6159, "step": 20522 }, { "epoch": 1.5253065774804906, "grad_norm": 1.9558130717400892, "learning_rate": 1.0212611565459935e-05, "loss": 0.5244, "step": 20523 }, { "epoch": 1.5253808992939428, "grad_norm": 2.2477857568416124, "learning_rate": 1.021180937789496e-05, "loss": 0.5415, "step": 20524 }, { "epoch": 1.525455221107395, "grad_norm": 1.8800574717742697, "learning_rate": 1.0211007188966362e-05, "loss": 0.535, "step": 20525 }, { "epoch": 1.5255295429208473, "grad_norm": 1.7446222705683598, "learning_rate": 1.0210204998679316e-05, "loss": 0.6008, "step": 20526 }, { "epoch": 1.5256038647342995, "grad_norm": 1.8968424548909582, "learning_rate": 1.0209402807038977e-05, "loss": 0.6342, "step": 20527 }, { "epoch": 1.5256781865477518, "grad_norm": 5.966611571365937, "learning_rate": 1.0208600614050515e-05, "loss": 0.5177, "step": 20528 }, { "epoch": 1.525752508361204, "grad_norm": 2.3098056666227995, "learning_rate": 1.020779841971909e-05, "loss": 0.7096, "step": 20529 }, { "epoch": 1.5258268301746563, "grad_norm": 1.68331205467217, "learning_rate": 1.0206996224049877e-05, "loss": 0.5045, "step": 20530 }, { "epoch": 1.5259011519881085, "grad_norm": 2.241202982263017, "learning_rate": 1.0206194027048026e-05, "loss": 0.6241, "step": 20531 }, { "epoch": 1.5259754738015607, "grad_norm": 2.6021562431944183, "learning_rate": 1.0205391828718714e-05, "loss": 0.7631, "step": 20532 }, { "epoch": 1.526049795615013, "grad_norm": 2.1854184378350903, "learning_rate": 1.0204589629067096e-05, "loss": 0.6543, "step": 20533 }, { "epoch": 1.5261241174284652, "grad_norm": 2.034588894220963, "learning_rate": 1.0203787428098339e-05, "loss": 0.6253, "step": 20534 }, { "epoch": 1.5261984392419174, "grad_norm": 4.536350080602412, "learning_rate": 1.0202985225817614e-05, "loss": 0.7106, "step": 20535 }, { "epoch": 1.52627276105537, "grad_norm": 1.788468572008704, "learning_rate": 1.0202183022230079e-05, "loss": 0.5705, "step": 20536 }, { "epoch": 1.526347082868822, "grad_norm": 2.291137216564409, "learning_rate": 1.02013808173409e-05, "loss": 0.6806, "step": 20537 }, { "epoch": 1.5264214046822744, "grad_norm": 1.6489711177535369, "learning_rate": 1.020057861115524e-05, "loss": 0.5493, "step": 20538 }, { "epoch": 1.5264957264957264, "grad_norm": 1.8327062364367948, "learning_rate": 1.0199776403678267e-05, "loss": 0.6193, "step": 20539 }, { "epoch": 1.5265700483091789, "grad_norm": 2.07397539468198, "learning_rate": 1.0198974194915143e-05, "loss": 0.716, "step": 20540 }, { "epoch": 1.5266443701226309, "grad_norm": 1.6337626501980704, "learning_rate": 1.0198171984871036e-05, "loss": 0.5083, "step": 20541 }, { "epoch": 1.5267186919360833, "grad_norm": 1.303772949936672, "learning_rate": 1.0197369773551105e-05, "loss": 0.4121, "step": 20542 }, { "epoch": 1.5267930137495354, "grad_norm": 2.1362564293557704, "learning_rate": 1.019656756096052e-05, "loss": 0.6464, "step": 20543 }, { "epoch": 1.5268673355629878, "grad_norm": 2.053140956716356, "learning_rate": 1.0195765347104443e-05, "loss": 0.6383, "step": 20544 }, { "epoch": 1.5269416573764398, "grad_norm": 1.943463001802575, "learning_rate": 1.0194963131988037e-05, "loss": 0.6158, "step": 20545 }, { "epoch": 1.5270159791898923, "grad_norm": 2.0275907607955435, "learning_rate": 1.0194160915616468e-05, "loss": 0.6546, "step": 20546 }, { "epoch": 1.5270903010033445, "grad_norm": 2.3643278251538296, "learning_rate": 1.0193358697994905e-05, "loss": 0.5843, "step": 20547 }, { "epoch": 1.5271646228167968, "grad_norm": 1.6770702188358, "learning_rate": 1.0192556479128506e-05, "loss": 0.5313, "step": 20548 }, { "epoch": 1.527238944630249, "grad_norm": 1.8477979312098483, "learning_rate": 1.0191754259022437e-05, "loss": 0.6248, "step": 20549 }, { "epoch": 1.5273132664437012, "grad_norm": 1.631421287243916, "learning_rate": 1.0190952037681866e-05, "loss": 0.615, "step": 20550 }, { "epoch": 1.5273875882571535, "grad_norm": 2.0360239827034654, "learning_rate": 1.0190149815111953e-05, "loss": 0.5169, "step": 20551 }, { "epoch": 1.5274619100706057, "grad_norm": 2.5805060227120586, "learning_rate": 1.018934759131787e-05, "loss": 0.6071, "step": 20552 }, { "epoch": 1.527536231884058, "grad_norm": 1.6530643047173144, "learning_rate": 1.0188545366304774e-05, "loss": 0.5975, "step": 20553 }, { "epoch": 1.5276105536975102, "grad_norm": 1.841705793845227, "learning_rate": 1.0187743140077835e-05, "loss": 0.5293, "step": 20554 }, { "epoch": 1.5276848755109624, "grad_norm": 1.8274991158485085, "learning_rate": 1.0186940912642214e-05, "loss": 0.5225, "step": 20555 }, { "epoch": 1.5277591973244147, "grad_norm": 2.346822162443492, "learning_rate": 1.0186138684003075e-05, "loss": 0.7042, "step": 20556 }, { "epoch": 1.527833519137867, "grad_norm": 2.2209113024251583, "learning_rate": 1.0185336454165589e-05, "loss": 0.601, "step": 20557 }, { "epoch": 1.5279078409513192, "grad_norm": 1.7187508215399405, "learning_rate": 1.0184534223134914e-05, "loss": 0.4456, "step": 20558 }, { "epoch": 1.5279821627647716, "grad_norm": 1.7312222242464215, "learning_rate": 1.0183731990916217e-05, "loss": 0.7023, "step": 20559 }, { "epoch": 1.5280564845782236, "grad_norm": 1.7551585992199041, "learning_rate": 1.0182929757514663e-05, "loss": 0.4129, "step": 20560 }, { "epoch": 1.528130806391676, "grad_norm": 1.7009724862074198, "learning_rate": 1.0182127522935417e-05, "loss": 0.4757, "step": 20561 }, { "epoch": 1.528205128205128, "grad_norm": 2.960280950608619, "learning_rate": 1.0181325287183643e-05, "loss": 0.6578, "step": 20562 }, { "epoch": 1.5282794500185806, "grad_norm": 2.6293556237275157, "learning_rate": 1.0180523050264509e-05, "loss": 0.6242, "step": 20563 }, { "epoch": 1.5283537718320326, "grad_norm": 1.865669828188069, "learning_rate": 1.0179720812183174e-05, "loss": 0.5863, "step": 20564 }, { "epoch": 1.528428093645485, "grad_norm": 1.6418603091139048, "learning_rate": 1.0178918572944808e-05, "loss": 0.4024, "step": 20565 }, { "epoch": 1.528502415458937, "grad_norm": 11.203245655779208, "learning_rate": 1.0178116332554571e-05, "loss": 0.8517, "step": 20566 }, { "epoch": 1.5285767372723895, "grad_norm": 2.135517889632374, "learning_rate": 1.0177314091017632e-05, "loss": 0.639, "step": 20567 }, { "epoch": 1.5286510590858415, "grad_norm": 1.962321904316837, "learning_rate": 1.0176511848339155e-05, "loss": 0.6772, "step": 20568 }, { "epoch": 1.528725380899294, "grad_norm": 1.683567128317213, "learning_rate": 1.01757096045243e-05, "loss": 0.5744, "step": 20569 }, { "epoch": 1.5287997027127462, "grad_norm": 1.3849508586147632, "learning_rate": 1.0174907359578236e-05, "loss": 0.4638, "step": 20570 }, { "epoch": 1.5288740245261985, "grad_norm": 1.9917302690414, "learning_rate": 1.0174105113506132e-05, "loss": 0.6986, "step": 20571 }, { "epoch": 1.5289483463396507, "grad_norm": 2.005442544972643, "learning_rate": 1.0173302866313147e-05, "loss": 0.6397, "step": 20572 }, { "epoch": 1.529022668153103, "grad_norm": 2.226666181869224, "learning_rate": 1.0172500618004443e-05, "loss": 0.7811, "step": 20573 }, { "epoch": 1.5290969899665552, "grad_norm": 2.265795440064733, "learning_rate": 1.0171698368585195e-05, "loss": 0.7592, "step": 20574 }, { "epoch": 1.5291713117800074, "grad_norm": 1.7161721710244007, "learning_rate": 1.0170896118060559e-05, "loss": 0.4417, "step": 20575 }, { "epoch": 1.5292456335934597, "grad_norm": 1.620519237859588, "learning_rate": 1.01700938664357e-05, "loss": 0.5265, "step": 20576 }, { "epoch": 1.529319955406912, "grad_norm": 1.6692935181505237, "learning_rate": 1.0169291613715789e-05, "loss": 0.5345, "step": 20577 }, { "epoch": 1.5293942772203641, "grad_norm": 1.7001405927571756, "learning_rate": 1.0168489359905986e-05, "loss": 0.5654, "step": 20578 }, { "epoch": 1.5294685990338164, "grad_norm": 1.860260029403743, "learning_rate": 1.0167687105011459e-05, "loss": 0.4866, "step": 20579 }, { "epoch": 1.5295429208472688, "grad_norm": 2.091816243158389, "learning_rate": 1.0166884849037368e-05, "loss": 0.7096, "step": 20580 }, { "epoch": 1.5296172426607209, "grad_norm": 2.289610583576231, "learning_rate": 1.0166082591988882e-05, "loss": 0.6214, "step": 20581 }, { "epoch": 1.5296915644741733, "grad_norm": 1.9890934537600833, "learning_rate": 1.0165280333871168e-05, "loss": 0.4239, "step": 20582 }, { "epoch": 1.5297658862876253, "grad_norm": 2.233270381630271, "learning_rate": 1.0164478074689384e-05, "loss": 0.6944, "step": 20583 }, { "epoch": 1.5298402081010778, "grad_norm": 2.901420712556221, "learning_rate": 1.0163675814448698e-05, "loss": 0.587, "step": 20584 }, { "epoch": 1.5299145299145298, "grad_norm": 2.184914043236997, "learning_rate": 1.016287355315428e-05, "loss": 0.5483, "step": 20585 }, { "epoch": 1.5299888517279823, "grad_norm": 1.7141813827510437, "learning_rate": 1.0162071290811288e-05, "loss": 0.6033, "step": 20586 }, { "epoch": 1.5300631735414343, "grad_norm": 2.3945262934727585, "learning_rate": 1.016126902742489e-05, "loss": 0.7062, "step": 20587 }, { "epoch": 1.5301374953548867, "grad_norm": 2.03428477455325, "learning_rate": 1.016046676300025e-05, "loss": 0.736, "step": 20588 }, { "epoch": 1.5302118171683388, "grad_norm": 1.908114838853182, "learning_rate": 1.0159664497542532e-05, "loss": 0.6794, "step": 20589 }, { "epoch": 1.5302861389817912, "grad_norm": 1.891236827083734, "learning_rate": 1.0158862231056905e-05, "loss": 0.5812, "step": 20590 }, { "epoch": 1.5303604607952432, "grad_norm": 2.5209962768534617, "learning_rate": 1.0158059963548529e-05, "loss": 0.5597, "step": 20591 }, { "epoch": 1.5304347826086957, "grad_norm": 2.101845071866259, "learning_rate": 1.015725769502257e-05, "loss": 0.5147, "step": 20592 }, { "epoch": 1.530509104422148, "grad_norm": 2.1249177740839458, "learning_rate": 1.0156455425484198e-05, "loss": 0.6628, "step": 20593 }, { "epoch": 1.5305834262356002, "grad_norm": 2.569000672156306, "learning_rate": 1.0155653154938572e-05, "loss": 0.7049, "step": 20594 }, { "epoch": 1.5306577480490524, "grad_norm": 2.141043418824352, "learning_rate": 1.015485088339086e-05, "loss": 0.7085, "step": 20595 }, { "epoch": 1.5307320698625047, "grad_norm": 2.3289639370649184, "learning_rate": 1.0154048610846225e-05, "loss": 0.7947, "step": 20596 }, { "epoch": 1.530806391675957, "grad_norm": 1.659252861886191, "learning_rate": 1.0153246337309831e-05, "loss": 0.5469, "step": 20597 }, { "epoch": 1.5308807134894091, "grad_norm": 2.186369423484918, "learning_rate": 1.0152444062786847e-05, "loss": 0.6843, "step": 20598 }, { "epoch": 1.5309550353028614, "grad_norm": 1.673645762546052, "learning_rate": 1.0151641787282438e-05, "loss": 0.4538, "step": 20599 }, { "epoch": 1.5310293571163136, "grad_norm": 1.7674506072430929, "learning_rate": 1.0150839510801766e-05, "loss": 0.6307, "step": 20600 }, { "epoch": 1.5311036789297658, "grad_norm": 2.00502589327087, "learning_rate": 1.0150037233349997e-05, "loss": 0.6128, "step": 20601 }, { "epoch": 1.531178000743218, "grad_norm": 1.9098040690540508, "learning_rate": 1.0149234954932294e-05, "loss": 0.6602, "step": 20602 }, { "epoch": 1.5312523225566705, "grad_norm": 2.3972333312150527, "learning_rate": 1.0148432675553826e-05, "loss": 0.761, "step": 20603 }, { "epoch": 1.5313266443701226, "grad_norm": 2.054573779252189, "learning_rate": 1.0147630395219754e-05, "loss": 0.5563, "step": 20604 }, { "epoch": 1.531400966183575, "grad_norm": 1.7895578901067104, "learning_rate": 1.014682811393525e-05, "loss": 0.3973, "step": 20605 }, { "epoch": 1.531475287997027, "grad_norm": 2.2409148020749416, "learning_rate": 1.0146025831705474e-05, "loss": 0.6918, "step": 20606 }, { "epoch": 1.5315496098104795, "grad_norm": 1.9852513659908737, "learning_rate": 1.0145223548535586e-05, "loss": 0.5582, "step": 20607 }, { "epoch": 1.5316239316239315, "grad_norm": 2.4284787541374055, "learning_rate": 1.0144421264430757e-05, "loss": 0.6943, "step": 20608 }, { "epoch": 1.531698253437384, "grad_norm": 1.858812090905837, "learning_rate": 1.0143618979396153e-05, "loss": 0.6313, "step": 20609 }, { "epoch": 1.531772575250836, "grad_norm": 2.005677038597384, "learning_rate": 1.0142816693436937e-05, "loss": 0.5809, "step": 20610 }, { "epoch": 1.5318468970642884, "grad_norm": 2.1691731987834073, "learning_rate": 1.0142014406558276e-05, "loss": 0.6571, "step": 20611 }, { "epoch": 1.5319212188777405, "grad_norm": 2.1812825564839593, "learning_rate": 1.0141212118765334e-05, "loss": 0.7228, "step": 20612 }, { "epoch": 1.531995540691193, "grad_norm": 1.6624586769597767, "learning_rate": 1.0140409830063274e-05, "loss": 0.5508, "step": 20613 }, { "epoch": 1.5320698625046452, "grad_norm": 1.8567137323568828, "learning_rate": 1.013960754045726e-05, "loss": 0.7023, "step": 20614 }, { "epoch": 1.5321441843180974, "grad_norm": 1.9979174997496956, "learning_rate": 1.0138805249952464e-05, "loss": 0.6597, "step": 20615 }, { "epoch": 1.5322185061315496, "grad_norm": 1.91149270802363, "learning_rate": 1.0138002958554048e-05, "loss": 0.6242, "step": 20616 }, { "epoch": 1.5322928279450019, "grad_norm": 1.9291206550919793, "learning_rate": 1.0137200666267175e-05, "loss": 0.7053, "step": 20617 }, { "epoch": 1.5323671497584541, "grad_norm": 1.8317042444871656, "learning_rate": 1.013639837309701e-05, "loss": 0.4979, "step": 20618 }, { "epoch": 1.5324414715719064, "grad_norm": 1.9763945605207813, "learning_rate": 1.013559607904872e-05, "loss": 0.7261, "step": 20619 }, { "epoch": 1.5325157933853586, "grad_norm": 2.659416182034713, "learning_rate": 1.0134793784127469e-05, "loss": 0.5234, "step": 20620 }, { "epoch": 1.5325901151988108, "grad_norm": 2.280176623628557, "learning_rate": 1.0133991488338421e-05, "loss": 0.5659, "step": 20621 }, { "epoch": 1.532664437012263, "grad_norm": 1.9266949634692905, "learning_rate": 1.0133189191686746e-05, "loss": 0.4978, "step": 20622 }, { "epoch": 1.5327387588257153, "grad_norm": 2.4042461245498927, "learning_rate": 1.0132386894177607e-05, "loss": 0.6119, "step": 20623 }, { "epoch": 1.5328130806391675, "grad_norm": 2.318551097559853, "learning_rate": 1.0131584595816164e-05, "loss": 0.6169, "step": 20624 }, { "epoch": 1.5328874024526198, "grad_norm": 2.3868352343779256, "learning_rate": 1.0130782296607586e-05, "loss": 0.776, "step": 20625 }, { "epoch": 1.5329617242660722, "grad_norm": 1.985583313349442, "learning_rate": 1.012997999655704e-05, "loss": 0.7464, "step": 20626 }, { "epoch": 1.5330360460795243, "grad_norm": 2.371676516554065, "learning_rate": 1.0129177695669693e-05, "loss": 0.6347, "step": 20627 }, { "epoch": 1.5331103678929767, "grad_norm": 2.1242806542615793, "learning_rate": 1.0128375393950702e-05, "loss": 0.5733, "step": 20628 }, { "epoch": 1.5331846897064287, "grad_norm": 2.0609956033975445, "learning_rate": 1.0127573091405238e-05, "loss": 0.4954, "step": 20629 }, { "epoch": 1.5332590115198812, "grad_norm": 2.716653753874691, "learning_rate": 1.0126770788038465e-05, "loss": 0.6473, "step": 20630 }, { "epoch": 1.5333333333333332, "grad_norm": 2.6059089722089257, "learning_rate": 1.0125968483855548e-05, "loss": 0.5546, "step": 20631 }, { "epoch": 1.5334076551467857, "grad_norm": 2.056534700915019, "learning_rate": 1.0125166178861652e-05, "loss": 0.502, "step": 20632 }, { "epoch": 1.5334819769602377, "grad_norm": 2.382217418255755, "learning_rate": 1.0124363873061946e-05, "loss": 0.5341, "step": 20633 }, { "epoch": 1.5335562987736902, "grad_norm": 2.2771862081929157, "learning_rate": 1.012356156646159e-05, "loss": 0.7164, "step": 20634 }, { "epoch": 1.5336306205871422, "grad_norm": 1.8899045738720868, "learning_rate": 1.012275925906575e-05, "loss": 0.5711, "step": 20635 }, { "epoch": 1.5337049424005946, "grad_norm": 2.178272598902247, "learning_rate": 1.0121956950879592e-05, "loss": 0.6496, "step": 20636 }, { "epoch": 1.5337792642140469, "grad_norm": 1.5968561424519623, "learning_rate": 1.0121154641908283e-05, "loss": 0.5203, "step": 20637 }, { "epoch": 1.533853586027499, "grad_norm": 2.3326223737256213, "learning_rate": 1.0120352332156987e-05, "loss": 0.6514, "step": 20638 }, { "epoch": 1.5339279078409513, "grad_norm": 1.959295518596457, "learning_rate": 1.0119550021630869e-05, "loss": 0.5808, "step": 20639 }, { "epoch": 1.5340022296544036, "grad_norm": 1.6934582028859266, "learning_rate": 1.0118747710335091e-05, "loss": 0.5394, "step": 20640 }, { "epoch": 1.5340765514678558, "grad_norm": 2.6005503519612514, "learning_rate": 1.0117945398274824e-05, "loss": 0.6496, "step": 20641 }, { "epoch": 1.534150873281308, "grad_norm": 1.9066994933673083, "learning_rate": 1.0117143085455227e-05, "loss": 0.7405, "step": 20642 }, { "epoch": 1.5342251950947603, "grad_norm": 1.6588827911684771, "learning_rate": 1.0116340771881471e-05, "loss": 0.5479, "step": 20643 }, { "epoch": 1.5342995169082125, "grad_norm": 2.137117386993512, "learning_rate": 1.0115538457558722e-05, "loss": 0.646, "step": 20644 }, { "epoch": 1.5343738387216648, "grad_norm": 1.9013901344737614, "learning_rate": 1.0114736142492143e-05, "loss": 0.5711, "step": 20645 }, { "epoch": 1.534448160535117, "grad_norm": 1.7155746461771568, "learning_rate": 1.0113933826686894e-05, "loss": 0.5431, "step": 20646 }, { "epoch": 1.5345224823485693, "grad_norm": 2.1420315689490943, "learning_rate": 1.0113131510148149e-05, "loss": 0.7114, "step": 20647 }, { "epoch": 1.5345968041620215, "grad_norm": 2.070211895194098, "learning_rate": 1.0112329192881065e-05, "loss": 0.6166, "step": 20648 }, { "epoch": 1.534671125975474, "grad_norm": 1.7722661352907867, "learning_rate": 1.0111526874890816e-05, "loss": 0.5377, "step": 20649 }, { "epoch": 1.534745447788926, "grad_norm": 1.726725151648369, "learning_rate": 1.0110724556182564e-05, "loss": 0.578, "step": 20650 }, { "epoch": 1.5348197696023784, "grad_norm": 1.808426371316106, "learning_rate": 1.0109922236761467e-05, "loss": 0.5193, "step": 20651 }, { "epoch": 1.5348940914158304, "grad_norm": 2.0257741230739876, "learning_rate": 1.0109119916632699e-05, "loss": 0.6078, "step": 20652 }, { "epoch": 1.534968413229283, "grad_norm": 1.9868458244749447, "learning_rate": 1.0108317595801421e-05, "loss": 0.6938, "step": 20653 }, { "epoch": 1.535042735042735, "grad_norm": 1.8760157936539597, "learning_rate": 1.0107515274272801e-05, "loss": 0.5751, "step": 20654 }, { "epoch": 1.5351170568561874, "grad_norm": 1.7567221656428362, "learning_rate": 1.0106712952052004e-05, "loss": 0.5378, "step": 20655 }, { "epoch": 1.5351913786696394, "grad_norm": 2.1812472336462556, "learning_rate": 1.0105910629144198e-05, "loss": 0.5495, "step": 20656 }, { "epoch": 1.5352657004830919, "grad_norm": 1.7669478680219615, "learning_rate": 1.0105108305554538e-05, "loss": 0.6168, "step": 20657 }, { "epoch": 1.5353400222965439, "grad_norm": 1.8298689478832186, "learning_rate": 1.01043059812882e-05, "loss": 0.6262, "step": 20658 }, { "epoch": 1.5354143441099963, "grad_norm": 2.341735037633112, "learning_rate": 1.0103503656350344e-05, "loss": 0.8144, "step": 20659 }, { "epoch": 1.5354886659234486, "grad_norm": 1.8522867822636229, "learning_rate": 1.0102701330746139e-05, "loss": 0.6273, "step": 20660 }, { "epoch": 1.5355629877369008, "grad_norm": 2.247194991987527, "learning_rate": 1.0101899004480748e-05, "loss": 0.7744, "step": 20661 }, { "epoch": 1.535637309550353, "grad_norm": 1.900448105915404, "learning_rate": 1.0101096677559335e-05, "loss": 0.4805, "step": 20662 }, { "epoch": 1.5357116313638053, "grad_norm": 1.8216600414602655, "learning_rate": 1.0100294349987067e-05, "loss": 0.5656, "step": 20663 }, { "epoch": 1.5357859531772575, "grad_norm": 1.6496576025212077, "learning_rate": 1.0099492021769108e-05, "loss": 0.5116, "step": 20664 }, { "epoch": 1.5358602749907098, "grad_norm": 2.1860992419410477, "learning_rate": 1.0098689692910625e-05, "loss": 0.6903, "step": 20665 }, { "epoch": 1.535934596804162, "grad_norm": 2.4839855163251388, "learning_rate": 1.0097887363416785e-05, "loss": 0.7346, "step": 20666 }, { "epoch": 1.5360089186176142, "grad_norm": 1.998028875029509, "learning_rate": 1.0097085033292753e-05, "loss": 0.7431, "step": 20667 }, { "epoch": 1.5360832404310665, "grad_norm": 1.5812044783401638, "learning_rate": 1.009628270254369e-05, "loss": 0.4657, "step": 20668 }, { "epoch": 1.5361575622445187, "grad_norm": 2.329583400651624, "learning_rate": 1.0095480371174763e-05, "loss": 0.6004, "step": 20669 }, { "epoch": 1.5362318840579712, "grad_norm": 1.7294006808887035, "learning_rate": 1.0094678039191137e-05, "loss": 0.666, "step": 20670 }, { "epoch": 1.5363062058714232, "grad_norm": 1.9584649595472858, "learning_rate": 1.0093875706597981e-05, "loss": 0.6115, "step": 20671 }, { "epoch": 1.5363805276848757, "grad_norm": 1.6984058108190898, "learning_rate": 1.0093073373400458e-05, "loss": 0.5075, "step": 20672 }, { "epoch": 1.5364548494983277, "grad_norm": 1.8896375462098012, "learning_rate": 1.0092271039603734e-05, "loss": 0.5966, "step": 20673 }, { "epoch": 1.5365291713117801, "grad_norm": 1.7829974635866588, "learning_rate": 1.0091468705212973e-05, "loss": 0.4147, "step": 20674 }, { "epoch": 1.5366034931252321, "grad_norm": 2.1776582963486995, "learning_rate": 1.009066637023334e-05, "loss": 0.7586, "step": 20675 }, { "epoch": 1.5366778149386846, "grad_norm": 1.6473523660043397, "learning_rate": 1.008986403467e-05, "loss": 0.5435, "step": 20676 }, { "epoch": 1.5367521367521366, "grad_norm": 1.3883544515987072, "learning_rate": 1.0089061698528127e-05, "loss": 0.3939, "step": 20677 }, { "epoch": 1.536826458565589, "grad_norm": 2.360712279412511, "learning_rate": 1.0088259361812875e-05, "loss": 0.5804, "step": 20678 }, { "epoch": 1.536900780379041, "grad_norm": 2.3989059293050055, "learning_rate": 1.0087457024529414e-05, "loss": 0.6355, "step": 20679 }, { "epoch": 1.5369751021924936, "grad_norm": 1.7057935800892108, "learning_rate": 1.0086654686682909e-05, "loss": 0.5663, "step": 20680 }, { "epoch": 1.5370494240059458, "grad_norm": 2.0355768256551214, "learning_rate": 1.0085852348278525e-05, "loss": 0.6277, "step": 20681 }, { "epoch": 1.537123745819398, "grad_norm": 1.8886366451025958, "learning_rate": 1.0085050009321429e-05, "loss": 0.5654, "step": 20682 }, { "epoch": 1.5371980676328503, "grad_norm": 1.924566366693482, "learning_rate": 1.0084247669816785e-05, "loss": 0.6196, "step": 20683 }, { "epoch": 1.5372723894463025, "grad_norm": 2.251844773153987, "learning_rate": 1.0083445329769761e-05, "loss": 0.6252, "step": 20684 }, { "epoch": 1.5373467112597548, "grad_norm": 2.1405808342926274, "learning_rate": 1.0082642989185519e-05, "loss": 0.5985, "step": 20685 }, { "epoch": 1.537421033073207, "grad_norm": 1.6997185106529284, "learning_rate": 1.0081840648069223e-05, "loss": 0.5526, "step": 20686 }, { "epoch": 1.5374953548866592, "grad_norm": 2.030697487095272, "learning_rate": 1.0081038306426044e-05, "loss": 0.7301, "step": 20687 }, { "epoch": 1.5375696767001115, "grad_norm": 2.535909129860151, "learning_rate": 1.0080235964261145e-05, "loss": 0.5193, "step": 20688 }, { "epoch": 1.5376439985135637, "grad_norm": 2.076221347377481, "learning_rate": 1.0079433621579688e-05, "loss": 0.5733, "step": 20689 }, { "epoch": 1.537718320327016, "grad_norm": 1.9901396126761102, "learning_rate": 1.0078631278386846e-05, "loss": 0.736, "step": 20690 }, { "epoch": 1.5377926421404682, "grad_norm": 2.0464148512087084, "learning_rate": 1.0077828934687776e-05, "loss": 0.5971, "step": 20691 }, { "epoch": 1.5378669639539204, "grad_norm": 1.8486452496490864, "learning_rate": 1.0077026590487648e-05, "loss": 0.6125, "step": 20692 }, { "epoch": 1.5379412857673729, "grad_norm": 1.7013923662848556, "learning_rate": 1.007622424579163e-05, "loss": 0.6263, "step": 20693 }, { "epoch": 1.538015607580825, "grad_norm": 2.1350804177881355, "learning_rate": 1.007542190060488e-05, "loss": 0.5459, "step": 20694 }, { "epoch": 1.5380899293942774, "grad_norm": 2.1295948036370773, "learning_rate": 1.0074619554932572e-05, "loss": 0.5567, "step": 20695 }, { "epoch": 1.5381642512077294, "grad_norm": 1.8791932792594226, "learning_rate": 1.0073817208779864e-05, "loss": 0.5573, "step": 20696 }, { "epoch": 1.5382385730211818, "grad_norm": 2.0895828018360234, "learning_rate": 1.0073014862151925e-05, "loss": 0.5415, "step": 20697 }, { "epoch": 1.5383128948346338, "grad_norm": 2.328072040819654, "learning_rate": 1.007221251505392e-05, "loss": 0.6802, "step": 20698 }, { "epoch": 1.5383872166480863, "grad_norm": 1.8996414117532272, "learning_rate": 1.0071410167491016e-05, "loss": 0.6279, "step": 20699 }, { "epoch": 1.5384615384615383, "grad_norm": 1.8585502012155946, "learning_rate": 1.0070607819468375e-05, "loss": 0.6795, "step": 20700 }, { "epoch": 1.5385358602749908, "grad_norm": 2.2795554557514164, "learning_rate": 1.0069805470991168e-05, "loss": 0.7588, "step": 20701 }, { "epoch": 1.5386101820884428, "grad_norm": 1.9660504708218116, "learning_rate": 1.0069003122064554e-05, "loss": 0.5812, "step": 20702 }, { "epoch": 1.5386845039018953, "grad_norm": 1.8611765929432162, "learning_rate": 1.00682007726937e-05, "loss": 0.5741, "step": 20703 }, { "epoch": 1.5387588257153475, "grad_norm": 2.067816700981156, "learning_rate": 1.0067398422883776e-05, "loss": 0.5852, "step": 20704 }, { "epoch": 1.5388331475287997, "grad_norm": 2.5524668072929586, "learning_rate": 1.0066596072639943e-05, "loss": 0.7066, "step": 20705 }, { "epoch": 1.538907469342252, "grad_norm": 1.8540645622176026, "learning_rate": 1.0065793721967367e-05, "loss": 0.6428, "step": 20706 }, { "epoch": 1.5389817911557042, "grad_norm": 1.7893612213822927, "learning_rate": 1.0064991370871217e-05, "loss": 0.5202, "step": 20707 }, { "epoch": 1.5390561129691565, "grad_norm": 2.0709738376297326, "learning_rate": 1.0064189019356655e-05, "loss": 0.588, "step": 20708 }, { "epoch": 1.5391304347826087, "grad_norm": 1.8530496618317884, "learning_rate": 1.0063386667428848e-05, "loss": 0.557, "step": 20709 }, { "epoch": 1.539204756596061, "grad_norm": 2.2334705302180726, "learning_rate": 1.006258431509296e-05, "loss": 0.6158, "step": 20710 }, { "epoch": 1.5392790784095132, "grad_norm": 2.0704944764423687, "learning_rate": 1.0061781962354156e-05, "loss": 0.6316, "step": 20711 }, { "epoch": 1.5393534002229654, "grad_norm": 1.8997374388083006, "learning_rate": 1.0060979609217607e-05, "loss": 0.6287, "step": 20712 }, { "epoch": 1.5394277220364176, "grad_norm": 1.9726998152815678, "learning_rate": 1.006017725568847e-05, "loss": 0.5965, "step": 20713 }, { "epoch": 1.5395020438498699, "grad_norm": 2.4743053172425866, "learning_rate": 1.0059374901771917e-05, "loss": 0.6912, "step": 20714 }, { "epoch": 1.5395763656633221, "grad_norm": 2.093484458500033, "learning_rate": 1.0058572547473114e-05, "loss": 0.664, "step": 20715 }, { "epoch": 1.5396506874767746, "grad_norm": 1.7399007662020327, "learning_rate": 1.005777019279722e-05, "loss": 0.4975, "step": 20716 }, { "epoch": 1.5397250092902266, "grad_norm": 2.5723847609556385, "learning_rate": 1.0056967837749405e-05, "loss": 0.8086, "step": 20717 }, { "epoch": 1.539799331103679, "grad_norm": 1.6037482920380124, "learning_rate": 1.0056165482334836e-05, "loss": 0.5636, "step": 20718 }, { "epoch": 1.539873652917131, "grad_norm": 1.796173333368742, "learning_rate": 1.0055363126558675e-05, "loss": 0.6127, "step": 20719 }, { "epoch": 1.5399479747305835, "grad_norm": 1.9290048971094733, "learning_rate": 1.0054560770426087e-05, "loss": 0.5391, "step": 20720 }, { "epoch": 1.5400222965440356, "grad_norm": 1.951458391036275, "learning_rate": 1.0053758413942245e-05, "loss": 0.6695, "step": 20721 }, { "epoch": 1.540096618357488, "grad_norm": 1.899470821220205, "learning_rate": 1.0052956057112304e-05, "loss": 0.6163, "step": 20722 }, { "epoch": 1.54017094017094, "grad_norm": 1.8970211775577932, "learning_rate": 1.0052153699941439e-05, "loss": 0.5691, "step": 20723 }, { "epoch": 1.5402452619843925, "grad_norm": 1.8107212784549374, "learning_rate": 1.0051351342434808e-05, "loss": 0.5819, "step": 20724 }, { "epoch": 1.5403195837978445, "grad_norm": 1.9525961745164246, "learning_rate": 1.005054898459758e-05, "loss": 0.677, "step": 20725 }, { "epoch": 1.540393905611297, "grad_norm": 1.8986810927998652, "learning_rate": 1.0049746626434923e-05, "loss": 0.6368, "step": 20726 }, { "epoch": 1.5404682274247492, "grad_norm": 2.0356372640394156, "learning_rate": 1.0048944267951997e-05, "loss": 0.7838, "step": 20727 }, { "epoch": 1.5405425492382014, "grad_norm": 1.80567020831885, "learning_rate": 1.004814190915397e-05, "loss": 0.4837, "step": 20728 }, { "epoch": 1.5406168710516537, "grad_norm": 1.6641983050063094, "learning_rate": 1.0047339550046011e-05, "loss": 0.4551, "step": 20729 }, { "epoch": 1.540691192865106, "grad_norm": 1.7140984531336585, "learning_rate": 1.0046537190633278e-05, "loss": 0.5759, "step": 20730 }, { "epoch": 1.5407655146785582, "grad_norm": 1.8604049840746308, "learning_rate": 1.0045734830920946e-05, "loss": 0.5889, "step": 20731 }, { "epoch": 1.5408398364920104, "grad_norm": 2.0562404639839746, "learning_rate": 1.0044932470914173e-05, "loss": 0.7361, "step": 20732 }, { "epoch": 1.5409141583054626, "grad_norm": 2.0166758186183116, "learning_rate": 1.0044130110618127e-05, "loss": 0.6172, "step": 20733 }, { "epoch": 1.5409884801189149, "grad_norm": 2.108454813922482, "learning_rate": 1.0043327750037973e-05, "loss": 0.5626, "step": 20734 }, { "epoch": 1.541062801932367, "grad_norm": 2.029160187902315, "learning_rate": 1.0042525389178879e-05, "loss": 0.7435, "step": 20735 }, { "epoch": 1.5411371237458193, "grad_norm": 2.389186791958878, "learning_rate": 1.0041723028046007e-05, "loss": 0.6048, "step": 20736 }, { "epoch": 1.5412114455592718, "grad_norm": 1.7520652366692566, "learning_rate": 1.0040920666644525e-05, "loss": 0.5992, "step": 20737 }, { "epoch": 1.5412857673727238, "grad_norm": 1.9677552007243004, "learning_rate": 1.0040118304979598e-05, "loss": 0.6457, "step": 20738 }, { "epoch": 1.5413600891861763, "grad_norm": 7.435043818787198, "learning_rate": 1.0039315943056388e-05, "loss": 0.6493, "step": 20739 }, { "epoch": 1.5414344109996283, "grad_norm": 1.8711476443238222, "learning_rate": 1.0038513580880072e-05, "loss": 0.5328, "step": 20740 }, { "epoch": 1.5415087328130808, "grad_norm": 2.365345335701087, "learning_rate": 1.0037711218455801e-05, "loss": 0.7261, "step": 20741 }, { "epoch": 1.5415830546265328, "grad_norm": 1.906239201005883, "learning_rate": 1.003690885578875e-05, "loss": 0.5426, "step": 20742 }, { "epoch": 1.5416573764399852, "grad_norm": 1.5411465643833366, "learning_rate": 1.003610649288408e-05, "loss": 0.4785, "step": 20743 }, { "epoch": 1.5417316982534373, "grad_norm": 1.6839470467037316, "learning_rate": 1.003530412974696e-05, "loss": 0.5165, "step": 20744 }, { "epoch": 1.5418060200668897, "grad_norm": 1.9888736230848323, "learning_rate": 1.0034501766382549e-05, "loss": 0.6477, "step": 20745 }, { "epoch": 1.5418803418803417, "grad_norm": 1.9499431465493708, "learning_rate": 1.0033699402796024e-05, "loss": 0.6275, "step": 20746 }, { "epoch": 1.5419546636937942, "grad_norm": 2.0070440380078374, "learning_rate": 1.003289703899254e-05, "loss": 0.6905, "step": 20747 }, { "epoch": 1.5420289855072464, "grad_norm": 1.9643161743750552, "learning_rate": 1.003209467497727e-05, "loss": 0.6747, "step": 20748 }, { "epoch": 1.5421033073206987, "grad_norm": 1.8235634565306356, "learning_rate": 1.0031292310755372e-05, "loss": 0.567, "step": 20749 }, { "epoch": 1.542177629134151, "grad_norm": 1.6550566645681373, "learning_rate": 1.0030489946332019e-05, "loss": 0.4316, "step": 20750 }, { "epoch": 1.5422519509476031, "grad_norm": 2.1020778261393, "learning_rate": 1.0029687581712368e-05, "loss": 0.5104, "step": 20751 }, { "epoch": 1.5423262727610554, "grad_norm": 2.4194847513793554, "learning_rate": 1.0028885216901596e-05, "loss": 0.4988, "step": 20752 }, { "epoch": 1.5424005945745076, "grad_norm": 1.715462525669484, "learning_rate": 1.0028082851904863e-05, "loss": 0.5354, "step": 20753 }, { "epoch": 1.5424749163879599, "grad_norm": 1.8406019983839774, "learning_rate": 1.002728048672733e-05, "loss": 0.7502, "step": 20754 }, { "epoch": 1.542549238201412, "grad_norm": 2.088860600763226, "learning_rate": 1.0026478121374168e-05, "loss": 0.6299, "step": 20755 }, { "epoch": 1.5426235600148643, "grad_norm": 1.8761439403270164, "learning_rate": 1.002567575585054e-05, "loss": 0.6797, "step": 20756 }, { "epoch": 1.5426978818283166, "grad_norm": 1.5310439380481902, "learning_rate": 1.0024873390161616e-05, "loss": 0.4365, "step": 20757 }, { "epoch": 1.5427722036417688, "grad_norm": 2.3164891014758027, "learning_rate": 1.0024071024312555e-05, "loss": 0.6818, "step": 20758 }, { "epoch": 1.542846525455221, "grad_norm": 2.039838358829804, "learning_rate": 1.002326865830853e-05, "loss": 0.6576, "step": 20759 }, { "epoch": 1.5429208472686735, "grad_norm": 1.691147116205076, "learning_rate": 1.0022466292154697e-05, "loss": 0.6776, "step": 20760 }, { "epoch": 1.5429951690821255, "grad_norm": 2.3703117933245266, "learning_rate": 1.002166392585623e-05, "loss": 0.7298, "step": 20761 }, { "epoch": 1.543069490895578, "grad_norm": 2.0320156341134292, "learning_rate": 1.0020861559418291e-05, "loss": 0.605, "step": 20762 }, { "epoch": 1.54314381270903, "grad_norm": 2.0447615110657984, "learning_rate": 1.002005919284605e-05, "loss": 0.6224, "step": 20763 }, { "epoch": 1.5432181345224825, "grad_norm": 1.8574245959206095, "learning_rate": 1.0019256826144668e-05, "loss": 0.6526, "step": 20764 }, { "epoch": 1.5432924563359345, "grad_norm": 1.6074692447737269, "learning_rate": 1.001845445931931e-05, "loss": 0.5851, "step": 20765 }, { "epoch": 1.543366778149387, "grad_norm": 1.7932934139004229, "learning_rate": 1.001765209237514e-05, "loss": 0.6073, "step": 20766 }, { "epoch": 1.543441099962839, "grad_norm": 2.2461841954340223, "learning_rate": 1.0016849725317331e-05, "loss": 0.53, "step": 20767 }, { "epoch": 1.5435154217762914, "grad_norm": 1.8708668911574575, "learning_rate": 1.001604735815104e-05, "loss": 0.5141, "step": 20768 }, { "epoch": 1.5435897435897434, "grad_norm": 2.0717927751734257, "learning_rate": 1.0015244990881443e-05, "loss": 0.7214, "step": 20769 }, { "epoch": 1.543664065403196, "grad_norm": 2.062445494168662, "learning_rate": 1.0014442623513695e-05, "loss": 0.4634, "step": 20770 }, { "epoch": 1.5437383872166481, "grad_norm": 2.398841606946501, "learning_rate": 1.001364025605297e-05, "loss": 0.7183, "step": 20771 }, { "epoch": 1.5438127090301004, "grad_norm": 1.624806916659625, "learning_rate": 1.0012837888504425e-05, "loss": 0.5038, "step": 20772 }, { "epoch": 1.5438870308435526, "grad_norm": 1.7355786354012004, "learning_rate": 1.0012035520873233e-05, "loss": 0.5603, "step": 20773 }, { "epoch": 1.5439613526570048, "grad_norm": 2.657305423538693, "learning_rate": 1.0011233153164557e-05, "loss": 0.6178, "step": 20774 }, { "epoch": 1.544035674470457, "grad_norm": 2.107353748141702, "learning_rate": 1.0010430785383565e-05, "loss": 0.603, "step": 20775 }, { "epoch": 1.5441099962839093, "grad_norm": 1.838723578592538, "learning_rate": 1.0009628417535415e-05, "loss": 0.5985, "step": 20776 }, { "epoch": 1.5441843180973616, "grad_norm": 1.524648292581953, "learning_rate": 1.000882604962528e-05, "loss": 0.5121, "step": 20777 }, { "epoch": 1.5442586399108138, "grad_norm": 2.2764614608503573, "learning_rate": 1.0008023681658324e-05, "loss": 0.6444, "step": 20778 }, { "epoch": 1.544332961724266, "grad_norm": 3.1148891191023007, "learning_rate": 1.000722131363971e-05, "loss": 0.6598, "step": 20779 }, { "epoch": 1.5444072835377183, "grad_norm": 2.1305527301889873, "learning_rate": 1.0006418945574608e-05, "loss": 0.5482, "step": 20780 }, { "epoch": 1.5444816053511705, "grad_norm": 1.9267253476886388, "learning_rate": 1.0005616577468183e-05, "loss": 0.5676, "step": 20781 }, { "epoch": 1.5445559271646228, "grad_norm": 1.7764710874633052, "learning_rate": 1.0004814209325595e-05, "loss": 0.6522, "step": 20782 }, { "epoch": 1.5446302489780752, "grad_norm": 2.170289042961223, "learning_rate": 1.0004011841152012e-05, "loss": 0.7273, "step": 20783 }, { "epoch": 1.5447045707915272, "grad_norm": 2.560845315955704, "learning_rate": 1.0003209472952604e-05, "loss": 0.8721, "step": 20784 }, { "epoch": 1.5447788926049797, "grad_norm": 1.5907232792871848, "learning_rate": 1.0002407104732534e-05, "loss": 0.5369, "step": 20785 }, { "epoch": 1.5448532144184317, "grad_norm": 2.2345080042454217, "learning_rate": 1.0001604736496966e-05, "loss": 0.5722, "step": 20786 }, { "epoch": 1.5449275362318842, "grad_norm": 2.1740228787520772, "learning_rate": 1.0000802368251068e-05, "loss": 0.4231, "step": 20787 }, { "epoch": 1.5450018580453362, "grad_norm": 2.263880111968161, "learning_rate": 1e-05, "loss": 0.5373, "step": 20788 }, { "epoch": 1.5450761798587886, "grad_norm": 2.082569723559748, "learning_rate": 9.999197631748937e-06, "loss": 0.6385, "step": 20789 }, { "epoch": 1.5451505016722407, "grad_norm": 1.7248227443301103, "learning_rate": 9.998395263503036e-06, "loss": 0.5162, "step": 20790 }, { "epoch": 1.5452248234856931, "grad_norm": 1.600020358026759, "learning_rate": 9.99759289526747e-06, "loss": 0.537, "step": 20791 }, { "epoch": 1.5452991452991451, "grad_norm": 1.725076586607472, "learning_rate": 9.996790527047398e-06, "loss": 0.5266, "step": 20792 }, { "epoch": 1.5453734671125976, "grad_norm": 2.153611179696087, "learning_rate": 9.995988158847988e-06, "loss": 0.7346, "step": 20793 }, { "epoch": 1.5454477889260498, "grad_norm": 1.923941324486852, "learning_rate": 9.995185790674412e-06, "loss": 0.6103, "step": 20794 }, { "epoch": 1.545522110739502, "grad_norm": 1.8852722817245506, "learning_rate": 9.994383422531824e-06, "loss": 0.5835, "step": 20795 }, { "epoch": 1.5455964325529543, "grad_norm": 3.072353440302281, "learning_rate": 9.993581054425394e-06, "loss": 0.7616, "step": 20796 }, { "epoch": 1.5456707543664066, "grad_norm": 2.2104971807795275, "learning_rate": 9.992778686360292e-06, "loss": 0.6693, "step": 20797 }, { "epoch": 1.5457450761798588, "grad_norm": 1.7837691520339545, "learning_rate": 9.99197631834168e-06, "loss": 0.5126, "step": 20798 }, { "epoch": 1.545819397993311, "grad_norm": 1.4332089183813383, "learning_rate": 9.991173950374722e-06, "loss": 0.5043, "step": 20799 }, { "epoch": 1.5458937198067633, "grad_norm": 3.446039337993005, "learning_rate": 9.990371582464587e-06, "loss": 0.6321, "step": 20800 }, { "epoch": 1.5459680416202155, "grad_norm": 2.0236908209447004, "learning_rate": 9.989569214616438e-06, "loss": 0.584, "step": 20801 }, { "epoch": 1.5460423634336677, "grad_norm": 1.903606972857886, "learning_rate": 9.988766846835445e-06, "loss": 0.4998, "step": 20802 }, { "epoch": 1.54611668524712, "grad_norm": 1.4631922141424694, "learning_rate": 9.987964479126768e-06, "loss": 0.3846, "step": 20803 }, { "epoch": 1.5461910070605724, "grad_norm": 1.7243007680786695, "learning_rate": 9.987162111495575e-06, "loss": 0.5641, "step": 20804 }, { "epoch": 1.5462653288740245, "grad_norm": 2.094458217044592, "learning_rate": 9.986359743947036e-06, "loss": 0.6067, "step": 20805 }, { "epoch": 1.546339650687477, "grad_norm": 2.392657367947475, "learning_rate": 9.985557376486308e-06, "loss": 0.75, "step": 20806 }, { "epoch": 1.546413972500929, "grad_norm": 1.7887399388737513, "learning_rate": 9.98475500911856e-06, "loss": 0.5832, "step": 20807 }, { "epoch": 1.5464882943143814, "grad_norm": 2.4908436480365594, "learning_rate": 9.983952641848962e-06, "loss": 0.5987, "step": 20808 }, { "epoch": 1.5465626161278334, "grad_norm": 1.7814470564573417, "learning_rate": 9.983150274682674e-06, "loss": 0.5019, "step": 20809 }, { "epoch": 1.5466369379412859, "grad_norm": 2.299142902294432, "learning_rate": 9.982347907624863e-06, "loss": 0.6796, "step": 20810 }, { "epoch": 1.5467112597547379, "grad_norm": 2.1949006122887385, "learning_rate": 9.981545540680696e-06, "loss": 0.708, "step": 20811 }, { "epoch": 1.5467855815681903, "grad_norm": 1.774647389807926, "learning_rate": 9.980743173855335e-06, "loss": 0.5765, "step": 20812 }, { "epoch": 1.5468599033816424, "grad_norm": 1.7444513192921827, "learning_rate": 9.979940807153954e-06, "loss": 0.5124, "step": 20813 }, { "epoch": 1.5469342251950948, "grad_norm": 1.704029623134053, "learning_rate": 9.979138440581709e-06, "loss": 0.5981, "step": 20814 }, { "epoch": 1.547008547008547, "grad_norm": 2.020859130829092, "learning_rate": 9.978336074143771e-06, "loss": 0.5789, "step": 20815 }, { "epoch": 1.5470828688219993, "grad_norm": 1.9596929271449692, "learning_rate": 9.977533707845303e-06, "loss": 0.53, "step": 20816 }, { "epoch": 1.5471571906354515, "grad_norm": 1.455846072542308, "learning_rate": 9.976731341691477e-06, "loss": 0.3757, "step": 20817 }, { "epoch": 1.5472315124489038, "grad_norm": 2.003150007743658, "learning_rate": 9.975928975687447e-06, "loss": 0.6814, "step": 20818 }, { "epoch": 1.547305834262356, "grad_norm": 2.16216008276638, "learning_rate": 9.97512660983839e-06, "loss": 0.6695, "step": 20819 }, { "epoch": 1.5473801560758083, "grad_norm": 1.6600066787997547, "learning_rate": 9.974324244149464e-06, "loss": 0.4828, "step": 20820 }, { "epoch": 1.5474544778892605, "grad_norm": 2.126774266918549, "learning_rate": 9.973521878625836e-06, "loss": 0.7729, "step": 20821 }, { "epoch": 1.5475287997027127, "grad_norm": 2.0497382554638244, "learning_rate": 9.972719513272673e-06, "loss": 0.7, "step": 20822 }, { "epoch": 1.547603121516165, "grad_norm": 2.7098576916988297, "learning_rate": 9.97191714809514e-06, "loss": 0.7868, "step": 20823 }, { "epoch": 1.5476774433296172, "grad_norm": 1.9807198540708832, "learning_rate": 9.971114783098408e-06, "loss": 0.6362, "step": 20824 }, { "epoch": 1.5477517651430694, "grad_norm": 2.028558000667359, "learning_rate": 9.97031241828763e-06, "loss": 0.6278, "step": 20825 }, { "epoch": 1.5478260869565217, "grad_norm": 2.656424417474462, "learning_rate": 9.969510053667983e-06, "loss": 0.6719, "step": 20826 }, { "epoch": 1.5479004087699741, "grad_norm": 2.4854410998042273, "learning_rate": 9.968707689244628e-06, "loss": 0.5658, "step": 20827 }, { "epoch": 1.5479747305834262, "grad_norm": 2.0436074447533326, "learning_rate": 9.967905325022736e-06, "loss": 0.5692, "step": 20828 }, { "epoch": 1.5480490523968786, "grad_norm": 1.6642190567104935, "learning_rate": 9.967102961007463e-06, "loss": 0.5872, "step": 20829 }, { "epoch": 1.5481233742103306, "grad_norm": 2.0464103364999353, "learning_rate": 9.966300597203981e-06, "loss": 0.5976, "step": 20830 }, { "epoch": 1.548197696023783, "grad_norm": 1.9550691292975093, "learning_rate": 9.965498233617453e-06, "loss": 0.5995, "step": 20831 }, { "epoch": 1.5482720178372351, "grad_norm": 1.5975929590708078, "learning_rate": 9.964695870253045e-06, "loss": 0.4277, "step": 20832 }, { "epoch": 1.5483463396506876, "grad_norm": 2.03762291490815, "learning_rate": 9.963893507115922e-06, "loss": 0.6489, "step": 20833 }, { "epoch": 1.5484206614641396, "grad_norm": 2.6010608697461053, "learning_rate": 9.963091144211254e-06, "loss": 0.7765, "step": 20834 }, { "epoch": 1.548494983277592, "grad_norm": 2.144912374604656, "learning_rate": 9.962288781544202e-06, "loss": 0.7309, "step": 20835 }, { "epoch": 1.548569305091044, "grad_norm": 1.9053145409501988, "learning_rate": 9.961486419119932e-06, "loss": 0.6486, "step": 20836 }, { "epoch": 1.5486436269044965, "grad_norm": 1.870568609854924, "learning_rate": 9.96068405694361e-06, "loss": 0.6108, "step": 20837 }, { "epoch": 1.5487179487179488, "grad_norm": 2.1567182634376674, "learning_rate": 9.959881695020404e-06, "loss": 0.6735, "step": 20838 }, { "epoch": 1.548792270531401, "grad_norm": 2.0923915826856763, "learning_rate": 9.95907933335548e-06, "loss": 0.6067, "step": 20839 }, { "epoch": 1.5488665923448532, "grad_norm": 1.6227920553006623, "learning_rate": 9.958276971953997e-06, "loss": 0.5067, "step": 20840 }, { "epoch": 1.5489409141583055, "grad_norm": 1.8466216725357312, "learning_rate": 9.957474610821128e-06, "loss": 0.6201, "step": 20841 }, { "epoch": 1.5490152359717577, "grad_norm": 10.095986055736383, "learning_rate": 9.95667224996203e-06, "loss": 0.5802, "step": 20842 }, { "epoch": 1.54908955778521, "grad_norm": 2.0966809967168096, "learning_rate": 9.955869889381877e-06, "loss": 0.6359, "step": 20843 }, { "epoch": 1.5491638795986622, "grad_norm": 2.0077847383356118, "learning_rate": 9.95506752908583e-06, "loss": 0.6301, "step": 20844 }, { "epoch": 1.5492382014121144, "grad_norm": 1.9059540205149708, "learning_rate": 9.954265169079055e-06, "loss": 0.6394, "step": 20845 }, { "epoch": 1.5493125232255667, "grad_norm": 4.886500512665148, "learning_rate": 9.953462809366724e-06, "loss": 0.6102, "step": 20846 }, { "epoch": 1.549386845039019, "grad_norm": 2.3035049402300487, "learning_rate": 9.952660449953992e-06, "loss": 0.6458, "step": 20847 }, { "epoch": 1.5494611668524712, "grad_norm": 1.6953056608305532, "learning_rate": 9.95185809084603e-06, "loss": 0.4587, "step": 20848 }, { "epoch": 1.5495354886659234, "grad_norm": 1.637239872668289, "learning_rate": 9.951055732048003e-06, "loss": 0.35, "step": 20849 }, { "epoch": 1.5496098104793758, "grad_norm": 2.2531360722651868, "learning_rate": 9.950253373565079e-06, "loss": 0.6465, "step": 20850 }, { "epoch": 1.5496841322928279, "grad_norm": 2.194831427919236, "learning_rate": 9.949451015402423e-06, "loss": 0.6599, "step": 20851 }, { "epoch": 1.5497584541062803, "grad_norm": 1.5931792700261695, "learning_rate": 9.948648657565197e-06, "loss": 0.4282, "step": 20852 }, { "epoch": 1.5498327759197323, "grad_norm": 4.14165724620688, "learning_rate": 9.947846300058566e-06, "loss": 0.4901, "step": 20853 }, { "epoch": 1.5499070977331848, "grad_norm": 2.4262810472562277, "learning_rate": 9.947043942887697e-06, "loss": 0.7805, "step": 20854 }, { "epoch": 1.5499814195466368, "grad_norm": 2.239174372486282, "learning_rate": 9.946241586057758e-06, "loss": 0.456, "step": 20855 }, { "epoch": 1.5500557413600893, "grad_norm": 1.8502838040905685, "learning_rate": 9.945439229573914e-06, "loss": 0.5841, "step": 20856 }, { "epoch": 1.5501300631735413, "grad_norm": 2.599999536639332, "learning_rate": 9.944636873441329e-06, "loss": 0.6712, "step": 20857 }, { "epoch": 1.5502043849869938, "grad_norm": 1.9040849961307673, "learning_rate": 9.943834517665168e-06, "loss": 0.4488, "step": 20858 }, { "epoch": 1.5502787068004458, "grad_norm": 3.013449272999735, "learning_rate": 9.943032162250596e-06, "loss": 0.6731, "step": 20859 }, { "epoch": 1.5503530286138982, "grad_norm": 1.9554402798184864, "learning_rate": 9.942229807202782e-06, "loss": 0.6383, "step": 20860 }, { "epoch": 1.5504273504273505, "grad_norm": 1.6779292202461324, "learning_rate": 9.94142745252689e-06, "loss": 0.5522, "step": 20861 }, { "epoch": 1.5505016722408027, "grad_norm": 2.2179348084838324, "learning_rate": 9.940625098228086e-06, "loss": 0.5988, "step": 20862 }, { "epoch": 1.550575994054255, "grad_norm": 1.7553700704856234, "learning_rate": 9.939822744311533e-06, "loss": 0.6248, "step": 20863 }, { "epoch": 1.5506503158677072, "grad_norm": 3.0093252235957646, "learning_rate": 9.939020390782398e-06, "loss": 0.6185, "step": 20864 }, { "epoch": 1.5507246376811594, "grad_norm": 1.5785941123810263, "learning_rate": 9.938218037645846e-06, "loss": 0.5974, "step": 20865 }, { "epoch": 1.5507989594946117, "grad_norm": 1.7762640058954011, "learning_rate": 9.937415684907043e-06, "loss": 0.524, "step": 20866 }, { "epoch": 1.550873281308064, "grad_norm": 2.0950842332836106, "learning_rate": 9.936613332571157e-06, "loss": 0.6794, "step": 20867 }, { "epoch": 1.5509476031215161, "grad_norm": 2.2277566448854818, "learning_rate": 9.935810980643348e-06, "loss": 0.7413, "step": 20868 }, { "epoch": 1.5510219249349684, "grad_norm": 2.345655072755888, "learning_rate": 9.935008629128786e-06, "loss": 0.704, "step": 20869 }, { "epoch": 1.5510962467484206, "grad_norm": 1.9455624855837361, "learning_rate": 9.934206278032633e-06, "loss": 0.5875, "step": 20870 }, { "epoch": 1.551170568561873, "grad_norm": 2.195186003454049, "learning_rate": 9.933403927360058e-06, "loss": 0.6845, "step": 20871 }, { "epoch": 1.551244890375325, "grad_norm": 1.8948991630166654, "learning_rate": 9.932601577116227e-06, "loss": 0.6183, "step": 20872 }, { "epoch": 1.5513192121887776, "grad_norm": 1.9395304788687913, "learning_rate": 9.931799227306303e-06, "loss": 0.687, "step": 20873 }, { "epoch": 1.5513935340022296, "grad_norm": 1.9450085641317965, "learning_rate": 9.930996877935453e-06, "loss": 0.5567, "step": 20874 }, { "epoch": 1.551467855815682, "grad_norm": 1.8634167167924673, "learning_rate": 9.930194529008839e-06, "loss": 0.4771, "step": 20875 }, { "epoch": 1.551542177629134, "grad_norm": 1.9264410143003674, "learning_rate": 9.929392180531629e-06, "loss": 0.5897, "step": 20876 }, { "epoch": 1.5516164994425865, "grad_norm": 1.726597077398001, "learning_rate": 9.928589832508989e-06, "loss": 0.584, "step": 20877 }, { "epoch": 1.5516908212560385, "grad_norm": 1.7309324698473516, "learning_rate": 9.927787484946084e-06, "loss": 0.538, "step": 20878 }, { "epoch": 1.551765143069491, "grad_norm": 1.5949610998986945, "learning_rate": 9.926985137848079e-06, "loss": 0.4473, "step": 20879 }, { "epoch": 1.551839464882943, "grad_norm": 2.3399303706513925, "learning_rate": 9.92618279122014e-06, "loss": 0.5595, "step": 20880 }, { "epoch": 1.5519137866963955, "grad_norm": 1.920412950963913, "learning_rate": 9.925380445067431e-06, "loss": 0.7004, "step": 20881 }, { "epoch": 1.5519881085098475, "grad_norm": 2.081105432675946, "learning_rate": 9.92457809939512e-06, "loss": 0.5505, "step": 20882 }, { "epoch": 1.5520624303233, "grad_norm": 2.0678290364425127, "learning_rate": 9.923775754208373e-06, "loss": 0.6586, "step": 20883 }, { "epoch": 1.5521367521367522, "grad_norm": 2.253166663082226, "learning_rate": 9.922973409512352e-06, "loss": 0.7541, "step": 20884 }, { "epoch": 1.5522110739502044, "grad_norm": 2.091778150862817, "learning_rate": 9.922171065312229e-06, "loss": 0.5945, "step": 20885 }, { "epoch": 1.5522853957636567, "grad_norm": 1.8978797987456184, "learning_rate": 9.921368721613159e-06, "loss": 0.5341, "step": 20886 }, { "epoch": 1.5523597175771089, "grad_norm": 2.0151882552828972, "learning_rate": 9.920566378420314e-06, "loss": 0.6094, "step": 20887 }, { "epoch": 1.5524340393905611, "grad_norm": 2.1226889496298402, "learning_rate": 9.919764035738859e-06, "loss": 0.5549, "step": 20888 }, { "epoch": 1.5525083612040134, "grad_norm": 1.8712752680767544, "learning_rate": 9.91896169357396e-06, "loss": 0.5387, "step": 20889 }, { "epoch": 1.5525826830174656, "grad_norm": 2.2672352871465056, "learning_rate": 9.918159351930779e-06, "loss": 0.7178, "step": 20890 }, { "epoch": 1.5526570048309178, "grad_norm": 2.9160045727386756, "learning_rate": 9.917357010814485e-06, "loss": 0.7924, "step": 20891 }, { "epoch": 1.55273132664437, "grad_norm": 2.4775070632634444, "learning_rate": 9.916554670230242e-06, "loss": 0.8447, "step": 20892 }, { "epoch": 1.5528056484578223, "grad_norm": 2.2542917633615116, "learning_rate": 9.915752330183215e-06, "loss": 0.7498, "step": 20893 }, { "epoch": 1.5528799702712748, "grad_norm": 1.7705274526484287, "learning_rate": 9.914949990678572e-06, "loss": 0.5918, "step": 20894 }, { "epoch": 1.5529542920847268, "grad_norm": 1.7159234702711312, "learning_rate": 9.914147651721477e-06, "loss": 0.6589, "step": 20895 }, { "epoch": 1.5530286138981793, "grad_norm": 1.584636021369419, "learning_rate": 9.913345313317096e-06, "loss": 0.5701, "step": 20896 }, { "epoch": 1.5531029357116313, "grad_norm": 2.6730707095962325, "learning_rate": 9.912542975470591e-06, "loss": 0.532, "step": 20897 }, { "epoch": 1.5531772575250837, "grad_norm": 1.7290427753391058, "learning_rate": 9.91174063818713e-06, "loss": 0.4731, "step": 20898 }, { "epoch": 1.5532515793385357, "grad_norm": 1.951244431859953, "learning_rate": 9.910938301471878e-06, "loss": 0.4894, "step": 20899 }, { "epoch": 1.5533259011519882, "grad_norm": 1.499216729284795, "learning_rate": 9.910135965330002e-06, "loss": 0.4525, "step": 20900 }, { "epoch": 1.5534002229654402, "grad_norm": 1.9088720709760882, "learning_rate": 9.909333629766664e-06, "loss": 0.6012, "step": 20901 }, { "epoch": 1.5534745447788927, "grad_norm": 1.8462993961801883, "learning_rate": 9.90853129478703e-06, "loss": 0.6154, "step": 20902 }, { "epoch": 1.5535488665923447, "grad_norm": 1.77643294285065, "learning_rate": 9.90772896039627e-06, "loss": 0.4402, "step": 20903 }, { "epoch": 1.5536231884057972, "grad_norm": 2.28298235862307, "learning_rate": 9.906926626599544e-06, "loss": 0.5601, "step": 20904 }, { "epoch": 1.5536975102192494, "grad_norm": 2.2820078388776754, "learning_rate": 9.906124293402022e-06, "loss": 0.6826, "step": 20905 }, { "epoch": 1.5537718320327016, "grad_norm": 2.405169843030583, "learning_rate": 9.905321960808865e-06, "loss": 0.7129, "step": 20906 }, { "epoch": 1.5538461538461539, "grad_norm": 2.0877158127635296, "learning_rate": 9.904519628825244e-06, "loss": 0.6594, "step": 20907 }, { "epoch": 1.5539204756596061, "grad_norm": 2.0151373205601972, "learning_rate": 9.903717297456317e-06, "loss": 0.587, "step": 20908 }, { "epoch": 1.5539947974730584, "grad_norm": 2.1371862384153584, "learning_rate": 9.902914966707252e-06, "loss": 0.4573, "step": 20909 }, { "epoch": 1.5540691192865106, "grad_norm": 2.0644353836511575, "learning_rate": 9.902112636583216e-06, "loss": 0.5791, "step": 20910 }, { "epoch": 1.5541434410999628, "grad_norm": 2.127936857302711, "learning_rate": 9.901310307089376e-06, "loss": 0.6549, "step": 20911 }, { "epoch": 1.554217762913415, "grad_norm": 2.168662968733147, "learning_rate": 9.900507978230894e-06, "loss": 0.7749, "step": 20912 }, { "epoch": 1.5542920847268673, "grad_norm": 2.1534063995848176, "learning_rate": 9.899705650012936e-06, "loss": 0.6006, "step": 20913 }, { "epoch": 1.5543664065403195, "grad_norm": 1.8212971879068502, "learning_rate": 9.898903322440667e-06, "loss": 0.6754, "step": 20914 }, { "epoch": 1.5544407283537718, "grad_norm": 1.8692537099450375, "learning_rate": 9.898100995519254e-06, "loss": 0.5804, "step": 20915 }, { "epoch": 1.554515050167224, "grad_norm": 2.1944807987005723, "learning_rate": 9.897298669253863e-06, "loss": 0.5628, "step": 20916 }, { "epoch": 1.5545893719806765, "grad_norm": 2.287805609512007, "learning_rate": 9.896496343649656e-06, "loss": 0.6927, "step": 20917 }, { "epoch": 1.5546636937941285, "grad_norm": 2.0475394909314355, "learning_rate": 9.895694018711806e-06, "loss": 0.7111, "step": 20918 }, { "epoch": 1.554738015607581, "grad_norm": 1.7198215285363996, "learning_rate": 9.894891694445465e-06, "loss": 0.4617, "step": 20919 }, { "epoch": 1.554812337421033, "grad_norm": 2.025073685862598, "learning_rate": 9.894089370855809e-06, "loss": 0.7871, "step": 20920 }, { "epoch": 1.5548866592344854, "grad_norm": 2.0514272518146925, "learning_rate": 9.893287047947997e-06, "loss": 0.6136, "step": 20921 }, { "epoch": 1.5549609810479375, "grad_norm": 1.9656433326251805, "learning_rate": 9.892484725727202e-06, "loss": 0.593, "step": 20922 }, { "epoch": 1.55503530286139, "grad_norm": 1.8107450818788946, "learning_rate": 9.89168240419858e-06, "loss": 0.5358, "step": 20923 }, { "epoch": 1.555109624674842, "grad_norm": 2.036663927329797, "learning_rate": 9.890880083367304e-06, "loss": 0.6411, "step": 20924 }, { "epoch": 1.5551839464882944, "grad_norm": 2.039712746909786, "learning_rate": 9.890077763238537e-06, "loss": 0.6984, "step": 20925 }, { "epoch": 1.5552582683017464, "grad_norm": 1.9358545391762474, "learning_rate": 9.889275443817441e-06, "loss": 0.6072, "step": 20926 }, { "epoch": 1.5553325901151989, "grad_norm": 1.9452966009666341, "learning_rate": 9.888473125109187e-06, "loss": 0.6079, "step": 20927 }, { "epoch": 1.555406911928651, "grad_norm": 1.6617195717113433, "learning_rate": 9.887670807118935e-06, "loss": 0.5029, "step": 20928 }, { "epoch": 1.5554812337421033, "grad_norm": 2.097070405697334, "learning_rate": 9.886868489851853e-06, "loss": 0.7585, "step": 20929 }, { "epoch": 1.5555555555555556, "grad_norm": 2.122756076762739, "learning_rate": 9.88606617331311e-06, "loss": 0.5788, "step": 20930 }, { "epoch": 1.5556298773690078, "grad_norm": 1.4272838705319346, "learning_rate": 9.885263857507862e-06, "loss": 0.4252, "step": 20931 }, { "epoch": 1.55570419918246, "grad_norm": 2.0258728316894343, "learning_rate": 9.88446154244128e-06, "loss": 0.5262, "step": 20932 }, { "epoch": 1.5557785209959123, "grad_norm": 1.8578562553295777, "learning_rate": 9.88365922811853e-06, "loss": 0.5369, "step": 20933 }, { "epoch": 1.5558528428093645, "grad_norm": 2.104005705128654, "learning_rate": 9.882856914544774e-06, "loss": 0.5415, "step": 20934 }, { "epoch": 1.5559271646228168, "grad_norm": 1.5837601025753663, "learning_rate": 9.88205460172518e-06, "loss": 0.4383, "step": 20935 }, { "epoch": 1.556001486436269, "grad_norm": 1.575350723844004, "learning_rate": 9.88125228966491e-06, "loss": 0.4989, "step": 20936 }, { "epoch": 1.5560758082497212, "grad_norm": 2.15624172576099, "learning_rate": 9.880449978369135e-06, "loss": 0.6757, "step": 20937 }, { "epoch": 1.5561501300631735, "grad_norm": 1.9000747999572047, "learning_rate": 9.879647667843017e-06, "loss": 0.5622, "step": 20938 }, { "epoch": 1.5562244518766257, "grad_norm": 1.5942929960911008, "learning_rate": 9.878845358091719e-06, "loss": 0.6214, "step": 20939 }, { "epoch": 1.5562987736900782, "grad_norm": 2.1173561088962956, "learning_rate": 9.878043049120408e-06, "loss": 0.7342, "step": 20940 }, { "epoch": 1.5563730955035302, "grad_norm": 2.107928979309512, "learning_rate": 9.877240740934255e-06, "loss": 0.4025, "step": 20941 }, { "epoch": 1.5564474173169827, "grad_norm": 1.9206305558340242, "learning_rate": 9.876438433538415e-06, "loss": 0.665, "step": 20942 }, { "epoch": 1.5565217391304347, "grad_norm": 1.6984803795390149, "learning_rate": 9.875636126938058e-06, "loss": 0.4968, "step": 20943 }, { "epoch": 1.5565960609438871, "grad_norm": 2.1310030104686235, "learning_rate": 9.874833821138351e-06, "loss": 0.5973, "step": 20944 }, { "epoch": 1.5566703827573392, "grad_norm": 1.7373389892353803, "learning_rate": 9.874031516144455e-06, "loss": 0.56, "step": 20945 }, { "epoch": 1.5567447045707916, "grad_norm": 1.8694771745778012, "learning_rate": 9.873229211961537e-06, "loss": 0.4278, "step": 20946 }, { "epoch": 1.5568190263842436, "grad_norm": 2.029604741833733, "learning_rate": 9.872426908594765e-06, "loss": 0.5267, "step": 20947 }, { "epoch": 1.556893348197696, "grad_norm": 2.2535594129625496, "learning_rate": 9.8716246060493e-06, "loss": 0.7498, "step": 20948 }, { "epoch": 1.556967670011148, "grad_norm": 1.7684046564037559, "learning_rate": 9.87082230433031e-06, "loss": 0.4832, "step": 20949 }, { "epoch": 1.5570419918246006, "grad_norm": 1.606445059447915, "learning_rate": 9.87002000344296e-06, "loss": 0.4729, "step": 20950 }, { "epoch": 1.5571163136380528, "grad_norm": 1.5602304088843826, "learning_rate": 9.869217703392414e-06, "loss": 0.5197, "step": 20951 }, { "epoch": 1.557190635451505, "grad_norm": 1.9620263602006924, "learning_rate": 9.86841540418384e-06, "loss": 0.5298, "step": 20952 }, { "epoch": 1.5572649572649573, "grad_norm": 1.9506169950719494, "learning_rate": 9.867613105822398e-06, "loss": 0.6668, "step": 20953 }, { "epoch": 1.5573392790784095, "grad_norm": 1.793375602123423, "learning_rate": 9.866810808313256e-06, "loss": 0.5502, "step": 20954 }, { "epoch": 1.5574136008918618, "grad_norm": 1.6560785770835025, "learning_rate": 9.866008511661582e-06, "loss": 0.4671, "step": 20955 }, { "epoch": 1.557487922705314, "grad_norm": 2.279111514747275, "learning_rate": 9.865206215872535e-06, "loss": 0.7335, "step": 20956 }, { "epoch": 1.5575622445187662, "grad_norm": 1.5924998398864163, "learning_rate": 9.864403920951283e-06, "loss": 0.4556, "step": 20957 }, { "epoch": 1.5576365663322185, "grad_norm": 2.008344228482638, "learning_rate": 9.863601626902992e-06, "loss": 0.6868, "step": 20958 }, { "epoch": 1.5577108881456707, "grad_norm": 3.1198585776101213, "learning_rate": 9.862799333732828e-06, "loss": 0.4974, "step": 20959 }, { "epoch": 1.557785209959123, "grad_norm": 1.6603656947483538, "learning_rate": 9.861997041445956e-06, "loss": 0.5553, "step": 20960 }, { "epoch": 1.5578595317725754, "grad_norm": 7.070351107210566, "learning_rate": 9.861194750047536e-06, "loss": 0.6314, "step": 20961 }, { "epoch": 1.5579338535860274, "grad_norm": 2.225903333127036, "learning_rate": 9.860392459542738e-06, "loss": 0.5767, "step": 20962 }, { "epoch": 1.5580081753994799, "grad_norm": 1.6993913370492044, "learning_rate": 9.859590169936728e-06, "loss": 0.4545, "step": 20963 }, { "epoch": 1.558082497212932, "grad_norm": 1.8267235690845316, "learning_rate": 9.858787881234671e-06, "loss": 0.6322, "step": 20964 }, { "epoch": 1.5581568190263844, "grad_norm": 2.2560031343837097, "learning_rate": 9.857985593441727e-06, "loss": 0.7299, "step": 20965 }, { "epoch": 1.5582311408398364, "grad_norm": 2.060406067375808, "learning_rate": 9.857183306563066e-06, "loss": 0.537, "step": 20966 }, { "epoch": 1.5583054626532888, "grad_norm": 1.8820427556549304, "learning_rate": 9.85638102060385e-06, "loss": 0.6979, "step": 20967 }, { "epoch": 1.5583797844667409, "grad_norm": 1.694087583678344, "learning_rate": 9.855578735569245e-06, "loss": 0.4625, "step": 20968 }, { "epoch": 1.5584541062801933, "grad_norm": 2.005978622480146, "learning_rate": 9.854776451464417e-06, "loss": 0.7116, "step": 20969 }, { "epoch": 1.5585284280936453, "grad_norm": 1.918619813320218, "learning_rate": 9.853974168294531e-06, "loss": 0.6216, "step": 20970 }, { "epoch": 1.5586027499070978, "grad_norm": 1.9518697491920933, "learning_rate": 9.853171886064754e-06, "loss": 0.5952, "step": 20971 }, { "epoch": 1.55867707172055, "grad_norm": 2.0286504470806785, "learning_rate": 9.852369604780245e-06, "loss": 0.6995, "step": 20972 }, { "epoch": 1.5587513935340023, "grad_norm": 2.4078717446163047, "learning_rate": 9.851567324446176e-06, "loss": 0.8054, "step": 20973 }, { "epoch": 1.5588257153474545, "grad_norm": 1.6847074633450854, "learning_rate": 9.850765045067706e-06, "loss": 0.4837, "step": 20974 }, { "epoch": 1.5589000371609067, "grad_norm": 1.7238572437383388, "learning_rate": 9.849962766650007e-06, "loss": 0.5046, "step": 20975 }, { "epoch": 1.558974358974359, "grad_norm": 1.9063149177578058, "learning_rate": 9.849160489198237e-06, "loss": 0.4156, "step": 20976 }, { "epoch": 1.5590486807878112, "grad_norm": 2.5143310501215472, "learning_rate": 9.848358212717567e-06, "loss": 0.6211, "step": 20977 }, { "epoch": 1.5591230026012635, "grad_norm": 2.0946892782211632, "learning_rate": 9.847555937213154e-06, "loss": 0.5777, "step": 20978 }, { "epoch": 1.5591973244147157, "grad_norm": 1.9588408989789938, "learning_rate": 9.84675366269017e-06, "loss": 0.6076, "step": 20979 }, { "epoch": 1.559271646228168, "grad_norm": 2.308451059717658, "learning_rate": 9.845951389153778e-06, "loss": 0.5859, "step": 20980 }, { "epoch": 1.5593459680416202, "grad_norm": 3.2204879656070644, "learning_rate": 9.845149116609141e-06, "loss": 0.6857, "step": 20981 }, { "epoch": 1.5594202898550724, "grad_norm": 2.085259416829165, "learning_rate": 9.844346845061431e-06, "loss": 0.5951, "step": 20982 }, { "epoch": 1.5594946116685247, "grad_norm": 2.18959422586269, "learning_rate": 9.843544574515804e-06, "loss": 0.6597, "step": 20983 }, { "epoch": 1.5595689334819771, "grad_norm": 1.8993684624997993, "learning_rate": 9.84274230497743e-06, "loss": 0.6329, "step": 20984 }, { "epoch": 1.5596432552954291, "grad_norm": 1.3301841679682178, "learning_rate": 9.841940036451473e-06, "loss": 0.4574, "step": 20985 }, { "epoch": 1.5597175771088816, "grad_norm": 1.996953168653473, "learning_rate": 9.8411377689431e-06, "loss": 0.6018, "step": 20986 }, { "epoch": 1.5597918989223336, "grad_norm": 1.824499423922647, "learning_rate": 9.84033550245747e-06, "loss": 0.5753, "step": 20987 }, { "epoch": 1.559866220735786, "grad_norm": 1.9364072044782645, "learning_rate": 9.839533236999756e-06, "loss": 0.6694, "step": 20988 }, { "epoch": 1.559940542549238, "grad_norm": 1.9325990365858823, "learning_rate": 9.838730972575114e-06, "loss": 0.5601, "step": 20989 }, { "epoch": 1.5600148643626905, "grad_norm": 2.0461089834266755, "learning_rate": 9.837928709188716e-06, "loss": 0.6421, "step": 20990 }, { "epoch": 1.5600891861761426, "grad_norm": 1.878105889341242, "learning_rate": 9.837126446845723e-06, "loss": 0.5553, "step": 20991 }, { "epoch": 1.560163507989595, "grad_norm": 2.008012626784544, "learning_rate": 9.836324185551303e-06, "loss": 0.6025, "step": 20992 }, { "epoch": 1.560237829803047, "grad_norm": 1.805067860240447, "learning_rate": 9.835521925310618e-06, "loss": 0.6055, "step": 20993 }, { "epoch": 1.5603121516164995, "grad_norm": 2.013000050809642, "learning_rate": 9.834719666128835e-06, "loss": 0.6054, "step": 20994 }, { "epoch": 1.5603864734299517, "grad_norm": 2.0897999609660207, "learning_rate": 9.833917408011118e-06, "loss": 0.7277, "step": 20995 }, { "epoch": 1.560460795243404, "grad_norm": 1.8571485691208838, "learning_rate": 9.833115150962632e-06, "loss": 0.4947, "step": 20996 }, { "epoch": 1.5605351170568562, "grad_norm": 1.769372616577395, "learning_rate": 9.832312894988546e-06, "loss": 0.6225, "step": 20997 }, { "epoch": 1.5606094388703085, "grad_norm": 2.116553493599435, "learning_rate": 9.831510640094018e-06, "loss": 0.719, "step": 20998 }, { "epoch": 1.5606837606837607, "grad_norm": 3.97849334913977, "learning_rate": 9.830708386284216e-06, "loss": 0.6379, "step": 20999 }, { "epoch": 1.560758082497213, "grad_norm": 3.137962411641428, "learning_rate": 9.829906133564302e-06, "loss": 0.4511, "step": 21000 }, { "epoch": 1.5608324043106652, "grad_norm": 1.6725839016415232, "learning_rate": 9.829103881939446e-06, "loss": 0.4935, "step": 21001 }, { "epoch": 1.5609067261241174, "grad_norm": 2.247679013880803, "learning_rate": 9.828301631414808e-06, "loss": 0.698, "step": 21002 }, { "epoch": 1.5609810479375696, "grad_norm": 2.0199039917906703, "learning_rate": 9.827499381995558e-06, "loss": 0.5106, "step": 21003 }, { "epoch": 1.5610553697510219, "grad_norm": 2.706543248554096, "learning_rate": 9.826697133686858e-06, "loss": 0.6064, "step": 21004 }, { "epoch": 1.5611296915644741, "grad_norm": 1.8663619877998603, "learning_rate": 9.825894886493872e-06, "loss": 0.7111, "step": 21005 }, { "epoch": 1.5612040133779264, "grad_norm": 1.8499683863035965, "learning_rate": 9.825092640421762e-06, "loss": 0.6593, "step": 21006 }, { "epoch": 1.5612783351913788, "grad_norm": 2.0902848148761057, "learning_rate": 9.824290395475701e-06, "loss": 0.4791, "step": 21007 }, { "epoch": 1.5613526570048308, "grad_norm": 2.118359128138708, "learning_rate": 9.82348815166085e-06, "loss": 0.6237, "step": 21008 }, { "epoch": 1.5614269788182833, "grad_norm": 2.1815959239017078, "learning_rate": 9.822685908982373e-06, "loss": 0.632, "step": 21009 }, { "epoch": 1.5615013006317353, "grad_norm": 2.900450525788561, "learning_rate": 9.821883667445434e-06, "loss": 0.6507, "step": 21010 }, { "epoch": 1.5615756224451878, "grad_norm": 2.2898153368035854, "learning_rate": 9.821081427055197e-06, "loss": 0.6326, "step": 21011 }, { "epoch": 1.5616499442586398, "grad_norm": 2.2805026492238114, "learning_rate": 9.82027918781683e-06, "loss": 0.572, "step": 21012 }, { "epoch": 1.5617242660720922, "grad_norm": 1.7639976192947784, "learning_rate": 9.819476949735493e-06, "loss": 0.5525, "step": 21013 }, { "epoch": 1.5617985878855443, "grad_norm": 2.357415973345468, "learning_rate": 9.818674712816359e-06, "loss": 0.6815, "step": 21014 }, { "epoch": 1.5618729096989967, "grad_norm": 2.1196987265712157, "learning_rate": 9.817872477064585e-06, "loss": 0.5995, "step": 21015 }, { "epoch": 1.5619472315124487, "grad_norm": 1.8561241718846546, "learning_rate": 9.817070242485339e-06, "loss": 0.4994, "step": 21016 }, { "epoch": 1.5620215533259012, "grad_norm": 6.30504475729085, "learning_rate": 9.816268009083785e-06, "loss": 0.7979, "step": 21017 }, { "epoch": 1.5620958751393534, "grad_norm": 1.9345828145564836, "learning_rate": 9.815465776865087e-06, "loss": 0.6686, "step": 21018 }, { "epoch": 1.5621701969528057, "grad_norm": 1.976961444295197, "learning_rate": 9.814663545834414e-06, "loss": 0.669, "step": 21019 }, { "epoch": 1.562244518766258, "grad_norm": 1.9741325291013994, "learning_rate": 9.813861315996926e-06, "loss": 0.5802, "step": 21020 }, { "epoch": 1.5623188405797102, "grad_norm": 2.397636401066021, "learning_rate": 9.813059087357791e-06, "loss": 0.5915, "step": 21021 }, { "epoch": 1.5623931623931624, "grad_norm": 2.4567579555387065, "learning_rate": 9.812256859922169e-06, "loss": 0.64, "step": 21022 }, { "epoch": 1.5624674842066146, "grad_norm": 3.0572046431271906, "learning_rate": 9.811454633695228e-06, "loss": 0.6847, "step": 21023 }, { "epoch": 1.5625418060200669, "grad_norm": 2.10422064678502, "learning_rate": 9.810652408682133e-06, "loss": 0.5592, "step": 21024 }, { "epoch": 1.562616127833519, "grad_norm": 1.9115875646276868, "learning_rate": 9.809850184888048e-06, "loss": 0.5563, "step": 21025 }, { "epoch": 1.5626904496469713, "grad_norm": 1.8547372282335322, "learning_rate": 9.809047962318137e-06, "loss": 0.547, "step": 21026 }, { "epoch": 1.5627647714604236, "grad_norm": 2.1937904163623583, "learning_rate": 9.808245740977565e-06, "loss": 0.6414, "step": 21027 }, { "epoch": 1.562839093273876, "grad_norm": 2.3382281387619237, "learning_rate": 9.807443520871498e-06, "loss": 0.7362, "step": 21028 }, { "epoch": 1.562913415087328, "grad_norm": 1.6261187840689533, "learning_rate": 9.806641302005097e-06, "loss": 0.5058, "step": 21029 }, { "epoch": 1.5629877369007805, "grad_norm": 2.0220616892707417, "learning_rate": 9.805839084383533e-06, "loss": 0.5879, "step": 21030 }, { "epoch": 1.5630620587142325, "grad_norm": 2.8104704605942246, "learning_rate": 9.805036868011967e-06, "loss": 0.4639, "step": 21031 }, { "epoch": 1.563136380527685, "grad_norm": 1.953238425294926, "learning_rate": 9.804234652895564e-06, "loss": 0.4921, "step": 21032 }, { "epoch": 1.563210702341137, "grad_norm": 2.1306931477552453, "learning_rate": 9.803432439039485e-06, "loss": 0.6143, "step": 21033 }, { "epoch": 1.5632850241545895, "grad_norm": 2.1003835960236565, "learning_rate": 9.802630226448897e-06, "loss": 0.4115, "step": 21034 }, { "epoch": 1.5633593459680415, "grad_norm": 1.7860785356868083, "learning_rate": 9.801828015128967e-06, "loss": 0.5841, "step": 21035 }, { "epoch": 1.563433667781494, "grad_norm": 2.0082879552626616, "learning_rate": 9.80102580508486e-06, "loss": 0.6113, "step": 21036 }, { "epoch": 1.563507989594946, "grad_norm": 2.3704574249078694, "learning_rate": 9.800223596321735e-06, "loss": 0.7694, "step": 21037 }, { "epoch": 1.5635823114083984, "grad_norm": 2.345847551289554, "learning_rate": 9.799421388844761e-06, "loss": 0.7269, "step": 21038 }, { "epoch": 1.5636566332218507, "grad_norm": 2.513806510871496, "learning_rate": 9.798619182659103e-06, "loss": 0.667, "step": 21039 }, { "epoch": 1.563730955035303, "grad_norm": 1.784653671722439, "learning_rate": 9.797816977769921e-06, "loss": 0.5845, "step": 21040 }, { "epoch": 1.5638052768487551, "grad_norm": 1.6478060932580163, "learning_rate": 9.797014774182389e-06, "loss": 0.4326, "step": 21041 }, { "epoch": 1.5638795986622074, "grad_norm": 1.5777711801213026, "learning_rate": 9.79621257190166e-06, "loss": 0.492, "step": 21042 }, { "epoch": 1.5639539204756596, "grad_norm": 2.084546804539597, "learning_rate": 9.795410370932909e-06, "loss": 0.7581, "step": 21043 }, { "epoch": 1.5640282422891119, "grad_norm": 1.8860373352481008, "learning_rate": 9.794608171281293e-06, "loss": 0.5529, "step": 21044 }, { "epoch": 1.564102564102564, "grad_norm": 2.8404554864243146, "learning_rate": 9.793805972951977e-06, "loss": 0.6496, "step": 21045 }, { "epoch": 1.5641768859160163, "grad_norm": 2.3098597127181377, "learning_rate": 9.793003775950128e-06, "loss": 0.592, "step": 21046 }, { "epoch": 1.5642512077294686, "grad_norm": 2.24159692320876, "learning_rate": 9.792201580280912e-06, "loss": 0.7741, "step": 21047 }, { "epoch": 1.5643255295429208, "grad_norm": 2.4502980975368502, "learning_rate": 9.791399385949488e-06, "loss": 0.6882, "step": 21048 }, { "epoch": 1.564399851356373, "grad_norm": 2.2272339220123234, "learning_rate": 9.790597192961025e-06, "loss": 0.6443, "step": 21049 }, { "epoch": 1.5644741731698253, "grad_norm": 1.8414522325374414, "learning_rate": 9.789795001320687e-06, "loss": 0.5981, "step": 21050 }, { "epoch": 1.5645484949832777, "grad_norm": 1.8334776471048655, "learning_rate": 9.788992811033637e-06, "loss": 0.5897, "step": 21051 }, { "epoch": 1.5646228167967298, "grad_norm": 1.7095071660561834, "learning_rate": 9.788190622105044e-06, "loss": 0.5935, "step": 21052 }, { "epoch": 1.5646971386101822, "grad_norm": 1.993542602517029, "learning_rate": 9.787388434540065e-06, "loss": 0.5754, "step": 21053 }, { "epoch": 1.5647714604236342, "grad_norm": 2.360411164867705, "learning_rate": 9.786586248343874e-06, "loss": 0.524, "step": 21054 }, { "epoch": 1.5648457822370867, "grad_norm": 1.8315586898692024, "learning_rate": 9.785784063521623e-06, "loss": 0.5671, "step": 21055 }, { "epoch": 1.5649201040505387, "grad_norm": 2.297468200704789, "learning_rate": 9.784981880078486e-06, "loss": 0.8046, "step": 21056 }, { "epoch": 1.5649944258639912, "grad_norm": 3.3903058294351043, "learning_rate": 9.784179698019622e-06, "loss": 0.6508, "step": 21057 }, { "epoch": 1.5650687476774432, "grad_norm": 1.6668514022635856, "learning_rate": 9.783377517350201e-06, "loss": 0.6061, "step": 21058 }, { "epoch": 1.5651430694908957, "grad_norm": 1.8888498718368079, "learning_rate": 9.782575338075383e-06, "loss": 0.5977, "step": 21059 }, { "epoch": 1.5652173913043477, "grad_norm": 2.160029506883683, "learning_rate": 9.781773160200334e-06, "loss": 0.6076, "step": 21060 }, { "epoch": 1.5652917131178001, "grad_norm": 2.085269774228595, "learning_rate": 9.780970983730216e-06, "loss": 0.712, "step": 21061 }, { "epoch": 1.5653660349312524, "grad_norm": 2.2977993529802982, "learning_rate": 9.780168808670196e-06, "loss": 0.6691, "step": 21062 }, { "epoch": 1.5654403567447046, "grad_norm": 1.9733987714813859, "learning_rate": 9.77936663502544e-06, "loss": 0.6557, "step": 21063 }, { "epoch": 1.5655146785581568, "grad_norm": 1.9059997716064458, "learning_rate": 9.77856446280111e-06, "loss": 0.5605, "step": 21064 }, { "epoch": 1.565589000371609, "grad_norm": 2.0868862973295332, "learning_rate": 9.777762292002373e-06, "loss": 0.5196, "step": 21065 }, { "epoch": 1.5656633221850613, "grad_norm": 1.762842615021084, "learning_rate": 9.776960122634386e-06, "loss": 0.5387, "step": 21066 }, { "epoch": 1.5657376439985136, "grad_norm": 4.291513709240544, "learning_rate": 9.77615795470232e-06, "loss": 0.5491, "step": 21067 }, { "epoch": 1.5658119658119658, "grad_norm": 2.249680496842299, "learning_rate": 9.775355788211335e-06, "loss": 0.4388, "step": 21068 }, { "epoch": 1.565886287625418, "grad_norm": 1.7160755874958629, "learning_rate": 9.7745536231666e-06, "loss": 0.6476, "step": 21069 }, { "epoch": 1.5659606094388703, "grad_norm": 1.8916879653158243, "learning_rate": 9.773751459573278e-06, "loss": 0.634, "step": 21070 }, { "epoch": 1.5660349312523225, "grad_norm": 1.7078891306728905, "learning_rate": 9.77294929743653e-06, "loss": 0.6445, "step": 21071 }, { "epoch": 1.5661092530657748, "grad_norm": 2.2104604885331316, "learning_rate": 9.772147136761524e-06, "loss": 0.5591, "step": 21072 }, { "epoch": 1.566183574879227, "grad_norm": 1.5337714686733885, "learning_rate": 9.771344977553422e-06, "loss": 0.5947, "step": 21073 }, { "epoch": 1.5662578966926795, "grad_norm": 2.0312746834299773, "learning_rate": 9.770542819817391e-06, "loss": 0.6684, "step": 21074 }, { "epoch": 1.5663322185061315, "grad_norm": 2.0612425035791784, "learning_rate": 9.769740663558592e-06, "loss": 0.6871, "step": 21075 }, { "epoch": 1.566406540319584, "grad_norm": 2.6522053671070887, "learning_rate": 9.768938508782191e-06, "loss": 0.4224, "step": 21076 }, { "epoch": 1.566480862133036, "grad_norm": 2.1359975795239583, "learning_rate": 9.768136355493356e-06, "loss": 0.5796, "step": 21077 }, { "epoch": 1.5665551839464884, "grad_norm": 1.9073202922125994, "learning_rate": 9.767334203697241e-06, "loss": 0.5671, "step": 21078 }, { "epoch": 1.5666295057599404, "grad_norm": 2.005245037397682, "learning_rate": 9.766532053399018e-06, "loss": 0.6697, "step": 21079 }, { "epoch": 1.5667038275733929, "grad_norm": 2.1656257733220605, "learning_rate": 9.765729904603852e-06, "loss": 0.6716, "step": 21080 }, { "epoch": 1.566778149386845, "grad_norm": 2.6014199829473164, "learning_rate": 9.764927757316902e-06, "loss": 0.8446, "step": 21081 }, { "epoch": 1.5668524712002974, "grad_norm": 1.9793021968528797, "learning_rate": 9.764125611543334e-06, "loss": 0.6705, "step": 21082 }, { "epoch": 1.5669267930137494, "grad_norm": 1.928805357995104, "learning_rate": 9.763323467288315e-06, "loss": 0.522, "step": 21083 }, { "epoch": 1.5670011148272018, "grad_norm": 2.0499196839778637, "learning_rate": 9.762521324557005e-06, "loss": 0.6825, "step": 21084 }, { "epoch": 1.567075436640654, "grad_norm": 1.9942984579564673, "learning_rate": 9.761719183354575e-06, "loss": 0.713, "step": 21085 }, { "epoch": 1.5671497584541063, "grad_norm": 2.1924685329134164, "learning_rate": 9.760917043686181e-06, "loss": 0.6306, "step": 21086 }, { "epoch": 1.5672240802675586, "grad_norm": 1.8594413135905985, "learning_rate": 9.76011490555699e-06, "loss": 0.5497, "step": 21087 }, { "epoch": 1.5672984020810108, "grad_norm": 1.8195633504219206, "learning_rate": 9.759312768972172e-06, "loss": 0.4824, "step": 21088 }, { "epoch": 1.567372723894463, "grad_norm": 1.865783758612753, "learning_rate": 9.758510633936882e-06, "loss": 0.5104, "step": 21089 }, { "epoch": 1.5674470457079153, "grad_norm": 2.0227673411703937, "learning_rate": 9.757708500456288e-06, "loss": 0.5774, "step": 21090 }, { "epoch": 1.5675213675213675, "grad_norm": 2.070680829170463, "learning_rate": 9.756906368535554e-06, "loss": 0.6635, "step": 21091 }, { "epoch": 1.5675956893348197, "grad_norm": 1.8668022550082635, "learning_rate": 9.756104238179845e-06, "loss": 0.6801, "step": 21092 }, { "epoch": 1.567670011148272, "grad_norm": 1.4908844770226046, "learning_rate": 9.755302109394322e-06, "loss": 0.5442, "step": 21093 }, { "epoch": 1.5677443329617242, "grad_norm": 2.025057536104929, "learning_rate": 9.754499982184154e-06, "loss": 0.5332, "step": 21094 }, { "epoch": 1.5678186547751767, "grad_norm": 1.8188791331079894, "learning_rate": 9.7536978565545e-06, "loss": 0.4872, "step": 21095 }, { "epoch": 1.5678929765886287, "grad_norm": 2.2250710891085728, "learning_rate": 9.75289573251053e-06, "loss": 0.7066, "step": 21096 }, { "epoch": 1.5679672984020812, "grad_norm": 1.976287340524515, "learning_rate": 9.7520936100574e-06, "loss": 0.504, "step": 21097 }, { "epoch": 1.5680416202155332, "grad_norm": 1.9635298017692424, "learning_rate": 9.75129148920028e-06, "loss": 0.5898, "step": 21098 }, { "epoch": 1.5681159420289856, "grad_norm": 2.068216125722955, "learning_rate": 9.750489369944337e-06, "loss": 0.6965, "step": 21099 }, { "epoch": 1.5681902638424376, "grad_norm": 1.9084340041108312, "learning_rate": 9.749687252294727e-06, "loss": 0.568, "step": 21100 }, { "epoch": 1.56826458565589, "grad_norm": 1.8323566052002092, "learning_rate": 9.748885136256616e-06, "loss": 0.5245, "step": 21101 }, { "epoch": 1.5683389074693421, "grad_norm": 2.356990882500249, "learning_rate": 9.748083021835174e-06, "loss": 0.7056, "step": 21102 }, { "epoch": 1.5684132292827946, "grad_norm": 1.7620724248008435, "learning_rate": 9.747280909035555e-06, "loss": 0.5806, "step": 21103 }, { "epoch": 1.5684875510962466, "grad_norm": 1.943855166485895, "learning_rate": 9.746478797862929e-06, "loss": 0.5824, "step": 21104 }, { "epoch": 1.568561872909699, "grad_norm": 2.053636050547648, "learning_rate": 9.745676688322461e-06, "loss": 0.4839, "step": 21105 }, { "epoch": 1.5686361947231513, "grad_norm": 1.8180367036185818, "learning_rate": 9.744874580419313e-06, "loss": 0.5578, "step": 21106 }, { "epoch": 1.5687105165366035, "grad_norm": 2.1195065373630793, "learning_rate": 9.74407247415865e-06, "loss": 0.534, "step": 21107 }, { "epoch": 1.5687848383500558, "grad_norm": 1.339415927107303, "learning_rate": 9.743270369545635e-06, "loss": 0.317, "step": 21108 }, { "epoch": 1.568859160163508, "grad_norm": 2.130678436500793, "learning_rate": 9.74246826658543e-06, "loss": 0.6792, "step": 21109 }, { "epoch": 1.5689334819769603, "grad_norm": 2.106730245355256, "learning_rate": 9.741666165283205e-06, "loss": 0.6493, "step": 21110 }, { "epoch": 1.5690078037904125, "grad_norm": 2.209077614133381, "learning_rate": 9.740864065644118e-06, "loss": 0.5469, "step": 21111 }, { "epoch": 1.5690821256038647, "grad_norm": 2.4501713673855283, "learning_rate": 9.740061967673333e-06, "loss": 0.6671, "step": 21112 }, { "epoch": 1.569156447417317, "grad_norm": 1.9702362919851701, "learning_rate": 9.739259871376018e-06, "loss": 0.5952, "step": 21113 }, { "epoch": 1.5692307692307692, "grad_norm": 1.5818875609266203, "learning_rate": 9.738457776757333e-06, "loss": 0.5025, "step": 21114 }, { "epoch": 1.5693050910442214, "grad_norm": 1.9502673086395912, "learning_rate": 9.737655683822443e-06, "loss": 0.5337, "step": 21115 }, { "epoch": 1.5693794128576737, "grad_norm": 1.9542245013986839, "learning_rate": 9.736853592576511e-06, "loss": 0.6326, "step": 21116 }, { "epoch": 1.569453734671126, "grad_norm": 2.1552770360529965, "learning_rate": 9.736051503024704e-06, "loss": 0.6431, "step": 21117 }, { "epoch": 1.5695280564845784, "grad_norm": 2.6467900286659756, "learning_rate": 9.735249415172183e-06, "loss": 0.6605, "step": 21118 }, { "epoch": 1.5696023782980304, "grad_norm": 1.7629071713086764, "learning_rate": 9.734447329024112e-06, "loss": 0.4982, "step": 21119 }, { "epoch": 1.5696767001114829, "grad_norm": 1.9786592287967324, "learning_rate": 9.733645244585655e-06, "loss": 0.6039, "step": 21120 }, { "epoch": 1.5697510219249349, "grad_norm": 1.8594205597313884, "learning_rate": 9.732843161861976e-06, "loss": 0.6499, "step": 21121 }, { "epoch": 1.5698253437383873, "grad_norm": 1.4134210032426722, "learning_rate": 9.732041080858243e-06, "loss": 0.4211, "step": 21122 }, { "epoch": 1.5698996655518394, "grad_norm": 2.771472752834476, "learning_rate": 9.731239001579612e-06, "loss": 0.695, "step": 21123 }, { "epoch": 1.5699739873652918, "grad_norm": 1.9460472631825068, "learning_rate": 9.730436924031252e-06, "loss": 0.6437, "step": 21124 }, { "epoch": 1.5700483091787438, "grad_norm": 1.6115222264208517, "learning_rate": 9.729634848218323e-06, "loss": 0.4606, "step": 21125 }, { "epoch": 1.5701226309921963, "grad_norm": 2.0871874545687445, "learning_rate": 9.72883277414599e-06, "loss": 0.7267, "step": 21126 }, { "epoch": 1.5701969528056483, "grad_norm": 2.3218983350398297, "learning_rate": 9.728030701819419e-06, "loss": 0.6786, "step": 21127 }, { "epoch": 1.5702712746191008, "grad_norm": 2.2709487460413578, "learning_rate": 9.727228631243774e-06, "loss": 0.661, "step": 21128 }, { "epoch": 1.570345596432553, "grad_norm": 2.2921841762466753, "learning_rate": 9.726426562424215e-06, "loss": 0.6516, "step": 21129 }, { "epoch": 1.5704199182460052, "grad_norm": 2.04681731922193, "learning_rate": 9.725624495365907e-06, "loss": 0.607, "step": 21130 }, { "epoch": 1.5704942400594575, "grad_norm": 1.9687468224382225, "learning_rate": 9.724822430074015e-06, "loss": 0.5917, "step": 21131 }, { "epoch": 1.5705685618729097, "grad_norm": 1.9566769351701232, "learning_rate": 9.724020366553701e-06, "loss": 0.6028, "step": 21132 }, { "epoch": 1.570642883686362, "grad_norm": 2.161267713132251, "learning_rate": 9.723218304810135e-06, "loss": 0.6553, "step": 21133 }, { "epoch": 1.5707172054998142, "grad_norm": 1.7758539926867842, "learning_rate": 9.72241624484847e-06, "loss": 0.6063, "step": 21134 }, { "epoch": 1.5707915273132664, "grad_norm": 2.001748288118551, "learning_rate": 9.721614186673878e-06, "loss": 0.5757, "step": 21135 }, { "epoch": 1.5708658491267187, "grad_norm": 2.0907590385063313, "learning_rate": 9.720812130291516e-06, "loss": 0.6605, "step": 21136 }, { "epoch": 1.570940170940171, "grad_norm": 2.3698707577668476, "learning_rate": 9.720010075706552e-06, "loss": 0.5183, "step": 21137 }, { "epoch": 1.5710144927536231, "grad_norm": 2.0203625497308515, "learning_rate": 9.719208022924147e-06, "loss": 0.6419, "step": 21138 }, { "epoch": 1.5710888145670754, "grad_norm": 2.1674451809198008, "learning_rate": 9.718405971949469e-06, "loss": 0.4846, "step": 21139 }, { "epoch": 1.5711631363805276, "grad_norm": 1.7315524650066005, "learning_rate": 9.71760392278768e-06, "loss": 0.5312, "step": 21140 }, { "epoch": 1.57123745819398, "grad_norm": 5.3731479621482325, "learning_rate": 9.716801875443938e-06, "loss": 0.7176, "step": 21141 }, { "epoch": 1.571311780007432, "grad_norm": 2.399401872387291, "learning_rate": 9.71599982992341e-06, "loss": 0.7995, "step": 21142 }, { "epoch": 1.5713861018208846, "grad_norm": 1.8223603676609283, "learning_rate": 9.715197786231264e-06, "loss": 0.4939, "step": 21143 }, { "epoch": 1.5714604236343366, "grad_norm": 1.6702000415955094, "learning_rate": 9.714395744372662e-06, "loss": 0.3919, "step": 21144 }, { "epoch": 1.571534745447789, "grad_norm": 1.885236153347938, "learning_rate": 9.713593704352762e-06, "loss": 0.5728, "step": 21145 }, { "epoch": 1.571609067261241, "grad_norm": 1.9082061207054046, "learning_rate": 9.712791666176732e-06, "loss": 0.5232, "step": 21146 }, { "epoch": 1.5716833890746935, "grad_norm": 1.781161921574991, "learning_rate": 9.711989629849735e-06, "loss": 0.5054, "step": 21147 }, { "epoch": 1.5717577108881455, "grad_norm": 2.3490503089282124, "learning_rate": 9.71118759537693e-06, "loss": 0.5992, "step": 21148 }, { "epoch": 1.571832032701598, "grad_norm": 1.6988590471659613, "learning_rate": 9.710385562763488e-06, "loss": 0.542, "step": 21149 }, { "epoch": 1.57190635451505, "grad_norm": 1.7288609043693253, "learning_rate": 9.709583532014568e-06, "loss": 0.46, "step": 21150 }, { "epoch": 1.5719806763285025, "grad_norm": 2.0817561274701424, "learning_rate": 9.708781503135333e-06, "loss": 0.652, "step": 21151 }, { "epoch": 1.5720549981419547, "grad_norm": 1.7902212231708663, "learning_rate": 9.707979476130947e-06, "loss": 0.5492, "step": 21152 }, { "epoch": 1.572129319955407, "grad_norm": 2.4690665117007344, "learning_rate": 9.707177451006573e-06, "loss": 0.5861, "step": 21153 }, { "epoch": 1.5722036417688592, "grad_norm": 1.6201837385265152, "learning_rate": 9.706375427767379e-06, "loss": 0.4659, "step": 21154 }, { "epoch": 1.5722779635823114, "grad_norm": 1.8150124299145132, "learning_rate": 9.705573406418524e-06, "loss": 0.5392, "step": 21155 }, { "epoch": 1.5723522853957637, "grad_norm": 1.9566278238247352, "learning_rate": 9.704771386965172e-06, "loss": 0.623, "step": 21156 }, { "epoch": 1.572426607209216, "grad_norm": 2.6668647936262135, "learning_rate": 9.703969369412489e-06, "loss": 0.6628, "step": 21157 }, { "epoch": 1.5725009290226681, "grad_norm": 2.0395388429565613, "learning_rate": 9.703167353765632e-06, "loss": 0.6075, "step": 21158 }, { "epoch": 1.5725752508361204, "grad_norm": 1.6316765176409678, "learning_rate": 9.702365340029769e-06, "loss": 0.4553, "step": 21159 }, { "epoch": 1.5726495726495726, "grad_norm": 2.376765705816242, "learning_rate": 9.70156332821006e-06, "loss": 0.76, "step": 21160 }, { "epoch": 1.5727238944630249, "grad_norm": 2.0566381724050484, "learning_rate": 9.700761318311675e-06, "loss": 0.683, "step": 21161 }, { "epoch": 1.5727982162764773, "grad_norm": 1.933945550417371, "learning_rate": 9.69995931033977e-06, "loss": 0.5177, "step": 21162 }, { "epoch": 1.5728725380899293, "grad_norm": 1.616565138558796, "learning_rate": 9.699157304299513e-06, "loss": 0.3987, "step": 21163 }, { "epoch": 1.5729468599033818, "grad_norm": 2.2527260563785405, "learning_rate": 9.698355300196065e-06, "loss": 0.7799, "step": 21164 }, { "epoch": 1.5730211817168338, "grad_norm": 2.3124602826086544, "learning_rate": 9.697553298034588e-06, "loss": 0.6076, "step": 21165 }, { "epoch": 1.5730955035302863, "grad_norm": 1.8583106528795819, "learning_rate": 9.696751297820253e-06, "loss": 0.5554, "step": 21166 }, { "epoch": 1.5731698253437383, "grad_norm": 2.0469172622285297, "learning_rate": 9.695949299558215e-06, "loss": 0.5417, "step": 21167 }, { "epoch": 1.5732441471571907, "grad_norm": 2.3705014232716493, "learning_rate": 9.69514730325364e-06, "loss": 0.7616, "step": 21168 }, { "epoch": 1.5733184689706428, "grad_norm": 2.052091154449951, "learning_rate": 9.694345308911687e-06, "loss": 0.587, "step": 21169 }, { "epoch": 1.5733927907840952, "grad_norm": 2.2190403608320306, "learning_rate": 9.693543316537526e-06, "loss": 0.565, "step": 21170 }, { "epoch": 1.5734671125975472, "grad_norm": 1.9206071491733423, "learning_rate": 9.692741326136316e-06, "loss": 0.6128, "step": 21171 }, { "epoch": 1.5735414344109997, "grad_norm": 1.8374984127518077, "learning_rate": 9.691939337713223e-06, "loss": 0.5581, "step": 21172 }, { "epoch": 1.5736157562244517, "grad_norm": 2.1776449931230526, "learning_rate": 9.691137351273407e-06, "loss": 0.7207, "step": 21173 }, { "epoch": 1.5736900780379042, "grad_norm": 2.0740836417982065, "learning_rate": 9.690335366822033e-06, "loss": 0.6204, "step": 21174 }, { "epoch": 1.5737643998513564, "grad_norm": 2.0630912201372302, "learning_rate": 9.689533384364264e-06, "loss": 0.556, "step": 21175 }, { "epoch": 1.5738387216648086, "grad_norm": 1.7245774252648443, "learning_rate": 9.688731403905261e-06, "loss": 0.5606, "step": 21176 }, { "epoch": 1.5739130434782609, "grad_norm": 2.1502021043435655, "learning_rate": 9.687929425450192e-06, "loss": 0.6284, "step": 21177 }, { "epoch": 1.5739873652917131, "grad_norm": 2.159413501117445, "learning_rate": 9.68712744900422e-06, "loss": 0.615, "step": 21178 }, { "epoch": 1.5740616871051654, "grad_norm": 1.699737715284183, "learning_rate": 9.686325474572502e-06, "loss": 0.5249, "step": 21179 }, { "epoch": 1.5741360089186176, "grad_norm": 1.889597420866871, "learning_rate": 9.685523502160203e-06, "loss": 0.5097, "step": 21180 }, { "epoch": 1.5742103307320698, "grad_norm": 2.094162252684156, "learning_rate": 9.684721531772488e-06, "loss": 0.7401, "step": 21181 }, { "epoch": 1.574284652545522, "grad_norm": 3.3982320600270155, "learning_rate": 9.68391956341452e-06, "loss": 0.5452, "step": 21182 }, { "epoch": 1.5743589743589743, "grad_norm": 1.9829589157524168, "learning_rate": 9.683117597091463e-06, "loss": 0.6979, "step": 21183 }, { "epoch": 1.5744332961724266, "grad_norm": 1.4986389819996455, "learning_rate": 9.682315632808476e-06, "loss": 0.4996, "step": 21184 }, { "epoch": 1.574507617985879, "grad_norm": 2.507624518742547, "learning_rate": 9.681513670570723e-06, "loss": 0.6865, "step": 21185 }, { "epoch": 1.574581939799331, "grad_norm": 1.9130920054294225, "learning_rate": 9.68071171038337e-06, "loss": 0.4394, "step": 21186 }, { "epoch": 1.5746562616127835, "grad_norm": 2.116880379570416, "learning_rate": 9.679909752251578e-06, "loss": 0.5956, "step": 21187 }, { "epoch": 1.5747305834262355, "grad_norm": 1.9648646429348833, "learning_rate": 9.679107796180514e-06, "loss": 0.6113, "step": 21188 }, { "epoch": 1.574804905239688, "grad_norm": 2.0290776204815657, "learning_rate": 9.678305842175334e-06, "loss": 0.6527, "step": 21189 }, { "epoch": 1.57487922705314, "grad_norm": 2.453185837499853, "learning_rate": 9.67750389024121e-06, "loss": 0.7058, "step": 21190 }, { "epoch": 1.5749535488665924, "grad_norm": 1.9436005772240907, "learning_rate": 9.676701940383292e-06, "loss": 0.6384, "step": 21191 }, { "epoch": 1.5750278706800445, "grad_norm": 2.0809348348581085, "learning_rate": 9.675899992606753e-06, "loss": 0.5171, "step": 21192 }, { "epoch": 1.575102192493497, "grad_norm": 1.9199110086873907, "learning_rate": 9.675098046916751e-06, "loss": 0.4832, "step": 21193 }, { "epoch": 1.575176514306949, "grad_norm": 1.484469356210538, "learning_rate": 9.674296103318454e-06, "loss": 0.4936, "step": 21194 }, { "epoch": 1.5752508361204014, "grad_norm": 1.9434410724607964, "learning_rate": 9.673494161817018e-06, "loss": 0.5198, "step": 21195 }, { "epoch": 1.5753251579338536, "grad_norm": 6.162357825042396, "learning_rate": 9.672692222417612e-06, "loss": 0.656, "step": 21196 }, { "epoch": 1.5753994797473059, "grad_norm": 1.5710644115132502, "learning_rate": 9.671890285125396e-06, "loss": 0.5488, "step": 21197 }, { "epoch": 1.5754738015607581, "grad_norm": 1.902329648083317, "learning_rate": 9.671088349945531e-06, "loss": 0.6404, "step": 21198 }, { "epoch": 1.5755481233742104, "grad_norm": 2.2899781113751847, "learning_rate": 9.670286416883187e-06, "loss": 0.7113, "step": 21199 }, { "epoch": 1.5756224451876626, "grad_norm": 2.1328912015802786, "learning_rate": 9.66948448594352e-06, "loss": 0.706, "step": 21200 }, { "epoch": 1.5756967670011148, "grad_norm": 1.8657911952267323, "learning_rate": 9.668682557131698e-06, "loss": 0.7145, "step": 21201 }, { "epoch": 1.575771088814567, "grad_norm": 5.406002782263413, "learning_rate": 9.667880630452877e-06, "loss": 0.5875, "step": 21202 }, { "epoch": 1.5758454106280193, "grad_norm": 2.313422047400406, "learning_rate": 9.667078705912221e-06, "loss": 0.6062, "step": 21203 }, { "epoch": 1.5759197324414715, "grad_norm": 1.7905279721452192, "learning_rate": 9.666276783514897e-06, "loss": 0.4861, "step": 21204 }, { "epoch": 1.5759940542549238, "grad_norm": 1.9213130438906372, "learning_rate": 9.665474863266067e-06, "loss": 0.4657, "step": 21205 }, { "epoch": 1.576068376068376, "grad_norm": 2.0743620088145156, "learning_rate": 9.664672945170892e-06, "loss": 0.7046, "step": 21206 }, { "epoch": 1.5761426978818283, "grad_norm": 1.975452741226536, "learning_rate": 9.663871029234534e-06, "loss": 0.7025, "step": 21207 }, { "epoch": 1.5762170196952807, "grad_norm": 2.6956138324846513, "learning_rate": 9.663069115462155e-06, "loss": 0.7073, "step": 21208 }, { "epoch": 1.5762913415087327, "grad_norm": 2.345555780078068, "learning_rate": 9.662267203858923e-06, "loss": 0.8372, "step": 21209 }, { "epoch": 1.5763656633221852, "grad_norm": 2.0751992934291557, "learning_rate": 9.661465294429997e-06, "loss": 0.689, "step": 21210 }, { "epoch": 1.5764399851356372, "grad_norm": 1.862164485015475, "learning_rate": 9.660663387180538e-06, "loss": 0.5833, "step": 21211 }, { "epoch": 1.5765143069490897, "grad_norm": 2.12340348870268, "learning_rate": 9.659861482115716e-06, "loss": 0.5957, "step": 21212 }, { "epoch": 1.5765886287625417, "grad_norm": 1.8274878062848785, "learning_rate": 9.659059579240684e-06, "loss": 0.5987, "step": 21213 }, { "epoch": 1.5766629505759941, "grad_norm": 1.6178638742468838, "learning_rate": 9.658257678560607e-06, "loss": 0.5128, "step": 21214 }, { "epoch": 1.5767372723894462, "grad_norm": 1.7151400723143038, "learning_rate": 9.65745578008065e-06, "loss": 0.5733, "step": 21215 }, { "epoch": 1.5768115942028986, "grad_norm": 2.044318263647829, "learning_rate": 9.656653883805978e-06, "loss": 0.7744, "step": 21216 }, { "epoch": 1.5768859160163506, "grad_norm": 2.095974061344398, "learning_rate": 9.655851989741747e-06, "loss": 0.4802, "step": 21217 }, { "epoch": 1.576960237829803, "grad_norm": 2.505151825309491, "learning_rate": 9.655050097893124e-06, "loss": 0.8149, "step": 21218 }, { "epoch": 1.5770345596432553, "grad_norm": 2.3149621619458935, "learning_rate": 9.65424820826527e-06, "loss": 0.5815, "step": 21219 }, { "epoch": 1.5771088814567076, "grad_norm": 1.7632069984053256, "learning_rate": 9.653446320863349e-06, "loss": 0.5753, "step": 21220 }, { "epoch": 1.5771832032701598, "grad_norm": 1.7510284194742296, "learning_rate": 9.652644435692523e-06, "loss": 0.5618, "step": 21221 }, { "epoch": 1.577257525083612, "grad_norm": 5.663366914087171, "learning_rate": 9.651842552757953e-06, "loss": 0.5582, "step": 21222 }, { "epoch": 1.5773318468970643, "grad_norm": 2.390500963536393, "learning_rate": 9.651040672064808e-06, "loss": 0.6407, "step": 21223 }, { "epoch": 1.5774061687105165, "grad_norm": 2.0151859768993314, "learning_rate": 9.650238793618238e-06, "loss": 0.6338, "step": 21224 }, { "epoch": 1.5774804905239688, "grad_norm": 2.0066730065122407, "learning_rate": 9.649436917423414e-06, "loss": 0.5781, "step": 21225 }, { "epoch": 1.577554812337421, "grad_norm": 2.778295253642692, "learning_rate": 9.648635043485498e-06, "loss": 0.58, "step": 21226 }, { "epoch": 1.5776291341508732, "grad_norm": 2.6370714887557356, "learning_rate": 9.647833171809652e-06, "loss": 0.6955, "step": 21227 }, { "epoch": 1.5777034559643255, "grad_norm": 2.0338024207349803, "learning_rate": 9.647031302401036e-06, "loss": 0.5785, "step": 21228 }, { "epoch": 1.5777777777777777, "grad_norm": 2.6654154825652903, "learning_rate": 9.646229435264814e-06, "loss": 0.6683, "step": 21229 }, { "epoch": 1.57785209959123, "grad_norm": 1.962098057747397, "learning_rate": 9.645427570406149e-06, "loss": 0.6388, "step": 21230 }, { "epoch": 1.5779264214046824, "grad_norm": 2.1014048115302595, "learning_rate": 9.644625707830203e-06, "loss": 0.5815, "step": 21231 }, { "epoch": 1.5780007432181344, "grad_norm": 1.8276658386261766, "learning_rate": 9.643823847542141e-06, "loss": 0.5473, "step": 21232 }, { "epoch": 1.578075065031587, "grad_norm": 2.036721620771059, "learning_rate": 9.64302198954712e-06, "loss": 0.6576, "step": 21233 }, { "epoch": 1.578149386845039, "grad_norm": 1.6798282535126463, "learning_rate": 9.642220133850305e-06, "loss": 0.4843, "step": 21234 }, { "epoch": 1.5782237086584914, "grad_norm": 2.2948737004941875, "learning_rate": 9.64141828045686e-06, "loss": 0.5944, "step": 21235 }, { "epoch": 1.5782980304719434, "grad_norm": 1.545834638143666, "learning_rate": 9.640616429371944e-06, "loss": 0.3781, "step": 21236 }, { "epoch": 1.5783723522853959, "grad_norm": 1.9589351891447713, "learning_rate": 9.639814580600721e-06, "loss": 0.7118, "step": 21237 }, { "epoch": 1.5784466740988479, "grad_norm": 2.179036819645505, "learning_rate": 9.639012734148353e-06, "loss": 0.7239, "step": 21238 }, { "epoch": 1.5785209959123003, "grad_norm": 2.85472788786546, "learning_rate": 9.63821089002e-06, "loss": 0.6375, "step": 21239 }, { "epoch": 1.5785953177257523, "grad_norm": 1.676355843958884, "learning_rate": 9.63740904822083e-06, "loss": 0.546, "step": 21240 }, { "epoch": 1.5786696395392048, "grad_norm": 2.054276932601154, "learning_rate": 9.636607208755999e-06, "loss": 0.6164, "step": 21241 }, { "epoch": 1.578743961352657, "grad_norm": 2.1448025407310216, "learning_rate": 9.635805371630671e-06, "loss": 0.6983, "step": 21242 }, { "epoch": 1.5788182831661093, "grad_norm": 1.692324611184233, "learning_rate": 9.635003536850014e-06, "loss": 0.5963, "step": 21243 }, { "epoch": 1.5788926049795615, "grad_norm": 2.2638216230027113, "learning_rate": 9.634201704419183e-06, "loss": 0.6254, "step": 21244 }, { "epoch": 1.5789669267930138, "grad_norm": 2.277707607407632, "learning_rate": 9.633399874343342e-06, "loss": 0.6154, "step": 21245 }, { "epoch": 1.579041248606466, "grad_norm": 1.8975432249804531, "learning_rate": 9.632598046627655e-06, "loss": 0.6384, "step": 21246 }, { "epoch": 1.5791155704199182, "grad_norm": 1.9553683641131867, "learning_rate": 9.631796221277282e-06, "loss": 0.6036, "step": 21247 }, { "epoch": 1.5791898922333705, "grad_norm": 2.0870501375491854, "learning_rate": 9.630994398297384e-06, "loss": 0.6127, "step": 21248 }, { "epoch": 1.5792642140468227, "grad_norm": 1.8917821284662462, "learning_rate": 9.630192577693129e-06, "loss": 0.5549, "step": 21249 }, { "epoch": 1.579338535860275, "grad_norm": 1.5003732833087766, "learning_rate": 9.62939075946967e-06, "loss": 0.551, "step": 21250 }, { "epoch": 1.5794128576737272, "grad_norm": 2.459972601934754, "learning_rate": 9.628588943632176e-06, "loss": 0.8131, "step": 21251 }, { "epoch": 1.5794871794871796, "grad_norm": 2.232016616827743, "learning_rate": 9.627787130185807e-06, "loss": 0.6807, "step": 21252 }, { "epoch": 1.5795615013006317, "grad_norm": 1.825359955872749, "learning_rate": 9.626985319135726e-06, "loss": 0.6167, "step": 21253 }, { "epoch": 1.5796358231140841, "grad_norm": 1.9116674598146663, "learning_rate": 9.626183510487092e-06, "loss": 0.5436, "step": 21254 }, { "epoch": 1.5797101449275361, "grad_norm": 2.091208865748685, "learning_rate": 9.625381704245072e-06, "loss": 0.6667, "step": 21255 }, { "epoch": 1.5797844667409886, "grad_norm": 2.7864594946171137, "learning_rate": 9.624579900414822e-06, "loss": 0.7307, "step": 21256 }, { "epoch": 1.5798587885544406, "grad_norm": 1.820103097264233, "learning_rate": 9.623778099001513e-06, "loss": 0.6426, "step": 21257 }, { "epoch": 1.579933110367893, "grad_norm": 1.8801079357125883, "learning_rate": 9.622976300010297e-06, "loss": 0.6719, "step": 21258 }, { "epoch": 1.580007432181345, "grad_norm": 1.6244364855796054, "learning_rate": 9.62217450344634e-06, "loss": 0.3975, "step": 21259 }, { "epoch": 1.5800817539947976, "grad_norm": 1.9911704470946334, "learning_rate": 9.621372709314805e-06, "loss": 0.6928, "step": 21260 }, { "epoch": 1.5801560758082496, "grad_norm": 2.2233122712845694, "learning_rate": 9.62057091762085e-06, "loss": 0.5161, "step": 21261 }, { "epoch": 1.580230397621702, "grad_norm": 2.204830948376421, "learning_rate": 9.619769128369641e-06, "loss": 0.6919, "step": 21262 }, { "epoch": 1.5803047194351543, "grad_norm": 1.5844236352361487, "learning_rate": 9.61896734156634e-06, "loss": 0.5898, "step": 21263 }, { "epoch": 1.5803790412486065, "grad_norm": 1.9354834121601348, "learning_rate": 9.618165557216108e-06, "loss": 0.5424, "step": 21264 }, { "epoch": 1.5804533630620587, "grad_norm": 1.9109933812728754, "learning_rate": 9.617363775324103e-06, "loss": 0.641, "step": 21265 }, { "epoch": 1.580527684875511, "grad_norm": 2.2596147521570096, "learning_rate": 9.616561995895493e-06, "loss": 0.6942, "step": 21266 }, { "epoch": 1.5806020066889632, "grad_norm": 2.2651523046210595, "learning_rate": 9.615760218935434e-06, "loss": 0.5865, "step": 21267 }, { "epoch": 1.5806763285024155, "grad_norm": 1.7699828712838466, "learning_rate": 9.614958444449093e-06, "loss": 0.4887, "step": 21268 }, { "epoch": 1.5807506503158677, "grad_norm": 2.2542715831083497, "learning_rate": 9.614156672441633e-06, "loss": 0.7073, "step": 21269 }, { "epoch": 1.58082497212932, "grad_norm": 2.0000822781647147, "learning_rate": 9.613354902918209e-06, "loss": 0.4881, "step": 21270 }, { "epoch": 1.5808992939427722, "grad_norm": 2.3182213624477863, "learning_rate": 9.612553135883988e-06, "loss": 0.706, "step": 21271 }, { "epoch": 1.5809736157562244, "grad_norm": 1.9805771419972202, "learning_rate": 9.611751371344126e-06, "loss": 0.5912, "step": 21272 }, { "epoch": 1.5810479375696767, "grad_norm": 1.886735760850287, "learning_rate": 9.61094960930379e-06, "loss": 0.6364, "step": 21273 }, { "epoch": 1.581122259383129, "grad_norm": 1.9693119958935883, "learning_rate": 9.61014784976814e-06, "loss": 0.5722, "step": 21274 }, { "epoch": 1.5811965811965814, "grad_norm": 1.9693685807868864, "learning_rate": 9.60934609274234e-06, "loss": 0.4561, "step": 21275 }, { "epoch": 1.5812709030100334, "grad_norm": 1.723786602213048, "learning_rate": 9.608544338231547e-06, "loss": 0.5974, "step": 21276 }, { "epoch": 1.5813452248234858, "grad_norm": 1.6618274199124263, "learning_rate": 9.607742586240926e-06, "loss": 0.5565, "step": 21277 }, { "epoch": 1.5814195466369378, "grad_norm": 1.6842303951047801, "learning_rate": 9.606940836775636e-06, "loss": 0.4384, "step": 21278 }, { "epoch": 1.5814938684503903, "grad_norm": 1.8392271034227854, "learning_rate": 9.606139089840842e-06, "loss": 0.5917, "step": 21279 }, { "epoch": 1.5815681902638423, "grad_norm": 2.041520915633445, "learning_rate": 9.605337345441709e-06, "loss": 0.5473, "step": 21280 }, { "epoch": 1.5816425120772948, "grad_norm": 1.784055243983206, "learning_rate": 9.60453560358339e-06, "loss": 0.5934, "step": 21281 }, { "epoch": 1.5817168338907468, "grad_norm": 2.0124230493937882, "learning_rate": 9.60373386427105e-06, "loss": 0.5877, "step": 21282 }, { "epoch": 1.5817911557041993, "grad_norm": 2.3579197483557714, "learning_rate": 9.60293212750985e-06, "loss": 0.7249, "step": 21283 }, { "epoch": 1.5818654775176513, "grad_norm": 2.3865662603011346, "learning_rate": 9.602130393304953e-06, "loss": 0.7601, "step": 21284 }, { "epoch": 1.5819397993311037, "grad_norm": 1.9651569461289204, "learning_rate": 9.601328661661519e-06, "loss": 0.4952, "step": 21285 }, { "epoch": 1.582014121144556, "grad_norm": 1.760289409358506, "learning_rate": 9.600526932584712e-06, "loss": 0.6527, "step": 21286 }, { "epoch": 1.5820884429580082, "grad_norm": 2.315298284632007, "learning_rate": 9.59972520607969e-06, "loss": 0.6848, "step": 21287 }, { "epoch": 1.5821627647714605, "grad_norm": 2.0089432152401985, "learning_rate": 9.598923482151616e-06, "loss": 0.6611, "step": 21288 }, { "epoch": 1.5822370865849127, "grad_norm": 2.172887609556334, "learning_rate": 9.598121760805653e-06, "loss": 0.6577, "step": 21289 }, { "epoch": 1.582311408398365, "grad_norm": 1.851680505293252, "learning_rate": 9.597320042046961e-06, "loss": 0.501, "step": 21290 }, { "epoch": 1.5823857302118172, "grad_norm": 2.1788906191386808, "learning_rate": 9.596518325880707e-06, "loss": 0.6987, "step": 21291 }, { "epoch": 1.5824600520252694, "grad_norm": 2.1784706200000428, "learning_rate": 9.595716612312041e-06, "loss": 0.5721, "step": 21292 }, { "epoch": 1.5825343738387216, "grad_norm": 1.9280500257471496, "learning_rate": 9.594914901346133e-06, "loss": 0.6806, "step": 21293 }, { "epoch": 1.5826086956521739, "grad_norm": 2.057935655980198, "learning_rate": 9.59411319298814e-06, "loss": 0.5547, "step": 21294 }, { "epoch": 1.5826830174656261, "grad_norm": 2.022913575291585, "learning_rate": 9.593311487243225e-06, "loss": 0.5638, "step": 21295 }, { "epoch": 1.5827573392790784, "grad_norm": 2.001162961263785, "learning_rate": 9.59250978411655e-06, "loss": 0.5337, "step": 21296 }, { "epoch": 1.5828316610925306, "grad_norm": 2.5434396122202023, "learning_rate": 9.591708083613277e-06, "loss": 0.6069, "step": 21297 }, { "epoch": 1.582905982905983, "grad_norm": 2.815181870395773, "learning_rate": 9.590906385738565e-06, "loss": 0.7254, "step": 21298 }, { "epoch": 1.582980304719435, "grad_norm": 1.7732229166875053, "learning_rate": 9.590104690497577e-06, "loss": 0.6362, "step": 21299 }, { "epoch": 1.5830546265328875, "grad_norm": 2.3678746973710694, "learning_rate": 9.589302997895472e-06, "loss": 0.5929, "step": 21300 }, { "epoch": 1.5831289483463395, "grad_norm": 2.0229193425379277, "learning_rate": 9.588501307937413e-06, "loss": 0.5384, "step": 21301 }, { "epoch": 1.583203270159792, "grad_norm": 2.075105147918872, "learning_rate": 9.587699620628566e-06, "loss": 0.7173, "step": 21302 }, { "epoch": 1.583277591973244, "grad_norm": 2.212213304008245, "learning_rate": 9.586897935974085e-06, "loss": 0.5515, "step": 21303 }, { "epoch": 1.5833519137866965, "grad_norm": 1.9699028467562516, "learning_rate": 9.586096253979133e-06, "loss": 0.5579, "step": 21304 }, { "epoch": 1.5834262356001485, "grad_norm": 3.2920631749593277, "learning_rate": 9.58529457464887e-06, "loss": 0.8257, "step": 21305 }, { "epoch": 1.583500557413601, "grad_norm": 2.352794470863042, "learning_rate": 9.584492897988461e-06, "loss": 0.5219, "step": 21306 }, { "epoch": 1.583574879227053, "grad_norm": 2.174207471040841, "learning_rate": 9.583691224003065e-06, "loss": 0.6681, "step": 21307 }, { "epoch": 1.5836492010405054, "grad_norm": 2.0674540022948316, "learning_rate": 9.582889552697844e-06, "loss": 0.6756, "step": 21308 }, { "epoch": 1.5837235228539577, "grad_norm": 1.8110924361529879, "learning_rate": 9.582087884077957e-06, "loss": 0.5762, "step": 21309 }, { "epoch": 1.58379784466741, "grad_norm": 1.425856865190816, "learning_rate": 9.581286218148566e-06, "loss": 0.3448, "step": 21310 }, { "epoch": 1.5838721664808622, "grad_norm": 1.6187435811458444, "learning_rate": 9.580484554914833e-06, "loss": 0.5597, "step": 21311 }, { "epoch": 1.5839464882943144, "grad_norm": 2.6681041111563233, "learning_rate": 9.57968289438192e-06, "loss": 0.6151, "step": 21312 }, { "epoch": 1.5840208101077666, "grad_norm": 1.4262907302291377, "learning_rate": 9.578881236554986e-06, "loss": 0.4111, "step": 21313 }, { "epoch": 1.5840951319212189, "grad_norm": 1.811315201901083, "learning_rate": 9.578079581439195e-06, "loss": 0.4955, "step": 21314 }, { "epoch": 1.584169453734671, "grad_norm": 2.149318439928048, "learning_rate": 9.577277929039705e-06, "loss": 0.6472, "step": 21315 }, { "epoch": 1.5842437755481233, "grad_norm": 1.8828569550764962, "learning_rate": 9.576476279361674e-06, "loss": 0.6648, "step": 21316 }, { "epoch": 1.5843180973615756, "grad_norm": 2.6982475368435215, "learning_rate": 9.57567463241027e-06, "loss": 0.6393, "step": 21317 }, { "epoch": 1.5843924191750278, "grad_norm": 2.1886451282652746, "learning_rate": 9.57487298819065e-06, "loss": 0.6568, "step": 21318 }, { "epoch": 1.5844667409884803, "grad_norm": 2.3750126323462495, "learning_rate": 9.574071346707976e-06, "loss": 0.6706, "step": 21319 }, { "epoch": 1.5845410628019323, "grad_norm": 2.1726046063299367, "learning_rate": 9.573269707967407e-06, "loss": 0.6293, "step": 21320 }, { "epoch": 1.5846153846153848, "grad_norm": 2.405738159026102, "learning_rate": 9.572468071974107e-06, "loss": 0.5138, "step": 21321 }, { "epoch": 1.5846897064288368, "grad_norm": 2.014643763264982, "learning_rate": 9.571666438733233e-06, "loss": 0.5996, "step": 21322 }, { "epoch": 1.5847640282422892, "grad_norm": 1.8228603170056554, "learning_rate": 9.57086480824995e-06, "loss": 0.508, "step": 21323 }, { "epoch": 1.5848383500557413, "grad_norm": 1.986104047515038, "learning_rate": 9.570063180529418e-06, "loss": 0.5452, "step": 21324 }, { "epoch": 1.5849126718691937, "grad_norm": 2.1324035583564225, "learning_rate": 9.569261555576798e-06, "loss": 0.6364, "step": 21325 }, { "epoch": 1.5849869936826457, "grad_norm": 2.090004795704765, "learning_rate": 9.56845993339725e-06, "loss": 0.6108, "step": 21326 }, { "epoch": 1.5850613154960982, "grad_norm": 1.8993424548931845, "learning_rate": 9.567658313995932e-06, "loss": 0.682, "step": 21327 }, { "epoch": 1.5851356373095502, "grad_norm": 2.695951868878093, "learning_rate": 9.566856697378007e-06, "loss": 0.6257, "step": 21328 }, { "epoch": 1.5852099591230027, "grad_norm": 2.033805349053757, "learning_rate": 9.566055083548635e-06, "loss": 0.6321, "step": 21329 }, { "epoch": 1.585284280936455, "grad_norm": 1.8092886230080651, "learning_rate": 9.56525347251298e-06, "loss": 0.6732, "step": 21330 }, { "epoch": 1.5853586027499071, "grad_norm": 1.7996213815122444, "learning_rate": 9.564451864276201e-06, "loss": 0.5809, "step": 21331 }, { "epoch": 1.5854329245633594, "grad_norm": 2.1165420672166917, "learning_rate": 9.563650258843456e-06, "loss": 0.6224, "step": 21332 }, { "epoch": 1.5855072463768116, "grad_norm": 2.3932915615088466, "learning_rate": 9.56284865621991e-06, "loss": 0.6205, "step": 21333 }, { "epoch": 1.5855815681902639, "grad_norm": 1.9505054650121587, "learning_rate": 9.56204705641072e-06, "loss": 0.5393, "step": 21334 }, { "epoch": 1.585655890003716, "grad_norm": 1.782189744686954, "learning_rate": 9.56124545942105e-06, "loss": 0.5696, "step": 21335 }, { "epoch": 1.5857302118171683, "grad_norm": 2.024800769219429, "learning_rate": 9.560443865256062e-06, "loss": 0.5494, "step": 21336 }, { "epoch": 1.5858045336306206, "grad_norm": 1.891201831380291, "learning_rate": 9.559642273920911e-06, "loss": 0.5087, "step": 21337 }, { "epoch": 1.5858788554440728, "grad_norm": 2.2976411827085954, "learning_rate": 9.55884068542076e-06, "loss": 0.6988, "step": 21338 }, { "epoch": 1.585953177257525, "grad_norm": 1.7702480031769718, "learning_rate": 9.558039099760768e-06, "loss": 0.5668, "step": 21339 }, { "epoch": 1.5860274990709773, "grad_norm": 2.045724610490911, "learning_rate": 9.557237516946097e-06, "loss": 0.6547, "step": 21340 }, { "epoch": 1.5861018208844295, "grad_norm": 1.8251198824758286, "learning_rate": 9.556435936981912e-06, "loss": 0.585, "step": 21341 }, { "epoch": 1.586176142697882, "grad_norm": 2.1556414319736623, "learning_rate": 9.555634359873364e-06, "loss": 0.6626, "step": 21342 }, { "epoch": 1.586250464511334, "grad_norm": 2.2714796294648663, "learning_rate": 9.554832785625621e-06, "loss": 0.5483, "step": 21343 }, { "epoch": 1.5863247863247865, "grad_norm": 1.8852483029521472, "learning_rate": 9.554031214243843e-06, "loss": 0.5643, "step": 21344 }, { "epoch": 1.5863991081382385, "grad_norm": 1.6610878908167819, "learning_rate": 9.553229645733187e-06, "loss": 0.4012, "step": 21345 }, { "epoch": 1.586473429951691, "grad_norm": 2.1005130004523145, "learning_rate": 9.552428080098817e-06, "loss": 0.7643, "step": 21346 }, { "epoch": 1.586547751765143, "grad_norm": 1.615776013681058, "learning_rate": 9.55162651734589e-06, "loss": 0.5587, "step": 21347 }, { "epoch": 1.5866220735785954, "grad_norm": 1.6754744618315491, "learning_rate": 9.550824957479575e-06, "loss": 0.4547, "step": 21348 }, { "epoch": 1.5866963953920474, "grad_norm": 2.2132474066887227, "learning_rate": 9.550023400505019e-06, "loss": 0.623, "step": 21349 }, { "epoch": 1.5867707172055, "grad_norm": 1.9274580106634613, "learning_rate": 9.549221846427389e-06, "loss": 0.6014, "step": 21350 }, { "epoch": 1.586845039018952, "grad_norm": 1.7810432475073765, "learning_rate": 9.548420295251846e-06, "loss": 0.5437, "step": 21351 }, { "epoch": 1.5869193608324044, "grad_norm": 1.5936432857168463, "learning_rate": 9.54761874698355e-06, "loss": 0.6093, "step": 21352 }, { "epoch": 1.5869936826458566, "grad_norm": 1.9248662642446144, "learning_rate": 9.546817201627661e-06, "loss": 0.644, "step": 21353 }, { "epoch": 1.5870680044593088, "grad_norm": 1.7286713539542085, "learning_rate": 9.54601565918934e-06, "loss": 0.6334, "step": 21354 }, { "epoch": 1.587142326272761, "grad_norm": 2.591469138839343, "learning_rate": 9.545214119673744e-06, "loss": 0.4288, "step": 21355 }, { "epoch": 1.5872166480862133, "grad_norm": 1.7838465783558903, "learning_rate": 9.544412583086038e-06, "loss": 0.5337, "step": 21356 }, { "epoch": 1.5872909698996656, "grad_norm": 1.8583670248903303, "learning_rate": 9.543611049431382e-06, "loss": 0.7821, "step": 21357 }, { "epoch": 1.5873652917131178, "grad_norm": 2.3246016974393586, "learning_rate": 9.542809518714931e-06, "loss": 0.6671, "step": 21358 }, { "epoch": 1.58743961352657, "grad_norm": 2.289284602761549, "learning_rate": 9.542007990941855e-06, "loss": 0.5439, "step": 21359 }, { "epoch": 1.5875139353400223, "grad_norm": 2.501623623492865, "learning_rate": 9.541206466117302e-06, "loss": 0.68, "step": 21360 }, { "epoch": 1.5875882571534745, "grad_norm": 2.7164355715454893, "learning_rate": 9.540404944246438e-06, "loss": 0.6002, "step": 21361 }, { "epoch": 1.5876625789669268, "grad_norm": 2.1993622815298974, "learning_rate": 9.539603425334424e-06, "loss": 0.6163, "step": 21362 }, { "epoch": 1.587736900780379, "grad_norm": 2.0508165853459204, "learning_rate": 9.53880190938642e-06, "loss": 0.5894, "step": 21363 }, { "epoch": 1.5878112225938312, "grad_norm": 2.002311916908056, "learning_rate": 9.538000396407585e-06, "loss": 0.693, "step": 21364 }, { "epoch": 1.5878855444072837, "grad_norm": 1.931644910841213, "learning_rate": 9.537198886403078e-06, "loss": 0.3615, "step": 21365 }, { "epoch": 1.5879598662207357, "grad_norm": 1.749905394223205, "learning_rate": 9.53639737937806e-06, "loss": 0.6628, "step": 21366 }, { "epoch": 1.5880341880341882, "grad_norm": 2.0340938623810123, "learning_rate": 9.535595875337694e-06, "loss": 0.5693, "step": 21367 }, { "epoch": 1.5881085098476402, "grad_norm": 1.695118936479576, "learning_rate": 9.534794374287138e-06, "loss": 0.5552, "step": 21368 }, { "epoch": 1.5881828316610926, "grad_norm": 2.1461736918213123, "learning_rate": 9.53399287623155e-06, "loss": 0.7405, "step": 21369 }, { "epoch": 1.5882571534745447, "grad_norm": 2.5505872115855133, "learning_rate": 9.533191381176096e-06, "loss": 0.7069, "step": 21370 }, { "epoch": 1.5883314752879971, "grad_norm": 1.6565861416067966, "learning_rate": 9.532389889125926e-06, "loss": 0.6179, "step": 21371 }, { "epoch": 1.5884057971014491, "grad_norm": 1.9620249641139478, "learning_rate": 9.531588400086208e-06, "loss": 0.6536, "step": 21372 }, { "epoch": 1.5884801189149016, "grad_norm": 4.766319326021098, "learning_rate": 9.530786914062099e-06, "loss": 0.768, "step": 21373 }, { "epoch": 1.5885544407283536, "grad_norm": 2.015590954848987, "learning_rate": 9.52998543105876e-06, "loss": 0.4609, "step": 21374 }, { "epoch": 1.588628762541806, "grad_norm": 2.2503486040620957, "learning_rate": 9.529183951081352e-06, "loss": 0.6519, "step": 21375 }, { "epoch": 1.5887030843552583, "grad_norm": 3.0096106574935484, "learning_rate": 9.52838247413503e-06, "loss": 0.7109, "step": 21376 }, { "epoch": 1.5887774061687105, "grad_norm": 2.2122779357263815, "learning_rate": 9.527581000224956e-06, "loss": 0.7738, "step": 21377 }, { "epoch": 1.5888517279821628, "grad_norm": 1.8458765305422384, "learning_rate": 9.526779529356296e-06, "loss": 0.6356, "step": 21378 }, { "epoch": 1.588926049795615, "grad_norm": 2.187316979365524, "learning_rate": 9.525978061534202e-06, "loss": 0.5804, "step": 21379 }, { "epoch": 1.5890003716090673, "grad_norm": 2.1614198269438893, "learning_rate": 9.525176596763837e-06, "loss": 0.5131, "step": 21380 }, { "epoch": 1.5890746934225195, "grad_norm": 1.8355810639365178, "learning_rate": 9.524375135050359e-06, "loss": 0.4928, "step": 21381 }, { "epoch": 1.5891490152359717, "grad_norm": 3.992492771364595, "learning_rate": 9.523573676398935e-06, "loss": 0.5598, "step": 21382 }, { "epoch": 1.589223337049424, "grad_norm": 1.602870735355998, "learning_rate": 9.522772220814713e-06, "loss": 0.5344, "step": 21383 }, { "epoch": 1.5892976588628762, "grad_norm": 2.0528360637209833, "learning_rate": 9.52197076830286e-06, "loss": 0.776, "step": 21384 }, { "epoch": 1.5893719806763285, "grad_norm": 1.8019195023627663, "learning_rate": 9.521169318868536e-06, "loss": 0.5446, "step": 21385 }, { "epoch": 1.589446302489781, "grad_norm": 2.032833943815987, "learning_rate": 9.520367872516898e-06, "loss": 0.5456, "step": 21386 }, { "epoch": 1.589520624303233, "grad_norm": 1.8596752387490207, "learning_rate": 9.519566429253106e-06, "loss": 0.6361, "step": 21387 }, { "epoch": 1.5895949461166854, "grad_norm": 1.8576011133077601, "learning_rate": 9.51876498908232e-06, "loss": 0.6628, "step": 21388 }, { "epoch": 1.5896692679301374, "grad_norm": 1.7586142683968888, "learning_rate": 9.517963552009703e-06, "loss": 0.5752, "step": 21389 }, { "epoch": 1.5897435897435899, "grad_norm": 1.8065938107912576, "learning_rate": 9.517162118040408e-06, "loss": 0.5374, "step": 21390 }, { "epoch": 1.5898179115570419, "grad_norm": 3.761495700472818, "learning_rate": 9.516360687179598e-06, "loss": 0.5745, "step": 21391 }, { "epoch": 1.5898922333704943, "grad_norm": 2.9457928422405337, "learning_rate": 9.515559259432435e-06, "loss": 0.6609, "step": 21392 }, { "epoch": 1.5899665551839464, "grad_norm": 2.9151012875127478, "learning_rate": 9.51475783480408e-06, "loss": 0.7007, "step": 21393 }, { "epoch": 1.5900408769973988, "grad_norm": 1.745095089678213, "learning_rate": 9.513956413299685e-06, "loss": 0.3911, "step": 21394 }, { "epoch": 1.5901151988108508, "grad_norm": 2.1836604333067613, "learning_rate": 9.513154994924412e-06, "loss": 0.6449, "step": 21395 }, { "epoch": 1.5901895206243033, "grad_norm": 3.3751676285421577, "learning_rate": 9.512353579683423e-06, "loss": 0.4715, "step": 21396 }, { "epoch": 1.5902638424377553, "grad_norm": 1.8356610166938534, "learning_rate": 9.511552167581878e-06, "loss": 0.5829, "step": 21397 }, { "epoch": 1.5903381642512078, "grad_norm": 2.0151936609456693, "learning_rate": 9.51075075862493e-06, "loss": 0.5644, "step": 21398 }, { "epoch": 1.59041248606466, "grad_norm": 1.8124745484845515, "learning_rate": 9.509949352817748e-06, "loss": 0.5436, "step": 21399 }, { "epoch": 1.5904868078781123, "grad_norm": 2.0136547976085923, "learning_rate": 9.509147950165487e-06, "loss": 0.6628, "step": 21400 }, { "epoch": 1.5905611296915645, "grad_norm": 1.572884106244291, "learning_rate": 9.508346550673303e-06, "loss": 0.5327, "step": 21401 }, { "epoch": 1.5906354515050167, "grad_norm": 1.9611823742870413, "learning_rate": 9.507545154346359e-06, "loss": 0.6934, "step": 21402 }, { "epoch": 1.590709773318469, "grad_norm": 1.8181901075379165, "learning_rate": 9.506743761189813e-06, "loss": 0.4986, "step": 21403 }, { "epoch": 1.5907840951319212, "grad_norm": 1.5054757390507716, "learning_rate": 9.505942371208831e-06, "loss": 0.4598, "step": 21404 }, { "epoch": 1.5908584169453734, "grad_norm": 1.6451396002862686, "learning_rate": 9.505140984408562e-06, "loss": 0.3728, "step": 21405 }, { "epoch": 1.5909327387588257, "grad_norm": 2.055644033904747, "learning_rate": 9.504339600794167e-06, "loss": 0.6147, "step": 21406 }, { "epoch": 1.591007060572278, "grad_norm": 2.1239732536709957, "learning_rate": 9.503538220370812e-06, "loss": 0.6528, "step": 21407 }, { "epoch": 1.5910813823857302, "grad_norm": 1.959894709628388, "learning_rate": 9.502736843143651e-06, "loss": 0.5758, "step": 21408 }, { "epoch": 1.5911557041991826, "grad_norm": 2.2868647228529597, "learning_rate": 9.501935469117843e-06, "loss": 0.6948, "step": 21409 }, { "epoch": 1.5912300260126346, "grad_norm": 1.8365213278070782, "learning_rate": 9.501134098298549e-06, "loss": 0.6396, "step": 21410 }, { "epoch": 1.591304347826087, "grad_norm": 2.8983381481978476, "learning_rate": 9.50033273069093e-06, "loss": 0.484, "step": 21411 }, { "epoch": 1.5913786696395391, "grad_norm": 2.371430494459624, "learning_rate": 9.49953136630014e-06, "loss": 0.6083, "step": 21412 }, { "epoch": 1.5914529914529916, "grad_norm": 1.6466253921323264, "learning_rate": 9.498730005131342e-06, "loss": 0.5086, "step": 21413 }, { "epoch": 1.5915273132664436, "grad_norm": 2.00465118085396, "learning_rate": 9.497928647189693e-06, "loss": 0.6291, "step": 21414 }, { "epoch": 1.591601635079896, "grad_norm": 2.014291260333262, "learning_rate": 9.49712729248036e-06, "loss": 0.5817, "step": 21415 }, { "epoch": 1.591675956893348, "grad_norm": 1.878412041426769, "learning_rate": 9.496325941008489e-06, "loss": 0.542, "step": 21416 }, { "epoch": 1.5917502787068005, "grad_norm": 1.8326180953674667, "learning_rate": 9.495524592779247e-06, "loss": 0.6555, "step": 21417 }, { "epoch": 1.5918246005202525, "grad_norm": 2.129454634634652, "learning_rate": 9.494723247797792e-06, "loss": 0.6193, "step": 21418 }, { "epoch": 1.591898922333705, "grad_norm": 2.033671892753791, "learning_rate": 9.493921906069282e-06, "loss": 0.6165, "step": 21419 }, { "epoch": 1.5919732441471572, "grad_norm": 1.9883118379588582, "learning_rate": 9.493120567598874e-06, "loss": 0.6349, "step": 21420 }, { "epoch": 1.5920475659606095, "grad_norm": 2.95741831743063, "learning_rate": 9.492319232391732e-06, "loss": 0.6917, "step": 21421 }, { "epoch": 1.5921218877740617, "grad_norm": 2.017476374584383, "learning_rate": 9.491517900453015e-06, "loss": 0.6429, "step": 21422 }, { "epoch": 1.592196209587514, "grad_norm": 1.8847785021957317, "learning_rate": 9.490716571787874e-06, "loss": 0.6896, "step": 21423 }, { "epoch": 1.5922705314009662, "grad_norm": 2.443836225834708, "learning_rate": 9.489915246401476e-06, "loss": 0.5797, "step": 21424 }, { "epoch": 1.5923448532144184, "grad_norm": 2.2068009499674335, "learning_rate": 9.489113924298977e-06, "loss": 0.761, "step": 21425 }, { "epoch": 1.5924191750278707, "grad_norm": 1.8822726866394497, "learning_rate": 9.488312605485534e-06, "loss": 0.5401, "step": 21426 }, { "epoch": 1.592493496841323, "grad_norm": 2.485605061944655, "learning_rate": 9.487511289966315e-06, "loss": 0.6967, "step": 21427 }, { "epoch": 1.5925678186547751, "grad_norm": 1.7529516464713184, "learning_rate": 9.486709977746465e-06, "loss": 0.5666, "step": 21428 }, { "epoch": 1.5926421404682274, "grad_norm": 1.7550768162275672, "learning_rate": 9.485908668831154e-06, "loss": 0.4494, "step": 21429 }, { "epoch": 1.5927164622816796, "grad_norm": 1.7885959116349948, "learning_rate": 9.485107363225533e-06, "loss": 0.404, "step": 21430 }, { "epoch": 1.5927907840951319, "grad_norm": 2.1182576398975788, "learning_rate": 9.484306060934764e-06, "loss": 0.7126, "step": 21431 }, { "epoch": 1.5928651059085843, "grad_norm": 2.02314546400913, "learning_rate": 9.483504761964005e-06, "loss": 0.5184, "step": 21432 }, { "epoch": 1.5929394277220363, "grad_norm": 2.0955184560086204, "learning_rate": 9.482703466318419e-06, "loss": 0.5404, "step": 21433 }, { "epoch": 1.5930137495354888, "grad_norm": 2.198647579165474, "learning_rate": 9.48190217400316e-06, "loss": 0.656, "step": 21434 }, { "epoch": 1.5930880713489408, "grad_norm": 2.4763729317422998, "learning_rate": 9.481100885023386e-06, "loss": 0.5783, "step": 21435 }, { "epoch": 1.5931623931623933, "grad_norm": 2.019753092299173, "learning_rate": 9.480299599384258e-06, "loss": 0.6175, "step": 21436 }, { "epoch": 1.5932367149758453, "grad_norm": 1.921209253754041, "learning_rate": 9.479498317090933e-06, "loss": 0.688, "step": 21437 }, { "epoch": 1.5933110367892978, "grad_norm": 1.6001710419748265, "learning_rate": 9.478697038148578e-06, "loss": 0.5204, "step": 21438 }, { "epoch": 1.5933853586027498, "grad_norm": 1.776327894981511, "learning_rate": 9.477895762562338e-06, "loss": 0.5111, "step": 21439 }, { "epoch": 1.5934596804162022, "grad_norm": 2.0813851329789235, "learning_rate": 9.477094490337381e-06, "loss": 0.6584, "step": 21440 }, { "epoch": 1.5935340022296542, "grad_norm": 1.6667031120716398, "learning_rate": 9.47629322147886e-06, "loss": 0.4142, "step": 21441 }, { "epoch": 1.5936083240431067, "grad_norm": 2.2294901127646964, "learning_rate": 9.475491955991936e-06, "loss": 0.5436, "step": 21442 }, { "epoch": 1.593682645856559, "grad_norm": 2.0214882032466677, "learning_rate": 9.474690693881767e-06, "loss": 0.7125, "step": 21443 }, { "epoch": 1.5937569676700112, "grad_norm": 1.7888743272972143, "learning_rate": 9.473889435153516e-06, "loss": 0.491, "step": 21444 }, { "epoch": 1.5938312894834634, "grad_norm": 1.7585797200060234, "learning_rate": 9.473088179812335e-06, "loss": 0.5147, "step": 21445 }, { "epoch": 1.5939056112969157, "grad_norm": 2.6064964133033657, "learning_rate": 9.472286927863383e-06, "loss": 0.5556, "step": 21446 }, { "epoch": 1.593979933110368, "grad_norm": 2.3570239377706383, "learning_rate": 9.471485679311823e-06, "loss": 0.4506, "step": 21447 }, { "epoch": 1.5940542549238201, "grad_norm": 1.744393726355745, "learning_rate": 9.470684434162809e-06, "loss": 0.4183, "step": 21448 }, { "epoch": 1.5941285767372724, "grad_norm": 1.8179669761590065, "learning_rate": 9.469883192421505e-06, "loss": 0.6063, "step": 21449 }, { "epoch": 1.5942028985507246, "grad_norm": 1.7761377930040507, "learning_rate": 9.469081954093062e-06, "loss": 0.6177, "step": 21450 }, { "epoch": 1.5942772203641769, "grad_norm": 1.8242237912280295, "learning_rate": 9.468280719182644e-06, "loss": 0.5709, "step": 21451 }, { "epoch": 1.594351542177629, "grad_norm": 1.8082799535048582, "learning_rate": 9.467479487695406e-06, "loss": 0.6182, "step": 21452 }, { "epoch": 1.5944258639910813, "grad_norm": 2.0352628594321223, "learning_rate": 9.466678259636507e-06, "loss": 0.5872, "step": 21453 }, { "epoch": 1.5945001858045336, "grad_norm": 1.5559974606743678, "learning_rate": 9.465877035011105e-06, "loss": 0.5471, "step": 21454 }, { "epoch": 1.594574507617986, "grad_norm": 2.4156418561593416, "learning_rate": 9.46507581382436e-06, "loss": 0.7597, "step": 21455 }, { "epoch": 1.594648829431438, "grad_norm": 2.2905533325381775, "learning_rate": 9.46427459608143e-06, "loss": 0.595, "step": 21456 }, { "epoch": 1.5947231512448905, "grad_norm": 1.3169643450964148, "learning_rate": 9.46347338178747e-06, "loss": 0.4514, "step": 21457 }, { "epoch": 1.5947974730583425, "grad_norm": 1.618036417803046, "learning_rate": 9.46267217094764e-06, "loss": 0.4352, "step": 21458 }, { "epoch": 1.594871794871795, "grad_norm": 1.9874467516788012, "learning_rate": 9.461870963567101e-06, "loss": 0.6105, "step": 21459 }, { "epoch": 1.594946116685247, "grad_norm": 1.8313934973767407, "learning_rate": 9.46106975965101e-06, "loss": 0.5269, "step": 21460 }, { "epoch": 1.5950204384986995, "grad_norm": 1.9037742382369682, "learning_rate": 9.460268559204524e-06, "loss": 0.4421, "step": 21461 }, { "epoch": 1.5950947603121515, "grad_norm": 4.068261694114764, "learning_rate": 9.459467362232801e-06, "loss": 0.4749, "step": 21462 }, { "epoch": 1.595169082125604, "grad_norm": 1.8763919701554093, "learning_rate": 9.458666168740996e-06, "loss": 0.5248, "step": 21463 }, { "epoch": 1.595243403939056, "grad_norm": 2.1072875095084393, "learning_rate": 9.45786497873427e-06, "loss": 0.4559, "step": 21464 }, { "epoch": 1.5953177257525084, "grad_norm": 2.157940579776245, "learning_rate": 9.457063792217783e-06, "loss": 0.5697, "step": 21465 }, { "epoch": 1.5953920475659606, "grad_norm": 2.2694332174491936, "learning_rate": 9.456262609196691e-06, "loss": 0.6619, "step": 21466 }, { "epoch": 1.5954663693794129, "grad_norm": 1.9680554255733944, "learning_rate": 9.455461429676151e-06, "loss": 0.6579, "step": 21467 }, { "epoch": 1.5955406911928651, "grad_norm": 1.8201916031771799, "learning_rate": 9.454660253661323e-06, "loss": 0.7416, "step": 21468 }, { "epoch": 1.5956150130063174, "grad_norm": 2.048511827953776, "learning_rate": 9.453859081157365e-06, "loss": 0.5619, "step": 21469 }, { "epoch": 1.5956893348197696, "grad_norm": 2.1507232203216784, "learning_rate": 9.45305791216943e-06, "loss": 0.5824, "step": 21470 }, { "epoch": 1.5957636566332218, "grad_norm": 1.7728346185635153, "learning_rate": 9.452256746702685e-06, "loss": 0.572, "step": 21471 }, { "epoch": 1.595837978446674, "grad_norm": 2.125285990657991, "learning_rate": 9.451455584762281e-06, "loss": 0.7033, "step": 21472 }, { "epoch": 1.5959123002601263, "grad_norm": 2.6304989026130454, "learning_rate": 9.450654426353378e-06, "loss": 0.6829, "step": 21473 }, { "epoch": 1.5959866220735786, "grad_norm": 1.9305097932131368, "learning_rate": 9.449853271481131e-06, "loss": 0.717, "step": 21474 }, { "epoch": 1.5960609438870308, "grad_norm": 1.8903369671166734, "learning_rate": 9.449052120150701e-06, "loss": 0.5799, "step": 21475 }, { "epoch": 1.5961352657004833, "grad_norm": 1.888520368201277, "learning_rate": 9.448250972367244e-06, "loss": 0.513, "step": 21476 }, { "epoch": 1.5962095875139353, "grad_norm": 1.7913001211331236, "learning_rate": 9.447449828135921e-06, "loss": 0.506, "step": 21477 }, { "epoch": 1.5962839093273877, "grad_norm": 1.5188278104472692, "learning_rate": 9.446648687461886e-06, "loss": 0.4979, "step": 21478 }, { "epoch": 1.5963582311408397, "grad_norm": 2.130218041325738, "learning_rate": 9.445847550350297e-06, "loss": 0.5258, "step": 21479 }, { "epoch": 1.5964325529542922, "grad_norm": 1.9410333810733094, "learning_rate": 9.445046416806314e-06, "loss": 0.6814, "step": 21480 }, { "epoch": 1.5965068747677442, "grad_norm": 3.0974916707676816, "learning_rate": 9.444245286835092e-06, "loss": 0.6692, "step": 21481 }, { "epoch": 1.5965811965811967, "grad_norm": 1.89026134487082, "learning_rate": 9.443444160441793e-06, "loss": 0.6452, "step": 21482 }, { "epoch": 1.5966555183946487, "grad_norm": 2.0824010388907186, "learning_rate": 9.44264303763157e-06, "loss": 0.6565, "step": 21483 }, { "epoch": 1.5967298402081012, "grad_norm": 1.6590075383779188, "learning_rate": 9.441841918409583e-06, "loss": 0.5216, "step": 21484 }, { "epoch": 1.5968041620215532, "grad_norm": 1.8747670612649137, "learning_rate": 9.441040802780988e-06, "loss": 0.5119, "step": 21485 }, { "epoch": 1.5968784838350056, "grad_norm": 2.254945637874951, "learning_rate": 9.440239690750942e-06, "loss": 0.5497, "step": 21486 }, { "epoch": 1.5969528056484579, "grad_norm": 2.307162980547182, "learning_rate": 9.439438582324604e-06, "loss": 0.5708, "step": 21487 }, { "epoch": 1.5970271274619101, "grad_norm": 1.7296832798727084, "learning_rate": 9.438637477507134e-06, "loss": 0.5635, "step": 21488 }, { "epoch": 1.5971014492753624, "grad_norm": 2.178038406713135, "learning_rate": 9.437836376303684e-06, "loss": 0.7362, "step": 21489 }, { "epoch": 1.5971757710888146, "grad_norm": 1.631526938340186, "learning_rate": 9.437035278719414e-06, "loss": 0.542, "step": 21490 }, { "epoch": 1.5972500929022668, "grad_norm": 1.759816222944805, "learning_rate": 9.436234184759483e-06, "loss": 0.3897, "step": 21491 }, { "epoch": 1.597324414715719, "grad_norm": 2.112749071692548, "learning_rate": 9.435433094429045e-06, "loss": 0.6487, "step": 21492 }, { "epoch": 1.5973987365291713, "grad_norm": 2.1705023541760053, "learning_rate": 9.434632007733264e-06, "loss": 0.5663, "step": 21493 }, { "epoch": 1.5974730583426235, "grad_norm": 2.320572100528468, "learning_rate": 9.43383092467729e-06, "loss": 0.6688, "step": 21494 }, { "epoch": 1.5975473801560758, "grad_norm": 2.159671730107544, "learning_rate": 9.433029845266286e-06, "loss": 0.5452, "step": 21495 }, { "epoch": 1.597621701969528, "grad_norm": 1.8343867945356296, "learning_rate": 9.432228769505403e-06, "loss": 0.5797, "step": 21496 }, { "epoch": 1.5976960237829803, "grad_norm": 1.9207264917909872, "learning_rate": 9.431427697399801e-06, "loss": 0.5473, "step": 21497 }, { "epoch": 1.5977703455964325, "grad_norm": 1.9421580123688944, "learning_rate": 9.430626628954639e-06, "loss": 0.7096, "step": 21498 }, { "epoch": 1.597844667409885, "grad_norm": 1.8428204633615983, "learning_rate": 9.429825564175074e-06, "loss": 0.6751, "step": 21499 }, { "epoch": 1.597918989223337, "grad_norm": 2.101570413833937, "learning_rate": 9.429024503066264e-06, "loss": 0.6591, "step": 21500 }, { "epoch": 1.5979933110367894, "grad_norm": 2.3312853877211452, "learning_rate": 9.428223445633361e-06, "loss": 0.7287, "step": 21501 }, { "epoch": 1.5980676328502414, "grad_norm": 1.4929950239952225, "learning_rate": 9.427422391881527e-06, "loss": 0.4684, "step": 21502 }, { "epoch": 1.598141954663694, "grad_norm": 2.1193658392529278, "learning_rate": 9.426621341815917e-06, "loss": 0.5478, "step": 21503 }, { "epoch": 1.598216276477146, "grad_norm": 1.9797249458873527, "learning_rate": 9.425820295441692e-06, "loss": 0.524, "step": 21504 }, { "epoch": 1.5982905982905984, "grad_norm": 1.7254109888697835, "learning_rate": 9.425019252764004e-06, "loss": 0.4114, "step": 21505 }, { "epoch": 1.5983649201040504, "grad_norm": 1.886516792153629, "learning_rate": 9.424218213788017e-06, "loss": 0.5541, "step": 21506 }, { "epoch": 1.5984392419175029, "grad_norm": 2.0551264131553895, "learning_rate": 9.423417178518879e-06, "loss": 0.6713, "step": 21507 }, { "epoch": 1.5985135637309549, "grad_norm": 3.4051039988403224, "learning_rate": 9.42261614696175e-06, "loss": 0.6527, "step": 21508 }, { "epoch": 1.5985878855444073, "grad_norm": 2.086060933619974, "learning_rate": 9.421815119121789e-06, "loss": 0.6635, "step": 21509 }, { "epoch": 1.5986622073578596, "grad_norm": 1.9538708055292484, "learning_rate": 9.421014095004154e-06, "loss": 0.5637, "step": 21510 }, { "epoch": 1.5987365291713118, "grad_norm": 1.615536140326382, "learning_rate": 9.420213074613998e-06, "loss": 0.4498, "step": 21511 }, { "epoch": 1.598810850984764, "grad_norm": 2.0854090725105765, "learning_rate": 9.41941205795648e-06, "loss": 0.6364, "step": 21512 }, { "epoch": 1.5988851727982163, "grad_norm": 1.925094083741796, "learning_rate": 9.418611045036759e-06, "loss": 0.6487, "step": 21513 }, { "epoch": 1.5989594946116685, "grad_norm": 2.2529558959941896, "learning_rate": 9.41781003585999e-06, "loss": 0.6574, "step": 21514 }, { "epoch": 1.5990338164251208, "grad_norm": 1.522071230505846, "learning_rate": 9.417009030431328e-06, "loss": 0.453, "step": 21515 }, { "epoch": 1.599108138238573, "grad_norm": 2.0293432111157528, "learning_rate": 9.416208028755932e-06, "loss": 0.7327, "step": 21516 }, { "epoch": 1.5991824600520252, "grad_norm": 1.6454612407195304, "learning_rate": 9.415407030838964e-06, "loss": 0.5359, "step": 21517 }, { "epoch": 1.5992567818654775, "grad_norm": 1.7480969845580152, "learning_rate": 9.414606036685569e-06, "loss": 0.6067, "step": 21518 }, { "epoch": 1.5993311036789297, "grad_norm": 1.6218052169088737, "learning_rate": 9.41380504630091e-06, "loss": 0.5095, "step": 21519 }, { "epoch": 1.599405425492382, "grad_norm": 1.8244739144446989, "learning_rate": 9.413004059690145e-06, "loss": 0.6014, "step": 21520 }, { "epoch": 1.5994797473058342, "grad_norm": 3.1950124705400498, "learning_rate": 9.41220307685843e-06, "loss": 0.5643, "step": 21521 }, { "epoch": 1.5995540691192867, "grad_norm": 2.139609215735661, "learning_rate": 9.41140209781092e-06, "loss": 0.8124, "step": 21522 }, { "epoch": 1.5996283909327387, "grad_norm": 1.9287218839986713, "learning_rate": 9.410601122552773e-06, "loss": 0.4413, "step": 21523 }, { "epoch": 1.5997027127461911, "grad_norm": 2.6681461020898425, "learning_rate": 9.409800151089144e-06, "loss": 0.655, "step": 21524 }, { "epoch": 1.5997770345596432, "grad_norm": 1.813594507958337, "learning_rate": 9.408999183425195e-06, "loss": 0.6112, "step": 21525 }, { "epoch": 1.5998513563730956, "grad_norm": 1.9784012381748972, "learning_rate": 9.408198219566075e-06, "loss": 0.5862, "step": 21526 }, { "epoch": 1.5999256781865476, "grad_norm": 2.0679391334311235, "learning_rate": 9.407397259516944e-06, "loss": 0.5624, "step": 21527 }, { "epoch": 1.6, "grad_norm": 1.935940452965172, "learning_rate": 9.406596303282964e-06, "loss": 0.643, "step": 21528 }, { "epoch": 1.600074321813452, "grad_norm": 1.9476639714785933, "learning_rate": 9.40579535086928e-06, "loss": 0.6324, "step": 21529 }, { "epoch": 1.6001486436269046, "grad_norm": 2.1653354065886856, "learning_rate": 9.404994402281059e-06, "loss": 0.6361, "step": 21530 }, { "epoch": 1.6002229654403566, "grad_norm": 2.0400500612144326, "learning_rate": 9.40419345752345e-06, "loss": 0.5829, "step": 21531 }, { "epoch": 1.600297287253809, "grad_norm": 1.806480031921933, "learning_rate": 9.403392516601615e-06, "loss": 0.6796, "step": 21532 }, { "epoch": 1.6003716090672613, "grad_norm": 1.8062090667299036, "learning_rate": 9.402591579520705e-06, "loss": 0.5696, "step": 21533 }, { "epoch": 1.6004459308807135, "grad_norm": 1.8340280680062935, "learning_rate": 9.401790646285881e-06, "loss": 0.7011, "step": 21534 }, { "epoch": 1.6005202526941658, "grad_norm": 1.9174058590896506, "learning_rate": 9.400989716902297e-06, "loss": 0.7025, "step": 21535 }, { "epoch": 1.600594574507618, "grad_norm": 1.9846634640370642, "learning_rate": 9.400188791375113e-06, "loss": 0.6841, "step": 21536 }, { "epoch": 1.6006688963210702, "grad_norm": 2.2620816813160793, "learning_rate": 9.399387869709479e-06, "loss": 0.545, "step": 21537 }, { "epoch": 1.6007432181345225, "grad_norm": 1.8588677861440648, "learning_rate": 9.398586951910556e-06, "loss": 0.5513, "step": 21538 }, { "epoch": 1.6008175399479747, "grad_norm": 2.8214050520742395, "learning_rate": 9.397786037983499e-06, "loss": 0.6186, "step": 21539 }, { "epoch": 1.600891861761427, "grad_norm": 2.034844871135322, "learning_rate": 9.396985127933468e-06, "loss": 0.6278, "step": 21540 }, { "epoch": 1.6009661835748792, "grad_norm": 1.9658465409293966, "learning_rate": 9.39618422176561e-06, "loss": 0.4783, "step": 21541 }, { "epoch": 1.6010405053883314, "grad_norm": 1.8865574931109894, "learning_rate": 9.395383319485088e-06, "loss": 0.5836, "step": 21542 }, { "epoch": 1.6011148272017839, "grad_norm": 2.0014648044637813, "learning_rate": 9.39458242109706e-06, "loss": 0.4613, "step": 21543 }, { "epoch": 1.601189149015236, "grad_norm": 2.0598852742504645, "learning_rate": 9.393781526606675e-06, "loss": 0.5842, "step": 21544 }, { "epoch": 1.6012634708286884, "grad_norm": 1.5592657641240426, "learning_rate": 9.392980636019093e-06, "loss": 0.5015, "step": 21545 }, { "epoch": 1.6013377926421404, "grad_norm": 1.74845016330075, "learning_rate": 9.392179749339471e-06, "loss": 0.546, "step": 21546 }, { "epoch": 1.6014121144555928, "grad_norm": 2.0249581013400944, "learning_rate": 9.391378866572967e-06, "loss": 0.6101, "step": 21547 }, { "epoch": 1.6014864362690449, "grad_norm": 1.6497364871537632, "learning_rate": 9.390577987724732e-06, "loss": 0.4733, "step": 21548 }, { "epoch": 1.6015607580824973, "grad_norm": 2.0500368763298074, "learning_rate": 9.389777112799923e-06, "loss": 0.4945, "step": 21549 }, { "epoch": 1.6016350798959493, "grad_norm": 2.0923909243745427, "learning_rate": 9.388976241803699e-06, "loss": 0.6929, "step": 21550 }, { "epoch": 1.6017094017094018, "grad_norm": 2.4387922157066226, "learning_rate": 9.388175374741216e-06, "loss": 0.566, "step": 21551 }, { "epoch": 1.6017837235228538, "grad_norm": 1.9727214236650834, "learning_rate": 9.387374511617626e-06, "loss": 0.5946, "step": 21552 }, { "epoch": 1.6018580453363063, "grad_norm": 1.8737859118690112, "learning_rate": 9.386573652438087e-06, "loss": 0.5693, "step": 21553 }, { "epoch": 1.6019323671497585, "grad_norm": 2.5101055960706398, "learning_rate": 9.385772797207756e-06, "loss": 0.6615, "step": 21554 }, { "epoch": 1.6020066889632107, "grad_norm": 2.1749144316638733, "learning_rate": 9.384971945931787e-06, "loss": 0.7044, "step": 21555 }, { "epoch": 1.602081010776663, "grad_norm": 1.8477321844709067, "learning_rate": 9.384171098615336e-06, "loss": 0.5894, "step": 21556 }, { "epoch": 1.6021553325901152, "grad_norm": 2.1638648935137264, "learning_rate": 9.38337025526356e-06, "loss": 0.7167, "step": 21557 }, { "epoch": 1.6022296544035675, "grad_norm": 2.079908568367955, "learning_rate": 9.382569415881614e-06, "loss": 0.5097, "step": 21558 }, { "epoch": 1.6023039762170197, "grad_norm": 2.0261027653329493, "learning_rate": 9.381768580474655e-06, "loss": 0.5875, "step": 21559 }, { "epoch": 1.602378298030472, "grad_norm": 1.7897334666892706, "learning_rate": 9.380967749047837e-06, "loss": 0.7644, "step": 21560 }, { "epoch": 1.6024526198439242, "grad_norm": 2.1620402527976017, "learning_rate": 9.380166921606317e-06, "loss": 0.683, "step": 21561 }, { "epoch": 1.6025269416573764, "grad_norm": 2.1048697104159766, "learning_rate": 9.379366098155254e-06, "loss": 0.6414, "step": 21562 }, { "epoch": 1.6026012634708287, "grad_norm": 2.075144476717584, "learning_rate": 9.378565278699796e-06, "loss": 0.5837, "step": 21563 }, { "epoch": 1.602675585284281, "grad_norm": 2.236420486765141, "learning_rate": 9.3777644632451e-06, "loss": 0.6773, "step": 21564 }, { "epoch": 1.6027499070977331, "grad_norm": 1.781711271460761, "learning_rate": 9.376963651796328e-06, "loss": 0.483, "step": 21565 }, { "epoch": 1.6028242289111856, "grad_norm": 2.146354176908817, "learning_rate": 9.37616284435863e-06, "loss": 0.6566, "step": 21566 }, { "epoch": 1.6028985507246376, "grad_norm": 1.8920941763328312, "learning_rate": 9.375362040937163e-06, "loss": 0.6653, "step": 21567 }, { "epoch": 1.60297287253809, "grad_norm": 1.9270358170834379, "learning_rate": 9.374561241537083e-06, "loss": 0.6298, "step": 21568 }, { "epoch": 1.603047194351542, "grad_norm": 1.9167318937017581, "learning_rate": 9.373760446163547e-06, "loss": 0.6628, "step": 21569 }, { "epoch": 1.6031215161649945, "grad_norm": 1.592041952065384, "learning_rate": 9.372959654821706e-06, "loss": 0.5315, "step": 21570 }, { "epoch": 1.6031958379784466, "grad_norm": 1.749088459245064, "learning_rate": 9.372158867516718e-06, "loss": 0.5729, "step": 21571 }, { "epoch": 1.603270159791899, "grad_norm": 1.8780407666512127, "learning_rate": 9.37135808425374e-06, "loss": 0.7225, "step": 21572 }, { "epoch": 1.603344481605351, "grad_norm": 1.981674231268559, "learning_rate": 9.370557305037927e-06, "loss": 0.4547, "step": 21573 }, { "epoch": 1.6034188034188035, "grad_norm": 1.755847244269873, "learning_rate": 9.369756529874436e-06, "loss": 0.5536, "step": 21574 }, { "epoch": 1.6034931252322555, "grad_norm": 2.1419544443651213, "learning_rate": 9.368955758768416e-06, "loss": 0.5668, "step": 21575 }, { "epoch": 1.603567447045708, "grad_norm": 1.8547586267344303, "learning_rate": 9.368154991725027e-06, "loss": 0.5311, "step": 21576 }, { "epoch": 1.6036417688591602, "grad_norm": 1.7630611393181663, "learning_rate": 9.367354228749422e-06, "loss": 0.5126, "step": 21577 }, { "epoch": 1.6037160906726124, "grad_norm": 1.9380371461535275, "learning_rate": 9.366553469846758e-06, "loss": 0.6003, "step": 21578 }, { "epoch": 1.6037904124860647, "grad_norm": 1.9506934803974527, "learning_rate": 9.36575271502219e-06, "loss": 0.6939, "step": 21579 }, { "epoch": 1.603864734299517, "grad_norm": 2.313903272287823, "learning_rate": 9.364951964280874e-06, "loss": 0.6225, "step": 21580 }, { "epoch": 1.6039390561129692, "grad_norm": 1.9662417437961068, "learning_rate": 9.364151217627964e-06, "loss": 0.4923, "step": 21581 }, { "epoch": 1.6040133779264214, "grad_norm": 1.7083300988758214, "learning_rate": 9.363350475068614e-06, "loss": 0.5466, "step": 21582 }, { "epoch": 1.6040876997398736, "grad_norm": 1.7744898401153797, "learning_rate": 9.36254973660798e-06, "loss": 0.5473, "step": 21583 }, { "epoch": 1.6041620215533259, "grad_norm": 2.3465185054095254, "learning_rate": 9.36174900225122e-06, "loss": 0.5355, "step": 21584 }, { "epoch": 1.6042363433667781, "grad_norm": 1.8859106950069482, "learning_rate": 9.36094827200349e-06, "loss": 0.6498, "step": 21585 }, { "epoch": 1.6043106651802304, "grad_norm": 2.0725611694097603, "learning_rate": 9.360147545869938e-06, "loss": 0.5368, "step": 21586 }, { "epoch": 1.6043849869936826, "grad_norm": 2.5033593646512227, "learning_rate": 9.359346823855725e-06, "loss": 0.7291, "step": 21587 }, { "epoch": 1.6044593088071348, "grad_norm": 2.177144284247294, "learning_rate": 9.358546105966001e-06, "loss": 0.7463, "step": 21588 }, { "epoch": 1.6045336306205873, "grad_norm": 2.186220571389082, "learning_rate": 9.357745392205924e-06, "loss": 0.686, "step": 21589 }, { "epoch": 1.6046079524340393, "grad_norm": 2.2091973032471968, "learning_rate": 9.35694468258065e-06, "loss": 0.5722, "step": 21590 }, { "epoch": 1.6046822742474918, "grad_norm": 2.054556306254836, "learning_rate": 9.356143977095333e-06, "loss": 0.6474, "step": 21591 }, { "epoch": 1.6047565960609438, "grad_norm": 1.8109375019895242, "learning_rate": 9.355343275755126e-06, "loss": 0.5001, "step": 21592 }, { "epoch": 1.6048309178743962, "grad_norm": 1.883244266715248, "learning_rate": 9.354542578565187e-06, "loss": 0.501, "step": 21593 }, { "epoch": 1.6049052396878483, "grad_norm": 2.210247524049972, "learning_rate": 9.353741885530668e-06, "loss": 0.6882, "step": 21594 }, { "epoch": 1.6049795615013007, "grad_norm": 1.905589100843864, "learning_rate": 9.352941196656726e-06, "loss": 0.5914, "step": 21595 }, { "epoch": 1.6050538833147527, "grad_norm": 2.18216653837676, "learning_rate": 9.35214051194852e-06, "loss": 0.6358, "step": 21596 }, { "epoch": 1.6051282051282052, "grad_norm": 2.1709722861681793, "learning_rate": 9.351339831411193e-06, "loss": 0.6495, "step": 21597 }, { "epoch": 1.6052025269416572, "grad_norm": 2.0700433439989347, "learning_rate": 9.350539155049911e-06, "loss": 0.6564, "step": 21598 }, { "epoch": 1.6052768487551097, "grad_norm": 1.8354701702922351, "learning_rate": 9.34973848286982e-06, "loss": 0.4609, "step": 21599 }, { "epoch": 1.605351170568562, "grad_norm": 1.662409542286898, "learning_rate": 9.34893781487608e-06, "loss": 0.4376, "step": 21600 }, { "epoch": 1.6054254923820142, "grad_norm": 2.215214606956283, "learning_rate": 9.348137151073846e-06, "loss": 0.6303, "step": 21601 }, { "epoch": 1.6054998141954664, "grad_norm": 2.4911953186253615, "learning_rate": 9.347336491468272e-06, "loss": 0.7008, "step": 21602 }, { "epoch": 1.6055741360089186, "grad_norm": 1.9121234432473238, "learning_rate": 9.34653583606451e-06, "loss": 0.6451, "step": 21603 }, { "epoch": 1.6056484578223709, "grad_norm": 1.78721850213781, "learning_rate": 9.345735184867716e-06, "loss": 0.4825, "step": 21604 }, { "epoch": 1.605722779635823, "grad_norm": 1.8669767079663824, "learning_rate": 9.344934537883044e-06, "loss": 0.6645, "step": 21605 }, { "epoch": 1.6057971014492753, "grad_norm": 1.9429969424943108, "learning_rate": 9.344133895115651e-06, "loss": 0.5939, "step": 21606 }, { "epoch": 1.6058714232627276, "grad_norm": 1.833319119228905, "learning_rate": 9.343333256570692e-06, "loss": 0.5725, "step": 21607 }, { "epoch": 1.6059457450761798, "grad_norm": 2.2268445117470885, "learning_rate": 9.342532622253319e-06, "loss": 0.6688, "step": 21608 }, { "epoch": 1.606020066889632, "grad_norm": 1.8072346846830956, "learning_rate": 9.341731992168687e-06, "loss": 0.5545, "step": 21609 }, { "epoch": 1.6060943887030845, "grad_norm": 2.2128272018916144, "learning_rate": 9.340931366321948e-06, "loss": 0.716, "step": 21610 }, { "epoch": 1.6061687105165365, "grad_norm": 2.2511035859389623, "learning_rate": 9.340130744718258e-06, "loss": 0.5549, "step": 21611 }, { "epoch": 1.606243032329989, "grad_norm": 1.69168442541342, "learning_rate": 9.339330127362772e-06, "loss": 0.6082, "step": 21612 }, { "epoch": 1.606317354143441, "grad_norm": 2.2011559881574394, "learning_rate": 9.338529514260648e-06, "loss": 0.7514, "step": 21613 }, { "epoch": 1.6063916759568935, "grad_norm": 2.4033868384497463, "learning_rate": 9.337728905417035e-06, "loss": 0.5199, "step": 21614 }, { "epoch": 1.6064659977703455, "grad_norm": 2.1044908033911756, "learning_rate": 9.336928300837085e-06, "loss": 0.6938, "step": 21615 }, { "epoch": 1.606540319583798, "grad_norm": 2.791407178301519, "learning_rate": 9.33612770052596e-06, "loss": 0.6433, "step": 21616 }, { "epoch": 1.60661464139725, "grad_norm": 1.7615771040561476, "learning_rate": 9.33532710448881e-06, "loss": 0.5573, "step": 21617 }, { "epoch": 1.6066889632107024, "grad_norm": 1.710237300093158, "learning_rate": 9.334526512730791e-06, "loss": 0.6201, "step": 21618 }, { "epoch": 1.6067632850241544, "grad_norm": 2.2469259650061018, "learning_rate": 9.333725925257055e-06, "loss": 0.5801, "step": 21619 }, { "epoch": 1.606837606837607, "grad_norm": 2.186955069022213, "learning_rate": 9.332925342072758e-06, "loss": 0.5727, "step": 21620 }, { "epoch": 1.6069119286510591, "grad_norm": 1.7072490649565586, "learning_rate": 9.33212476318305e-06, "loss": 0.5428, "step": 21621 }, { "epoch": 1.6069862504645114, "grad_norm": 2.081347965945369, "learning_rate": 9.331324188593089e-06, "loss": 0.56, "step": 21622 }, { "epoch": 1.6070605722779636, "grad_norm": 3.609926431291017, "learning_rate": 9.330523618308028e-06, "loss": 0.5884, "step": 21623 }, { "epoch": 1.6071348940914159, "grad_norm": 1.7199900431401367, "learning_rate": 9.329723052333024e-06, "loss": 0.5584, "step": 21624 }, { "epoch": 1.607209215904868, "grad_norm": 1.991826278834438, "learning_rate": 9.328922490673225e-06, "loss": 0.5822, "step": 21625 }, { "epoch": 1.6072835377183203, "grad_norm": 1.6992403023862483, "learning_rate": 9.328121933333787e-06, "loss": 0.5448, "step": 21626 }, { "epoch": 1.6073578595317726, "grad_norm": 1.6702349840686048, "learning_rate": 9.327321380319869e-06, "loss": 0.5068, "step": 21627 }, { "epoch": 1.6074321813452248, "grad_norm": 1.7577655637402034, "learning_rate": 9.326520831636617e-06, "loss": 0.5061, "step": 21628 }, { "epoch": 1.607506503158677, "grad_norm": 2.188144443002253, "learning_rate": 9.325720287289192e-06, "loss": 0.5938, "step": 21629 }, { "epoch": 1.6075808249721293, "grad_norm": 2.284843705659232, "learning_rate": 9.324919747282749e-06, "loss": 0.7096, "step": 21630 }, { "epoch": 1.6076551467855815, "grad_norm": 2.0192274581586487, "learning_rate": 9.324119211622432e-06, "loss": 0.6593, "step": 21631 }, { "epoch": 1.6077294685990338, "grad_norm": 2.109373523490824, "learning_rate": 9.3233186803134e-06, "loss": 0.6675, "step": 21632 }, { "epoch": 1.6078037904124862, "grad_norm": 2.9102064125594307, "learning_rate": 9.322518153360809e-06, "loss": 0.6196, "step": 21633 }, { "epoch": 1.6078781122259382, "grad_norm": 2.8141255954548754, "learning_rate": 9.321717630769809e-06, "loss": 0.6595, "step": 21634 }, { "epoch": 1.6079524340393907, "grad_norm": 3.025040090621236, "learning_rate": 9.320917112545558e-06, "loss": 0.7103, "step": 21635 }, { "epoch": 1.6080267558528427, "grad_norm": 2.0062108086582766, "learning_rate": 9.320116598693207e-06, "loss": 0.6436, "step": 21636 }, { "epoch": 1.6081010776662952, "grad_norm": 2.162785427325, "learning_rate": 9.319316089217909e-06, "loss": 0.5248, "step": 21637 }, { "epoch": 1.6081753994797472, "grad_norm": 2.494507602145203, "learning_rate": 9.318515584124818e-06, "loss": 0.6815, "step": 21638 }, { "epoch": 1.6082497212931997, "grad_norm": 1.7793003159287362, "learning_rate": 9.317715083419091e-06, "loss": 0.4168, "step": 21639 }, { "epoch": 1.6083240431066517, "grad_norm": 1.8944395974249577, "learning_rate": 9.316914587105877e-06, "loss": 0.5503, "step": 21640 }, { "epoch": 1.6083983649201041, "grad_norm": 1.919853967602496, "learning_rate": 9.316114095190336e-06, "loss": 0.6431, "step": 21641 }, { "epoch": 1.6084726867335561, "grad_norm": 2.03012581636155, "learning_rate": 9.315313607677612e-06, "loss": 0.6733, "step": 21642 }, { "epoch": 1.6085470085470086, "grad_norm": 1.832722456012215, "learning_rate": 9.314513124572862e-06, "loss": 0.5949, "step": 21643 }, { "epoch": 1.6086213303604608, "grad_norm": 1.91787711641185, "learning_rate": 9.313712645881243e-06, "loss": 0.547, "step": 21644 }, { "epoch": 1.608695652173913, "grad_norm": 2.125388527170106, "learning_rate": 9.312912171607906e-06, "loss": 0.6632, "step": 21645 }, { "epoch": 1.6087699739873653, "grad_norm": 2.0311105260848707, "learning_rate": 9.312111701758008e-06, "loss": 0.6515, "step": 21646 }, { "epoch": 1.6088442958008176, "grad_norm": 2.3108450174170456, "learning_rate": 9.311311236336695e-06, "loss": 0.6893, "step": 21647 }, { "epoch": 1.6089186176142698, "grad_norm": 1.906086792316484, "learning_rate": 9.310510775349126e-06, "loss": 0.6461, "step": 21648 }, { "epoch": 1.608992939427722, "grad_norm": 2.0725777382754553, "learning_rate": 9.309710318800452e-06, "loss": 0.6807, "step": 21649 }, { "epoch": 1.6090672612411743, "grad_norm": 2.3136358367847105, "learning_rate": 9.30890986669583e-06, "loss": 0.716, "step": 21650 }, { "epoch": 1.6091415830546265, "grad_norm": 2.1377760293550723, "learning_rate": 9.308109419040407e-06, "loss": 0.648, "step": 21651 }, { "epoch": 1.6092159048680788, "grad_norm": 2.2456206239398115, "learning_rate": 9.30730897583934e-06, "loss": 0.6599, "step": 21652 }, { "epoch": 1.609290226681531, "grad_norm": 1.8084969385891854, "learning_rate": 9.306508537097788e-06, "loss": 0.6215, "step": 21653 }, { "epoch": 1.6093645484949832, "grad_norm": 1.7370228956245628, "learning_rate": 9.305708102820892e-06, "loss": 0.5286, "step": 21654 }, { "epoch": 1.6094388703084355, "grad_norm": 2.599872439600113, "learning_rate": 9.304907673013812e-06, "loss": 0.5956, "step": 21655 }, { "epoch": 1.609513192121888, "grad_norm": 2.327816667594297, "learning_rate": 9.304107247681699e-06, "loss": 0.7601, "step": 21656 }, { "epoch": 1.60958751393534, "grad_norm": 2.3453897763047293, "learning_rate": 9.303306826829711e-06, "loss": 0.7749, "step": 21657 }, { "epoch": 1.6096618357487924, "grad_norm": 4.15542970204103, "learning_rate": 9.302506410462995e-06, "loss": 0.506, "step": 21658 }, { "epoch": 1.6097361575622444, "grad_norm": 1.8918071739622768, "learning_rate": 9.301705998586708e-06, "loss": 0.6077, "step": 21659 }, { "epoch": 1.6098104793756969, "grad_norm": 2.6667177857149107, "learning_rate": 9.300905591205998e-06, "loss": 0.4331, "step": 21660 }, { "epoch": 1.609884801189149, "grad_norm": 1.8867548490730304, "learning_rate": 9.300105188326027e-06, "loss": 0.4707, "step": 21661 }, { "epoch": 1.6099591230026014, "grad_norm": 2.5740894282839197, "learning_rate": 9.29930478995194e-06, "loss": 0.5368, "step": 21662 }, { "epoch": 1.6100334448160534, "grad_norm": 2.3381207984675214, "learning_rate": 9.29850439608889e-06, "loss": 0.7785, "step": 21663 }, { "epoch": 1.6101077666295058, "grad_norm": 2.1357165160526947, "learning_rate": 9.297704006742038e-06, "loss": 0.5901, "step": 21664 }, { "epoch": 1.6101820884429578, "grad_norm": 2.3036171455444974, "learning_rate": 9.296903621916527e-06, "loss": 0.6478, "step": 21665 }, { "epoch": 1.6102564102564103, "grad_norm": 3.05372027919191, "learning_rate": 9.296103241617514e-06, "loss": 0.4683, "step": 21666 }, { "epoch": 1.6103307320698625, "grad_norm": 1.6809431605299976, "learning_rate": 9.295302865850153e-06, "loss": 0.6187, "step": 21667 }, { "epoch": 1.6104050538833148, "grad_norm": 1.6875156245546044, "learning_rate": 9.294502494619595e-06, "loss": 0.4773, "step": 21668 }, { "epoch": 1.610479375696767, "grad_norm": 1.6344990382685796, "learning_rate": 9.293702127930994e-06, "loss": 0.5834, "step": 21669 }, { "epoch": 1.6105536975102193, "grad_norm": 1.7794667963518185, "learning_rate": 9.292901765789499e-06, "loss": 0.6117, "step": 21670 }, { "epoch": 1.6106280193236715, "grad_norm": 2.0187906430278746, "learning_rate": 9.292101408200268e-06, "loss": 0.5948, "step": 21671 }, { "epoch": 1.6107023411371237, "grad_norm": 1.9305962316173153, "learning_rate": 9.291301055168453e-06, "loss": 0.7522, "step": 21672 }, { "epoch": 1.610776662950576, "grad_norm": 2.3019788553558986, "learning_rate": 9.290500706699204e-06, "loss": 0.5511, "step": 21673 }, { "epoch": 1.6108509847640282, "grad_norm": 2.4265248641778596, "learning_rate": 9.289700362797673e-06, "loss": 0.5826, "step": 21674 }, { "epoch": 1.6109253065774805, "grad_norm": 1.9294384082614418, "learning_rate": 9.288900023469018e-06, "loss": 0.6838, "step": 21675 }, { "epoch": 1.6109996283909327, "grad_norm": 2.580544842483757, "learning_rate": 9.288099688718385e-06, "loss": 0.6101, "step": 21676 }, { "epoch": 1.6110739502043852, "grad_norm": 1.9081320428611126, "learning_rate": 9.28729935855093e-06, "loss": 0.4605, "step": 21677 }, { "epoch": 1.6111482720178372, "grad_norm": 1.6620074531957776, "learning_rate": 9.286499032971803e-06, "loss": 0.492, "step": 21678 }, { "epoch": 1.6112225938312896, "grad_norm": 2.0837474450353164, "learning_rate": 9.285698711986159e-06, "loss": 0.5823, "step": 21679 }, { "epoch": 1.6112969156447416, "grad_norm": 1.730352668122782, "learning_rate": 9.284898395599151e-06, "loss": 0.4715, "step": 21680 }, { "epoch": 1.611371237458194, "grad_norm": 1.7946754791811055, "learning_rate": 9.284098083815927e-06, "loss": 0.6362, "step": 21681 }, { "epoch": 1.6114455592716461, "grad_norm": 1.6866280536391298, "learning_rate": 9.283297776641645e-06, "loss": 0.5251, "step": 21682 }, { "epoch": 1.6115198810850986, "grad_norm": 1.94509758231426, "learning_rate": 9.282497474081455e-06, "loss": 0.59, "step": 21683 }, { "epoch": 1.6115942028985506, "grad_norm": 1.8413840631155596, "learning_rate": 9.281697176140508e-06, "loss": 0.6003, "step": 21684 }, { "epoch": 1.611668524712003, "grad_norm": 2.1652252774380383, "learning_rate": 9.280896882823957e-06, "loss": 0.6792, "step": 21685 }, { "epoch": 1.611742846525455, "grad_norm": 1.9059441061370421, "learning_rate": 9.280096594136953e-06, "loss": 0.6087, "step": 21686 }, { "epoch": 1.6118171683389075, "grad_norm": 2.054737903238559, "learning_rate": 9.279296310084657e-06, "loss": 0.6875, "step": 21687 }, { "epoch": 1.6118914901523596, "grad_norm": 1.7270000351561479, "learning_rate": 9.278496030672207e-06, "loss": 0.519, "step": 21688 }, { "epoch": 1.611965811965812, "grad_norm": 2.7114746837471375, "learning_rate": 9.277695755904764e-06, "loss": 0.7326, "step": 21689 }, { "epoch": 1.6120401337792643, "grad_norm": 2.0029370671322715, "learning_rate": 9.276895485787479e-06, "loss": 0.6133, "step": 21690 }, { "epoch": 1.6121144555927165, "grad_norm": 1.9829624258911376, "learning_rate": 9.276095220325503e-06, "loss": 0.5983, "step": 21691 }, { "epoch": 1.6121887774061687, "grad_norm": 2.1405376545607, "learning_rate": 9.275294959523986e-06, "loss": 0.7518, "step": 21692 }, { "epoch": 1.612263099219621, "grad_norm": 2.34798500517419, "learning_rate": 9.274494703388083e-06, "loss": 0.6008, "step": 21693 }, { "epoch": 1.6123374210330732, "grad_norm": 3.1458141733981155, "learning_rate": 9.27369445192295e-06, "loss": 0.806, "step": 21694 }, { "epoch": 1.6124117428465254, "grad_norm": 1.7024741418104556, "learning_rate": 9.27289420513373e-06, "loss": 0.5656, "step": 21695 }, { "epoch": 1.6124860646599777, "grad_norm": 2.4986178396539196, "learning_rate": 9.27209396302558e-06, "loss": 0.7294, "step": 21696 }, { "epoch": 1.61256038647343, "grad_norm": 1.726061529907935, "learning_rate": 9.271293725603652e-06, "loss": 0.5388, "step": 21697 }, { "epoch": 1.6126347082868822, "grad_norm": 1.6872034001757608, "learning_rate": 9.270493492873101e-06, "loss": 0.3881, "step": 21698 }, { "epoch": 1.6127090301003344, "grad_norm": 1.8951019066356556, "learning_rate": 9.269693264839069e-06, "loss": 0.598, "step": 21699 }, { "epoch": 1.6127833519137869, "grad_norm": 2.117626461343181, "learning_rate": 9.268893041506716e-06, "loss": 0.4942, "step": 21700 }, { "epoch": 1.6128576737272389, "grad_norm": 2.4140177011048847, "learning_rate": 9.268092822881194e-06, "loss": 0.7298, "step": 21701 }, { "epoch": 1.6129319955406913, "grad_norm": 2.2853197257634377, "learning_rate": 9.26729260896765e-06, "loss": 0.6339, "step": 21702 }, { "epoch": 1.6130063173541433, "grad_norm": 1.9042086118149832, "learning_rate": 9.266492399771236e-06, "loss": 0.5786, "step": 21703 }, { "epoch": 1.6130806391675958, "grad_norm": 1.9261987045092268, "learning_rate": 9.265692195297107e-06, "loss": 0.5963, "step": 21704 }, { "epoch": 1.6131549609810478, "grad_norm": 2.018905183535449, "learning_rate": 9.264891995550417e-06, "loss": 0.6579, "step": 21705 }, { "epoch": 1.6132292827945003, "grad_norm": 2.545856818808685, "learning_rate": 9.26409180053631e-06, "loss": 0.6744, "step": 21706 }, { "epoch": 1.6133036046079523, "grad_norm": 1.6796475137700262, "learning_rate": 9.263291610259944e-06, "loss": 0.4806, "step": 21707 }, { "epoch": 1.6133779264214048, "grad_norm": 2.3459250210095184, "learning_rate": 9.262491424726467e-06, "loss": 0.6829, "step": 21708 }, { "epoch": 1.6134522482348568, "grad_norm": 2.1226317784126327, "learning_rate": 9.261691243941037e-06, "loss": 0.6905, "step": 21709 }, { "epoch": 1.6135265700483092, "grad_norm": 1.5340154398962471, "learning_rate": 9.260891067908796e-06, "loss": 0.462, "step": 21710 }, { "epoch": 1.6136008918617615, "grad_norm": 1.9093521391803683, "learning_rate": 9.260090896634899e-06, "loss": 0.5935, "step": 21711 }, { "epoch": 1.6136752136752137, "grad_norm": 2.1807520659487105, "learning_rate": 9.259290730124502e-06, "loss": 0.5001, "step": 21712 }, { "epoch": 1.613749535488666, "grad_norm": 2.15165003951562, "learning_rate": 9.25849056838275e-06, "loss": 0.6167, "step": 21713 }, { "epoch": 1.6138238573021182, "grad_norm": 2.3472018554596734, "learning_rate": 9.257690411414797e-06, "loss": 0.6663, "step": 21714 }, { "epoch": 1.6138981791155704, "grad_norm": 1.9787426252923925, "learning_rate": 9.256890259225794e-06, "loss": 0.5506, "step": 21715 }, { "epoch": 1.6139725009290227, "grad_norm": 1.88588425609201, "learning_rate": 9.256090111820897e-06, "loss": 0.4274, "step": 21716 }, { "epoch": 1.614046822742475, "grad_norm": 2.059686199053306, "learning_rate": 9.25528996920525e-06, "loss": 0.6021, "step": 21717 }, { "epoch": 1.6141211445559271, "grad_norm": 10.243715448902568, "learning_rate": 9.254489831384008e-06, "loss": 0.5695, "step": 21718 }, { "epoch": 1.6141954663693794, "grad_norm": 2.066310899688753, "learning_rate": 9.25368969836232e-06, "loss": 0.5513, "step": 21719 }, { "epoch": 1.6142697881828316, "grad_norm": 2.195778063193396, "learning_rate": 9.252889570145347e-06, "loss": 0.556, "step": 21720 }, { "epoch": 1.6143441099962839, "grad_norm": 1.800566059908756, "learning_rate": 9.252089446738223e-06, "loss": 0.5831, "step": 21721 }, { "epoch": 1.614418431809736, "grad_norm": 2.197926862174178, "learning_rate": 9.251289328146112e-06, "loss": 0.6315, "step": 21722 }, { "epoch": 1.6144927536231886, "grad_norm": 2.2292093362810057, "learning_rate": 9.250489214374163e-06, "loss": 0.5251, "step": 21723 }, { "epoch": 1.6145670754366406, "grad_norm": 2.165813625934286, "learning_rate": 9.249689105427523e-06, "loss": 0.6516, "step": 21724 }, { "epoch": 1.614641397250093, "grad_norm": 1.8045793486692385, "learning_rate": 9.248889001311345e-06, "loss": 0.3926, "step": 21725 }, { "epoch": 1.614715719063545, "grad_norm": 2.486157056812614, "learning_rate": 9.24808890203078e-06, "loss": 0.6962, "step": 21726 }, { "epoch": 1.6147900408769975, "grad_norm": 2.0610676715050253, "learning_rate": 9.247288807590984e-06, "loss": 0.5886, "step": 21727 }, { "epoch": 1.6148643626904495, "grad_norm": 2.017653547987681, "learning_rate": 9.246488717997101e-06, "loss": 0.5001, "step": 21728 }, { "epoch": 1.614938684503902, "grad_norm": 2.1332530746623597, "learning_rate": 9.245688633254284e-06, "loss": 0.6487, "step": 21729 }, { "epoch": 1.615013006317354, "grad_norm": 1.9508923473542352, "learning_rate": 9.244888553367684e-06, "loss": 0.6155, "step": 21730 }, { "epoch": 1.6150873281308065, "grad_norm": 2.3567911791389675, "learning_rate": 9.244088478342452e-06, "loss": 0.6869, "step": 21731 }, { "epoch": 1.6151616499442585, "grad_norm": 1.8152659665247382, "learning_rate": 9.243288408183745e-06, "loss": 0.4459, "step": 21732 }, { "epoch": 1.615235971757711, "grad_norm": 2.2872689297875377, "learning_rate": 9.242488342896703e-06, "loss": 0.6207, "step": 21733 }, { "epoch": 1.6153102935711632, "grad_norm": 2.0219343129828755, "learning_rate": 9.241688282486484e-06, "loss": 0.7282, "step": 21734 }, { "epoch": 1.6153846153846154, "grad_norm": 1.9935779686088233, "learning_rate": 9.240888226958233e-06, "loss": 0.4937, "step": 21735 }, { "epoch": 1.6154589371980677, "grad_norm": 1.7574201344574847, "learning_rate": 9.240088176317106e-06, "loss": 0.5015, "step": 21736 }, { "epoch": 1.61553325901152, "grad_norm": 2.0991830329322396, "learning_rate": 9.239288130568252e-06, "loss": 0.6311, "step": 21737 }, { "epoch": 1.6156075808249721, "grad_norm": 2.1282348241834956, "learning_rate": 9.238488089716822e-06, "loss": 0.7828, "step": 21738 }, { "epoch": 1.6156819026384244, "grad_norm": 2.1883125076006698, "learning_rate": 9.237688053767964e-06, "loss": 0.6078, "step": 21739 }, { "epoch": 1.6157562244518766, "grad_norm": 1.6074265887113188, "learning_rate": 9.236888022726833e-06, "loss": 0.4739, "step": 21740 }, { "epoch": 1.6158305462653288, "grad_norm": 2.182317481287599, "learning_rate": 9.236087996598575e-06, "loss": 0.512, "step": 21741 }, { "epoch": 1.615904868078781, "grad_norm": 1.8865696552316245, "learning_rate": 9.235287975388345e-06, "loss": 0.6655, "step": 21742 }, { "epoch": 1.6159791898922333, "grad_norm": 2.1857882257012897, "learning_rate": 9.234487959101294e-06, "loss": 0.5841, "step": 21743 }, { "epoch": 1.6160535117056856, "grad_norm": 2.2773289490840267, "learning_rate": 9.233687947742565e-06, "loss": 0.7015, "step": 21744 }, { "epoch": 1.6161278335191378, "grad_norm": 1.9391794642453946, "learning_rate": 9.232887941317317e-06, "loss": 0.4761, "step": 21745 }, { "epoch": 1.6162021553325903, "grad_norm": 1.6843582298358364, "learning_rate": 9.232087939830694e-06, "loss": 0.4164, "step": 21746 }, { "epoch": 1.6162764771460423, "grad_norm": 2.0008717416355952, "learning_rate": 9.231287943287847e-06, "loss": 0.5201, "step": 21747 }, { "epoch": 1.6163507989594947, "grad_norm": 2.207465475903031, "learning_rate": 9.230487951693931e-06, "loss": 0.6121, "step": 21748 }, { "epoch": 1.6164251207729468, "grad_norm": 5.9655033953595655, "learning_rate": 9.229687965054095e-06, "loss": 0.7313, "step": 21749 }, { "epoch": 1.6164994425863992, "grad_norm": 2.241491112745684, "learning_rate": 9.228887983373486e-06, "loss": 0.6136, "step": 21750 }, { "epoch": 1.6165737643998512, "grad_norm": 2.086260688032938, "learning_rate": 9.228088006657255e-06, "loss": 0.5807, "step": 21751 }, { "epoch": 1.6166480862133037, "grad_norm": 1.6911770195018954, "learning_rate": 9.227288034910555e-06, "loss": 0.4831, "step": 21752 }, { "epoch": 1.6167224080267557, "grad_norm": 2.312267181664727, "learning_rate": 9.226488068138533e-06, "loss": 0.5921, "step": 21753 }, { "epoch": 1.6167967298402082, "grad_norm": 1.8969407793020552, "learning_rate": 9.225688106346346e-06, "loss": 0.6411, "step": 21754 }, { "epoch": 1.6168710516536602, "grad_norm": 1.86363527877569, "learning_rate": 9.224888149539138e-06, "loss": 0.7494, "step": 21755 }, { "epoch": 1.6169453734671126, "grad_norm": 1.8352737511292387, "learning_rate": 9.224088197722055e-06, "loss": 0.5703, "step": 21756 }, { "epoch": 1.6170196952805649, "grad_norm": 2.3053100371643227, "learning_rate": 9.223288250900254e-06, "loss": 0.7225, "step": 21757 }, { "epoch": 1.6170940170940171, "grad_norm": 1.6103667128759434, "learning_rate": 9.22248830907888e-06, "loss": 0.3974, "step": 21758 }, { "epoch": 1.6171683389074694, "grad_norm": 1.8503076472295987, "learning_rate": 9.221688372263088e-06, "loss": 0.5753, "step": 21759 }, { "epoch": 1.6172426607209216, "grad_norm": 1.8541688100663447, "learning_rate": 9.22088844045803e-06, "loss": 0.7134, "step": 21760 }, { "epoch": 1.6173169825343738, "grad_norm": 2.2568059905002618, "learning_rate": 9.220088513668845e-06, "loss": 0.6936, "step": 21761 }, { "epoch": 1.617391304347826, "grad_norm": 2.3006659183505063, "learning_rate": 9.219288591900693e-06, "loss": 0.377, "step": 21762 }, { "epoch": 1.6174656261612783, "grad_norm": 2.061682874234803, "learning_rate": 9.21848867515872e-06, "loss": 0.6523, "step": 21763 }, { "epoch": 1.6175399479747306, "grad_norm": 2.1549588099730723, "learning_rate": 9.217688763448077e-06, "loss": 0.6462, "step": 21764 }, { "epoch": 1.6176142697881828, "grad_norm": 2.231360715153309, "learning_rate": 9.216888856773913e-06, "loss": 0.6408, "step": 21765 }, { "epoch": 1.617688591601635, "grad_norm": 1.9190646798698596, "learning_rate": 9.216088955141382e-06, "loss": 0.6082, "step": 21766 }, { "epoch": 1.6177629134150875, "grad_norm": 1.8131657528080258, "learning_rate": 9.215289058555626e-06, "loss": 0.5979, "step": 21767 }, { "epoch": 1.6178372352285395, "grad_norm": 2.0832580203701854, "learning_rate": 9.214489167021797e-06, "loss": 0.5854, "step": 21768 }, { "epoch": 1.617911557041992, "grad_norm": 2.276899351683813, "learning_rate": 9.213689280545045e-06, "loss": 0.5624, "step": 21769 }, { "epoch": 1.617985878855444, "grad_norm": 1.7969019358513922, "learning_rate": 9.212889399130522e-06, "loss": 0.5214, "step": 21770 }, { "epoch": 1.6180602006688964, "grad_norm": 1.796860789225872, "learning_rate": 9.212089522783378e-06, "loss": 0.5678, "step": 21771 }, { "epoch": 1.6181345224823485, "grad_norm": 2.055671568179605, "learning_rate": 9.211289651508759e-06, "loss": 0.5733, "step": 21772 }, { "epoch": 1.618208844295801, "grad_norm": 1.8122797158226738, "learning_rate": 9.210489785311816e-06, "loss": 0.6956, "step": 21773 }, { "epoch": 1.618283166109253, "grad_norm": 3.691784802220529, "learning_rate": 9.209689924197697e-06, "loss": 0.5439, "step": 21774 }, { "epoch": 1.6183574879227054, "grad_norm": 2.034390865907527, "learning_rate": 9.208890068171558e-06, "loss": 0.6656, "step": 21775 }, { "epoch": 1.6184318097361574, "grad_norm": 1.9580840635723855, "learning_rate": 9.208090217238542e-06, "loss": 0.6769, "step": 21776 }, { "epoch": 1.6185061315496099, "grad_norm": 1.6929883157221786, "learning_rate": 9.207290371403803e-06, "loss": 0.5554, "step": 21777 }, { "epoch": 1.618580453363062, "grad_norm": 1.7554330181988118, "learning_rate": 9.206490530672482e-06, "loss": 0.4763, "step": 21778 }, { "epoch": 1.6186547751765143, "grad_norm": 1.905561624342792, "learning_rate": 9.205690695049736e-06, "loss": 0.5351, "step": 21779 }, { "epoch": 1.6187290969899666, "grad_norm": 1.529677163014125, "learning_rate": 9.204890864540712e-06, "loss": 0.4397, "step": 21780 }, { "epoch": 1.6188034188034188, "grad_norm": 1.9486480045569687, "learning_rate": 9.204091039150558e-06, "loss": 0.5747, "step": 21781 }, { "epoch": 1.618877740616871, "grad_norm": 1.9635779101948236, "learning_rate": 9.203291218884427e-06, "loss": 0.5381, "step": 21782 }, { "epoch": 1.6189520624303233, "grad_norm": 2.245658556815989, "learning_rate": 9.202491403747464e-06, "loss": 0.6195, "step": 21783 }, { "epoch": 1.6190263842437755, "grad_norm": 1.804357914467001, "learning_rate": 9.20169159374482e-06, "loss": 0.5689, "step": 21784 }, { "epoch": 1.6191007060572278, "grad_norm": 2.1263081190153152, "learning_rate": 9.200891788881644e-06, "loss": 0.615, "step": 21785 }, { "epoch": 1.61917502787068, "grad_norm": 2.143963877508911, "learning_rate": 9.200091989163088e-06, "loss": 0.6381, "step": 21786 }, { "epoch": 1.6192493496841323, "grad_norm": 2.104243366156267, "learning_rate": 9.199292194594295e-06, "loss": 0.5944, "step": 21787 }, { "epoch": 1.6193236714975845, "grad_norm": 2.287580847294623, "learning_rate": 9.198492405180421e-06, "loss": 0.6249, "step": 21788 }, { "epoch": 1.6193979933110367, "grad_norm": 2.118579436830936, "learning_rate": 9.197692620926607e-06, "loss": 0.6967, "step": 21789 }, { "epoch": 1.6194723151244892, "grad_norm": 1.659801109477814, "learning_rate": 9.19689284183801e-06, "loss": 0.553, "step": 21790 }, { "epoch": 1.6195466369379412, "grad_norm": 3.977506289103835, "learning_rate": 9.196093067919771e-06, "loss": 0.649, "step": 21791 }, { "epoch": 1.6196209587513937, "grad_norm": 2.282541591788544, "learning_rate": 9.195293299177045e-06, "loss": 0.6309, "step": 21792 }, { "epoch": 1.6196952805648457, "grad_norm": 2.105932559715958, "learning_rate": 9.194493535614981e-06, "loss": 0.622, "step": 21793 }, { "epoch": 1.6197696023782981, "grad_norm": 2.149477681424054, "learning_rate": 9.193693777238724e-06, "loss": 0.6651, "step": 21794 }, { "epoch": 1.6198439241917502, "grad_norm": 2.357146012288674, "learning_rate": 9.192894024053423e-06, "loss": 0.7024, "step": 21795 }, { "epoch": 1.6199182460052026, "grad_norm": 2.006767255777867, "learning_rate": 9.19209427606423e-06, "loss": 0.59, "step": 21796 }, { "epoch": 1.6199925678186546, "grad_norm": 1.917057530586638, "learning_rate": 9.191294533276294e-06, "loss": 0.5977, "step": 21797 }, { "epoch": 1.620066889632107, "grad_norm": 1.8593574217836433, "learning_rate": 9.19049479569476e-06, "loss": 0.5978, "step": 21798 }, { "epoch": 1.6201412114455591, "grad_norm": 3.128783906276076, "learning_rate": 9.189695063324777e-06, "loss": 0.4999, "step": 21799 }, { "epoch": 1.6202155332590116, "grad_norm": 1.8314339090171556, "learning_rate": 9.1888953361715e-06, "loss": 0.5102, "step": 21800 }, { "epoch": 1.6202898550724638, "grad_norm": 1.5223849488968064, "learning_rate": 9.188095614240071e-06, "loss": 0.4354, "step": 21801 }, { "epoch": 1.620364176885916, "grad_norm": 1.8643954557434148, "learning_rate": 9.187295897535637e-06, "loss": 0.5988, "step": 21802 }, { "epoch": 1.6204384986993683, "grad_norm": 1.616196670455006, "learning_rate": 9.18649618606335e-06, "loss": 0.4799, "step": 21803 }, { "epoch": 1.6205128205128205, "grad_norm": 1.7342645401087904, "learning_rate": 9.185696479828362e-06, "loss": 0.7082, "step": 21804 }, { "epoch": 1.6205871423262728, "grad_norm": 2.1333319645082773, "learning_rate": 9.184896778835816e-06, "loss": 0.7431, "step": 21805 }, { "epoch": 1.620661464139725, "grad_norm": 1.9667537330454035, "learning_rate": 9.184097083090862e-06, "loss": 0.6079, "step": 21806 }, { "epoch": 1.6207357859531772, "grad_norm": 2.005081694337734, "learning_rate": 9.183297392598647e-06, "loss": 0.6896, "step": 21807 }, { "epoch": 1.6208101077666295, "grad_norm": 1.7242753247484401, "learning_rate": 9.182497707364323e-06, "loss": 0.4985, "step": 21808 }, { "epoch": 1.6208844295800817, "grad_norm": 1.8042047544999622, "learning_rate": 9.181698027393037e-06, "loss": 0.4794, "step": 21809 }, { "epoch": 1.620958751393534, "grad_norm": 2.1652608262511706, "learning_rate": 9.180898352689935e-06, "loss": 0.5526, "step": 21810 }, { "epoch": 1.6210330732069862, "grad_norm": 2.3062762454305408, "learning_rate": 9.180098683260172e-06, "loss": 0.6276, "step": 21811 }, { "epoch": 1.6211073950204384, "grad_norm": 2.0611895641210127, "learning_rate": 9.179299019108885e-06, "loss": 0.5635, "step": 21812 }, { "epoch": 1.621181716833891, "grad_norm": 1.812995418059001, "learning_rate": 9.17849936024123e-06, "loss": 0.5537, "step": 21813 }, { "epoch": 1.621256038647343, "grad_norm": 1.504566740893668, "learning_rate": 9.177699706662353e-06, "loss": 0.4299, "step": 21814 }, { "epoch": 1.6213303604607954, "grad_norm": 2.005019240566249, "learning_rate": 9.176900058377404e-06, "loss": 0.5771, "step": 21815 }, { "epoch": 1.6214046822742474, "grad_norm": 1.8538006990960132, "learning_rate": 9.176100415391529e-06, "loss": 0.5524, "step": 21816 }, { "epoch": 1.6214790040876998, "grad_norm": 2.199439030213539, "learning_rate": 9.175300777709875e-06, "loss": 0.6185, "step": 21817 }, { "epoch": 1.6215533259011519, "grad_norm": 1.9044925886449877, "learning_rate": 9.174501145337594e-06, "loss": 0.5854, "step": 21818 }, { "epoch": 1.6216276477146043, "grad_norm": 2.3742492725173205, "learning_rate": 9.173701518279832e-06, "loss": 0.5326, "step": 21819 }, { "epoch": 1.6217019695280563, "grad_norm": 1.8081319103980753, "learning_rate": 9.172901896541734e-06, "loss": 0.4651, "step": 21820 }, { "epoch": 1.6217762913415088, "grad_norm": 1.909931380274757, "learning_rate": 9.172102280128453e-06, "loss": 0.5966, "step": 21821 }, { "epoch": 1.6218506131549608, "grad_norm": 2.2198276371942702, "learning_rate": 9.171302669045138e-06, "loss": 0.7484, "step": 21822 }, { "epoch": 1.6219249349684133, "grad_norm": 2.1203012807803616, "learning_rate": 9.170503063296929e-06, "loss": 0.6787, "step": 21823 }, { "epoch": 1.6219992567818655, "grad_norm": 2.014807350638656, "learning_rate": 9.169703462888979e-06, "loss": 0.5819, "step": 21824 }, { "epoch": 1.6220735785953178, "grad_norm": 2.084783730599787, "learning_rate": 9.168903867826435e-06, "loss": 0.5617, "step": 21825 }, { "epoch": 1.62214790040877, "grad_norm": 1.9273857872406477, "learning_rate": 9.168104278114446e-06, "loss": 0.5833, "step": 21826 }, { "epoch": 1.6222222222222222, "grad_norm": 1.9841990624986374, "learning_rate": 9.167304693758155e-06, "loss": 0.6037, "step": 21827 }, { "epoch": 1.6222965440356745, "grad_norm": 2.2727749644949164, "learning_rate": 9.166505114762717e-06, "loss": 0.7342, "step": 21828 }, { "epoch": 1.6223708658491267, "grad_norm": 2.1342237594427407, "learning_rate": 9.165705541133271e-06, "loss": 0.6681, "step": 21829 }, { "epoch": 1.622445187662579, "grad_norm": 2.0821992364692194, "learning_rate": 9.164905972874975e-06, "loss": 0.7599, "step": 21830 }, { "epoch": 1.6225195094760312, "grad_norm": 1.7944377263492108, "learning_rate": 9.164106409992968e-06, "loss": 0.6093, "step": 21831 }, { "epoch": 1.6225938312894834, "grad_norm": 2.8811020392796176, "learning_rate": 9.1633068524924e-06, "loss": 0.7134, "step": 21832 }, { "epoch": 1.6226681531029357, "grad_norm": 1.703842547643846, "learning_rate": 9.162507300378425e-06, "loss": 0.5608, "step": 21833 }, { "epoch": 1.6227424749163881, "grad_norm": 1.9229950745443527, "learning_rate": 9.161707753656179e-06, "loss": 0.6261, "step": 21834 }, { "epoch": 1.6228167967298401, "grad_norm": 2.2158348753021144, "learning_rate": 9.160908212330815e-06, "loss": 0.752, "step": 21835 }, { "epoch": 1.6228911185432926, "grad_norm": 9.781851198478744, "learning_rate": 9.160108676407482e-06, "loss": 0.6086, "step": 21836 }, { "epoch": 1.6229654403567446, "grad_norm": 1.7788485208425773, "learning_rate": 9.159309145891325e-06, "loss": 0.5621, "step": 21837 }, { "epoch": 1.623039762170197, "grad_norm": 1.4932158909655293, "learning_rate": 9.158509620787493e-06, "loss": 0.3886, "step": 21838 }, { "epoch": 1.623114083983649, "grad_norm": 2.053219344550444, "learning_rate": 9.157710101101129e-06, "loss": 0.6158, "step": 21839 }, { "epoch": 1.6231884057971016, "grad_norm": 1.8325242410466092, "learning_rate": 9.156910586837387e-06, "loss": 0.5803, "step": 21840 }, { "epoch": 1.6232627276105536, "grad_norm": 2.6271161676841452, "learning_rate": 9.156111078001411e-06, "loss": 0.4586, "step": 21841 }, { "epoch": 1.623337049424006, "grad_norm": 5.776750226529044, "learning_rate": 9.155311574598348e-06, "loss": 0.5539, "step": 21842 }, { "epoch": 1.623411371237458, "grad_norm": 1.6210463954241732, "learning_rate": 9.154512076633342e-06, "loss": 0.4766, "step": 21843 }, { "epoch": 1.6234856930509105, "grad_norm": 2.6298781245924077, "learning_rate": 9.153712584111545e-06, "loss": 0.6238, "step": 21844 }, { "epoch": 1.6235600148643627, "grad_norm": 2.135822258593347, "learning_rate": 9.152913097038108e-06, "loss": 0.6922, "step": 21845 }, { "epoch": 1.623634336677815, "grad_norm": 2.479008931037319, "learning_rate": 9.152113615418167e-06, "loss": 0.7656, "step": 21846 }, { "epoch": 1.6237086584912672, "grad_norm": 2.5546319466075276, "learning_rate": 9.151314139256873e-06, "loss": 0.7016, "step": 21847 }, { "epoch": 1.6237829803047195, "grad_norm": 1.807246594440497, "learning_rate": 9.15051466855938e-06, "loss": 0.5758, "step": 21848 }, { "epoch": 1.6238573021181717, "grad_norm": 1.9551673523344466, "learning_rate": 9.149715203330824e-06, "loss": 0.5546, "step": 21849 }, { "epoch": 1.623931623931624, "grad_norm": 4.958125213858051, "learning_rate": 9.148915743576359e-06, "loss": 0.6446, "step": 21850 }, { "epoch": 1.6240059457450762, "grad_norm": 1.9655303724209736, "learning_rate": 9.148116289301132e-06, "loss": 0.5635, "step": 21851 }, { "epoch": 1.6240802675585284, "grad_norm": 2.0457379978976773, "learning_rate": 9.147316840510289e-06, "loss": 0.6507, "step": 21852 }, { "epoch": 1.6241545893719807, "grad_norm": 2.0167970969707616, "learning_rate": 9.146517397208973e-06, "loss": 0.7733, "step": 21853 }, { "epoch": 1.6242289111854329, "grad_norm": 2.006993146622142, "learning_rate": 9.145717959402333e-06, "loss": 0.6556, "step": 21854 }, { "epoch": 1.6243032329988851, "grad_norm": 2.114519759399175, "learning_rate": 9.144918527095518e-06, "loss": 0.6757, "step": 21855 }, { "epoch": 1.6243775548123374, "grad_norm": 2.0001978783927674, "learning_rate": 9.144119100293678e-06, "loss": 0.553, "step": 21856 }, { "epoch": 1.6244518766257898, "grad_norm": 2.4679112050911893, "learning_rate": 9.143319679001949e-06, "loss": 0.5315, "step": 21857 }, { "epoch": 1.6245261984392418, "grad_norm": 1.8091555340815169, "learning_rate": 9.142520263225484e-06, "loss": 0.7185, "step": 21858 }, { "epoch": 1.6246005202526943, "grad_norm": 1.708713872109638, "learning_rate": 9.14172085296943e-06, "loss": 0.5826, "step": 21859 }, { "epoch": 1.6246748420661463, "grad_norm": 1.6292945138557493, "learning_rate": 9.140921448238933e-06, "loss": 0.516, "step": 21860 }, { "epoch": 1.6247491638795988, "grad_norm": 2.0619266879978886, "learning_rate": 9.140122049039138e-06, "loss": 0.689, "step": 21861 }, { "epoch": 1.6248234856930508, "grad_norm": 1.8669095407749738, "learning_rate": 9.139322655375193e-06, "loss": 0.4707, "step": 21862 }, { "epoch": 1.6248978075065033, "grad_norm": 1.6974565830249444, "learning_rate": 9.138523267252244e-06, "loss": 0.4881, "step": 21863 }, { "epoch": 1.6249721293199553, "grad_norm": 1.8374372233556266, "learning_rate": 9.137723884675437e-06, "loss": 0.5831, "step": 21864 }, { "epoch": 1.6250464511334077, "grad_norm": 2.053360311635764, "learning_rate": 9.13692450764992e-06, "loss": 0.6464, "step": 21865 }, { "epoch": 1.6251207729468597, "grad_norm": 2.2713965662797215, "learning_rate": 9.136125136180838e-06, "loss": 0.6475, "step": 21866 }, { "epoch": 1.6251950947603122, "grad_norm": 2.3459542315581468, "learning_rate": 9.135325770273341e-06, "loss": 0.5928, "step": 21867 }, { "epoch": 1.6252694165737644, "grad_norm": 1.8952573712294702, "learning_rate": 9.134526409932567e-06, "loss": 0.6673, "step": 21868 }, { "epoch": 1.6253437383872167, "grad_norm": 2.0729394299892805, "learning_rate": 9.133727055163669e-06, "loss": 0.6815, "step": 21869 }, { "epoch": 1.625418060200669, "grad_norm": 1.634337678124718, "learning_rate": 9.132927705971791e-06, "loss": 0.5071, "step": 21870 }, { "epoch": 1.6254923820141212, "grad_norm": 2.0481263547966537, "learning_rate": 9.132128362362079e-06, "loss": 0.7085, "step": 21871 }, { "epoch": 1.6255667038275734, "grad_norm": 1.7804169860943162, "learning_rate": 9.131329024339678e-06, "loss": 0.4712, "step": 21872 }, { "epoch": 1.6256410256410256, "grad_norm": 1.542736709219075, "learning_rate": 9.130529691909738e-06, "loss": 0.4984, "step": 21873 }, { "epoch": 1.6257153474544779, "grad_norm": 1.8124936706889028, "learning_rate": 9.129730365077403e-06, "loss": 0.628, "step": 21874 }, { "epoch": 1.6257896692679301, "grad_norm": 1.8736212488809092, "learning_rate": 9.128931043847816e-06, "loss": 0.5647, "step": 21875 }, { "epoch": 1.6258639910813824, "grad_norm": 1.6646569923880958, "learning_rate": 9.128131728226127e-06, "loss": 0.4922, "step": 21876 }, { "epoch": 1.6259383128948346, "grad_norm": 1.6798480048932, "learning_rate": 9.12733241821748e-06, "loss": 0.5033, "step": 21877 }, { "epoch": 1.6260126347082868, "grad_norm": 2.4208174658102615, "learning_rate": 9.126533113827023e-06, "loss": 0.8394, "step": 21878 }, { "epoch": 1.626086956521739, "grad_norm": 2.4621771515026762, "learning_rate": 9.125733815059905e-06, "loss": 0.4805, "step": 21879 }, { "epoch": 1.6261612783351915, "grad_norm": 1.7718264054485975, "learning_rate": 9.12493452192126e-06, "loss": 0.6187, "step": 21880 }, { "epoch": 1.6262356001486435, "grad_norm": 1.760892417472527, "learning_rate": 9.124135234416247e-06, "loss": 0.6263, "step": 21881 }, { "epoch": 1.626309921962096, "grad_norm": 1.944247984814407, "learning_rate": 9.123335952550002e-06, "loss": 0.6547, "step": 21882 }, { "epoch": 1.626384243775548, "grad_norm": 1.5948190798431254, "learning_rate": 9.122536676327673e-06, "loss": 0.4708, "step": 21883 }, { "epoch": 1.6264585655890005, "grad_norm": 2.0426232968931246, "learning_rate": 9.12173740575441e-06, "loss": 0.6112, "step": 21884 }, { "epoch": 1.6265328874024525, "grad_norm": 1.734362490375545, "learning_rate": 9.120938140835357e-06, "loss": 0.5588, "step": 21885 }, { "epoch": 1.626607209215905, "grad_norm": 1.964135331778132, "learning_rate": 9.120138881575657e-06, "loss": 0.6123, "step": 21886 }, { "epoch": 1.626681531029357, "grad_norm": 2.505652038561404, "learning_rate": 9.119339627980457e-06, "loss": 0.5736, "step": 21887 }, { "epoch": 1.6267558528428094, "grad_norm": 1.7876617955036127, "learning_rate": 9.118540380054902e-06, "loss": 0.606, "step": 21888 }, { "epoch": 1.6268301746562615, "grad_norm": 2.1937428426189234, "learning_rate": 9.117741137804139e-06, "loss": 0.7038, "step": 21889 }, { "epoch": 1.626904496469714, "grad_norm": 2.0556364380055334, "learning_rate": 9.116941901233318e-06, "loss": 0.5695, "step": 21890 }, { "epoch": 1.6269788182831662, "grad_norm": 1.9963431911451253, "learning_rate": 9.116142670347576e-06, "loss": 0.5314, "step": 21891 }, { "epoch": 1.6270531400966184, "grad_norm": 1.789698163357111, "learning_rate": 9.11534344515206e-06, "loss": 0.5015, "step": 21892 }, { "epoch": 1.6271274619100706, "grad_norm": 2.0283777090683364, "learning_rate": 9.114544225651916e-06, "loss": 0.5018, "step": 21893 }, { "epoch": 1.6272017837235229, "grad_norm": 2.583966767221809, "learning_rate": 9.11374501185229e-06, "loss": 0.6005, "step": 21894 }, { "epoch": 1.627276105536975, "grad_norm": 1.7705287589055043, "learning_rate": 9.112945803758329e-06, "loss": 0.5177, "step": 21895 }, { "epoch": 1.6273504273504273, "grad_norm": 1.8415873812524663, "learning_rate": 9.11214660137518e-06, "loss": 0.6922, "step": 21896 }, { "epoch": 1.6274247491638796, "grad_norm": 1.9248603000211326, "learning_rate": 9.11134740470798e-06, "loss": 0.6557, "step": 21897 }, { "epoch": 1.6274990709773318, "grad_norm": 2.414129837176951, "learning_rate": 9.110548213761881e-06, "loss": 0.7019, "step": 21898 }, { "epoch": 1.627573392790784, "grad_norm": 1.662230135435046, "learning_rate": 9.109749028542028e-06, "loss": 0.5324, "step": 21899 }, { "epoch": 1.6276477146042363, "grad_norm": 2.0891957926573426, "learning_rate": 9.108949849053564e-06, "loss": 0.7448, "step": 21900 }, { "epoch": 1.6277220364176888, "grad_norm": 1.7097783589592237, "learning_rate": 9.108150675301635e-06, "loss": 0.5357, "step": 21901 }, { "epoch": 1.6277963582311408, "grad_norm": 2.077276852423234, "learning_rate": 9.107351507291388e-06, "loss": 0.6644, "step": 21902 }, { "epoch": 1.6278706800445932, "grad_norm": 2.1071226283982463, "learning_rate": 9.106552345027962e-06, "loss": 0.6795, "step": 21903 }, { "epoch": 1.6279450018580452, "grad_norm": 2.277125524223461, "learning_rate": 9.105753188516505e-06, "loss": 0.6416, "step": 21904 }, { "epoch": 1.6280193236714977, "grad_norm": 2.1150768413563035, "learning_rate": 9.104954037762163e-06, "loss": 0.753, "step": 21905 }, { "epoch": 1.6280936454849497, "grad_norm": 1.8472835090035962, "learning_rate": 9.104154892770081e-06, "loss": 0.5441, "step": 21906 }, { "epoch": 1.6281679672984022, "grad_norm": 1.907969254796562, "learning_rate": 9.103355753545404e-06, "loss": 0.5317, "step": 21907 }, { "epoch": 1.6282422891118542, "grad_norm": 1.8205365064911898, "learning_rate": 9.102556620093276e-06, "loss": 0.5696, "step": 21908 }, { "epoch": 1.6283166109253067, "grad_norm": 1.7837749270231191, "learning_rate": 9.10175749241884e-06, "loss": 0.6343, "step": 21909 }, { "epoch": 1.6283909327387587, "grad_norm": 2.0946951372747566, "learning_rate": 9.100958370527244e-06, "loss": 0.7344, "step": 21910 }, { "epoch": 1.6284652545522111, "grad_norm": 1.8280662574331188, "learning_rate": 9.100159254423633e-06, "loss": 0.6505, "step": 21911 }, { "epoch": 1.6285395763656634, "grad_norm": 2.431050752857129, "learning_rate": 9.099360144113147e-06, "loss": 0.6659, "step": 21912 }, { "epoch": 1.6286138981791156, "grad_norm": 2.379260167145118, "learning_rate": 9.09856103960094e-06, "loss": 0.7322, "step": 21913 }, { "epoch": 1.6286882199925679, "grad_norm": 1.888200710173005, "learning_rate": 9.097761940892144e-06, "loss": 0.5686, "step": 21914 }, { "epoch": 1.62876254180602, "grad_norm": 2.139025507220965, "learning_rate": 9.096962847991911e-06, "loss": 0.3724, "step": 21915 }, { "epoch": 1.6288368636194723, "grad_norm": 2.315195690938942, "learning_rate": 9.096163760905385e-06, "loss": 0.6561, "step": 21916 }, { "epoch": 1.6289111854329246, "grad_norm": 2.2224433369753522, "learning_rate": 9.095364679637708e-06, "loss": 0.7006, "step": 21917 }, { "epoch": 1.6289855072463768, "grad_norm": 2.0654400869878264, "learning_rate": 9.09456560419403e-06, "loss": 0.6407, "step": 21918 }, { "epoch": 1.629059829059829, "grad_norm": 2.058962189586284, "learning_rate": 9.093766534579489e-06, "loss": 0.6274, "step": 21919 }, { "epoch": 1.6291341508732813, "grad_norm": 2.110215683398511, "learning_rate": 9.09296747079923e-06, "loss": 0.5252, "step": 21920 }, { "epoch": 1.6292084726867335, "grad_norm": 2.332140332705029, "learning_rate": 9.092168412858402e-06, "loss": 0.6608, "step": 21921 }, { "epoch": 1.6292827945001858, "grad_norm": 1.745666068361374, "learning_rate": 9.091369360762148e-06, "loss": 0.5318, "step": 21922 }, { "epoch": 1.629357116313638, "grad_norm": 2.037114477618693, "learning_rate": 9.090570314515609e-06, "loss": 0.4944, "step": 21923 }, { "epoch": 1.6294314381270905, "grad_norm": 1.9413801654463714, "learning_rate": 9.089771274123936e-06, "loss": 0.5533, "step": 21924 }, { "epoch": 1.6295057599405425, "grad_norm": 3.4610708894030298, "learning_rate": 9.088972239592262e-06, "loss": 0.7787, "step": 21925 }, { "epoch": 1.629580081753995, "grad_norm": 1.6068159632247803, "learning_rate": 9.088173210925739e-06, "loss": 0.5086, "step": 21926 }, { "epoch": 1.629654403567447, "grad_norm": 1.8169154756464512, "learning_rate": 9.08737418812951e-06, "loss": 0.4736, "step": 21927 }, { "epoch": 1.6297287253808994, "grad_norm": 2.106463636231188, "learning_rate": 9.086575171208717e-06, "loss": 0.5847, "step": 21928 }, { "epoch": 1.6298030471943514, "grad_norm": 1.6351090396811683, "learning_rate": 9.085776160168509e-06, "loss": 0.4314, "step": 21929 }, { "epoch": 1.6298773690078039, "grad_norm": 1.8630080918741745, "learning_rate": 9.084977155014025e-06, "loss": 0.5467, "step": 21930 }, { "epoch": 1.629951690821256, "grad_norm": 1.919390452188654, "learning_rate": 9.084178155750408e-06, "loss": 0.6192, "step": 21931 }, { "epoch": 1.6300260126347084, "grad_norm": 1.5864752434561318, "learning_rate": 9.083379162382805e-06, "loss": 0.6466, "step": 21932 }, { "epoch": 1.6301003344481604, "grad_norm": 1.8016432560656608, "learning_rate": 9.082580174916365e-06, "loss": 0.5873, "step": 21933 }, { "epoch": 1.6301746562616128, "grad_norm": 2.2653370148763248, "learning_rate": 9.081781193356222e-06, "loss": 0.6185, "step": 21934 }, { "epoch": 1.630248978075065, "grad_norm": 2.0538455903341, "learning_rate": 9.080982217707528e-06, "loss": 0.5796, "step": 21935 }, { "epoch": 1.6303232998885173, "grad_norm": 2.2570681666650425, "learning_rate": 9.080183247975418e-06, "loss": 0.6729, "step": 21936 }, { "epoch": 1.6303976217019696, "grad_norm": 2.234537171155497, "learning_rate": 9.079384284165042e-06, "loss": 0.661, "step": 21937 }, { "epoch": 1.6304719435154218, "grad_norm": 1.9312311837843217, "learning_rate": 9.07858532628154e-06, "loss": 0.6944, "step": 21938 }, { "epoch": 1.630546265328874, "grad_norm": 1.8952216901234293, "learning_rate": 9.07778637433006e-06, "loss": 0.5491, "step": 21939 }, { "epoch": 1.6306205871423263, "grad_norm": 2.1142933537279096, "learning_rate": 9.076987428315744e-06, "loss": 0.6238, "step": 21940 }, { "epoch": 1.6306949089557785, "grad_norm": 1.846130997516394, "learning_rate": 9.076188488243735e-06, "loss": 0.5452, "step": 21941 }, { "epoch": 1.6307692307692307, "grad_norm": 2.8635027347238022, "learning_rate": 9.075389554119175e-06, "loss": 0.5422, "step": 21942 }, { "epoch": 1.630843552582683, "grad_norm": 2.2369401310972212, "learning_rate": 9.07459062594721e-06, "loss": 0.5799, "step": 21943 }, { "epoch": 1.6309178743961352, "grad_norm": 2.4495378843607107, "learning_rate": 9.073791703732984e-06, "loss": 0.6305, "step": 21944 }, { "epoch": 1.6309921962095875, "grad_norm": 1.608129260629348, "learning_rate": 9.072992787481636e-06, "loss": 0.5065, "step": 21945 }, { "epoch": 1.6310665180230397, "grad_norm": 2.2033403646878886, "learning_rate": 9.072193877198318e-06, "loss": 0.7762, "step": 21946 }, { "epoch": 1.6311408398364922, "grad_norm": 2.1399465178261963, "learning_rate": 9.071394972888163e-06, "loss": 0.6281, "step": 21947 }, { "epoch": 1.6312151616499442, "grad_norm": 1.8205828913932744, "learning_rate": 9.07059607455632e-06, "loss": 0.5963, "step": 21948 }, { "epoch": 1.6312894834633966, "grad_norm": 1.986017679538451, "learning_rate": 9.06979718220793e-06, "loss": 0.6674, "step": 21949 }, { "epoch": 1.6313638052768487, "grad_norm": 1.8434553372603573, "learning_rate": 9.068998295848138e-06, "loss": 0.5408, "step": 21950 }, { "epoch": 1.6314381270903011, "grad_norm": 3.254480091262558, "learning_rate": 9.068199415482088e-06, "loss": 0.5872, "step": 21951 }, { "epoch": 1.6315124489037531, "grad_norm": 1.8657412466199597, "learning_rate": 9.067400541114919e-06, "loss": 0.5883, "step": 21952 }, { "epoch": 1.6315867707172056, "grad_norm": 1.896028955165337, "learning_rate": 9.066601672751778e-06, "loss": 0.6452, "step": 21953 }, { "epoch": 1.6316610925306576, "grad_norm": 1.911736196951683, "learning_rate": 9.065802810397808e-06, "loss": 0.515, "step": 21954 }, { "epoch": 1.63173541434411, "grad_norm": 3.669650583423708, "learning_rate": 9.06500395405815e-06, "loss": 0.7009, "step": 21955 }, { "epoch": 1.631809736157562, "grad_norm": 1.8500227811980818, "learning_rate": 9.06420510373795e-06, "loss": 0.4354, "step": 21956 }, { "epoch": 1.6318840579710145, "grad_norm": 2.0157357648028813, "learning_rate": 9.063406259442346e-06, "loss": 0.4577, "step": 21957 }, { "epoch": 1.6319583797844668, "grad_norm": 2.3011073672984916, "learning_rate": 9.06260742117649e-06, "loss": 0.646, "step": 21958 }, { "epoch": 1.632032701597919, "grad_norm": 1.9928137462657622, "learning_rate": 9.061808588945514e-06, "loss": 0.6121, "step": 21959 }, { "epoch": 1.6321070234113713, "grad_norm": 2.15704782413463, "learning_rate": 9.061009762754565e-06, "loss": 0.7496, "step": 21960 }, { "epoch": 1.6321813452248235, "grad_norm": 2.486683596334265, "learning_rate": 9.060210942608786e-06, "loss": 0.6569, "step": 21961 }, { "epoch": 1.6322556670382757, "grad_norm": 1.63509550479732, "learning_rate": 9.059412128513324e-06, "loss": 0.5307, "step": 21962 }, { "epoch": 1.632329988851728, "grad_norm": 2.0250184145738257, "learning_rate": 9.058613320473315e-06, "loss": 0.6437, "step": 21963 }, { "epoch": 1.6324043106651802, "grad_norm": 1.9830560268748778, "learning_rate": 9.057814518493905e-06, "loss": 0.5656, "step": 21964 }, { "epoch": 1.6324786324786325, "grad_norm": 2.026763306755762, "learning_rate": 9.057015722580235e-06, "loss": 0.5781, "step": 21965 }, { "epoch": 1.6325529542920847, "grad_norm": 1.5282242964317156, "learning_rate": 9.056216932737452e-06, "loss": 0.5192, "step": 21966 }, { "epoch": 1.632627276105537, "grad_norm": 2.028258892162416, "learning_rate": 9.055418148970692e-06, "loss": 0.6439, "step": 21967 }, { "epoch": 1.6327015979189894, "grad_norm": 2.0911058369883477, "learning_rate": 9.054619371285103e-06, "loss": 0.5463, "step": 21968 }, { "epoch": 1.6327759197324414, "grad_norm": 1.9912551832772525, "learning_rate": 9.05382059968583e-06, "loss": 0.6057, "step": 21969 }, { "epoch": 1.6328502415458939, "grad_norm": 1.8955807995556975, "learning_rate": 9.053021834178005e-06, "loss": 0.6387, "step": 21970 }, { "epoch": 1.6329245633593459, "grad_norm": 2.1331060023688186, "learning_rate": 9.052223074766776e-06, "loss": 0.7482, "step": 21971 }, { "epoch": 1.6329988851727983, "grad_norm": 2.089790642448774, "learning_rate": 9.051424321457286e-06, "loss": 0.7082, "step": 21972 }, { "epoch": 1.6330732069862504, "grad_norm": 2.256506508813554, "learning_rate": 9.05062557425468e-06, "loss": 0.5918, "step": 21973 }, { "epoch": 1.6331475287997028, "grad_norm": 2.464661859902439, "learning_rate": 9.049826833164094e-06, "loss": 0.6593, "step": 21974 }, { "epoch": 1.6332218506131548, "grad_norm": 1.943782170349391, "learning_rate": 9.049028098190674e-06, "loss": 0.5601, "step": 21975 }, { "epoch": 1.6332961724266073, "grad_norm": 1.7040681985457145, "learning_rate": 9.048229369339562e-06, "loss": 0.5658, "step": 21976 }, { "epoch": 1.6333704942400593, "grad_norm": 1.8980277344530847, "learning_rate": 9.047430646615902e-06, "loss": 0.6143, "step": 21977 }, { "epoch": 1.6334448160535118, "grad_norm": 1.9207181743784232, "learning_rate": 9.046631930024831e-06, "loss": 0.6125, "step": 21978 }, { "epoch": 1.6335191378669638, "grad_norm": 2.1983352451869087, "learning_rate": 9.045833219571497e-06, "loss": 0.6942, "step": 21979 }, { "epoch": 1.6335934596804162, "grad_norm": 1.6827245435116454, "learning_rate": 9.04503451526104e-06, "loss": 0.5328, "step": 21980 }, { "epoch": 1.6336677814938685, "grad_norm": 2.8137140243880947, "learning_rate": 9.0442358170986e-06, "loss": 0.6148, "step": 21981 }, { "epoch": 1.6337421033073207, "grad_norm": 1.6916685000521443, "learning_rate": 9.043437125089317e-06, "loss": 0.5033, "step": 21982 }, { "epoch": 1.633816425120773, "grad_norm": 1.8905626328756777, "learning_rate": 9.042638439238338e-06, "loss": 0.5289, "step": 21983 }, { "epoch": 1.6338907469342252, "grad_norm": 1.9949416182904647, "learning_rate": 9.041839759550806e-06, "loss": 0.6626, "step": 21984 }, { "epoch": 1.6339650687476774, "grad_norm": 1.823849444431444, "learning_rate": 9.041041086031857e-06, "loss": 0.4896, "step": 21985 }, { "epoch": 1.6340393905611297, "grad_norm": 2.0684854996426085, "learning_rate": 9.040242418686636e-06, "loss": 0.677, "step": 21986 }, { "epoch": 1.634113712374582, "grad_norm": 1.7934378052759141, "learning_rate": 9.039443757520284e-06, "loss": 0.6637, "step": 21987 }, { "epoch": 1.6341880341880342, "grad_norm": 1.9104426308962086, "learning_rate": 9.038645102537944e-06, "loss": 0.3095, "step": 21988 }, { "epoch": 1.6342623560014864, "grad_norm": 1.9803911427404381, "learning_rate": 9.037846453744758e-06, "loss": 0.5343, "step": 21989 }, { "epoch": 1.6343366778149386, "grad_norm": 2.0549613191105847, "learning_rate": 9.037047811145864e-06, "loss": 0.6173, "step": 21990 }, { "epoch": 1.634410999628391, "grad_norm": 1.7588894946916571, "learning_rate": 9.036249174746408e-06, "loss": 0.5539, "step": 21991 }, { "epoch": 1.634485321441843, "grad_norm": 1.9494467400743174, "learning_rate": 9.035450544551532e-06, "loss": 0.6709, "step": 21992 }, { "epoch": 1.6345596432552956, "grad_norm": 2.0407576613805563, "learning_rate": 9.034651920566372e-06, "loss": 0.5729, "step": 21993 }, { "epoch": 1.6346339650687476, "grad_norm": 1.951331405369853, "learning_rate": 9.033853302796074e-06, "loss": 0.6915, "step": 21994 }, { "epoch": 1.6347082868822, "grad_norm": 2.0319619209413498, "learning_rate": 9.03305469124578e-06, "loss": 0.5154, "step": 21995 }, { "epoch": 1.634782608695652, "grad_norm": 1.8707659384137558, "learning_rate": 9.032256085920626e-06, "loss": 0.5834, "step": 21996 }, { "epoch": 1.6348569305091045, "grad_norm": 2.438630242504973, "learning_rate": 9.031457486825758e-06, "loss": 0.6874, "step": 21997 }, { "epoch": 1.6349312523225565, "grad_norm": 2.274192286397939, "learning_rate": 9.030658893966318e-06, "loss": 0.8442, "step": 21998 }, { "epoch": 1.635005574136009, "grad_norm": 1.6073142463909567, "learning_rate": 9.029860307347447e-06, "loss": 0.4501, "step": 21999 }, { "epoch": 1.635079895949461, "grad_norm": 2.234959865214916, "learning_rate": 9.029061726974282e-06, "loss": 0.6612, "step": 22000 }, { "epoch": 1.6351542177629135, "grad_norm": 2.160380193989983, "learning_rate": 9.028263152851968e-06, "loss": 0.5148, "step": 22001 }, { "epoch": 1.6352285395763657, "grad_norm": 2.3456262659689084, "learning_rate": 9.027464584985644e-06, "loss": 0.7824, "step": 22002 }, { "epoch": 1.635302861389818, "grad_norm": 1.8079357061492696, "learning_rate": 9.026666023380459e-06, "loss": 0.4835, "step": 22003 }, { "epoch": 1.6353771832032702, "grad_norm": 1.8608271253095736, "learning_rate": 9.025867468041542e-06, "loss": 0.5003, "step": 22004 }, { "epoch": 1.6354515050167224, "grad_norm": 2.0541723784782544, "learning_rate": 9.025068918974039e-06, "loss": 0.4156, "step": 22005 }, { "epoch": 1.6355258268301747, "grad_norm": 1.9313309850545428, "learning_rate": 9.024270376183096e-06, "loss": 0.6532, "step": 22006 }, { "epoch": 1.635600148643627, "grad_norm": 1.9504582246031896, "learning_rate": 9.023471839673847e-06, "loss": 0.6008, "step": 22007 }, { "epoch": 1.6356744704570791, "grad_norm": 1.9177841222913905, "learning_rate": 9.022673309451434e-06, "loss": 0.6571, "step": 22008 }, { "epoch": 1.6357487922705314, "grad_norm": 2.272794492873506, "learning_rate": 9.021874785521001e-06, "loss": 0.7122, "step": 22009 }, { "epoch": 1.6358231140839836, "grad_norm": 2.088887285691493, "learning_rate": 9.021076267887688e-06, "loss": 0.6818, "step": 22010 }, { "epoch": 1.6358974358974359, "grad_norm": 1.7793158342365893, "learning_rate": 9.020277756556635e-06, "loss": 0.5384, "step": 22011 }, { "epoch": 1.635971757710888, "grad_norm": 1.757779672952377, "learning_rate": 9.019479251532983e-06, "loss": 0.5647, "step": 22012 }, { "epoch": 1.6360460795243403, "grad_norm": 1.9626063915830156, "learning_rate": 9.01868075282187e-06, "loss": 0.5163, "step": 22013 }, { "epoch": 1.6361204013377928, "grad_norm": 2.0011084603252396, "learning_rate": 9.017882260428446e-06, "loss": 0.4584, "step": 22014 }, { "epoch": 1.6361947231512448, "grad_norm": 2.2597689182330885, "learning_rate": 9.01708377435784e-06, "loss": 0.6639, "step": 22015 }, { "epoch": 1.6362690449646973, "grad_norm": 1.6392466842386462, "learning_rate": 9.0162852946152e-06, "loss": 0.4665, "step": 22016 }, { "epoch": 1.6363433667781493, "grad_norm": 2.2687316334899643, "learning_rate": 9.015486821205663e-06, "loss": 0.5749, "step": 22017 }, { "epoch": 1.6364176885916017, "grad_norm": 2.277743629068587, "learning_rate": 9.01468835413437e-06, "loss": 0.4102, "step": 22018 }, { "epoch": 1.6364920104050538, "grad_norm": 2.152247117457908, "learning_rate": 9.013889893406461e-06, "loss": 0.5389, "step": 22019 }, { "epoch": 1.6365663322185062, "grad_norm": 2.0565282882021254, "learning_rate": 9.013091439027079e-06, "loss": 0.6749, "step": 22020 }, { "epoch": 1.6366406540319582, "grad_norm": 2.080484059421171, "learning_rate": 9.012292991001365e-06, "loss": 0.5553, "step": 22021 }, { "epoch": 1.6367149758454107, "grad_norm": 2.0635191333242093, "learning_rate": 9.011494549334455e-06, "loss": 0.527, "step": 22022 }, { "epoch": 1.6367892976588627, "grad_norm": 2.943182496829801, "learning_rate": 9.010696114031493e-06, "loss": 0.6249, "step": 22023 }, { "epoch": 1.6368636194723152, "grad_norm": 1.961531351632572, "learning_rate": 9.009897685097617e-06, "loss": 0.6119, "step": 22024 }, { "epoch": 1.6369379412857674, "grad_norm": 2.859578108722582, "learning_rate": 9.009099262537973e-06, "loss": 0.5953, "step": 22025 }, { "epoch": 1.6370122630992197, "grad_norm": 3.264873862354932, "learning_rate": 9.008300846357692e-06, "loss": 0.6155, "step": 22026 }, { "epoch": 1.637086584912672, "grad_norm": 1.7716161476666328, "learning_rate": 9.007502436561921e-06, "loss": 0.5787, "step": 22027 }, { "epoch": 1.6371609067261241, "grad_norm": 2.3266504895712043, "learning_rate": 9.006704033155796e-06, "loss": 0.7357, "step": 22028 }, { "epoch": 1.6372352285395764, "grad_norm": 2.214751959014105, "learning_rate": 9.005905636144459e-06, "loss": 0.7189, "step": 22029 }, { "epoch": 1.6373095503530286, "grad_norm": 2.4112891314641582, "learning_rate": 9.00510724553305e-06, "loss": 0.6283, "step": 22030 }, { "epoch": 1.6373838721664808, "grad_norm": 2.744803036621407, "learning_rate": 9.004308861326708e-06, "loss": 0.4915, "step": 22031 }, { "epoch": 1.637458193979933, "grad_norm": 2.505598284223675, "learning_rate": 9.003510483530576e-06, "loss": 0.7364, "step": 22032 }, { "epoch": 1.6375325157933853, "grad_norm": 2.0995884403272904, "learning_rate": 9.00271211214979e-06, "loss": 0.5895, "step": 22033 }, { "epoch": 1.6376068376068376, "grad_norm": 2.1484648437806073, "learning_rate": 9.00191374718949e-06, "loss": 0.6116, "step": 22034 }, { "epoch": 1.6376811594202898, "grad_norm": 1.9060207959642388, "learning_rate": 9.00111538865482e-06, "loss": 0.5527, "step": 22035 }, { "epoch": 1.637755481233742, "grad_norm": 2.050883587695377, "learning_rate": 9.000317036550919e-06, "loss": 0.6013, "step": 22036 }, { "epoch": 1.6378298030471945, "grad_norm": 1.7888659971623686, "learning_rate": 8.999518690882925e-06, "loss": 0.5331, "step": 22037 }, { "epoch": 1.6379041248606465, "grad_norm": 2.3217490758311845, "learning_rate": 8.998720351655978e-06, "loss": 0.6368, "step": 22038 }, { "epoch": 1.637978446674099, "grad_norm": 2.9225391987309384, "learning_rate": 8.997922018875215e-06, "loss": 0.5919, "step": 22039 }, { "epoch": 1.638052768487551, "grad_norm": 2.0996079445629445, "learning_rate": 8.997123692545778e-06, "loss": 0.5801, "step": 22040 }, { "epoch": 1.6381270903010035, "grad_norm": 2.0162582521887176, "learning_rate": 8.996325372672807e-06, "loss": 0.6557, "step": 22041 }, { "epoch": 1.6382014121144555, "grad_norm": 1.661183598956837, "learning_rate": 8.99552705926144e-06, "loss": 0.5522, "step": 22042 }, { "epoch": 1.638275733927908, "grad_norm": 2.4383601978794305, "learning_rate": 8.99472875231682e-06, "loss": 0.8162, "step": 22043 }, { "epoch": 1.63835005574136, "grad_norm": 1.7449659400600295, "learning_rate": 8.993930451844084e-06, "loss": 0.4447, "step": 22044 }, { "epoch": 1.6384243775548124, "grad_norm": 2.2438632737259128, "learning_rate": 8.993132157848369e-06, "loss": 0.4486, "step": 22045 }, { "epoch": 1.6384986993682644, "grad_norm": 2.055457915117176, "learning_rate": 8.992333870334819e-06, "loss": 0.5786, "step": 22046 }, { "epoch": 1.6385730211817169, "grad_norm": 2.3457567469985725, "learning_rate": 8.991535589308571e-06, "loss": 0.65, "step": 22047 }, { "epoch": 1.6386473429951691, "grad_norm": 1.9255279760299147, "learning_rate": 8.990737314774766e-06, "loss": 0.431, "step": 22048 }, { "epoch": 1.6387216648086214, "grad_norm": 1.7960493349335553, "learning_rate": 8.989939046738543e-06, "loss": 0.4477, "step": 22049 }, { "epoch": 1.6387959866220736, "grad_norm": 1.9819309872415143, "learning_rate": 8.989140785205035e-06, "loss": 0.6554, "step": 22050 }, { "epoch": 1.6388703084355258, "grad_norm": 2.5486448327408633, "learning_rate": 8.988342530179389e-06, "loss": 0.6874, "step": 22051 }, { "epoch": 1.638944630248978, "grad_norm": 2.2803466323866837, "learning_rate": 8.98754428166674e-06, "loss": 0.6883, "step": 22052 }, { "epoch": 1.6390189520624303, "grad_norm": 1.9657917872995043, "learning_rate": 8.986746039672229e-06, "loss": 0.6264, "step": 22053 }, { "epoch": 1.6390932738758826, "grad_norm": 2.286250020367367, "learning_rate": 8.985947804200995e-06, "loss": 0.7349, "step": 22054 }, { "epoch": 1.6391675956893348, "grad_norm": 1.917249206339932, "learning_rate": 8.985149575258174e-06, "loss": 0.7571, "step": 22055 }, { "epoch": 1.639241917502787, "grad_norm": 1.9974938017560289, "learning_rate": 8.98435135284891e-06, "loss": 0.5578, "step": 22056 }, { "epoch": 1.6393162393162393, "grad_norm": 2.068145515183989, "learning_rate": 8.983553136978338e-06, "loss": 0.671, "step": 22057 }, { "epoch": 1.6393905611296917, "grad_norm": 1.6429351720186127, "learning_rate": 8.9827549276516e-06, "loss": 0.4788, "step": 22058 }, { "epoch": 1.6394648829431437, "grad_norm": 2.347733617504051, "learning_rate": 8.981956724873833e-06, "loss": 0.6927, "step": 22059 }, { "epoch": 1.6395392047565962, "grad_norm": 2.049873387831554, "learning_rate": 8.981158528650177e-06, "loss": 0.5225, "step": 22060 }, { "epoch": 1.6396135265700482, "grad_norm": 2.033389272205782, "learning_rate": 8.980360338985767e-06, "loss": 0.6103, "step": 22061 }, { "epoch": 1.6396878483835007, "grad_norm": 1.9171270205151403, "learning_rate": 8.979562155885742e-06, "loss": 0.5887, "step": 22062 }, { "epoch": 1.6397621701969527, "grad_norm": 1.9170414229542843, "learning_rate": 8.978763979355245e-06, "loss": 0.5805, "step": 22063 }, { "epoch": 1.6398364920104052, "grad_norm": 2.283832804483527, "learning_rate": 8.977965809399413e-06, "loss": 0.5678, "step": 22064 }, { "epoch": 1.6399108138238572, "grad_norm": 2.1771667793289757, "learning_rate": 8.977167646023385e-06, "loss": 0.7397, "step": 22065 }, { "epoch": 1.6399851356373096, "grad_norm": 1.9009818424885903, "learning_rate": 8.976369489232296e-06, "loss": 0.6035, "step": 22066 }, { "epoch": 1.6400594574507616, "grad_norm": 1.612517540193451, "learning_rate": 8.975571339031288e-06, "loss": 0.4537, "step": 22067 }, { "epoch": 1.640133779264214, "grad_norm": 1.7783181674512902, "learning_rate": 8.974773195425499e-06, "loss": 0.5033, "step": 22068 }, { "epoch": 1.6402081010776663, "grad_norm": 2.35335897533037, "learning_rate": 8.973975058420068e-06, "loss": 0.743, "step": 22069 }, { "epoch": 1.6402824228911186, "grad_norm": 4.11181292423458, "learning_rate": 8.97317692802013e-06, "loss": 0.4192, "step": 22070 }, { "epoch": 1.6403567447045708, "grad_norm": 2.711158894893025, "learning_rate": 8.972378804230833e-06, "loss": 0.6064, "step": 22071 }, { "epoch": 1.640431066518023, "grad_norm": 1.9322258271551773, "learning_rate": 8.9715806870573e-06, "loss": 0.7103, "step": 22072 }, { "epoch": 1.6405053883314753, "grad_norm": 1.9818032062064992, "learning_rate": 8.97078257650468e-06, "loss": 0.6121, "step": 22073 }, { "epoch": 1.6405797101449275, "grad_norm": 1.981355698387128, "learning_rate": 8.969984472578106e-06, "loss": 0.5793, "step": 22074 }, { "epoch": 1.6406540319583798, "grad_norm": 1.9623859846572997, "learning_rate": 8.969186375282718e-06, "loss": 0.624, "step": 22075 }, { "epoch": 1.640728353771832, "grad_norm": 2.6671772687967175, "learning_rate": 8.96838828462366e-06, "loss": 0.7483, "step": 22076 }, { "epoch": 1.6408026755852843, "grad_norm": 1.921190297479459, "learning_rate": 8.96759020060606e-06, "loss": 0.5487, "step": 22077 }, { "epoch": 1.6408769973987365, "grad_norm": 1.7516679577417844, "learning_rate": 8.966792123235062e-06, "loss": 0.5458, "step": 22078 }, { "epoch": 1.6409513192121887, "grad_norm": 2.0485917603433834, "learning_rate": 8.965994052515801e-06, "loss": 0.6218, "step": 22079 }, { "epoch": 1.641025641025641, "grad_norm": 1.8249355458982246, "learning_rate": 8.96519598845342e-06, "loss": 0.6391, "step": 22080 }, { "epoch": 1.6410999628390934, "grad_norm": 2.2954959571847415, "learning_rate": 8.96439793105305e-06, "loss": 0.5948, "step": 22081 }, { "epoch": 1.6411742846525454, "grad_norm": 2.081311570172817, "learning_rate": 8.96359988031984e-06, "loss": 0.4996, "step": 22082 }, { "epoch": 1.641248606465998, "grad_norm": 2.101710724648549, "learning_rate": 8.962801836258915e-06, "loss": 0.5271, "step": 22083 }, { "epoch": 1.64132292827945, "grad_norm": 1.912760250352419, "learning_rate": 8.962003798875416e-06, "loss": 0.5348, "step": 22084 }, { "epoch": 1.6413972500929024, "grad_norm": 1.803287920387238, "learning_rate": 8.961205768174483e-06, "loss": 0.4666, "step": 22085 }, { "epoch": 1.6414715719063544, "grad_norm": 1.8453269828616956, "learning_rate": 8.960407744161256e-06, "loss": 0.6108, "step": 22086 }, { "epoch": 1.6415458937198069, "grad_norm": 1.9821714125939784, "learning_rate": 8.959609726840869e-06, "loss": 0.7157, "step": 22087 }, { "epoch": 1.6416202155332589, "grad_norm": 2.4717303097397276, "learning_rate": 8.958811716218458e-06, "loss": 0.6228, "step": 22088 }, { "epoch": 1.6416945373467113, "grad_norm": 2.006157287736793, "learning_rate": 8.958013712299165e-06, "loss": 0.5154, "step": 22089 }, { "epoch": 1.6417688591601634, "grad_norm": 1.772962004480981, "learning_rate": 8.957215715088125e-06, "loss": 0.6005, "step": 22090 }, { "epoch": 1.6418431809736158, "grad_norm": 2.3021630708882763, "learning_rate": 8.95641772459048e-06, "loss": 0.7198, "step": 22091 }, { "epoch": 1.641917502787068, "grad_norm": 1.8381223485469964, "learning_rate": 8.95561974081136e-06, "loss": 0.5451, "step": 22092 }, { "epoch": 1.6419918246005203, "grad_norm": 1.7322243701032638, "learning_rate": 8.95482176375591e-06, "loss": 0.5725, "step": 22093 }, { "epoch": 1.6420661464139725, "grad_norm": 2.524280059908569, "learning_rate": 8.954023793429259e-06, "loss": 0.6778, "step": 22094 }, { "epoch": 1.6421404682274248, "grad_norm": 2.111330335318342, "learning_rate": 8.95322582983655e-06, "loss": 0.5516, "step": 22095 }, { "epoch": 1.642214790040877, "grad_norm": 1.9385345364106448, "learning_rate": 8.952427872982916e-06, "loss": 0.5751, "step": 22096 }, { "epoch": 1.6422891118543292, "grad_norm": 1.441052061085825, "learning_rate": 8.9516299228735e-06, "loss": 0.3939, "step": 22097 }, { "epoch": 1.6423634336677815, "grad_norm": 2.1960883078345925, "learning_rate": 8.950831979513436e-06, "loss": 0.6157, "step": 22098 }, { "epoch": 1.6424377554812337, "grad_norm": 1.8794335398443753, "learning_rate": 8.95003404290786e-06, "loss": 0.5375, "step": 22099 }, { "epoch": 1.642512077294686, "grad_norm": 4.305806611870632, "learning_rate": 8.94923611306191e-06, "loss": 0.5711, "step": 22100 }, { "epoch": 1.6425863991081382, "grad_norm": 1.7447473643307765, "learning_rate": 8.948438189980724e-06, "loss": 0.416, "step": 22101 }, { "epoch": 1.6426607209215904, "grad_norm": 2.0324104990774243, "learning_rate": 8.94764027366944e-06, "loss": 0.6144, "step": 22102 }, { "epoch": 1.6427350427350427, "grad_norm": 2.616178941123267, "learning_rate": 8.946842364133191e-06, "loss": 0.6802, "step": 22103 }, { "epoch": 1.6428093645484951, "grad_norm": 2.1649008018107088, "learning_rate": 8.946044461377118e-06, "loss": 0.6435, "step": 22104 }, { "epoch": 1.6428836863619471, "grad_norm": 1.8452708539821012, "learning_rate": 8.945246565406359e-06, "loss": 0.6169, "step": 22105 }, { "epoch": 1.6429580081753996, "grad_norm": 1.8323631249187886, "learning_rate": 8.944448676226043e-06, "loss": 0.3787, "step": 22106 }, { "epoch": 1.6430323299888516, "grad_norm": 2.849668116378355, "learning_rate": 8.943650793841311e-06, "loss": 0.6396, "step": 22107 }, { "epoch": 1.643106651802304, "grad_norm": 3.2197369861951506, "learning_rate": 8.942852918257302e-06, "loss": 0.6643, "step": 22108 }, { "epoch": 1.643180973615756, "grad_norm": 3.2937689424087306, "learning_rate": 8.942055049479154e-06, "loss": 0.5243, "step": 22109 }, { "epoch": 1.6432552954292086, "grad_norm": 1.9366257524529087, "learning_rate": 8.941257187511998e-06, "loss": 0.6045, "step": 22110 }, { "epoch": 1.6433296172426606, "grad_norm": 2.103278496502353, "learning_rate": 8.940459332360971e-06, "loss": 0.6221, "step": 22111 }, { "epoch": 1.643403939056113, "grad_norm": 1.5469956908807134, "learning_rate": 8.939661484031213e-06, "loss": 0.5929, "step": 22112 }, { "epoch": 1.643478260869565, "grad_norm": 1.8745901498414237, "learning_rate": 8.938863642527863e-06, "loss": 0.5036, "step": 22113 }, { "epoch": 1.6435525826830175, "grad_norm": 1.4910761274881421, "learning_rate": 8.93806580785605e-06, "loss": 0.3377, "step": 22114 }, { "epoch": 1.6436269044964698, "grad_norm": 2.322163872342445, "learning_rate": 8.937267980020914e-06, "loss": 0.6691, "step": 22115 }, { "epoch": 1.643701226309922, "grad_norm": 1.885914774184359, "learning_rate": 8.936470159027598e-06, "loss": 0.5903, "step": 22116 }, { "epoch": 1.6437755481233742, "grad_norm": 2.0911492893660797, "learning_rate": 8.935672344881226e-06, "loss": 0.6346, "step": 22117 }, { "epoch": 1.6438498699368265, "grad_norm": 1.7185844155257795, "learning_rate": 8.934874537586941e-06, "loss": 0.4864, "step": 22118 }, { "epoch": 1.6439241917502787, "grad_norm": 1.9304902588046708, "learning_rate": 8.934076737149878e-06, "loss": 0.4665, "step": 22119 }, { "epoch": 1.643998513563731, "grad_norm": 1.9872676145302157, "learning_rate": 8.933278943575175e-06, "loss": 0.6009, "step": 22120 }, { "epoch": 1.6440728353771832, "grad_norm": 1.5400852600561712, "learning_rate": 8.932481156867967e-06, "loss": 0.4457, "step": 22121 }, { "epoch": 1.6441471571906354, "grad_norm": 2.017231495047683, "learning_rate": 8.931683377033387e-06, "loss": 0.5313, "step": 22122 }, { "epoch": 1.6442214790040877, "grad_norm": 1.8781518137137905, "learning_rate": 8.930885604076576e-06, "loss": 0.6182, "step": 22123 }, { "epoch": 1.64429580081754, "grad_norm": 2.1860565775134173, "learning_rate": 8.93008783800267e-06, "loss": 0.6674, "step": 22124 }, { "epoch": 1.6443701226309924, "grad_norm": 1.53111442431114, "learning_rate": 8.9292900788168e-06, "loss": 0.5113, "step": 22125 }, { "epoch": 1.6444444444444444, "grad_norm": 1.6870064575240553, "learning_rate": 8.928492326524106e-06, "loss": 0.5712, "step": 22126 }, { "epoch": 1.6445187662578968, "grad_norm": 1.9355534288009029, "learning_rate": 8.927694581129726e-06, "loss": 0.5437, "step": 22127 }, { "epoch": 1.6445930880713489, "grad_norm": 1.684089852444192, "learning_rate": 8.92689684263879e-06, "loss": 0.5059, "step": 22128 }, { "epoch": 1.6446674098848013, "grad_norm": 2.028267871872332, "learning_rate": 8.926099111056434e-06, "loss": 0.6503, "step": 22129 }, { "epoch": 1.6447417316982533, "grad_norm": 2.1953726342187836, "learning_rate": 8.925301386387798e-06, "loss": 0.5569, "step": 22130 }, { "epoch": 1.6448160535117058, "grad_norm": 1.9501487962534882, "learning_rate": 8.924503668638017e-06, "loss": 0.5451, "step": 22131 }, { "epoch": 1.6448903753251578, "grad_norm": 1.8793647872599533, "learning_rate": 8.923705957812225e-06, "loss": 0.5978, "step": 22132 }, { "epoch": 1.6449646971386103, "grad_norm": 1.8838751347289275, "learning_rate": 8.922908253915557e-06, "loss": 0.5365, "step": 22133 }, { "epoch": 1.6450390189520623, "grad_norm": 1.921189582671303, "learning_rate": 8.92211055695315e-06, "loss": 0.6519, "step": 22134 }, { "epoch": 1.6451133407655147, "grad_norm": 1.9117689867373522, "learning_rate": 8.92131286693014e-06, "loss": 0.6831, "step": 22135 }, { "epoch": 1.645187662578967, "grad_norm": 1.897193895709881, "learning_rate": 8.920515183851662e-06, "loss": 0.5289, "step": 22136 }, { "epoch": 1.6452619843924192, "grad_norm": 1.4246515103640716, "learning_rate": 8.91971750772285e-06, "loss": 0.4153, "step": 22137 }, { "epoch": 1.6453363062058715, "grad_norm": 1.8820884241669844, "learning_rate": 8.918919838548846e-06, "loss": 0.6553, "step": 22138 }, { "epoch": 1.6454106280193237, "grad_norm": 2.1972130639811165, "learning_rate": 8.918122176334776e-06, "loss": 0.6236, "step": 22139 }, { "epoch": 1.645484949832776, "grad_norm": 1.9714326395613697, "learning_rate": 8.917324521085776e-06, "loss": 0.7106, "step": 22140 }, { "epoch": 1.6455592716462282, "grad_norm": 1.574774405473219, "learning_rate": 8.916526872806987e-06, "loss": 0.4296, "step": 22141 }, { "epoch": 1.6456335934596804, "grad_norm": 1.7776245859769035, "learning_rate": 8.915729231503543e-06, "loss": 0.5246, "step": 22142 }, { "epoch": 1.6457079152731326, "grad_norm": 1.9680253585153293, "learning_rate": 8.914931597180577e-06, "loss": 0.6323, "step": 22143 }, { "epoch": 1.6457822370865849, "grad_norm": 1.8086571479622437, "learning_rate": 8.914133969843224e-06, "loss": 0.5786, "step": 22144 }, { "epoch": 1.6458565589000371, "grad_norm": 1.836013244097908, "learning_rate": 8.913336349496623e-06, "loss": 0.5402, "step": 22145 }, { "epoch": 1.6459308807134894, "grad_norm": 1.5288269967310504, "learning_rate": 8.912538736145905e-06, "loss": 0.4763, "step": 22146 }, { "epoch": 1.6460052025269416, "grad_norm": 1.96882293109247, "learning_rate": 8.911741129796205e-06, "loss": 0.6412, "step": 22147 }, { "epoch": 1.646079524340394, "grad_norm": 1.8924488345100603, "learning_rate": 8.91094353045266e-06, "loss": 0.3848, "step": 22148 }, { "epoch": 1.646153846153846, "grad_norm": 1.856838387713153, "learning_rate": 8.910145938120403e-06, "loss": 0.6167, "step": 22149 }, { "epoch": 1.6462281679672985, "grad_norm": 1.7182610680276311, "learning_rate": 8.909348352804575e-06, "loss": 0.5081, "step": 22150 }, { "epoch": 1.6463024897807506, "grad_norm": 1.7952478540714922, "learning_rate": 8.9085507745103e-06, "loss": 0.4369, "step": 22151 }, { "epoch": 1.646376811594203, "grad_norm": 1.4764413072379226, "learning_rate": 8.907753203242721e-06, "loss": 0.4616, "step": 22152 }, { "epoch": 1.646451133407655, "grad_norm": 2.1066012426205902, "learning_rate": 8.90695563900697e-06, "loss": 0.6395, "step": 22153 }, { "epoch": 1.6465254552211075, "grad_norm": 2.007727466089376, "learning_rate": 8.90615808180818e-06, "loss": 0.6671, "step": 22154 }, { "epoch": 1.6465997770345595, "grad_norm": 1.733003105681508, "learning_rate": 8.905360531651488e-06, "loss": 0.449, "step": 22155 }, { "epoch": 1.646674098848012, "grad_norm": 2.1557207687455544, "learning_rate": 8.904562988542028e-06, "loss": 0.5002, "step": 22156 }, { "epoch": 1.646748420661464, "grad_norm": 1.7916123085845055, "learning_rate": 8.903765452484936e-06, "loss": 0.6146, "step": 22157 }, { "epoch": 1.6468227424749164, "grad_norm": 1.924693375986984, "learning_rate": 8.902967923485344e-06, "loss": 0.6678, "step": 22158 }, { "epoch": 1.6468970642883687, "grad_norm": 2.443360485830508, "learning_rate": 8.902170401548388e-06, "loss": 0.6684, "step": 22159 }, { "epoch": 1.646971386101821, "grad_norm": 1.7813753350162678, "learning_rate": 8.901372886679201e-06, "loss": 0.4973, "step": 22160 }, { "epoch": 1.6470457079152732, "grad_norm": 2.963078983720813, "learning_rate": 8.900575378882924e-06, "loss": 0.5311, "step": 22161 }, { "epoch": 1.6471200297287254, "grad_norm": 1.7217086343080408, "learning_rate": 8.899777878164679e-06, "loss": 0.6555, "step": 22162 }, { "epoch": 1.6471943515421776, "grad_norm": 2.220799995963989, "learning_rate": 8.898980384529611e-06, "loss": 0.6688, "step": 22163 }, { "epoch": 1.6472686733556299, "grad_norm": 2.208087426284298, "learning_rate": 8.898182897982848e-06, "loss": 0.635, "step": 22164 }, { "epoch": 1.6473429951690821, "grad_norm": 2.0952387432697446, "learning_rate": 8.897385418529526e-06, "loss": 0.6101, "step": 22165 }, { "epoch": 1.6474173169825344, "grad_norm": 2.441232614002037, "learning_rate": 8.896587946174779e-06, "loss": 0.6547, "step": 22166 }, { "epoch": 1.6474916387959866, "grad_norm": 2.07009985932962, "learning_rate": 8.895790480923742e-06, "loss": 0.687, "step": 22167 }, { "epoch": 1.6475659606094388, "grad_norm": 1.61159368365853, "learning_rate": 8.89499302278155e-06, "loss": 0.4758, "step": 22168 }, { "epoch": 1.647640282422891, "grad_norm": 2.092391906783246, "learning_rate": 8.894195571753335e-06, "loss": 0.4579, "step": 22169 }, { "epoch": 1.6477146042363433, "grad_norm": 2.0131777882085786, "learning_rate": 8.893398127844231e-06, "loss": 0.5896, "step": 22170 }, { "epoch": 1.6477889260497958, "grad_norm": 2.1333352906074823, "learning_rate": 8.89260069105937e-06, "loss": 0.599, "step": 22171 }, { "epoch": 1.6478632478632478, "grad_norm": 2.6112813147013605, "learning_rate": 8.891803261403897e-06, "loss": 0.6498, "step": 22172 }, { "epoch": 1.6479375696767002, "grad_norm": 1.8613645422407838, "learning_rate": 8.891005838882929e-06, "loss": 0.5124, "step": 22173 }, { "epoch": 1.6480118914901523, "grad_norm": 2.2796847007907766, "learning_rate": 8.890208423501612e-06, "loss": 0.6541, "step": 22174 }, { "epoch": 1.6480862133036047, "grad_norm": 1.9972902395629513, "learning_rate": 8.889411015265072e-06, "loss": 0.5999, "step": 22175 }, { "epoch": 1.6481605351170567, "grad_norm": 1.8703593067497564, "learning_rate": 8.888613614178449e-06, "loss": 0.594, "step": 22176 }, { "epoch": 1.6482348569305092, "grad_norm": 1.7510747736046681, "learning_rate": 8.887816220246871e-06, "loss": 0.4319, "step": 22177 }, { "epoch": 1.6483091787439612, "grad_norm": 2.1351355280971025, "learning_rate": 8.887018833475476e-06, "loss": 0.6672, "step": 22178 }, { "epoch": 1.6483835005574137, "grad_norm": 2.0392770837838357, "learning_rate": 8.886221453869397e-06, "loss": 0.6075, "step": 22179 }, { "epoch": 1.6484578223708657, "grad_norm": 1.845346141790222, "learning_rate": 8.885424081433765e-06, "loss": 0.5979, "step": 22180 }, { "epoch": 1.6485321441843181, "grad_norm": 2.0423663414408884, "learning_rate": 8.884626716173715e-06, "loss": 0.6908, "step": 22181 }, { "epoch": 1.6486064659977704, "grad_norm": 2.3959878409363187, "learning_rate": 8.88382935809438e-06, "loss": 0.5852, "step": 22182 }, { "epoch": 1.6486807878112226, "grad_norm": 1.8500486834779433, "learning_rate": 8.883032007200897e-06, "loss": 0.5569, "step": 22183 }, { "epoch": 1.6487551096246749, "grad_norm": 2.1243134341716647, "learning_rate": 8.882234663498395e-06, "loss": 0.6905, "step": 22184 }, { "epoch": 1.648829431438127, "grad_norm": 1.6432622945599675, "learning_rate": 8.881437326992008e-06, "loss": 0.4588, "step": 22185 }, { "epoch": 1.6489037532515793, "grad_norm": 1.9364080310709815, "learning_rate": 8.880639997686867e-06, "loss": 0.6587, "step": 22186 }, { "epoch": 1.6489780750650316, "grad_norm": 2.2536658095707263, "learning_rate": 8.879842675588108e-06, "loss": 0.6407, "step": 22187 }, { "epoch": 1.6490523968784838, "grad_norm": 1.9849156401024215, "learning_rate": 8.879045360700865e-06, "loss": 0.6901, "step": 22188 }, { "epoch": 1.649126718691936, "grad_norm": 2.3165369542078795, "learning_rate": 8.878248053030268e-06, "loss": 0.7696, "step": 22189 }, { "epoch": 1.6492010405053883, "grad_norm": 2.0580481180519445, "learning_rate": 8.877450752581456e-06, "loss": 0.5789, "step": 22190 }, { "epoch": 1.6492753623188405, "grad_norm": 2.2893114790624187, "learning_rate": 8.876653459359553e-06, "loss": 0.5421, "step": 22191 }, { "epoch": 1.649349684132293, "grad_norm": 1.9656366862545533, "learning_rate": 8.8758561733697e-06, "loss": 0.7227, "step": 22192 }, { "epoch": 1.649424005945745, "grad_norm": 1.610882466291375, "learning_rate": 8.875058894617023e-06, "loss": 0.5655, "step": 22193 }, { "epoch": 1.6494983277591975, "grad_norm": 2.0360437149425965, "learning_rate": 8.874261623106663e-06, "loss": 0.5276, "step": 22194 }, { "epoch": 1.6495726495726495, "grad_norm": 2.7246408158907225, "learning_rate": 8.873464358843747e-06, "loss": 0.5211, "step": 22195 }, { "epoch": 1.649646971386102, "grad_norm": 2.0234125728611736, "learning_rate": 8.87266710183341e-06, "loss": 0.7137, "step": 22196 }, { "epoch": 1.649721293199554, "grad_norm": 2.1009483585786066, "learning_rate": 8.871869852080782e-06, "loss": 0.597, "step": 22197 }, { "epoch": 1.6497956150130064, "grad_norm": 2.2880628736956177, "learning_rate": 8.871072609590997e-06, "loss": 0.6202, "step": 22198 }, { "epoch": 1.6498699368264584, "grad_norm": 1.9644302392608481, "learning_rate": 8.870275374369186e-06, "loss": 0.537, "step": 22199 }, { "epoch": 1.649944258639911, "grad_norm": 5.088773198096917, "learning_rate": 8.869478146420485e-06, "loss": 0.613, "step": 22200 }, { "epoch": 1.650018580453363, "grad_norm": 1.6274853988718405, "learning_rate": 8.868680925750029e-06, "loss": 0.5263, "step": 22201 }, { "epoch": 1.6500929022668154, "grad_norm": 2.112096762943554, "learning_rate": 8.86788371236294e-06, "loss": 0.5956, "step": 22202 }, { "epoch": 1.6501672240802676, "grad_norm": 1.835214275101659, "learning_rate": 8.86708650626436e-06, "loss": 0.6241, "step": 22203 }, { "epoch": 1.6502415458937199, "grad_norm": 1.8919278160827095, "learning_rate": 8.86628930745942e-06, "loss": 0.51, "step": 22204 }, { "epoch": 1.650315867707172, "grad_norm": 1.493012889754964, "learning_rate": 8.865492115953249e-06, "loss": 0.4951, "step": 22205 }, { "epoch": 1.6503901895206243, "grad_norm": 2.003438840469183, "learning_rate": 8.864694931750982e-06, "loss": 0.797, "step": 22206 }, { "epoch": 1.6504645113340766, "grad_norm": 1.6986972002857217, "learning_rate": 8.863897754857751e-06, "loss": 0.6401, "step": 22207 }, { "epoch": 1.6505388331475288, "grad_norm": 2.305111778381013, "learning_rate": 8.863100585278685e-06, "loss": 0.6582, "step": 22208 }, { "epoch": 1.650613154960981, "grad_norm": 1.9043620159373251, "learning_rate": 8.86230342301892e-06, "loss": 0.5999, "step": 22209 }, { "epoch": 1.6506874767744333, "grad_norm": 2.4307675166694698, "learning_rate": 8.861506268083583e-06, "loss": 0.6613, "step": 22210 }, { "epoch": 1.6507617985878855, "grad_norm": 2.2035513020295303, "learning_rate": 8.86070912047781e-06, "loss": 0.6469, "step": 22211 }, { "epoch": 1.6508361204013378, "grad_norm": 2.295585712928007, "learning_rate": 8.859911980206737e-06, "loss": 0.559, "step": 22212 }, { "epoch": 1.65091044221479, "grad_norm": 2.7030292635304334, "learning_rate": 8.859114847275488e-06, "loss": 0.5714, "step": 22213 }, { "epoch": 1.6509847640282422, "grad_norm": 1.788327368708016, "learning_rate": 8.858317721689199e-06, "loss": 0.4891, "step": 22214 }, { "epoch": 1.6510590858416947, "grad_norm": 2.153997155931857, "learning_rate": 8.857520603453001e-06, "loss": 0.6517, "step": 22215 }, { "epoch": 1.6511334076551467, "grad_norm": 1.9111310301084539, "learning_rate": 8.85672349257203e-06, "loss": 0.5535, "step": 22216 }, { "epoch": 1.6512077294685992, "grad_norm": 2.1854663584906637, "learning_rate": 8.855926389051408e-06, "loss": 0.6763, "step": 22217 }, { "epoch": 1.6512820512820512, "grad_norm": 2.207444385234586, "learning_rate": 8.85512929289628e-06, "loss": 0.6385, "step": 22218 }, { "epoch": 1.6513563730955036, "grad_norm": 1.75325350303248, "learning_rate": 8.854332204111764e-06, "loss": 0.5867, "step": 22219 }, { "epoch": 1.6514306949089557, "grad_norm": 1.630729774425938, "learning_rate": 8.853535122702999e-06, "loss": 0.593, "step": 22220 }, { "epoch": 1.6515050167224081, "grad_norm": 1.9940318338344456, "learning_rate": 8.852738048675114e-06, "loss": 0.5989, "step": 22221 }, { "epoch": 1.6515793385358601, "grad_norm": 2.0178460608230377, "learning_rate": 8.851940982033243e-06, "loss": 0.5999, "step": 22222 }, { "epoch": 1.6516536603493126, "grad_norm": 1.8824887002594544, "learning_rate": 8.85114392278252e-06, "loss": 0.5611, "step": 22223 }, { "epoch": 1.6517279821627646, "grad_norm": 2.0198779685272683, "learning_rate": 8.850346870928068e-06, "loss": 0.5989, "step": 22224 }, { "epoch": 1.651802303976217, "grad_norm": 1.7174013886750206, "learning_rate": 8.849549826475025e-06, "loss": 0.5092, "step": 22225 }, { "epoch": 1.6518766257896693, "grad_norm": 2.1357455039384643, "learning_rate": 8.848752789428518e-06, "loss": 0.7366, "step": 22226 }, { "epoch": 1.6519509476031216, "grad_norm": 1.9480755498937399, "learning_rate": 8.847955759793686e-06, "loss": 0.6108, "step": 22227 }, { "epoch": 1.6520252694165738, "grad_norm": 2.0703921274740074, "learning_rate": 8.847158737575652e-06, "loss": 0.5805, "step": 22228 }, { "epoch": 1.652099591230026, "grad_norm": 2.1246818321770027, "learning_rate": 8.846361722779554e-06, "loss": 0.5874, "step": 22229 }, { "epoch": 1.6521739130434783, "grad_norm": 1.6111769286143012, "learning_rate": 8.845564715410516e-06, "loss": 0.4621, "step": 22230 }, { "epoch": 1.6522482348569305, "grad_norm": 1.8461805840187946, "learning_rate": 8.84476771547367e-06, "loss": 0.6346, "step": 22231 }, { "epoch": 1.6523225566703827, "grad_norm": 1.4960968684403495, "learning_rate": 8.84397072297415e-06, "loss": 0.4149, "step": 22232 }, { "epoch": 1.652396878483835, "grad_norm": 2.089508168914007, "learning_rate": 8.843173737917089e-06, "loss": 0.7515, "step": 22233 }, { "epoch": 1.6524712002972872, "grad_norm": 2.3533015280102103, "learning_rate": 8.842376760307617e-06, "loss": 0.7403, "step": 22234 }, { "epoch": 1.6525455221107395, "grad_norm": 1.8877498971423474, "learning_rate": 8.84157979015086e-06, "loss": 0.6464, "step": 22235 }, { "epoch": 1.6526198439241917, "grad_norm": 1.8913039306981456, "learning_rate": 8.840782827451952e-06, "loss": 0.7303, "step": 22236 }, { "epoch": 1.652694165737644, "grad_norm": 2.0232034419659617, "learning_rate": 8.839985872216025e-06, "loss": 0.6139, "step": 22237 }, { "epoch": 1.6527684875510964, "grad_norm": 2.175165366364261, "learning_rate": 8.839188924448212e-06, "loss": 0.8154, "step": 22238 }, { "epoch": 1.6528428093645484, "grad_norm": 2.203056499312787, "learning_rate": 8.838391984153636e-06, "loss": 0.8147, "step": 22239 }, { "epoch": 1.6529171311780009, "grad_norm": 2.2482664532966776, "learning_rate": 8.837595051337438e-06, "loss": 0.6097, "step": 22240 }, { "epoch": 1.652991452991453, "grad_norm": 2.3945127648255373, "learning_rate": 8.836798126004737e-06, "loss": 0.6262, "step": 22241 }, { "epoch": 1.6530657748049054, "grad_norm": 2.3911856764287407, "learning_rate": 8.83600120816067e-06, "loss": 0.6224, "step": 22242 }, { "epoch": 1.6531400966183574, "grad_norm": 2.106083389882248, "learning_rate": 8.835204297810366e-06, "loss": 0.5213, "step": 22243 }, { "epoch": 1.6532144184318098, "grad_norm": 1.6549529052321514, "learning_rate": 8.834407394958958e-06, "loss": 0.4651, "step": 22244 }, { "epoch": 1.6532887402452618, "grad_norm": 1.829343543888045, "learning_rate": 8.833610499611576e-06, "loss": 0.5571, "step": 22245 }, { "epoch": 1.6533630620587143, "grad_norm": 2.2691599982959163, "learning_rate": 8.832813611773348e-06, "loss": 0.574, "step": 22246 }, { "epoch": 1.6534373838721663, "grad_norm": 2.3067353469834098, "learning_rate": 8.832016731449404e-06, "loss": 0.7316, "step": 22247 }, { "epoch": 1.6535117056856188, "grad_norm": 1.7533997105639598, "learning_rate": 8.831219858644876e-06, "loss": 0.6041, "step": 22248 }, { "epoch": 1.653586027499071, "grad_norm": 1.5088570183107863, "learning_rate": 8.830422993364897e-06, "loss": 0.4514, "step": 22249 }, { "epoch": 1.6536603493125233, "grad_norm": 2.076630505694157, "learning_rate": 8.82962613561459e-06, "loss": 0.5896, "step": 22250 }, { "epoch": 1.6537346711259755, "grad_norm": 1.6464487660131635, "learning_rate": 8.828829285399095e-06, "loss": 0.5518, "step": 22251 }, { "epoch": 1.6538089929394277, "grad_norm": 7.94593791917024, "learning_rate": 8.828032442723532e-06, "loss": 0.6257, "step": 22252 }, { "epoch": 1.65388331475288, "grad_norm": 1.7367045631648175, "learning_rate": 8.827235607593035e-06, "loss": 0.4344, "step": 22253 }, { "epoch": 1.6539576365663322, "grad_norm": 1.8918359219861487, "learning_rate": 8.826438780012734e-06, "loss": 0.4967, "step": 22254 }, { "epoch": 1.6540319583797845, "grad_norm": 2.091879460848899, "learning_rate": 8.82564195998776e-06, "loss": 0.5887, "step": 22255 }, { "epoch": 1.6541062801932367, "grad_norm": 2.044676959207695, "learning_rate": 8.824845147523243e-06, "loss": 0.6879, "step": 22256 }, { "epoch": 1.654180602006689, "grad_norm": 1.989364767283063, "learning_rate": 8.824048342624312e-06, "loss": 0.5575, "step": 22257 }, { "epoch": 1.6542549238201412, "grad_norm": 3.3090207962236247, "learning_rate": 8.823251545296095e-06, "loss": 0.7258, "step": 22258 }, { "epoch": 1.6543292456335936, "grad_norm": 1.8357438356863636, "learning_rate": 8.822454755543724e-06, "loss": 0.5624, "step": 22259 }, { "epoch": 1.6544035674470456, "grad_norm": 2.2753767440319863, "learning_rate": 8.821657973372329e-06, "loss": 0.6302, "step": 22260 }, { "epoch": 1.654477889260498, "grad_norm": 1.6324763397083157, "learning_rate": 8.820861198787039e-06, "loss": 0.4787, "step": 22261 }, { "epoch": 1.6545522110739501, "grad_norm": 2.117465267406635, "learning_rate": 8.820064431792982e-06, "loss": 0.6128, "step": 22262 }, { "epoch": 1.6546265328874026, "grad_norm": 1.620636986081753, "learning_rate": 8.819267672395293e-06, "loss": 0.4704, "step": 22263 }, { "epoch": 1.6547008547008546, "grad_norm": 2.4892442062190474, "learning_rate": 8.818470920599094e-06, "loss": 0.5854, "step": 22264 }, { "epoch": 1.654775176514307, "grad_norm": 2.2210353912830567, "learning_rate": 8.817674176409518e-06, "loss": 0.6881, "step": 22265 }, { "epoch": 1.654849498327759, "grad_norm": 1.6843020055499964, "learning_rate": 8.816877439831694e-06, "loss": 0.5725, "step": 22266 }, { "epoch": 1.6549238201412115, "grad_norm": 1.9472802053368305, "learning_rate": 8.816080710870753e-06, "loss": 0.4889, "step": 22267 }, { "epoch": 1.6549981419546635, "grad_norm": 3.218359093502668, "learning_rate": 8.815283989531823e-06, "loss": 0.5523, "step": 22268 }, { "epoch": 1.655072463768116, "grad_norm": 2.4747763510463456, "learning_rate": 8.814487275820031e-06, "loss": 0.7823, "step": 22269 }, { "epoch": 1.655146785581568, "grad_norm": 2.157109476139293, "learning_rate": 8.81369056974051e-06, "loss": 0.5224, "step": 22270 }, { "epoch": 1.6552211073950205, "grad_norm": 2.2200213585950457, "learning_rate": 8.812893871298389e-06, "loss": 0.7625, "step": 22271 }, { "epoch": 1.6552954292084727, "grad_norm": 1.9409319232981486, "learning_rate": 8.812097180498793e-06, "loss": 0.5331, "step": 22272 }, { "epoch": 1.655369751021925, "grad_norm": 1.909505924975598, "learning_rate": 8.811300497346853e-06, "loss": 0.5404, "step": 22273 }, { "epoch": 1.6554440728353772, "grad_norm": 1.5737405694713953, "learning_rate": 8.810503821847705e-06, "loss": 0.4246, "step": 22274 }, { "epoch": 1.6555183946488294, "grad_norm": 1.7326652594313323, "learning_rate": 8.809707154006465e-06, "loss": 0.4895, "step": 22275 }, { "epoch": 1.6555927164622817, "grad_norm": 1.9528765692844912, "learning_rate": 8.80891049382827e-06, "loss": 0.5639, "step": 22276 }, { "epoch": 1.655667038275734, "grad_norm": 1.946789822789892, "learning_rate": 8.808113841318252e-06, "loss": 0.6127, "step": 22277 }, { "epoch": 1.6557413600891862, "grad_norm": 1.8554029752444707, "learning_rate": 8.80731719648153e-06, "loss": 0.7204, "step": 22278 }, { "epoch": 1.6558156819026384, "grad_norm": 2.157989622502995, "learning_rate": 8.806520559323239e-06, "loss": 0.6717, "step": 22279 }, { "epoch": 1.6558900037160906, "grad_norm": 2.203629108574108, "learning_rate": 8.805723929848506e-06, "loss": 0.6321, "step": 22280 }, { "epoch": 1.6559643255295429, "grad_norm": 1.8250174786625328, "learning_rate": 8.804927308062459e-06, "loss": 0.5245, "step": 22281 }, { "epoch": 1.6560386473429953, "grad_norm": 2.0340356788333462, "learning_rate": 8.804130693970233e-06, "loss": 0.7074, "step": 22282 }, { "epoch": 1.6561129691564473, "grad_norm": 1.90661296399479, "learning_rate": 8.80333408757695e-06, "loss": 0.4157, "step": 22283 }, { "epoch": 1.6561872909698998, "grad_norm": 2.4333238695023875, "learning_rate": 8.802537488887738e-06, "loss": 0.7655, "step": 22284 }, { "epoch": 1.6562616127833518, "grad_norm": 2.303691161262814, "learning_rate": 8.801740897907732e-06, "loss": 0.6976, "step": 22285 }, { "epoch": 1.6563359345968043, "grad_norm": 2.07113431370241, "learning_rate": 8.800944314642051e-06, "loss": 0.619, "step": 22286 }, { "epoch": 1.6564102564102563, "grad_norm": 2.0893647258838848, "learning_rate": 8.80014773909583e-06, "loss": 0.6484, "step": 22287 }, { "epoch": 1.6564845782237088, "grad_norm": 2.8717272465865316, "learning_rate": 8.799351171274196e-06, "loss": 0.6317, "step": 22288 }, { "epoch": 1.6565589000371608, "grad_norm": 1.8882870176432578, "learning_rate": 8.798554611182276e-06, "loss": 0.5595, "step": 22289 }, { "epoch": 1.6566332218506132, "grad_norm": 17.328524520149696, "learning_rate": 8.797758058825199e-06, "loss": 0.8303, "step": 22290 }, { "epoch": 1.6567075436640653, "grad_norm": 2.0177074128016232, "learning_rate": 8.796961514208092e-06, "loss": 0.5711, "step": 22291 }, { "epoch": 1.6567818654775177, "grad_norm": 1.771447172028924, "learning_rate": 8.796164977336085e-06, "loss": 0.5559, "step": 22292 }, { "epoch": 1.65685618729097, "grad_norm": 2.0883586234350466, "learning_rate": 8.795368448214309e-06, "loss": 0.6797, "step": 22293 }, { "epoch": 1.6569305091044222, "grad_norm": 1.6420222047152486, "learning_rate": 8.794571926847884e-06, "loss": 0.5635, "step": 22294 }, { "epoch": 1.6570048309178744, "grad_norm": 2.1481181823646636, "learning_rate": 8.793775413241944e-06, "loss": 0.7252, "step": 22295 }, { "epoch": 1.6570791527313267, "grad_norm": 1.6645696174964224, "learning_rate": 8.792978907401614e-06, "loss": 0.5203, "step": 22296 }, { "epoch": 1.657153474544779, "grad_norm": 1.4530136460784782, "learning_rate": 8.792182409332029e-06, "loss": 0.348, "step": 22297 }, { "epoch": 1.6572277963582311, "grad_norm": 1.9258985214288682, "learning_rate": 8.791385919038306e-06, "loss": 0.546, "step": 22298 }, { "epoch": 1.6573021181716834, "grad_norm": 1.8499514479917734, "learning_rate": 8.79058943652558e-06, "loss": 0.5846, "step": 22299 }, { "epoch": 1.6573764399851356, "grad_norm": 1.6882470643461487, "learning_rate": 8.789792961798974e-06, "loss": 0.5759, "step": 22300 }, { "epoch": 1.6574507617985879, "grad_norm": 2.2908801219546957, "learning_rate": 8.788996494863618e-06, "loss": 0.6911, "step": 22301 }, { "epoch": 1.65752508361204, "grad_norm": 1.662435277211218, "learning_rate": 8.78820003572464e-06, "loss": 0.4324, "step": 22302 }, { "epoch": 1.6575994054254923, "grad_norm": 2.4018394899310356, "learning_rate": 8.787403584387168e-06, "loss": 0.6474, "step": 22303 }, { "epoch": 1.6576737272389446, "grad_norm": 1.8890025558134822, "learning_rate": 8.78660714085633e-06, "loss": 0.6751, "step": 22304 }, { "epoch": 1.657748049052397, "grad_norm": 1.9931434849432945, "learning_rate": 8.78581070513725e-06, "loss": 0.5901, "step": 22305 }, { "epoch": 1.657822370865849, "grad_norm": 1.4860535688101588, "learning_rate": 8.785014277235059e-06, "loss": 0.4561, "step": 22306 }, { "epoch": 1.6578966926793015, "grad_norm": 1.894554870586918, "learning_rate": 8.784217857154882e-06, "loss": 0.5979, "step": 22307 }, { "epoch": 1.6579710144927535, "grad_norm": 2.0245842564420413, "learning_rate": 8.783421444901852e-06, "loss": 0.6123, "step": 22308 }, { "epoch": 1.658045336306206, "grad_norm": 1.9714958031696592, "learning_rate": 8.782625040481087e-06, "loss": 0.5403, "step": 22309 }, { "epoch": 1.658119658119658, "grad_norm": 1.9472517304718626, "learning_rate": 8.78182864389772e-06, "loss": 0.5798, "step": 22310 }, { "epoch": 1.6581939799331105, "grad_norm": 2.8775961166542063, "learning_rate": 8.781032255156877e-06, "loss": 0.6108, "step": 22311 }, { "epoch": 1.6582683017465625, "grad_norm": 1.7296107416929607, "learning_rate": 8.780235874263683e-06, "loss": 0.5055, "step": 22312 }, { "epoch": 1.658342623560015, "grad_norm": 2.1733290239792566, "learning_rate": 8.779439501223269e-06, "loss": 0.6278, "step": 22313 }, { "epoch": 1.658416945373467, "grad_norm": 1.9005500414155259, "learning_rate": 8.778643136040759e-06, "loss": 0.556, "step": 22314 }, { "epoch": 1.6584912671869194, "grad_norm": 1.8166663791611404, "learning_rate": 8.777846778721283e-06, "loss": 0.4999, "step": 22315 }, { "epoch": 1.6585655890003717, "grad_norm": 1.97140571100599, "learning_rate": 8.777050429269965e-06, "loss": 0.4533, "step": 22316 }, { "epoch": 1.658639910813824, "grad_norm": 2.063336890932538, "learning_rate": 8.776254087691932e-06, "loss": 0.5673, "step": 22317 }, { "epoch": 1.6587142326272761, "grad_norm": 1.6200890005038557, "learning_rate": 8.77545775399231e-06, "loss": 0.4447, "step": 22318 }, { "epoch": 1.6587885544407284, "grad_norm": 2.286029909914121, "learning_rate": 8.774661428176235e-06, "loss": 0.5563, "step": 22319 }, { "epoch": 1.6588628762541806, "grad_norm": 5.568590179379969, "learning_rate": 8.77386511024882e-06, "loss": 0.6729, "step": 22320 }, { "epoch": 1.6589371980676328, "grad_norm": 2.2598970223902954, "learning_rate": 8.773068800215202e-06, "loss": 0.6722, "step": 22321 }, { "epoch": 1.659011519881085, "grad_norm": 2.066868232216703, "learning_rate": 8.7722724980805e-06, "loss": 0.6646, "step": 22322 }, { "epoch": 1.6590858416945373, "grad_norm": 1.8702788113060174, "learning_rate": 8.771476203849844e-06, "loss": 0.499, "step": 22323 }, { "epoch": 1.6591601635079896, "grad_norm": 1.812806875132897, "learning_rate": 8.77067991752836e-06, "loss": 0.576, "step": 22324 }, { "epoch": 1.6592344853214418, "grad_norm": 1.7751425533734324, "learning_rate": 8.769883639121174e-06, "loss": 0.5559, "step": 22325 }, { "epoch": 1.659308807134894, "grad_norm": 2.0845522090732223, "learning_rate": 8.769087368633416e-06, "loss": 0.4825, "step": 22326 }, { "epoch": 1.6593831289483463, "grad_norm": 2.371723120774961, "learning_rate": 8.76829110607021e-06, "loss": 0.5419, "step": 22327 }, { "epoch": 1.6594574507617987, "grad_norm": 2.0138519907453007, "learning_rate": 8.767494851436679e-06, "loss": 0.6312, "step": 22328 }, { "epoch": 1.6595317725752508, "grad_norm": 2.214763512577195, "learning_rate": 8.766698604737953e-06, "loss": 0.623, "step": 22329 }, { "epoch": 1.6596060943887032, "grad_norm": 2.4449072286410556, "learning_rate": 8.765902365979163e-06, "loss": 0.7082, "step": 22330 }, { "epoch": 1.6596804162021552, "grad_norm": 1.7501718461796163, "learning_rate": 8.765106135165423e-06, "loss": 0.4252, "step": 22331 }, { "epoch": 1.6597547380156077, "grad_norm": 1.971104818762862, "learning_rate": 8.764309912301869e-06, "loss": 0.5497, "step": 22332 }, { "epoch": 1.6598290598290597, "grad_norm": 2.0593797776989655, "learning_rate": 8.763513697393622e-06, "loss": 0.6205, "step": 22333 }, { "epoch": 1.6599033816425122, "grad_norm": 1.97835198566432, "learning_rate": 8.762717490445808e-06, "loss": 0.6086, "step": 22334 }, { "epoch": 1.6599777034559642, "grad_norm": 2.185261246988588, "learning_rate": 8.761921291463557e-06, "loss": 0.7477, "step": 22335 }, { "epoch": 1.6600520252694166, "grad_norm": 1.9424977181778447, "learning_rate": 8.76112510045199e-06, "loss": 0.5471, "step": 22336 }, { "epoch": 1.6601263470828687, "grad_norm": 1.7138680986105765, "learning_rate": 8.760328917416238e-06, "loss": 0.647, "step": 22337 }, { "epoch": 1.6602006688963211, "grad_norm": 1.803997515079573, "learning_rate": 8.759532742361424e-06, "loss": 0.6024, "step": 22338 }, { "epoch": 1.6602749907097734, "grad_norm": 1.9683792320518763, "learning_rate": 8.758736575292672e-06, "loss": 0.4455, "step": 22339 }, { "epoch": 1.6603493125232256, "grad_norm": 2.02392519961981, "learning_rate": 8.757940416215111e-06, "loss": 0.6651, "step": 22340 }, { "epoch": 1.6604236343366778, "grad_norm": 1.6843564568057159, "learning_rate": 8.757144265133865e-06, "loss": 0.5919, "step": 22341 }, { "epoch": 1.66049795615013, "grad_norm": 2.2519706388909295, "learning_rate": 8.756348122054062e-06, "loss": 0.6954, "step": 22342 }, { "epoch": 1.6605722779635823, "grad_norm": 2.2003941100152407, "learning_rate": 8.755551986980824e-06, "loss": 0.5818, "step": 22343 }, { "epoch": 1.6606465997770345, "grad_norm": 1.7379466096981615, "learning_rate": 8.754755859919274e-06, "loss": 0.5763, "step": 22344 }, { "epoch": 1.6607209215904868, "grad_norm": 1.9504642004457018, "learning_rate": 8.753959740874544e-06, "loss": 0.5757, "step": 22345 }, { "epoch": 1.660795243403939, "grad_norm": 1.8903675733615586, "learning_rate": 8.753163629851756e-06, "loss": 0.535, "step": 22346 }, { "epoch": 1.6608695652173913, "grad_norm": 1.811692481767344, "learning_rate": 8.752367526856036e-06, "loss": 0.6112, "step": 22347 }, { "epoch": 1.6609438870308435, "grad_norm": 1.984461058241683, "learning_rate": 8.75157143189251e-06, "loss": 0.4124, "step": 22348 }, { "epoch": 1.661018208844296, "grad_norm": 3.1596008633187775, "learning_rate": 8.750775344966302e-06, "loss": 0.5959, "step": 22349 }, { "epoch": 1.661092530657748, "grad_norm": 1.9463471239238543, "learning_rate": 8.749979266082536e-06, "loss": 0.5044, "step": 22350 }, { "epoch": 1.6611668524712004, "grad_norm": 1.9477386659734548, "learning_rate": 8.749183195246339e-06, "loss": 0.4517, "step": 22351 }, { "epoch": 1.6612411742846525, "grad_norm": 1.8924410435745043, "learning_rate": 8.748387132462836e-06, "loss": 0.434, "step": 22352 }, { "epoch": 1.661315496098105, "grad_norm": 1.4192667390936429, "learning_rate": 8.747591077737155e-06, "loss": 0.5065, "step": 22353 }, { "epoch": 1.661389817911557, "grad_norm": 1.8593781346500293, "learning_rate": 8.746795031074415e-06, "loss": 0.5646, "step": 22354 }, { "epoch": 1.6614641397250094, "grad_norm": 2.1193585923469924, "learning_rate": 8.745998992479745e-06, "loss": 0.6325, "step": 22355 }, { "epoch": 1.6615384615384614, "grad_norm": 1.9312614293085681, "learning_rate": 8.745202961958265e-06, "loss": 0.5587, "step": 22356 }, { "epoch": 1.6616127833519139, "grad_norm": 2.107902233508471, "learning_rate": 8.744406939515105e-06, "loss": 0.5837, "step": 22357 }, { "epoch": 1.6616871051653659, "grad_norm": 5.393469571250099, "learning_rate": 8.743610925155385e-06, "loss": 0.528, "step": 22358 }, { "epoch": 1.6617614269788183, "grad_norm": 1.9281738954510619, "learning_rate": 8.742814918884239e-06, "loss": 0.5428, "step": 22359 }, { "epoch": 1.6618357487922706, "grad_norm": 2.5062103829885216, "learning_rate": 8.74201892070678e-06, "loss": 0.6146, "step": 22360 }, { "epoch": 1.6619100706057228, "grad_norm": 1.7372180003692586, "learning_rate": 8.741222930628139e-06, "loss": 0.5672, "step": 22361 }, { "epoch": 1.661984392419175, "grad_norm": 1.5468772205978747, "learning_rate": 8.74042694865344e-06, "loss": 0.4554, "step": 22362 }, { "epoch": 1.6620587142326273, "grad_norm": 1.70309214091046, "learning_rate": 8.73963097478781e-06, "loss": 0.5433, "step": 22363 }, { "epoch": 1.6621330360460795, "grad_norm": 1.9089062095795166, "learning_rate": 8.738835009036369e-06, "loss": 0.5297, "step": 22364 }, { "epoch": 1.6622073578595318, "grad_norm": 1.898032559409361, "learning_rate": 8.738039051404241e-06, "loss": 0.4995, "step": 22365 }, { "epoch": 1.662281679672984, "grad_norm": 1.7532437655771913, "learning_rate": 8.737243101896553e-06, "loss": 0.5836, "step": 22366 }, { "epoch": 1.6623560014864363, "grad_norm": 2.0113273829221527, "learning_rate": 8.736447160518427e-06, "loss": 0.6839, "step": 22367 }, { "epoch": 1.6624303232998885, "grad_norm": 1.8405478755535316, "learning_rate": 8.735651227274989e-06, "loss": 0.5334, "step": 22368 }, { "epoch": 1.6625046451133407, "grad_norm": 1.8032433524325904, "learning_rate": 8.734855302171362e-06, "loss": 0.6655, "step": 22369 }, { "epoch": 1.662578966926793, "grad_norm": 3.978118667189304, "learning_rate": 8.734059385212674e-06, "loss": 0.5995, "step": 22370 }, { "epoch": 1.6626532887402452, "grad_norm": 1.6599838224257295, "learning_rate": 8.733263476404044e-06, "loss": 0.5382, "step": 22371 }, { "epoch": 1.6627276105536977, "grad_norm": 2.949439137124536, "learning_rate": 8.732467575750598e-06, "loss": 0.7732, "step": 22372 }, { "epoch": 1.6628019323671497, "grad_norm": 2.254270406426123, "learning_rate": 8.731671683257459e-06, "loss": 0.5908, "step": 22373 }, { "epoch": 1.6628762541806021, "grad_norm": 1.7588942628660373, "learning_rate": 8.730875798929754e-06, "loss": 0.5605, "step": 22374 }, { "epoch": 1.6629505759940542, "grad_norm": 1.9440143986067508, "learning_rate": 8.730079922772603e-06, "loss": 0.6002, "step": 22375 }, { "epoch": 1.6630248978075066, "grad_norm": 1.6610914503080745, "learning_rate": 8.729284054791137e-06, "loss": 0.4898, "step": 22376 }, { "epoch": 1.6630992196209586, "grad_norm": 1.9187296009780987, "learning_rate": 8.72848819499047e-06, "loss": 0.6581, "step": 22377 }, { "epoch": 1.663173541434411, "grad_norm": 2.0708666516484535, "learning_rate": 8.727692343375728e-06, "loss": 0.6537, "step": 22378 }, { "epoch": 1.6632478632478631, "grad_norm": 3.7357895708686804, "learning_rate": 8.726896499952037e-06, "loss": 0.6446, "step": 22379 }, { "epoch": 1.6633221850613156, "grad_norm": 1.6946236379545163, "learning_rate": 8.726100664724522e-06, "loss": 0.5306, "step": 22380 }, { "epoch": 1.6633965068747676, "grad_norm": 2.2683245333913677, "learning_rate": 8.725304837698306e-06, "loss": 0.639, "step": 22381 }, { "epoch": 1.66347082868822, "grad_norm": 2.344980923672539, "learning_rate": 8.724509018878508e-06, "loss": 0.5714, "step": 22382 }, { "epoch": 1.6635451505016723, "grad_norm": 1.923876189134687, "learning_rate": 8.723713208270257e-06, "loss": 0.628, "step": 22383 }, { "epoch": 1.6636194723151245, "grad_norm": 2.3363910397600915, "learning_rate": 8.722917405878673e-06, "loss": 0.6968, "step": 22384 }, { "epoch": 1.6636937941285768, "grad_norm": 2.9095180082316183, "learning_rate": 8.722121611708883e-06, "loss": 0.5756, "step": 22385 }, { "epoch": 1.663768115942029, "grad_norm": 1.8198080228246363, "learning_rate": 8.721325825766006e-06, "loss": 0.6418, "step": 22386 }, { "epoch": 1.6638424377554812, "grad_norm": 2.1017309906130723, "learning_rate": 8.720530048055168e-06, "loss": 0.7662, "step": 22387 }, { "epoch": 1.6639167595689335, "grad_norm": 2.0130254109300134, "learning_rate": 8.719734278581491e-06, "loss": 0.6678, "step": 22388 }, { "epoch": 1.6639910813823857, "grad_norm": 1.7868552292009416, "learning_rate": 8.718938517350097e-06, "loss": 0.5898, "step": 22389 }, { "epoch": 1.664065403195838, "grad_norm": 1.8605037603971666, "learning_rate": 8.71814276436611e-06, "loss": 0.5177, "step": 22390 }, { "epoch": 1.6641397250092902, "grad_norm": 1.888535842782158, "learning_rate": 8.717347019634653e-06, "loss": 0.6194, "step": 22391 }, { "epoch": 1.6642140468227424, "grad_norm": 2.189127696693224, "learning_rate": 8.716551283160853e-06, "loss": 0.6702, "step": 22392 }, { "epoch": 1.6642883686361947, "grad_norm": 2.1875550118107037, "learning_rate": 8.715755554949825e-06, "loss": 0.6971, "step": 22393 }, { "epoch": 1.664362690449647, "grad_norm": 1.967553704238313, "learning_rate": 8.714959835006697e-06, "loss": 0.5242, "step": 22394 }, { "epoch": 1.6644370122630994, "grad_norm": 2.1010827099088565, "learning_rate": 8.714164123336591e-06, "loss": 0.5135, "step": 22395 }, { "epoch": 1.6645113340765514, "grad_norm": 2.5663505263630393, "learning_rate": 8.713368419944632e-06, "loss": 0.7201, "step": 22396 }, { "epoch": 1.6645856558900038, "grad_norm": 2.1927223737642, "learning_rate": 8.71257272483594e-06, "loss": 0.5625, "step": 22397 }, { "epoch": 1.6646599777034559, "grad_norm": 1.7630959722960728, "learning_rate": 8.711777038015639e-06, "loss": 0.6389, "step": 22398 }, { "epoch": 1.6647342995169083, "grad_norm": 1.6948085801429476, "learning_rate": 8.710981359488848e-06, "loss": 0.509, "step": 22399 }, { "epoch": 1.6648086213303603, "grad_norm": 2.2044956847332253, "learning_rate": 8.710185689260691e-06, "loss": 0.6666, "step": 22400 }, { "epoch": 1.6648829431438128, "grad_norm": 1.8129168609982336, "learning_rate": 8.709390027336294e-06, "loss": 0.4675, "step": 22401 }, { "epoch": 1.6649572649572648, "grad_norm": 1.6946991673868015, "learning_rate": 8.708594373720774e-06, "loss": 0.5742, "step": 22402 }, { "epoch": 1.6650315867707173, "grad_norm": 2.4086954791301003, "learning_rate": 8.70779872841926e-06, "loss": 0.6222, "step": 22403 }, { "epoch": 1.6651059085841693, "grad_norm": 1.7750528670256296, "learning_rate": 8.70700309143687e-06, "loss": 0.5074, "step": 22404 }, { "epoch": 1.6651802303976218, "grad_norm": 2.1779862727268413, "learning_rate": 8.706207462778725e-06, "loss": 0.6459, "step": 22405 }, { "epoch": 1.665254552211074, "grad_norm": 2.8549376530623305, "learning_rate": 8.70541184244995e-06, "loss": 0.538, "step": 22406 }, { "epoch": 1.6653288740245262, "grad_norm": 4.107777450906293, "learning_rate": 8.704616230455668e-06, "loss": 0.6919, "step": 22407 }, { "epoch": 1.6654031958379785, "grad_norm": 2.3154246630911874, "learning_rate": 8.703820626800999e-06, "loss": 0.6154, "step": 22408 }, { "epoch": 1.6654775176514307, "grad_norm": 2.2116447975432862, "learning_rate": 8.703025031491063e-06, "loss": 0.6216, "step": 22409 }, { "epoch": 1.665551839464883, "grad_norm": 1.8871636652947827, "learning_rate": 8.70222944453099e-06, "loss": 0.6473, "step": 22410 }, { "epoch": 1.6656261612783352, "grad_norm": 1.884260157478582, "learning_rate": 8.701433865925893e-06, "loss": 0.5202, "step": 22411 }, { "epoch": 1.6657004830917874, "grad_norm": 1.8240460690553264, "learning_rate": 8.700638295680896e-06, "loss": 0.501, "step": 22412 }, { "epoch": 1.6657748049052397, "grad_norm": 2.948554117097954, "learning_rate": 8.699842733801125e-06, "loss": 0.4906, "step": 22413 }, { "epoch": 1.665849126718692, "grad_norm": 2.126589680121788, "learning_rate": 8.699047180291697e-06, "loss": 0.5287, "step": 22414 }, { "epoch": 1.6659234485321441, "grad_norm": 1.50255286240744, "learning_rate": 8.698251635157736e-06, "loss": 0.4118, "step": 22415 }, { "epoch": 1.6659977703455966, "grad_norm": 6.285096340689553, "learning_rate": 8.697456098404363e-06, "loss": 0.6158, "step": 22416 }, { "epoch": 1.6660720921590486, "grad_norm": 2.7856461402677772, "learning_rate": 8.6966605700367e-06, "loss": 0.4422, "step": 22417 }, { "epoch": 1.666146413972501, "grad_norm": 2.00243871734802, "learning_rate": 8.695865050059873e-06, "loss": 0.6034, "step": 22418 }, { "epoch": 1.666220735785953, "grad_norm": 2.4495071548775016, "learning_rate": 8.695069538478993e-06, "loss": 0.6073, "step": 22419 }, { "epoch": 1.6662950575994055, "grad_norm": 1.6634231657984015, "learning_rate": 8.694274035299191e-06, "loss": 0.4985, "step": 22420 }, { "epoch": 1.6663693794128576, "grad_norm": 1.7875340793655368, "learning_rate": 8.693478540525587e-06, "loss": 0.5172, "step": 22421 }, { "epoch": 1.66644370122631, "grad_norm": 1.8959520607386189, "learning_rate": 8.692683054163297e-06, "loss": 0.4445, "step": 22422 }, { "epoch": 1.666518023039762, "grad_norm": 2.0266178255470133, "learning_rate": 8.691887576217446e-06, "loss": 0.772, "step": 22423 }, { "epoch": 1.6665923448532145, "grad_norm": 2.30190383723966, "learning_rate": 8.691092106693156e-06, "loss": 0.6962, "step": 22424 }, { "epoch": 1.6666666666666665, "grad_norm": 2.061341178597846, "learning_rate": 8.690296645595546e-06, "loss": 0.6668, "step": 22425 }, { "epoch": 1.666740988480119, "grad_norm": 2.0082768074815838, "learning_rate": 8.689501192929738e-06, "loss": 0.7443, "step": 22426 }, { "epoch": 1.6668153102935712, "grad_norm": 2.3220054108107404, "learning_rate": 8.688705748700852e-06, "loss": 0.7322, "step": 22427 }, { "epoch": 1.6668896321070235, "grad_norm": 1.9129819656559952, "learning_rate": 8.687910312914013e-06, "loss": 0.5481, "step": 22428 }, { "epoch": 1.6669639539204757, "grad_norm": 2.0462080118045085, "learning_rate": 8.687114885574339e-06, "loss": 0.4564, "step": 22429 }, { "epoch": 1.667038275733928, "grad_norm": 2.1179851778341847, "learning_rate": 8.68631946668695e-06, "loss": 0.6424, "step": 22430 }, { "epoch": 1.6671125975473802, "grad_norm": 2.202528145703826, "learning_rate": 8.685524056256968e-06, "loss": 0.6876, "step": 22431 }, { "epoch": 1.6671869193608324, "grad_norm": 2.0264672649010285, "learning_rate": 8.68472865428952e-06, "loss": 0.5828, "step": 22432 }, { "epoch": 1.6672612411742846, "grad_norm": 1.9561450445278656, "learning_rate": 8.683933260789715e-06, "loss": 0.5684, "step": 22433 }, { "epoch": 1.6673355629877369, "grad_norm": 2.2058437288657804, "learning_rate": 8.683137875762678e-06, "loss": 0.7182, "step": 22434 }, { "epoch": 1.6674098848011891, "grad_norm": 1.6787253469896057, "learning_rate": 8.682342499213534e-06, "loss": 0.5282, "step": 22435 }, { "epoch": 1.6674842066146414, "grad_norm": 2.5247830509637317, "learning_rate": 8.6815471311474e-06, "loss": 0.7151, "step": 22436 }, { "epoch": 1.6675585284280936, "grad_norm": 4.810937159975071, "learning_rate": 8.680751771569395e-06, "loss": 0.5736, "step": 22437 }, { "epoch": 1.6676328502415458, "grad_norm": 1.795538509446245, "learning_rate": 8.679956420484642e-06, "loss": 0.5246, "step": 22438 }, { "epoch": 1.6677071720549983, "grad_norm": 1.6980481734961468, "learning_rate": 8.679161077898262e-06, "loss": 0.5971, "step": 22439 }, { "epoch": 1.6677814938684503, "grad_norm": 5.515857589355108, "learning_rate": 8.678365743815378e-06, "loss": 0.5274, "step": 22440 }, { "epoch": 1.6678558156819028, "grad_norm": 1.6930311936716727, "learning_rate": 8.677570418241103e-06, "loss": 0.5521, "step": 22441 }, { "epoch": 1.6679301374953548, "grad_norm": 1.8793319176531644, "learning_rate": 8.67677510118056e-06, "loss": 0.6747, "step": 22442 }, { "epoch": 1.6680044593088073, "grad_norm": 1.8100871131150342, "learning_rate": 8.675979792638875e-06, "loss": 0.4618, "step": 22443 }, { "epoch": 1.6680787811222593, "grad_norm": 1.7202589198107383, "learning_rate": 8.675184492621161e-06, "loss": 0.5017, "step": 22444 }, { "epoch": 1.6681531029357117, "grad_norm": 1.6848275872863832, "learning_rate": 8.674389201132538e-06, "loss": 0.5207, "step": 22445 }, { "epoch": 1.6682274247491637, "grad_norm": 1.6106831807582904, "learning_rate": 8.673593918178133e-06, "loss": 0.5865, "step": 22446 }, { "epoch": 1.6683017465626162, "grad_norm": 2.2028343640835013, "learning_rate": 8.672798643763057e-06, "loss": 0.6921, "step": 22447 }, { "epoch": 1.6683760683760682, "grad_norm": 1.7308737812636867, "learning_rate": 8.672003377892436e-06, "loss": 0.6698, "step": 22448 }, { "epoch": 1.6684503901895207, "grad_norm": 2.1748467969002037, "learning_rate": 8.671208120571389e-06, "loss": 0.6672, "step": 22449 }, { "epoch": 1.668524712002973, "grad_norm": 1.9413268863563533, "learning_rate": 8.670412871805034e-06, "loss": 0.5163, "step": 22450 }, { "epoch": 1.6685990338164252, "grad_norm": 1.8099691852708115, "learning_rate": 8.669617631598493e-06, "loss": 0.4911, "step": 22451 }, { "epoch": 1.6686733556298774, "grad_norm": 1.9413400964412908, "learning_rate": 8.668822399956885e-06, "loss": 0.5575, "step": 22452 }, { "epoch": 1.6687476774433296, "grad_norm": 1.9385111792947274, "learning_rate": 8.668027176885327e-06, "loss": 0.523, "step": 22453 }, { "epoch": 1.6688219992567819, "grad_norm": 2.4975176801105112, "learning_rate": 8.667231962388942e-06, "loss": 0.644, "step": 22454 }, { "epoch": 1.6688963210702341, "grad_norm": 1.9147420194250073, "learning_rate": 8.666436756472854e-06, "loss": 0.5593, "step": 22455 }, { "epoch": 1.6689706428836864, "grad_norm": 2.1497314891067325, "learning_rate": 8.66564155914217e-06, "loss": 0.5962, "step": 22456 }, { "epoch": 1.6690449646971386, "grad_norm": 1.663879715221624, "learning_rate": 8.66484637040202e-06, "loss": 0.5348, "step": 22457 }, { "epoch": 1.6691192865105908, "grad_norm": 2.026086376011707, "learning_rate": 8.664051190257518e-06, "loss": 0.5364, "step": 22458 }, { "epoch": 1.669193608324043, "grad_norm": 2.1787173357969127, "learning_rate": 8.663256018713785e-06, "loss": 0.6961, "step": 22459 }, { "epoch": 1.6692679301374953, "grad_norm": 2.0112065345239114, "learning_rate": 8.662460855775941e-06, "loss": 0.6535, "step": 22460 }, { "epoch": 1.6693422519509475, "grad_norm": 2.015829906823559, "learning_rate": 8.661665701449103e-06, "loss": 0.6327, "step": 22461 }, { "epoch": 1.6694165737644, "grad_norm": 2.223829191814885, "learning_rate": 8.660870555738395e-06, "loss": 0.5655, "step": 22462 }, { "epoch": 1.669490895577852, "grad_norm": 1.95505879456271, "learning_rate": 8.66007541864893e-06, "loss": 0.67, "step": 22463 }, { "epoch": 1.6695652173913045, "grad_norm": 2.2448097067692743, "learning_rate": 8.659280290185831e-06, "loss": 0.7269, "step": 22464 }, { "epoch": 1.6696395392047565, "grad_norm": 1.6677991007862059, "learning_rate": 8.658485170354215e-06, "loss": 0.5876, "step": 22465 }, { "epoch": 1.669713861018209, "grad_norm": 1.8967857179062046, "learning_rate": 8.657690059159206e-06, "loss": 0.5445, "step": 22466 }, { "epoch": 1.669788182831661, "grad_norm": 1.9578495234544586, "learning_rate": 8.656894956605916e-06, "loss": 0.5277, "step": 22467 }, { "epoch": 1.6698625046451134, "grad_norm": 7.690921792606289, "learning_rate": 8.656099862699468e-06, "loss": 0.5928, "step": 22468 }, { "epoch": 1.6699368264585654, "grad_norm": 1.7336355876597607, "learning_rate": 8.655304777444977e-06, "loss": 0.4238, "step": 22469 }, { "epoch": 1.670011148272018, "grad_norm": 1.7245998875827577, "learning_rate": 8.654509700847564e-06, "loss": 0.5074, "step": 22470 }, { "epoch": 1.67008547008547, "grad_norm": 1.994470726279309, "learning_rate": 8.653714632912347e-06, "loss": 0.5546, "step": 22471 }, { "epoch": 1.6701597918989224, "grad_norm": 1.9333204220888989, "learning_rate": 8.652919573644444e-06, "loss": 0.5602, "step": 22472 }, { "epoch": 1.6702341137123746, "grad_norm": 1.8590937813425217, "learning_rate": 8.65212452304898e-06, "loss": 0.5731, "step": 22473 }, { "epoch": 1.6703084355258269, "grad_norm": 2.136158259684743, "learning_rate": 8.651329481131066e-06, "loss": 0.6359, "step": 22474 }, { "epoch": 1.670382757339279, "grad_norm": 1.8153111738855028, "learning_rate": 8.650534447895823e-06, "loss": 0.5047, "step": 22475 }, { "epoch": 1.6704570791527313, "grad_norm": 2.001606337804093, "learning_rate": 8.649739423348366e-06, "loss": 0.5576, "step": 22476 }, { "epoch": 1.6705314009661836, "grad_norm": 2.260272968231803, "learning_rate": 8.648944407493824e-06, "loss": 0.5772, "step": 22477 }, { "epoch": 1.6706057227796358, "grad_norm": 2.094042256274147, "learning_rate": 8.648149400337302e-06, "loss": 0.6899, "step": 22478 }, { "epoch": 1.670680044593088, "grad_norm": 1.9790806606980127, "learning_rate": 8.647354401883927e-06, "loss": 0.5617, "step": 22479 }, { "epoch": 1.6707543664065403, "grad_norm": 2.039381143867395, "learning_rate": 8.646559412138813e-06, "loss": 0.6855, "step": 22480 }, { "epoch": 1.6708286882199925, "grad_norm": 1.6532377242233862, "learning_rate": 8.645764431107077e-06, "loss": 0.5443, "step": 22481 }, { "epoch": 1.6709030100334448, "grad_norm": 2.1911335985225744, "learning_rate": 8.644969458793842e-06, "loss": 0.593, "step": 22482 }, { "epoch": 1.6709773318468972, "grad_norm": 1.8344700814793373, "learning_rate": 8.644174495204221e-06, "loss": 0.6164, "step": 22483 }, { "epoch": 1.6710516536603492, "grad_norm": 1.9159047302880106, "learning_rate": 8.643379540343337e-06, "loss": 0.6152, "step": 22484 }, { "epoch": 1.6711259754738017, "grad_norm": 3.9256084866151673, "learning_rate": 8.642584594216303e-06, "loss": 0.6513, "step": 22485 }, { "epoch": 1.6712002972872537, "grad_norm": 2.1115042260121717, "learning_rate": 8.641789656828239e-06, "loss": 0.6285, "step": 22486 }, { "epoch": 1.6712746191007062, "grad_norm": 2.207327336536394, "learning_rate": 8.640994728184263e-06, "loss": 0.6828, "step": 22487 }, { "epoch": 1.6713489409141582, "grad_norm": 2.0818870318075517, "learning_rate": 8.640199808289494e-06, "loss": 0.6757, "step": 22488 }, { "epoch": 1.6714232627276107, "grad_norm": 2.090983285480558, "learning_rate": 8.639404897149049e-06, "loss": 0.674, "step": 22489 }, { "epoch": 1.6714975845410627, "grad_norm": 1.912803097235545, "learning_rate": 8.638609994768043e-06, "loss": 0.5882, "step": 22490 }, { "epoch": 1.6715719063545151, "grad_norm": 1.8253301334220509, "learning_rate": 8.637815101151595e-06, "loss": 0.5356, "step": 22491 }, { "epoch": 1.6716462281679672, "grad_norm": 3.432792243570056, "learning_rate": 8.637020216304822e-06, "loss": 0.5729, "step": 22492 }, { "epoch": 1.6717205499814196, "grad_norm": 2.243725436484039, "learning_rate": 8.636225340232842e-06, "loss": 0.5842, "step": 22493 }, { "epoch": 1.6717948717948716, "grad_norm": 2.0067994771407474, "learning_rate": 8.635430472940772e-06, "loss": 0.5174, "step": 22494 }, { "epoch": 1.671869193608324, "grad_norm": 1.9242334537762233, "learning_rate": 8.634635614433733e-06, "loss": 0.6077, "step": 22495 }, { "epoch": 1.6719435154217763, "grad_norm": 1.8350837357330745, "learning_rate": 8.633840764716837e-06, "loss": 0.5174, "step": 22496 }, { "epoch": 1.6720178372352286, "grad_norm": 2.0406882111289595, "learning_rate": 8.633045923795203e-06, "loss": 0.6054, "step": 22497 }, { "epoch": 1.6720921590486808, "grad_norm": 1.9229115472102145, "learning_rate": 8.632251091673947e-06, "loss": 0.5861, "step": 22498 }, { "epoch": 1.672166480862133, "grad_norm": 1.9982641487367647, "learning_rate": 8.631456268358191e-06, "loss": 0.4753, "step": 22499 }, { "epoch": 1.6722408026755853, "grad_norm": 2.1914916607922863, "learning_rate": 8.630661453853049e-06, "loss": 0.6661, "step": 22500 }, { "epoch": 1.6723151244890375, "grad_norm": 1.9769923699344505, "learning_rate": 8.629866648163637e-06, "loss": 0.6202, "step": 22501 }, { "epoch": 1.6723894463024898, "grad_norm": 2.155574327817896, "learning_rate": 8.629071851295069e-06, "loss": 0.6863, "step": 22502 }, { "epoch": 1.672463768115942, "grad_norm": 1.8466260652272872, "learning_rate": 8.628277063252465e-06, "loss": 0.6359, "step": 22503 }, { "epoch": 1.6725380899293942, "grad_norm": 2.3491416330951154, "learning_rate": 8.627482284040944e-06, "loss": 0.4892, "step": 22504 }, { "epoch": 1.6726124117428465, "grad_norm": 3.008747291335517, "learning_rate": 8.626687513665622e-06, "loss": 0.5113, "step": 22505 }, { "epoch": 1.672686733556299, "grad_norm": 1.8305437267744487, "learning_rate": 8.625892752131614e-06, "loss": 0.5635, "step": 22506 }, { "epoch": 1.672761055369751, "grad_norm": 2.4270548279567605, "learning_rate": 8.625097999444036e-06, "loss": 0.7592, "step": 22507 }, { "epoch": 1.6728353771832034, "grad_norm": 1.8462719091660884, "learning_rate": 8.624303255608007e-06, "loss": 0.562, "step": 22508 }, { "epoch": 1.6729096989966554, "grad_norm": 2.1898198851798036, "learning_rate": 8.62350852062864e-06, "loss": 0.5944, "step": 22509 }, { "epoch": 1.6729840208101079, "grad_norm": 1.7150042970861956, "learning_rate": 8.622713794511057e-06, "loss": 0.5725, "step": 22510 }, { "epoch": 1.67305834262356, "grad_norm": 2.120000986150744, "learning_rate": 8.62191907726037e-06, "loss": 0.4248, "step": 22511 }, { "epoch": 1.6731326644370124, "grad_norm": 2.345433417380955, "learning_rate": 8.621124368881698e-06, "loss": 0.5878, "step": 22512 }, { "epoch": 1.6732069862504644, "grad_norm": 1.9660025739434768, "learning_rate": 8.620329669380154e-06, "loss": 0.6906, "step": 22513 }, { "epoch": 1.6732813080639168, "grad_norm": 1.7292005038517144, "learning_rate": 8.619534978760854e-06, "loss": 0.4903, "step": 22514 }, { "epoch": 1.6733556298773689, "grad_norm": 2.2207289490736195, "learning_rate": 8.618740297028919e-06, "loss": 0.5975, "step": 22515 }, { "epoch": 1.6734299516908213, "grad_norm": 2.1801247319545367, "learning_rate": 8.617945624189459e-06, "loss": 0.6996, "step": 22516 }, { "epoch": 1.6735042735042736, "grad_norm": 2.3876803468116172, "learning_rate": 8.617150960247597e-06, "loss": 0.7555, "step": 22517 }, { "epoch": 1.6735785953177258, "grad_norm": 3.448652617876268, "learning_rate": 8.616356305208443e-06, "loss": 0.4014, "step": 22518 }, { "epoch": 1.673652917131178, "grad_norm": 2.046317935874763, "learning_rate": 8.615561659077117e-06, "loss": 0.6436, "step": 22519 }, { "epoch": 1.6737272389446303, "grad_norm": 1.9844659548513266, "learning_rate": 8.614767021858731e-06, "loss": 0.5864, "step": 22520 }, { "epoch": 1.6738015607580825, "grad_norm": 1.98024989256939, "learning_rate": 8.613972393558405e-06, "loss": 0.636, "step": 22521 }, { "epoch": 1.6738758825715347, "grad_norm": 1.723860921880001, "learning_rate": 8.613177774181252e-06, "loss": 0.5811, "step": 22522 }, { "epoch": 1.673950204384987, "grad_norm": 10.624236143190117, "learning_rate": 8.612383163732393e-06, "loss": 0.5658, "step": 22523 }, { "epoch": 1.6740245261984392, "grad_norm": 1.6710463711934136, "learning_rate": 8.611588562216934e-06, "loss": 0.6195, "step": 22524 }, { "epoch": 1.6740988480118915, "grad_norm": 2.804798185688508, "learning_rate": 8.610793969639997e-06, "loss": 0.519, "step": 22525 }, { "epoch": 1.6741731698253437, "grad_norm": 2.1899643037016103, "learning_rate": 8.609999386006695e-06, "loss": 0.6944, "step": 22526 }, { "epoch": 1.674247491638796, "grad_norm": 2.1873409995871507, "learning_rate": 8.609204811322145e-06, "loss": 0.6174, "step": 22527 }, { "epoch": 1.6743218134522482, "grad_norm": 2.095063864979317, "learning_rate": 8.608410245591465e-06, "loss": 0.7267, "step": 22528 }, { "epoch": 1.6743961352657006, "grad_norm": 2.2980246115419813, "learning_rate": 8.607615688819766e-06, "loss": 0.6702, "step": 22529 }, { "epoch": 1.6744704570791527, "grad_norm": 2.0050358953474916, "learning_rate": 8.606821141012164e-06, "loss": 0.571, "step": 22530 }, { "epoch": 1.6745447788926051, "grad_norm": 1.7527608038419435, "learning_rate": 8.606026602173776e-06, "loss": 0.5337, "step": 22531 }, { "epoch": 1.6746191007060571, "grad_norm": 2.1609194011020754, "learning_rate": 8.605232072309716e-06, "loss": 0.7518, "step": 22532 }, { "epoch": 1.6746934225195096, "grad_norm": 2.8531845448327915, "learning_rate": 8.6044375514251e-06, "loss": 0.6714, "step": 22533 }, { "epoch": 1.6747677443329616, "grad_norm": 1.9075229937900755, "learning_rate": 8.603643039525047e-06, "loss": 0.6456, "step": 22534 }, { "epoch": 1.674842066146414, "grad_norm": 6.430433425179047, "learning_rate": 8.602848536614662e-06, "loss": 0.5538, "step": 22535 }, { "epoch": 1.674916387959866, "grad_norm": 1.7710593369294532, "learning_rate": 8.602054042699066e-06, "loss": 0.5176, "step": 22536 }, { "epoch": 1.6749907097733185, "grad_norm": 1.9598696331130678, "learning_rate": 8.601259557783374e-06, "loss": 0.5759, "step": 22537 }, { "epoch": 1.6750650315867706, "grad_norm": 1.8792249474284708, "learning_rate": 8.600465081872703e-06, "loss": 0.4754, "step": 22538 }, { "epoch": 1.675139353400223, "grad_norm": 2.5225234037865145, "learning_rate": 8.599670614972162e-06, "loss": 0.6439, "step": 22539 }, { "epoch": 1.6752136752136753, "grad_norm": 2.0530740736281423, "learning_rate": 8.598876157086869e-06, "loss": 0.7086, "step": 22540 }, { "epoch": 1.6752879970271275, "grad_norm": 1.9166824542198033, "learning_rate": 8.59808170822194e-06, "loss": 0.661, "step": 22541 }, { "epoch": 1.6753623188405797, "grad_norm": 1.675165984600572, "learning_rate": 8.597287268382485e-06, "loss": 0.5348, "step": 22542 }, { "epoch": 1.675436640654032, "grad_norm": 1.6381960418579333, "learning_rate": 8.596492837573628e-06, "loss": 0.4656, "step": 22543 }, { "epoch": 1.6755109624674842, "grad_norm": 1.9101971135165337, "learning_rate": 8.595698415800474e-06, "loss": 0.5403, "step": 22544 }, { "epoch": 1.6755852842809364, "grad_norm": 2.0835953893237473, "learning_rate": 8.594904003068144e-06, "loss": 0.5726, "step": 22545 }, { "epoch": 1.6756596060943887, "grad_norm": 2.015377287004762, "learning_rate": 8.594109599381744e-06, "loss": 0.6036, "step": 22546 }, { "epoch": 1.675733927907841, "grad_norm": 2.1127110806089835, "learning_rate": 8.593315204746395e-06, "loss": 0.5268, "step": 22547 }, { "epoch": 1.6758082497212932, "grad_norm": 5.389731900369224, "learning_rate": 8.592520819167209e-06, "loss": 0.433, "step": 22548 }, { "epoch": 1.6758825715347454, "grad_norm": 2.0439815985231005, "learning_rate": 8.591726442649304e-06, "loss": 0.7127, "step": 22549 }, { "epoch": 1.6759568933481979, "grad_norm": 1.7941930549771299, "learning_rate": 8.590932075197788e-06, "loss": 0.4848, "step": 22550 }, { "epoch": 1.6760312151616499, "grad_norm": 1.8742941122092132, "learning_rate": 8.590137716817778e-06, "loss": 0.5422, "step": 22551 }, { "epoch": 1.6761055369751023, "grad_norm": 1.7084455199892035, "learning_rate": 8.589343367514389e-06, "loss": 0.5904, "step": 22552 }, { "epoch": 1.6761798587885544, "grad_norm": 2.009353310237378, "learning_rate": 8.588549027292735e-06, "loss": 0.5397, "step": 22553 }, { "epoch": 1.6762541806020068, "grad_norm": 2.2410413167616476, "learning_rate": 8.587754696157928e-06, "loss": 0.7207, "step": 22554 }, { "epoch": 1.6763285024154588, "grad_norm": 1.7641716414903645, "learning_rate": 8.586960374115083e-06, "loss": 0.4921, "step": 22555 }, { "epoch": 1.6764028242289113, "grad_norm": 2.84436788441697, "learning_rate": 8.586166061169319e-06, "loss": 0.7941, "step": 22556 }, { "epoch": 1.6764771460423633, "grad_norm": 2.5266704915136224, "learning_rate": 8.58537175732574e-06, "loss": 0.6606, "step": 22557 }, { "epoch": 1.6765514678558158, "grad_norm": 1.8755846972507058, "learning_rate": 8.584577462589463e-06, "loss": 0.4091, "step": 22558 }, { "epoch": 1.6766257896692678, "grad_norm": 1.8475153810848688, "learning_rate": 8.583783176965604e-06, "loss": 0.5592, "step": 22559 }, { "epoch": 1.6767001114827202, "grad_norm": 2.0005204941462056, "learning_rate": 8.582988900459275e-06, "loss": 0.472, "step": 22560 }, { "epoch": 1.6767744332961723, "grad_norm": 1.6570754183809215, "learning_rate": 8.582194633075592e-06, "loss": 0.5877, "step": 22561 }, { "epoch": 1.6768487551096247, "grad_norm": 1.7326452135881174, "learning_rate": 8.581400374819662e-06, "loss": 0.5464, "step": 22562 }, { "epoch": 1.676923076923077, "grad_norm": 1.696010033217394, "learning_rate": 8.580606125696606e-06, "loss": 0.491, "step": 22563 }, { "epoch": 1.6769973987365292, "grad_norm": 2.3244416802623937, "learning_rate": 8.579811885711533e-06, "loss": 0.6675, "step": 22564 }, { "epoch": 1.6770717205499814, "grad_norm": 1.8991580912622823, "learning_rate": 8.57901765486956e-06, "loss": 0.5516, "step": 22565 }, { "epoch": 1.6771460423634337, "grad_norm": 2.2966219575870337, "learning_rate": 8.578223433175795e-06, "loss": 0.6476, "step": 22566 }, { "epoch": 1.677220364176886, "grad_norm": 2.9202029977084045, "learning_rate": 8.577429220635353e-06, "loss": 0.6503, "step": 22567 }, { "epoch": 1.6772946859903382, "grad_norm": 2.0031166278201424, "learning_rate": 8.576635017253354e-06, "loss": 0.6756, "step": 22568 }, { "epoch": 1.6773690078037904, "grad_norm": 1.6873224580803554, "learning_rate": 8.575840823034899e-06, "loss": 0.5158, "step": 22569 }, { "epoch": 1.6774433296172426, "grad_norm": 1.6969607636554298, "learning_rate": 8.575046637985108e-06, "loss": 0.4267, "step": 22570 }, { "epoch": 1.6775176514306949, "grad_norm": 1.5982938894473642, "learning_rate": 8.574252462109094e-06, "loss": 0.5202, "step": 22571 }, { "epoch": 1.677591973244147, "grad_norm": 1.7078947110744613, "learning_rate": 8.573458295411967e-06, "loss": 0.5114, "step": 22572 }, { "epoch": 1.6776662950575996, "grad_norm": 1.6809165213317743, "learning_rate": 8.572664137898842e-06, "loss": 0.5418, "step": 22573 }, { "epoch": 1.6777406168710516, "grad_norm": 1.8956489080972654, "learning_rate": 8.57186998957483e-06, "loss": 0.5381, "step": 22574 }, { "epoch": 1.677814938684504, "grad_norm": 1.866777884442164, "learning_rate": 8.571075850445047e-06, "loss": 0.6808, "step": 22575 }, { "epoch": 1.677889260497956, "grad_norm": 1.6663284119801887, "learning_rate": 8.570281720514604e-06, "loss": 0.4376, "step": 22576 }, { "epoch": 1.6779635823114085, "grad_norm": 2.055490756929226, "learning_rate": 8.56948759978861e-06, "loss": 0.581, "step": 22577 }, { "epoch": 1.6780379041248605, "grad_norm": 2.2301829809048126, "learning_rate": 8.568693488272182e-06, "loss": 0.5772, "step": 22578 }, { "epoch": 1.678112225938313, "grad_norm": 2.2718729385142273, "learning_rate": 8.567899385970436e-06, "loss": 0.6739, "step": 22579 }, { "epoch": 1.678186547751765, "grad_norm": 1.8887087681556984, "learning_rate": 8.567105292888474e-06, "loss": 0.6468, "step": 22580 }, { "epoch": 1.6782608695652175, "grad_norm": 2.0468184964821816, "learning_rate": 8.566311209031414e-06, "loss": 0.4674, "step": 22581 }, { "epoch": 1.6783351913786695, "grad_norm": 1.5505289962018263, "learning_rate": 8.56551713440437e-06, "loss": 0.4134, "step": 22582 }, { "epoch": 1.678409513192122, "grad_norm": 2.267012481904148, "learning_rate": 8.56472306901245e-06, "loss": 0.542, "step": 22583 }, { "epoch": 1.6784838350055742, "grad_norm": 1.8190391762285123, "learning_rate": 8.56392901286077e-06, "loss": 0.5608, "step": 22584 }, { "epoch": 1.6785581568190264, "grad_norm": 1.7844190501645498, "learning_rate": 8.563134965954437e-06, "loss": 0.5136, "step": 22585 }, { "epoch": 1.6786324786324787, "grad_norm": 1.8428702036899391, "learning_rate": 8.562340928298571e-06, "loss": 0.5511, "step": 22586 }, { "epoch": 1.678706800445931, "grad_norm": 1.8725428501498194, "learning_rate": 8.56154689989828e-06, "loss": 0.646, "step": 22587 }, { "epoch": 1.6787811222593831, "grad_norm": 1.6213568962596547, "learning_rate": 8.560752880758672e-06, "loss": 0.54, "step": 22588 }, { "epoch": 1.6788554440728354, "grad_norm": 2.1752529948276407, "learning_rate": 8.559958870884864e-06, "loss": 0.6902, "step": 22589 }, { "epoch": 1.6789297658862876, "grad_norm": 2.3061901506403464, "learning_rate": 8.559164870281968e-06, "loss": 0.7464, "step": 22590 }, { "epoch": 1.6790040876997399, "grad_norm": 2.018461394911032, "learning_rate": 8.558370878955092e-06, "loss": 0.6559, "step": 22591 }, { "epoch": 1.679078409513192, "grad_norm": 2.3516003398531837, "learning_rate": 8.557576896909348e-06, "loss": 0.5993, "step": 22592 }, { "epoch": 1.6791527313266443, "grad_norm": 2.4520784369356163, "learning_rate": 8.55678292414985e-06, "loss": 0.7148, "step": 22593 }, { "epoch": 1.6792270531400966, "grad_norm": 1.832459931298762, "learning_rate": 8.555988960681709e-06, "loss": 0.4498, "step": 22594 }, { "epoch": 1.6793013749535488, "grad_norm": 1.8212574960253298, "learning_rate": 8.555195006510035e-06, "loss": 0.4879, "step": 22595 }, { "epoch": 1.6793756967670013, "grad_norm": 1.9490773631439242, "learning_rate": 8.554401061639941e-06, "loss": 0.5565, "step": 22596 }, { "epoch": 1.6794500185804533, "grad_norm": 1.894099509997753, "learning_rate": 8.553607126076539e-06, "loss": 0.5169, "step": 22597 }, { "epoch": 1.6795243403939057, "grad_norm": 2.0453703883588856, "learning_rate": 8.55281319982494e-06, "loss": 0.6337, "step": 22598 }, { "epoch": 1.6795986622073578, "grad_norm": 1.9447789075117354, "learning_rate": 8.552019282890252e-06, "loss": 0.6134, "step": 22599 }, { "epoch": 1.6796729840208102, "grad_norm": 2.5353578909568637, "learning_rate": 8.55122537527759e-06, "loss": 0.6154, "step": 22600 }, { "epoch": 1.6797473058342622, "grad_norm": 2.2268156384401774, "learning_rate": 8.550431476992063e-06, "loss": 0.5456, "step": 22601 }, { "epoch": 1.6798216276477147, "grad_norm": 1.8962334444131161, "learning_rate": 8.549637588038788e-06, "loss": 0.6245, "step": 22602 }, { "epoch": 1.6798959494611667, "grad_norm": 2.154219404763502, "learning_rate": 8.548843708422866e-06, "loss": 0.662, "step": 22603 }, { "epoch": 1.6799702712746192, "grad_norm": 1.8368054987389377, "learning_rate": 8.548049838149416e-06, "loss": 0.6832, "step": 22604 }, { "epoch": 1.6800445930880712, "grad_norm": 2.0501837458852314, "learning_rate": 8.547255977223541e-06, "loss": 0.5024, "step": 22605 }, { "epoch": 1.6801189149015237, "grad_norm": 1.8116743829903137, "learning_rate": 8.54646212565036e-06, "loss": 0.5535, "step": 22606 }, { "epoch": 1.680193236714976, "grad_norm": 1.9134401949668272, "learning_rate": 8.545668283434977e-06, "loss": 0.6459, "step": 22607 }, { "epoch": 1.6802675585284281, "grad_norm": 2.1168428457839266, "learning_rate": 8.54487445058251e-06, "loss": 0.58, "step": 22608 }, { "epoch": 1.6803418803418804, "grad_norm": 1.813391871752107, "learning_rate": 8.544080627098066e-06, "loss": 0.5442, "step": 22609 }, { "epoch": 1.6804162021553326, "grad_norm": 1.9676940242012593, "learning_rate": 8.543286812986753e-06, "loss": 0.6609, "step": 22610 }, { "epoch": 1.6804905239687848, "grad_norm": 2.3686871646780014, "learning_rate": 8.542493008253685e-06, "loss": 0.6153, "step": 22611 }, { "epoch": 1.680564845782237, "grad_norm": 1.867340481419667, "learning_rate": 8.54169921290397e-06, "loss": 0.5351, "step": 22612 }, { "epoch": 1.6806391675956893, "grad_norm": 1.7406608420374157, "learning_rate": 8.540905426942724e-06, "loss": 0.5458, "step": 22613 }, { "epoch": 1.6807134894091416, "grad_norm": 1.907110768545228, "learning_rate": 8.54011165037505e-06, "loss": 0.8054, "step": 22614 }, { "epoch": 1.6807878112225938, "grad_norm": 1.44541295651787, "learning_rate": 8.539317883206061e-06, "loss": 0.4044, "step": 22615 }, { "epoch": 1.680862133036046, "grad_norm": 1.9184380488738764, "learning_rate": 8.538524125440869e-06, "loss": 0.5655, "step": 22616 }, { "epoch": 1.6809364548494983, "grad_norm": 2.1430844882356803, "learning_rate": 8.53773037708458e-06, "loss": 0.5853, "step": 22617 }, { "epoch": 1.6810107766629505, "grad_norm": 2.5255761874873675, "learning_rate": 8.53693663814231e-06, "loss": 0.5068, "step": 22618 }, { "epoch": 1.681085098476403, "grad_norm": 1.9559859596792404, "learning_rate": 8.536142908619164e-06, "loss": 0.7049, "step": 22619 }, { "epoch": 1.681159420289855, "grad_norm": 1.9789630478591402, "learning_rate": 8.535349188520257e-06, "loss": 0.5804, "step": 22620 }, { "epoch": 1.6812337421033074, "grad_norm": 2.1110874661260652, "learning_rate": 8.534555477850693e-06, "loss": 0.6451, "step": 22621 }, { "epoch": 1.6813080639167595, "grad_norm": 2.25480499282602, "learning_rate": 8.533761776615586e-06, "loss": 0.5395, "step": 22622 }, { "epoch": 1.681382385730212, "grad_norm": 1.636639419615753, "learning_rate": 8.532968084820043e-06, "loss": 0.4408, "step": 22623 }, { "epoch": 1.681456707543664, "grad_norm": 1.9599180576974216, "learning_rate": 8.53217440246918e-06, "loss": 0.5898, "step": 22624 }, { "epoch": 1.6815310293571164, "grad_norm": 2.572841434836381, "learning_rate": 8.531380729568098e-06, "loss": 0.5443, "step": 22625 }, { "epoch": 1.6816053511705684, "grad_norm": 2.0161669355437004, "learning_rate": 8.530587066121913e-06, "loss": 0.5421, "step": 22626 }, { "epoch": 1.6816796729840209, "grad_norm": 1.7932741966515606, "learning_rate": 8.52979341213573e-06, "loss": 0.5389, "step": 22627 }, { "epoch": 1.681753994797473, "grad_norm": 2.2291935403312975, "learning_rate": 8.52899976761466e-06, "loss": 0.5635, "step": 22628 }, { "epoch": 1.6818283166109254, "grad_norm": 1.861730885374191, "learning_rate": 8.528206132563816e-06, "loss": 0.4848, "step": 22629 }, { "epoch": 1.6819026384243776, "grad_norm": 2.161688979272072, "learning_rate": 8.527412506988301e-06, "loss": 0.5595, "step": 22630 }, { "epoch": 1.6819769602378298, "grad_norm": 1.9494851294963986, "learning_rate": 8.526618890893233e-06, "loss": 0.6616, "step": 22631 }, { "epoch": 1.682051282051282, "grad_norm": 6.084569122030606, "learning_rate": 8.525825284283712e-06, "loss": 0.5344, "step": 22632 }, { "epoch": 1.6821256038647343, "grad_norm": 2.2515646454946303, "learning_rate": 8.52503168716485e-06, "loss": 0.6204, "step": 22633 }, { "epoch": 1.6821999256781865, "grad_norm": 1.8480054806125032, "learning_rate": 8.524238099541761e-06, "loss": 0.5651, "step": 22634 }, { "epoch": 1.6822742474916388, "grad_norm": 1.8576833220942253, "learning_rate": 8.523444521419553e-06, "loss": 0.6379, "step": 22635 }, { "epoch": 1.682348569305091, "grad_norm": 1.5934566354681852, "learning_rate": 8.522650952803329e-06, "loss": 0.4616, "step": 22636 }, { "epoch": 1.6824228911185433, "grad_norm": 2.3052100471688983, "learning_rate": 8.521857393698201e-06, "loss": 0.5561, "step": 22637 }, { "epoch": 1.6824972129319955, "grad_norm": 2.3389517169342575, "learning_rate": 8.52106384410928e-06, "loss": 0.5876, "step": 22638 }, { "epoch": 1.6825715347454477, "grad_norm": 1.668968269796026, "learning_rate": 8.520270304041672e-06, "loss": 0.4701, "step": 22639 }, { "epoch": 1.6826458565589002, "grad_norm": 2.023975701123568, "learning_rate": 8.519476773500485e-06, "loss": 0.6078, "step": 22640 }, { "epoch": 1.6827201783723522, "grad_norm": 2.3272475726973982, "learning_rate": 8.518683252490832e-06, "loss": 0.7545, "step": 22641 }, { "epoch": 1.6827945001858047, "grad_norm": 2.79172918610793, "learning_rate": 8.517889741017821e-06, "loss": 0.7439, "step": 22642 }, { "epoch": 1.6828688219992567, "grad_norm": 1.9276130741894004, "learning_rate": 8.517096239086556e-06, "loss": 0.6078, "step": 22643 }, { "epoch": 1.6829431438127092, "grad_norm": 1.9565570324217116, "learning_rate": 8.51630274670215e-06, "loss": 0.6929, "step": 22644 }, { "epoch": 1.6830174656261612, "grad_norm": 2.161942130385028, "learning_rate": 8.515509263869707e-06, "loss": 0.745, "step": 22645 }, { "epoch": 1.6830917874396136, "grad_norm": 1.7361246851896295, "learning_rate": 8.514715790594343e-06, "loss": 0.4545, "step": 22646 }, { "epoch": 1.6831661092530656, "grad_norm": 2.1136747833610987, "learning_rate": 8.513922326881161e-06, "loss": 0.4622, "step": 22647 }, { "epoch": 1.683240431066518, "grad_norm": 1.8450580511387014, "learning_rate": 8.51312887273527e-06, "loss": 0.6565, "step": 22648 }, { "epoch": 1.6833147528799701, "grad_norm": 2.156641996753981, "learning_rate": 8.512335428161776e-06, "loss": 0.4988, "step": 22649 }, { "epoch": 1.6833890746934226, "grad_norm": 1.6950562383137604, "learning_rate": 8.51154199316579e-06, "loss": 0.5268, "step": 22650 }, { "epoch": 1.6834633965068748, "grad_norm": 1.9074198319662883, "learning_rate": 8.510748567752418e-06, "loss": 0.5877, "step": 22651 }, { "epoch": 1.683537718320327, "grad_norm": 2.189354435008443, "learning_rate": 8.50995515192677e-06, "loss": 0.5439, "step": 22652 }, { "epoch": 1.6836120401337793, "grad_norm": 2.066831094005073, "learning_rate": 8.509161745693957e-06, "loss": 0.6669, "step": 22653 }, { "epoch": 1.6836863619472315, "grad_norm": 1.8150812921064674, "learning_rate": 8.50836834905908e-06, "loss": 0.5309, "step": 22654 }, { "epoch": 1.6837606837606838, "grad_norm": 1.5978129267037764, "learning_rate": 8.50757496202725e-06, "loss": 0.5021, "step": 22655 }, { "epoch": 1.683835005574136, "grad_norm": 1.9024420525553798, "learning_rate": 8.506781584603576e-06, "loss": 0.5582, "step": 22656 }, { "epoch": 1.6839093273875883, "grad_norm": 2.096986696867362, "learning_rate": 8.505988216793165e-06, "loss": 0.6719, "step": 22657 }, { "epoch": 1.6839836492010405, "grad_norm": 2.017247940416495, "learning_rate": 8.505194858601125e-06, "loss": 0.6198, "step": 22658 }, { "epoch": 1.6840579710144927, "grad_norm": 2.11265014462658, "learning_rate": 8.504401510032563e-06, "loss": 0.6329, "step": 22659 }, { "epoch": 1.684132292827945, "grad_norm": 2.278756618656838, "learning_rate": 8.503608171092586e-06, "loss": 0.6833, "step": 22660 }, { "epoch": 1.6842066146413972, "grad_norm": 1.5759190812572277, "learning_rate": 8.502814841786299e-06, "loss": 0.4764, "step": 22661 }, { "epoch": 1.6842809364548494, "grad_norm": 2.1431943647752165, "learning_rate": 8.502021522118815e-06, "loss": 0.6613, "step": 22662 }, { "epoch": 1.684355258268302, "grad_norm": 1.707079709324488, "learning_rate": 8.501228212095236e-06, "loss": 0.4882, "step": 22663 }, { "epoch": 1.684429580081754, "grad_norm": 2.192311319159855, "learning_rate": 8.500434911720678e-06, "loss": 0.6368, "step": 22664 }, { "epoch": 1.6845039018952064, "grad_norm": 1.6825149812383384, "learning_rate": 8.499641621000236e-06, "loss": 0.5547, "step": 22665 }, { "epoch": 1.6845782237086584, "grad_norm": 2.009599401781462, "learning_rate": 8.498848339939027e-06, "loss": 0.4924, "step": 22666 }, { "epoch": 1.6846525455221109, "grad_norm": 2.088605250673949, "learning_rate": 8.498055068542153e-06, "loss": 0.581, "step": 22667 }, { "epoch": 1.6847268673355629, "grad_norm": 2.531838672576127, "learning_rate": 8.497261806814723e-06, "loss": 0.6463, "step": 22668 }, { "epoch": 1.6848011891490153, "grad_norm": 1.8021538066322205, "learning_rate": 8.496468554761845e-06, "loss": 0.6305, "step": 22669 }, { "epoch": 1.6848755109624673, "grad_norm": 3.759846134264583, "learning_rate": 8.495675312388625e-06, "loss": 0.6855, "step": 22670 }, { "epoch": 1.6849498327759198, "grad_norm": 2.2669164992141937, "learning_rate": 8.494882079700166e-06, "loss": 0.6981, "step": 22671 }, { "epoch": 1.6850241545893718, "grad_norm": 2.194366206820709, "learning_rate": 8.494088856701579e-06, "loss": 0.6395, "step": 22672 }, { "epoch": 1.6850984764028243, "grad_norm": 2.153539292946607, "learning_rate": 8.49329564339797e-06, "loss": 0.4697, "step": 22673 }, { "epoch": 1.6851727982162765, "grad_norm": 1.8424092722027803, "learning_rate": 8.492502439794447e-06, "loss": 0.5009, "step": 22674 }, { "epoch": 1.6852471200297288, "grad_norm": 2.4548853948811677, "learning_rate": 8.491709245896112e-06, "loss": 0.683, "step": 22675 }, { "epoch": 1.685321441843181, "grad_norm": 1.938792056719383, "learning_rate": 8.490916061708076e-06, "loss": 0.6594, "step": 22676 }, { "epoch": 1.6853957636566332, "grad_norm": 1.948689542205081, "learning_rate": 8.490122887235444e-06, "loss": 0.4212, "step": 22677 }, { "epoch": 1.6854700854700855, "grad_norm": 2.243677251223545, "learning_rate": 8.489329722483321e-06, "loss": 0.7169, "step": 22678 }, { "epoch": 1.6855444072835377, "grad_norm": 1.9091925495123288, "learning_rate": 8.48853656745682e-06, "loss": 0.6282, "step": 22679 }, { "epoch": 1.68561872909699, "grad_norm": 1.7301667014442177, "learning_rate": 8.487743422161037e-06, "loss": 0.6212, "step": 22680 }, { "epoch": 1.6856930509104422, "grad_norm": 2.4504299944860453, "learning_rate": 8.486950286601089e-06, "loss": 0.6411, "step": 22681 }, { "epoch": 1.6857673727238944, "grad_norm": 1.805132758890199, "learning_rate": 8.486157160782072e-06, "loss": 0.6266, "step": 22682 }, { "epoch": 1.6858416945373467, "grad_norm": 1.8408602313370905, "learning_rate": 8.485364044709098e-06, "loss": 0.5581, "step": 22683 }, { "epoch": 1.685916016350799, "grad_norm": 2.0937685087114026, "learning_rate": 8.484570938387268e-06, "loss": 0.3868, "step": 22684 }, { "epoch": 1.6859903381642511, "grad_norm": 2.3605217304613046, "learning_rate": 8.483777841821698e-06, "loss": 0.5888, "step": 22685 }, { "epoch": 1.6860646599777036, "grad_norm": 2.3326011247609704, "learning_rate": 8.482984755017483e-06, "loss": 0.6171, "step": 22686 }, { "epoch": 1.6861389817911556, "grad_norm": 1.8557764916526893, "learning_rate": 8.482191677979733e-06, "loss": 0.634, "step": 22687 }, { "epoch": 1.686213303604608, "grad_norm": 2.4012860454859295, "learning_rate": 8.481398610713556e-06, "loss": 0.6604, "step": 22688 }, { "epoch": 1.68628762541806, "grad_norm": 2.0041111386426214, "learning_rate": 8.480605553224055e-06, "loss": 0.6256, "step": 22689 }, { "epoch": 1.6863619472315126, "grad_norm": 1.8429982617308214, "learning_rate": 8.47981250551634e-06, "loss": 0.6198, "step": 22690 }, { "epoch": 1.6864362690449646, "grad_norm": 1.9000547932579526, "learning_rate": 8.47901946759551e-06, "loss": 0.564, "step": 22691 }, { "epoch": 1.686510590858417, "grad_norm": 1.9683847758595754, "learning_rate": 8.478226439466677e-06, "loss": 0.6329, "step": 22692 }, { "epoch": 1.686584912671869, "grad_norm": 1.9018192786147616, "learning_rate": 8.477433421134942e-06, "loss": 0.5973, "step": 22693 }, { "epoch": 1.6866592344853215, "grad_norm": 2.6942111503399775, "learning_rate": 8.476640412605409e-06, "loss": 0.6029, "step": 22694 }, { "epoch": 1.6867335562987735, "grad_norm": 2.4511937468987925, "learning_rate": 8.475847413883186e-06, "loss": 0.5258, "step": 22695 }, { "epoch": 1.686807878112226, "grad_norm": 1.5554493606241497, "learning_rate": 8.475054424973381e-06, "loss": 0.5491, "step": 22696 }, { "epoch": 1.6868821999256782, "grad_norm": 2.1291216030264244, "learning_rate": 8.474261445881096e-06, "loss": 0.6821, "step": 22697 }, { "epoch": 1.6869565217391305, "grad_norm": 2.0526056632170455, "learning_rate": 8.473468476611435e-06, "loss": 0.5399, "step": 22698 }, { "epoch": 1.6870308435525827, "grad_norm": 1.8772896590234105, "learning_rate": 8.472675517169504e-06, "loss": 0.535, "step": 22699 }, { "epoch": 1.687105165366035, "grad_norm": 2.151769440752007, "learning_rate": 8.47188256756041e-06, "loss": 0.6513, "step": 22700 }, { "epoch": 1.6871794871794872, "grad_norm": 2.2949167887944726, "learning_rate": 8.47108962778926e-06, "loss": 0.6131, "step": 22701 }, { "epoch": 1.6872538089929394, "grad_norm": 2.0813229149400163, "learning_rate": 8.470296697861152e-06, "loss": 0.5274, "step": 22702 }, { "epoch": 1.6873281308063917, "grad_norm": 2.16902579616396, "learning_rate": 8.469503777781198e-06, "loss": 0.6161, "step": 22703 }, { "epoch": 1.687402452619844, "grad_norm": 2.0137354102117286, "learning_rate": 8.468710867554495e-06, "loss": 0.5645, "step": 22704 }, { "epoch": 1.6874767744332961, "grad_norm": 1.965596867736792, "learning_rate": 8.467917967186152e-06, "loss": 0.566, "step": 22705 }, { "epoch": 1.6875510962467484, "grad_norm": 1.7921479967987322, "learning_rate": 8.467125076681275e-06, "loss": 0.6509, "step": 22706 }, { "epoch": 1.6876254180602008, "grad_norm": 1.8301769495099756, "learning_rate": 8.466332196044969e-06, "loss": 0.5207, "step": 22707 }, { "epoch": 1.6876997398736528, "grad_norm": 1.7885793157685719, "learning_rate": 8.465539325282335e-06, "loss": 0.5858, "step": 22708 }, { "epoch": 1.6877740616871053, "grad_norm": 2.419218775458134, "learning_rate": 8.464746464398478e-06, "loss": 0.5763, "step": 22709 }, { "epoch": 1.6878483835005573, "grad_norm": 2.4749536778425014, "learning_rate": 8.463953613398503e-06, "loss": 0.608, "step": 22710 }, { "epoch": 1.6879227053140098, "grad_norm": 2.0544457769843767, "learning_rate": 8.463160772287517e-06, "loss": 0.6579, "step": 22711 }, { "epoch": 1.6879970271274618, "grad_norm": 1.7340549503825222, "learning_rate": 8.462367941070623e-06, "loss": 0.5258, "step": 22712 }, { "epoch": 1.6880713489409143, "grad_norm": 2.1425795006361743, "learning_rate": 8.461575119752922e-06, "loss": 0.6016, "step": 22713 }, { "epoch": 1.6881456707543663, "grad_norm": 1.9523728240725042, "learning_rate": 8.460782308339523e-06, "loss": 0.6495, "step": 22714 }, { "epoch": 1.6882199925678187, "grad_norm": 1.8711476456727898, "learning_rate": 8.45998950683553e-06, "loss": 0.5143, "step": 22715 }, { "epoch": 1.6882943143812708, "grad_norm": 1.7821462238037646, "learning_rate": 8.459196715246041e-06, "loss": 0.5981, "step": 22716 }, { "epoch": 1.6883686361947232, "grad_norm": 2.180760001852001, "learning_rate": 8.458403933576162e-06, "loss": 0.6266, "step": 22717 }, { "epoch": 1.6884429580081755, "grad_norm": 2.1831092792480695, "learning_rate": 8.457611161831003e-06, "loss": 0.5904, "step": 22718 }, { "epoch": 1.6885172798216277, "grad_norm": 2.405476883926834, "learning_rate": 8.456818400015659e-06, "loss": 0.6261, "step": 22719 }, { "epoch": 1.68859160163508, "grad_norm": 4.4115799919908385, "learning_rate": 8.45602564813524e-06, "loss": 0.5301, "step": 22720 }, { "epoch": 1.6886659234485322, "grad_norm": 7.997382763415079, "learning_rate": 8.455232906194845e-06, "loss": 0.5873, "step": 22721 }, { "epoch": 1.6887402452619844, "grad_norm": 2.144827414168333, "learning_rate": 8.454440174199582e-06, "loss": 0.5718, "step": 22722 }, { "epoch": 1.6888145670754366, "grad_norm": 2.256683846408836, "learning_rate": 8.453647452154556e-06, "loss": 0.6166, "step": 22723 }, { "epoch": 1.6888888888888889, "grad_norm": 1.8591026696748343, "learning_rate": 8.452854740064865e-06, "loss": 0.5326, "step": 22724 }, { "epoch": 1.6889632107023411, "grad_norm": 1.9581561490380137, "learning_rate": 8.452062037935615e-06, "loss": 0.6115, "step": 22725 }, { "epoch": 1.6890375325157934, "grad_norm": 1.7580023110091603, "learning_rate": 8.451269345771914e-06, "loss": 0.6109, "step": 22726 }, { "epoch": 1.6891118543292456, "grad_norm": 1.9219863732451117, "learning_rate": 8.450476663578854e-06, "loss": 0.53, "step": 22727 }, { "epoch": 1.6891861761426978, "grad_norm": 1.8483338588416731, "learning_rate": 8.449683991361547e-06, "loss": 0.4565, "step": 22728 }, { "epoch": 1.68926049795615, "grad_norm": 2.5010494193497173, "learning_rate": 8.448891329125096e-06, "loss": 0.4451, "step": 22729 }, { "epoch": 1.6893348197696025, "grad_norm": 1.8741688381838029, "learning_rate": 8.448098676874598e-06, "loss": 0.4244, "step": 22730 }, { "epoch": 1.6894091415830546, "grad_norm": 1.9963900323290469, "learning_rate": 8.447306034615163e-06, "loss": 0.6171, "step": 22731 }, { "epoch": 1.689483463396507, "grad_norm": 1.6227292601229408, "learning_rate": 8.446513402351888e-06, "loss": 0.6317, "step": 22732 }, { "epoch": 1.689557785209959, "grad_norm": 2.0739227219534624, "learning_rate": 8.445720780089883e-06, "loss": 0.7045, "step": 22733 }, { "epoch": 1.6896321070234115, "grad_norm": 1.9091682216456862, "learning_rate": 8.444928167834245e-06, "loss": 0.5739, "step": 22734 }, { "epoch": 1.6897064288368635, "grad_norm": 2.772830783094996, "learning_rate": 8.444135565590079e-06, "loss": 0.7444, "step": 22735 }, { "epoch": 1.689780750650316, "grad_norm": 1.9201947327845834, "learning_rate": 8.443342973362488e-06, "loss": 0.5307, "step": 22736 }, { "epoch": 1.689855072463768, "grad_norm": 1.9900692687114665, "learning_rate": 8.442550391156577e-06, "loss": 0.6651, "step": 22737 }, { "epoch": 1.6899293942772204, "grad_norm": 1.7557865749794432, "learning_rate": 8.441757818977442e-06, "loss": 0.5268, "step": 22738 }, { "epoch": 1.6900037160906725, "grad_norm": 1.9611441987306082, "learning_rate": 8.44096525683019e-06, "loss": 0.4625, "step": 22739 }, { "epoch": 1.690078037904125, "grad_norm": 1.4412241990604207, "learning_rate": 8.440172704719924e-06, "loss": 0.4525, "step": 22740 }, { "epoch": 1.6901523597175772, "grad_norm": 2.0577837439837805, "learning_rate": 8.439380162651743e-06, "loss": 0.3557, "step": 22741 }, { "epoch": 1.6902266815310294, "grad_norm": 2.212220848182308, "learning_rate": 8.438587630630753e-06, "loss": 0.6897, "step": 22742 }, { "epoch": 1.6903010033444816, "grad_norm": 1.9046157669586783, "learning_rate": 8.437795108662052e-06, "loss": 0.5592, "step": 22743 }, { "epoch": 1.6903753251579339, "grad_norm": 2.4066974395929885, "learning_rate": 8.437002596750747e-06, "loss": 0.594, "step": 22744 }, { "epoch": 1.690449646971386, "grad_norm": 1.8824016499841394, "learning_rate": 8.43621009490194e-06, "loss": 0.6035, "step": 22745 }, { "epoch": 1.6905239687848383, "grad_norm": 1.9378925850723718, "learning_rate": 8.43541760312073e-06, "loss": 0.5796, "step": 22746 }, { "epoch": 1.6905982905982906, "grad_norm": 2.0898063689036137, "learning_rate": 8.43462512141222e-06, "loss": 0.5345, "step": 22747 }, { "epoch": 1.6906726124117428, "grad_norm": 2.014569632465703, "learning_rate": 8.433832649781516e-06, "loss": 0.3495, "step": 22748 }, { "epoch": 1.690746934225195, "grad_norm": 1.6585306151980252, "learning_rate": 8.433040188233713e-06, "loss": 0.5436, "step": 22749 }, { "epoch": 1.6908212560386473, "grad_norm": 2.645533577358059, "learning_rate": 8.432247736773914e-06, "loss": 0.6889, "step": 22750 }, { "epoch": 1.6908955778520995, "grad_norm": 1.7710747570840368, "learning_rate": 8.431455295407225e-06, "loss": 0.4904, "step": 22751 }, { "epoch": 1.6909698996655518, "grad_norm": 2.106885075402417, "learning_rate": 8.430662864138745e-06, "loss": 0.5764, "step": 22752 }, { "epoch": 1.6910442214790042, "grad_norm": 2.426938016517479, "learning_rate": 8.429870442973574e-06, "loss": 0.6706, "step": 22753 }, { "epoch": 1.6911185432924563, "grad_norm": 1.8612173206680123, "learning_rate": 8.429078031916818e-06, "loss": 0.5339, "step": 22754 }, { "epoch": 1.6911928651059087, "grad_norm": 2.306623477295381, "learning_rate": 8.428285630973575e-06, "loss": 0.562, "step": 22755 }, { "epoch": 1.6912671869193607, "grad_norm": 1.7944044824345402, "learning_rate": 8.427493240148948e-06, "loss": 0.6018, "step": 22756 }, { "epoch": 1.6913415087328132, "grad_norm": 1.8558043566230453, "learning_rate": 8.426700859448038e-06, "loss": 0.5509, "step": 22757 }, { "epoch": 1.6914158305462652, "grad_norm": 2.111615196003369, "learning_rate": 8.425908488875945e-06, "loss": 0.6845, "step": 22758 }, { "epoch": 1.6914901523597177, "grad_norm": 1.6226883302222537, "learning_rate": 8.425116128437771e-06, "loss": 0.4625, "step": 22759 }, { "epoch": 1.6915644741731697, "grad_norm": 2.2413943940297907, "learning_rate": 8.424323778138624e-06, "loss": 0.6498, "step": 22760 }, { "epoch": 1.6916387959866221, "grad_norm": 2.251085392372609, "learning_rate": 8.423531437983593e-06, "loss": 0.6017, "step": 22761 }, { "epoch": 1.6917131178000742, "grad_norm": 1.8212502663892722, "learning_rate": 8.422739107977788e-06, "loss": 0.6404, "step": 22762 }, { "epoch": 1.6917874396135266, "grad_norm": 2.19872792359819, "learning_rate": 8.421946788126304e-06, "loss": 0.7042, "step": 22763 }, { "epoch": 1.6918617614269789, "grad_norm": 2.400336715179043, "learning_rate": 8.421154478434245e-06, "loss": 0.5446, "step": 22764 }, { "epoch": 1.691936083240431, "grad_norm": 1.709705798446305, "learning_rate": 8.42036217890671e-06, "loss": 0.5485, "step": 22765 }, { "epoch": 1.6920104050538833, "grad_norm": 2.388255553144023, "learning_rate": 8.419569889548804e-06, "loss": 0.6009, "step": 22766 }, { "epoch": 1.6920847268673356, "grad_norm": 2.053072419010756, "learning_rate": 8.418777610365625e-06, "loss": 0.6181, "step": 22767 }, { "epoch": 1.6921590486807878, "grad_norm": 1.9312686693912797, "learning_rate": 8.417985341362272e-06, "loss": 0.6784, "step": 22768 }, { "epoch": 1.69223337049424, "grad_norm": 2.2711476346250987, "learning_rate": 8.417193082543847e-06, "loss": 0.636, "step": 22769 }, { "epoch": 1.6923076923076923, "grad_norm": 2.284537382112046, "learning_rate": 8.416400833915452e-06, "loss": 0.5123, "step": 22770 }, { "epoch": 1.6923820141211445, "grad_norm": 2.4780237682911372, "learning_rate": 8.41560859548219e-06, "loss": 0.7198, "step": 22771 }, { "epoch": 1.6924563359345968, "grad_norm": 1.9273609257006754, "learning_rate": 8.414816367249154e-06, "loss": 0.6506, "step": 22772 }, { "epoch": 1.692530657748049, "grad_norm": 2.4207279710960035, "learning_rate": 8.414024149221449e-06, "loss": 0.6675, "step": 22773 }, { "epoch": 1.6926049795615015, "grad_norm": 1.9352148574680852, "learning_rate": 8.413231941404174e-06, "loss": 0.623, "step": 22774 }, { "epoch": 1.6926793013749535, "grad_norm": 1.7636708797674596, "learning_rate": 8.412439743802426e-06, "loss": 0.5424, "step": 22775 }, { "epoch": 1.692753623188406, "grad_norm": 2.3534555098032963, "learning_rate": 8.411647556421311e-06, "loss": 0.7101, "step": 22776 }, { "epoch": 1.692827945001858, "grad_norm": 1.85670180572394, "learning_rate": 8.410855379265927e-06, "loss": 0.6122, "step": 22777 }, { "epoch": 1.6929022668153104, "grad_norm": 1.828404378989131, "learning_rate": 8.410063212341375e-06, "loss": 0.6078, "step": 22778 }, { "epoch": 1.6929765886287624, "grad_norm": 1.8333968484681047, "learning_rate": 8.40927105565275e-06, "loss": 0.6868, "step": 22779 }, { "epoch": 1.693050910442215, "grad_norm": 2.1624804586778534, "learning_rate": 8.408478909205158e-06, "loss": 0.6883, "step": 22780 }, { "epoch": 1.693125232255667, "grad_norm": 1.9829866618044307, "learning_rate": 8.407686773003693e-06, "loss": 0.5826, "step": 22781 }, { "epoch": 1.6931995540691194, "grad_norm": 1.8042529196048416, "learning_rate": 8.406894647053465e-06, "loss": 0.4864, "step": 22782 }, { "epoch": 1.6932738758825714, "grad_norm": 1.8556949153568376, "learning_rate": 8.406102531359561e-06, "loss": 0.6411, "step": 22783 }, { "epoch": 1.6933481976960238, "grad_norm": 2.5849057147904837, "learning_rate": 8.405310425927088e-06, "loss": 0.7221, "step": 22784 }, { "epoch": 1.6934225195094759, "grad_norm": 2.2612029549186508, "learning_rate": 8.404518330761145e-06, "loss": 0.7311, "step": 22785 }, { "epoch": 1.6934968413229283, "grad_norm": 1.939447873596873, "learning_rate": 8.403726245866827e-06, "loss": 0.5673, "step": 22786 }, { "epoch": 1.6935711631363806, "grad_norm": 1.9837993705867527, "learning_rate": 8.402934171249237e-06, "loss": 0.5217, "step": 22787 }, { "epoch": 1.6936454849498328, "grad_norm": 2.229480229314981, "learning_rate": 8.402142106913473e-06, "loss": 0.729, "step": 22788 }, { "epoch": 1.693719806763285, "grad_norm": 1.8041713648286324, "learning_rate": 8.40135005286464e-06, "loss": 0.4834, "step": 22789 }, { "epoch": 1.6937941285767373, "grad_norm": 1.6550475095643673, "learning_rate": 8.400558009107829e-06, "loss": 0.551, "step": 22790 }, { "epoch": 1.6938684503901895, "grad_norm": 1.992352475906633, "learning_rate": 8.39976597564814e-06, "loss": 0.6804, "step": 22791 }, { "epoch": 1.6939427722036418, "grad_norm": 2.3576905267516755, "learning_rate": 8.398973952490678e-06, "loss": 0.7159, "step": 22792 }, { "epoch": 1.694017094017094, "grad_norm": 1.8944698556773925, "learning_rate": 8.39818193964054e-06, "loss": 0.5648, "step": 22793 }, { "epoch": 1.6940914158305462, "grad_norm": 2.0547229962121367, "learning_rate": 8.397389937102823e-06, "loss": 0.6127, "step": 22794 }, { "epoch": 1.6941657376439985, "grad_norm": 1.7758769919268547, "learning_rate": 8.396597944882627e-06, "loss": 0.5219, "step": 22795 }, { "epoch": 1.6942400594574507, "grad_norm": 1.877898548322514, "learning_rate": 8.395805962985048e-06, "loss": 0.6015, "step": 22796 }, { "epoch": 1.6943143812709032, "grad_norm": 1.9537631480968438, "learning_rate": 8.395013991415186e-06, "loss": 0.6632, "step": 22797 }, { "epoch": 1.6943887030843552, "grad_norm": 2.0262975134469383, "learning_rate": 8.394222030178143e-06, "loss": 0.5844, "step": 22798 }, { "epoch": 1.6944630248978076, "grad_norm": 2.097652149770924, "learning_rate": 8.393430079279015e-06, "loss": 0.4917, "step": 22799 }, { "epoch": 1.6945373467112597, "grad_norm": 2.141733923192288, "learning_rate": 8.3926381387229e-06, "loss": 0.5698, "step": 22800 }, { "epoch": 1.6946116685247121, "grad_norm": 1.5077744204368957, "learning_rate": 8.391846208514896e-06, "loss": 0.5606, "step": 22801 }, { "epoch": 1.6946859903381641, "grad_norm": 1.8404383415349002, "learning_rate": 8.391054288660103e-06, "loss": 0.4667, "step": 22802 }, { "epoch": 1.6947603121516166, "grad_norm": 1.450103135653846, "learning_rate": 8.390262379163619e-06, "loss": 0.312, "step": 22803 }, { "epoch": 1.6948346339650686, "grad_norm": 1.9943842547104447, "learning_rate": 8.389470480030545e-06, "loss": 0.5653, "step": 22804 }, { "epoch": 1.694908955778521, "grad_norm": 1.3556494506157986, "learning_rate": 8.388678591265977e-06, "loss": 0.4042, "step": 22805 }, { "epoch": 1.694983277591973, "grad_norm": 1.605989419200983, "learning_rate": 8.38788671287501e-06, "loss": 0.481, "step": 22806 }, { "epoch": 1.6950575994054256, "grad_norm": 2.1040425046581532, "learning_rate": 8.387094844862747e-06, "loss": 0.6242, "step": 22807 }, { "epoch": 1.6951319212188778, "grad_norm": 1.9427466425222577, "learning_rate": 8.386302987234281e-06, "loss": 0.6541, "step": 22808 }, { "epoch": 1.69520624303233, "grad_norm": 1.458753739931189, "learning_rate": 8.385511139994712e-06, "loss": 0.3884, "step": 22809 }, { "epoch": 1.6952805648457823, "grad_norm": 1.9354080088125438, "learning_rate": 8.384719303149141e-06, "loss": 0.5367, "step": 22810 }, { "epoch": 1.6953548866592345, "grad_norm": 1.8524069422123342, "learning_rate": 8.383927476702662e-06, "loss": 0.6287, "step": 22811 }, { "epoch": 1.6954292084726867, "grad_norm": 1.3635869266180505, "learning_rate": 8.383135660660373e-06, "loss": 0.384, "step": 22812 }, { "epoch": 1.695503530286139, "grad_norm": 1.6562163289190905, "learning_rate": 8.382343855027373e-06, "loss": 0.5622, "step": 22813 }, { "epoch": 1.6955778520995912, "grad_norm": 1.8557271413517331, "learning_rate": 8.38155205980876e-06, "loss": 0.565, "step": 22814 }, { "epoch": 1.6956521739130435, "grad_norm": 2.1203018059246364, "learning_rate": 8.380760275009634e-06, "loss": 0.6217, "step": 22815 }, { "epoch": 1.6957264957264957, "grad_norm": 2.3025841066203006, "learning_rate": 8.379968500635087e-06, "loss": 0.6256, "step": 22816 }, { "epoch": 1.695800817539948, "grad_norm": 2.2390180173504883, "learning_rate": 8.37917673669022e-06, "loss": 0.7153, "step": 22817 }, { "epoch": 1.6958751393534002, "grad_norm": 1.9491942998117344, "learning_rate": 8.378384983180126e-06, "loss": 0.544, "step": 22818 }, { "epoch": 1.6959494611668524, "grad_norm": 1.8693866548588873, "learning_rate": 8.377593240109907e-06, "loss": 0.591, "step": 22819 }, { "epoch": 1.6960237829803049, "grad_norm": 1.6701780124739618, "learning_rate": 8.376801507484657e-06, "loss": 0.6877, "step": 22820 }, { "epoch": 1.6960981047937569, "grad_norm": 1.859845910852718, "learning_rate": 8.376009785309477e-06, "loss": 0.4891, "step": 22821 }, { "epoch": 1.6961724266072093, "grad_norm": 1.7285407989321302, "learning_rate": 8.375218073589462e-06, "loss": 0.5854, "step": 22822 }, { "epoch": 1.6962467484206614, "grad_norm": 2.754933205368172, "learning_rate": 8.374426372329705e-06, "loss": 0.5875, "step": 22823 }, { "epoch": 1.6963210702341138, "grad_norm": 1.923876965435613, "learning_rate": 8.37363468153531e-06, "loss": 0.497, "step": 22824 }, { "epoch": 1.6963953920475658, "grad_norm": 2.3157354536957198, "learning_rate": 8.372843001211369e-06, "loss": 0.7741, "step": 22825 }, { "epoch": 1.6964697138610183, "grad_norm": 2.7054769686465887, "learning_rate": 8.372051331362982e-06, "loss": 0.4944, "step": 22826 }, { "epoch": 1.6965440356744703, "grad_norm": 2.113928303179609, "learning_rate": 8.371259671995242e-06, "loss": 0.6144, "step": 22827 }, { "epoch": 1.6966183574879228, "grad_norm": 2.1370230323833055, "learning_rate": 8.370468023113252e-06, "loss": 0.5998, "step": 22828 }, { "epoch": 1.6966926793013748, "grad_norm": 2.2711619890392223, "learning_rate": 8.3696763847221e-06, "loss": 0.5157, "step": 22829 }, { "epoch": 1.6967670011148273, "grad_norm": 1.979138072204948, "learning_rate": 8.368884756826888e-06, "loss": 0.5823, "step": 22830 }, { "epoch": 1.6968413229282795, "grad_norm": 9.073849198611665, "learning_rate": 8.36809313943271e-06, "loss": 0.6148, "step": 22831 }, { "epoch": 1.6969156447417317, "grad_norm": 1.9163116161898734, "learning_rate": 8.367301532544667e-06, "loss": 0.578, "step": 22832 }, { "epoch": 1.696989966555184, "grad_norm": 1.931813781929072, "learning_rate": 8.366509936167849e-06, "loss": 0.5436, "step": 22833 }, { "epoch": 1.6970642883686362, "grad_norm": 3.2362948691084874, "learning_rate": 8.365718350307356e-06, "loss": 0.5344, "step": 22834 }, { "epoch": 1.6971386101820884, "grad_norm": 2.117279523650908, "learning_rate": 8.364926774968283e-06, "loss": 0.6335, "step": 22835 }, { "epoch": 1.6972129319955407, "grad_norm": 1.5840282672682082, "learning_rate": 8.364135210155726e-06, "loss": 0.5432, "step": 22836 }, { "epoch": 1.697287253808993, "grad_norm": 2.279051767088537, "learning_rate": 8.363343655874785e-06, "loss": 0.5841, "step": 22837 }, { "epoch": 1.6973615756224452, "grad_norm": 2.1557950884040635, "learning_rate": 8.362552112130549e-06, "loss": 0.5124, "step": 22838 }, { "epoch": 1.6974358974358974, "grad_norm": 2.257579745131691, "learning_rate": 8.361760578928123e-06, "loss": 0.5523, "step": 22839 }, { "epoch": 1.6975102192493496, "grad_norm": 1.6437780910970532, "learning_rate": 8.360969056272593e-06, "loss": 0.5336, "step": 22840 }, { "epoch": 1.6975845410628019, "grad_norm": 2.2656746519319015, "learning_rate": 8.360177544169058e-06, "loss": 0.6626, "step": 22841 }, { "epoch": 1.6976588628762541, "grad_norm": 2.122829676029078, "learning_rate": 8.359386042622617e-06, "loss": 0.4908, "step": 22842 }, { "epoch": 1.6977331846897066, "grad_norm": 1.437007444757679, "learning_rate": 8.358594551638364e-06, "loss": 0.4389, "step": 22843 }, { "epoch": 1.6978075065031586, "grad_norm": 2.6278131725387697, "learning_rate": 8.357803071221393e-06, "loss": 0.7621, "step": 22844 }, { "epoch": 1.697881828316611, "grad_norm": 2.1821289601508154, "learning_rate": 8.3570116013768e-06, "loss": 0.634, "step": 22845 }, { "epoch": 1.697956150130063, "grad_norm": 2.1727511208537953, "learning_rate": 8.356220142109682e-06, "loss": 0.6138, "step": 22846 }, { "epoch": 1.6980304719435155, "grad_norm": 2.186013863489014, "learning_rate": 8.355428693425131e-06, "loss": 0.6609, "step": 22847 }, { "epoch": 1.6981047937569675, "grad_norm": 1.8647662573655257, "learning_rate": 8.354637255328249e-06, "loss": 0.6121, "step": 22848 }, { "epoch": 1.69817911557042, "grad_norm": 2.521795097175574, "learning_rate": 8.353845827824125e-06, "loss": 0.6517, "step": 22849 }, { "epoch": 1.698253437383872, "grad_norm": 2.9758507278389152, "learning_rate": 8.353054410917859e-06, "loss": 0.712, "step": 22850 }, { "epoch": 1.6983277591973245, "grad_norm": 1.9131343040779467, "learning_rate": 8.352263004614539e-06, "loss": 0.5252, "step": 22851 }, { "epoch": 1.6984020810107765, "grad_norm": 2.01323558493342, "learning_rate": 8.351471608919265e-06, "loss": 0.6439, "step": 22852 }, { "epoch": 1.698476402824229, "grad_norm": 2.3141527180304546, "learning_rate": 8.350680223837131e-06, "loss": 0.6048, "step": 22853 }, { "epoch": 1.6985507246376812, "grad_norm": 2.303040476980401, "learning_rate": 8.349888849373235e-06, "loss": 0.5841, "step": 22854 }, { "epoch": 1.6986250464511334, "grad_norm": 2.290236334204631, "learning_rate": 8.349097485532666e-06, "loss": 0.5896, "step": 22855 }, { "epoch": 1.6986993682645857, "grad_norm": 1.921294680760966, "learning_rate": 8.348306132320523e-06, "loss": 0.5081, "step": 22856 }, { "epoch": 1.698773690078038, "grad_norm": 2.2361144943956157, "learning_rate": 8.347514789741898e-06, "loss": 0.6054, "step": 22857 }, { "epoch": 1.6988480118914902, "grad_norm": 2.290831134327131, "learning_rate": 8.346723457801888e-06, "loss": 0.7807, "step": 22858 }, { "epoch": 1.6989223337049424, "grad_norm": 1.8574971521521122, "learning_rate": 8.345932136505589e-06, "loss": 0.6198, "step": 22859 }, { "epoch": 1.6989966555183946, "grad_norm": 2.103761632545474, "learning_rate": 8.34514082585809e-06, "loss": 0.6414, "step": 22860 }, { "epoch": 1.6990709773318469, "grad_norm": 1.957656402872275, "learning_rate": 8.344349525864493e-06, "loss": 0.4969, "step": 22861 }, { "epoch": 1.699145299145299, "grad_norm": 1.8823723325896118, "learning_rate": 8.343558236529882e-06, "loss": 0.6137, "step": 22862 }, { "epoch": 1.6992196209587513, "grad_norm": 1.8623374739498484, "learning_rate": 8.34276695785936e-06, "loss": 0.5981, "step": 22863 }, { "epoch": 1.6992939427722038, "grad_norm": 1.5696425945375414, "learning_rate": 8.341975689858016e-06, "loss": 0.4895, "step": 22864 }, { "epoch": 1.6993682645856558, "grad_norm": 1.9595060842064203, "learning_rate": 8.34118443253095e-06, "loss": 0.6388, "step": 22865 }, { "epoch": 1.6994425863991083, "grad_norm": 1.898401884552249, "learning_rate": 8.34039318588325e-06, "loss": 0.5627, "step": 22866 }, { "epoch": 1.6995169082125603, "grad_norm": 2.0933865525288136, "learning_rate": 8.339601949920012e-06, "loss": 0.7109, "step": 22867 }, { "epoch": 1.6995912300260128, "grad_norm": 2.1899319024828756, "learning_rate": 8.338810724646332e-06, "loss": 0.5675, "step": 22868 }, { "epoch": 1.6996655518394648, "grad_norm": 1.8125669952742716, "learning_rate": 8.3380195100673e-06, "loss": 0.5396, "step": 22869 }, { "epoch": 1.6997398736529172, "grad_norm": 2.083127884289747, "learning_rate": 8.337228306188016e-06, "loss": 0.6777, "step": 22870 }, { "epoch": 1.6998141954663692, "grad_norm": 1.931334195471013, "learning_rate": 8.336437113013568e-06, "loss": 0.6525, "step": 22871 }, { "epoch": 1.6998885172798217, "grad_norm": 1.673242394920653, "learning_rate": 8.33564593054905e-06, "loss": 0.5248, "step": 22872 }, { "epoch": 1.6999628390932737, "grad_norm": 1.7415266633001485, "learning_rate": 8.334854758799564e-06, "loss": 0.5042, "step": 22873 }, { "epoch": 1.7000371609067262, "grad_norm": 1.6471437396734405, "learning_rate": 8.33406359777019e-06, "loss": 0.4439, "step": 22874 }, { "epoch": 1.7001114827201784, "grad_norm": 1.917497040254874, "learning_rate": 8.333272447466029e-06, "loss": 0.6181, "step": 22875 }, { "epoch": 1.7001858045336307, "grad_norm": 2.304781805214098, "learning_rate": 8.332481307892177e-06, "loss": 0.6545, "step": 22876 }, { "epoch": 1.700260126347083, "grad_norm": 1.9539157735071881, "learning_rate": 8.33169017905372e-06, "loss": 0.5206, "step": 22877 }, { "epoch": 1.7003344481605351, "grad_norm": 2.281072405289273, "learning_rate": 8.330899060955754e-06, "loss": 0.7135, "step": 22878 }, { "epoch": 1.7004087699739874, "grad_norm": 1.6388322169244294, "learning_rate": 8.330107953603376e-06, "loss": 0.486, "step": 22879 }, { "epoch": 1.7004830917874396, "grad_norm": 1.7321151950725984, "learning_rate": 8.329316857001675e-06, "loss": 0.5142, "step": 22880 }, { "epoch": 1.7005574136008919, "grad_norm": 1.9573231179607247, "learning_rate": 8.328525771155746e-06, "loss": 0.6432, "step": 22881 }, { "epoch": 1.700631735414344, "grad_norm": 2.1082317421039938, "learning_rate": 8.327734696070681e-06, "loss": 0.5122, "step": 22882 }, { "epoch": 1.7007060572277963, "grad_norm": 2.205724638758135, "learning_rate": 8.326943631751574e-06, "loss": 0.6505, "step": 22883 }, { "epoch": 1.7007803790412486, "grad_norm": 2.276425458153726, "learning_rate": 8.32615257820352e-06, "loss": 0.6098, "step": 22884 }, { "epoch": 1.7008547008547008, "grad_norm": 2.0376332925694824, "learning_rate": 8.325361535431605e-06, "loss": 0.6166, "step": 22885 }, { "epoch": 1.700929022668153, "grad_norm": 1.902985275087821, "learning_rate": 8.324570503440925e-06, "loss": 0.6717, "step": 22886 }, { "epoch": 1.7010033444816055, "grad_norm": 2.0477345215446556, "learning_rate": 8.323779482236576e-06, "loss": 0.5941, "step": 22887 }, { "epoch": 1.7010776662950575, "grad_norm": 2.1753529629292983, "learning_rate": 8.322988471823645e-06, "loss": 0.5111, "step": 22888 }, { "epoch": 1.70115198810851, "grad_norm": 2.6872742175799744, "learning_rate": 8.322197472207227e-06, "loss": 0.6014, "step": 22889 }, { "epoch": 1.701226309921962, "grad_norm": 1.8290275488001284, "learning_rate": 8.321406483392416e-06, "loss": 0.5728, "step": 22890 }, { "epoch": 1.7013006317354145, "grad_norm": 2.320135877712562, "learning_rate": 8.320615505384302e-06, "loss": 0.7018, "step": 22891 }, { "epoch": 1.7013749535488665, "grad_norm": 1.8608231933547672, "learning_rate": 8.31982453818798e-06, "loss": 0.653, "step": 22892 }, { "epoch": 1.701449275362319, "grad_norm": 2.134227129475508, "learning_rate": 8.31903358180854e-06, "loss": 0.6442, "step": 22893 }, { "epoch": 1.701523597175771, "grad_norm": 1.716634394154509, "learning_rate": 8.318242636251073e-06, "loss": 0.5182, "step": 22894 }, { "epoch": 1.7015979189892234, "grad_norm": 5.659520429805862, "learning_rate": 8.317451701520677e-06, "loss": 0.665, "step": 22895 }, { "epoch": 1.7016722408026754, "grad_norm": 2.2171795796401614, "learning_rate": 8.316660777622436e-06, "loss": 0.4819, "step": 22896 }, { "epoch": 1.701746562616128, "grad_norm": 1.864477518854521, "learning_rate": 8.315869864561445e-06, "loss": 0.517, "step": 22897 }, { "epoch": 1.7018208844295801, "grad_norm": 1.8304160393622506, "learning_rate": 8.3150789623428e-06, "loss": 0.6452, "step": 22898 }, { "epoch": 1.7018952062430324, "grad_norm": 1.6683717316260476, "learning_rate": 8.314288070971584e-06, "loss": 0.5779, "step": 22899 }, { "epoch": 1.7019695280564846, "grad_norm": 1.753638231666612, "learning_rate": 8.313497190452896e-06, "loss": 0.6802, "step": 22900 }, { "epoch": 1.7020438498699368, "grad_norm": 1.8592561477862335, "learning_rate": 8.312706320791825e-06, "loss": 0.515, "step": 22901 }, { "epoch": 1.702118171683389, "grad_norm": 1.8331613396458235, "learning_rate": 8.311915461993466e-06, "loss": 0.6285, "step": 22902 }, { "epoch": 1.7021924934968413, "grad_norm": 2.105322211230368, "learning_rate": 8.311124614062905e-06, "loss": 0.7279, "step": 22903 }, { "epoch": 1.7022668153102936, "grad_norm": 2.2440910730590917, "learning_rate": 8.310333777005238e-06, "loss": 0.5435, "step": 22904 }, { "epoch": 1.7023411371237458, "grad_norm": 1.7335012940502952, "learning_rate": 8.309542950825551e-06, "loss": 0.5188, "step": 22905 }, { "epoch": 1.702415458937198, "grad_norm": 1.68764886638614, "learning_rate": 8.308752135528942e-06, "loss": 0.5612, "step": 22906 }, { "epoch": 1.7024897807506503, "grad_norm": 2.5800338965814587, "learning_rate": 8.307961331120503e-06, "loss": 0.6206, "step": 22907 }, { "epoch": 1.7025641025641025, "grad_norm": 1.691858546272397, "learning_rate": 8.307170537605313e-06, "loss": 0.4585, "step": 22908 }, { "epoch": 1.7026384243775547, "grad_norm": 1.742519180925127, "learning_rate": 8.306379754988477e-06, "loss": 0.4874, "step": 22909 }, { "epoch": 1.7027127461910072, "grad_norm": 2.011857873566468, "learning_rate": 8.305588983275077e-06, "loss": 0.54, "step": 22910 }, { "epoch": 1.7027870680044592, "grad_norm": 1.8239576155907677, "learning_rate": 8.304798222470207e-06, "loss": 0.5294, "step": 22911 }, { "epoch": 1.7028613898179117, "grad_norm": 1.9657832306447534, "learning_rate": 8.30400747257896e-06, "loss": 0.5466, "step": 22912 }, { "epoch": 1.7029357116313637, "grad_norm": 2.060789906934762, "learning_rate": 8.303216733606422e-06, "loss": 0.662, "step": 22913 }, { "epoch": 1.7030100334448162, "grad_norm": 2.1778907535694674, "learning_rate": 8.302426005557691e-06, "loss": 0.5608, "step": 22914 }, { "epoch": 1.7030843552582682, "grad_norm": 2.164677509456026, "learning_rate": 8.30163528843785e-06, "loss": 0.7248, "step": 22915 }, { "epoch": 1.7031586770717206, "grad_norm": 2.2669733502114195, "learning_rate": 8.300844582251994e-06, "loss": 0.6325, "step": 22916 }, { "epoch": 1.7032329988851727, "grad_norm": 2.1416647564408455, "learning_rate": 8.300053887005211e-06, "loss": 0.7085, "step": 22917 }, { "epoch": 1.7033073206986251, "grad_norm": 1.9399025001277153, "learning_rate": 8.299263202702598e-06, "loss": 0.6774, "step": 22918 }, { "epoch": 1.7033816425120771, "grad_norm": 2.1154852155720887, "learning_rate": 8.298472529349236e-06, "loss": 0.5658, "step": 22919 }, { "epoch": 1.7034559643255296, "grad_norm": 2.085538907458583, "learning_rate": 8.29768186695022e-06, "loss": 0.5807, "step": 22920 }, { "epoch": 1.7035302861389818, "grad_norm": 1.9400802517800526, "learning_rate": 8.29689121551064e-06, "loss": 0.603, "step": 22921 }, { "epoch": 1.703604607952434, "grad_norm": 2.2805851499115954, "learning_rate": 8.296100575035586e-06, "loss": 0.4773, "step": 22922 }, { "epoch": 1.7036789297658863, "grad_norm": 2.217731062296856, "learning_rate": 8.295309945530146e-06, "loss": 0.6974, "step": 22923 }, { "epoch": 1.7037532515793385, "grad_norm": 2.0489136942497024, "learning_rate": 8.294519326999414e-06, "loss": 0.6327, "step": 22924 }, { "epoch": 1.7038275733927908, "grad_norm": 2.1296986097844317, "learning_rate": 8.293728719448479e-06, "loss": 0.7275, "step": 22925 }, { "epoch": 1.703901895206243, "grad_norm": 1.9862745890796047, "learning_rate": 8.29293812288243e-06, "loss": 0.6441, "step": 22926 }, { "epoch": 1.7039762170196953, "grad_norm": 2.151719789549187, "learning_rate": 8.292147537306355e-06, "loss": 0.7516, "step": 22927 }, { "epoch": 1.7040505388331475, "grad_norm": 1.979114522822484, "learning_rate": 8.291356962725345e-06, "loss": 0.624, "step": 22928 }, { "epoch": 1.7041248606465997, "grad_norm": 1.4076473095329176, "learning_rate": 8.290566399144495e-06, "loss": 0.4401, "step": 22929 }, { "epoch": 1.704199182460052, "grad_norm": 1.7934454716978565, "learning_rate": 8.289775846568886e-06, "loss": 0.4657, "step": 22930 }, { "epoch": 1.7042735042735044, "grad_norm": 1.8917890066349317, "learning_rate": 8.288985305003614e-06, "loss": 0.5333, "step": 22931 }, { "epoch": 1.7043478260869565, "grad_norm": 2.224840284728274, "learning_rate": 8.288194774453762e-06, "loss": 0.7195, "step": 22932 }, { "epoch": 1.704422147900409, "grad_norm": 2.2173412434048445, "learning_rate": 8.287404254924426e-06, "loss": 0.6577, "step": 22933 }, { "epoch": 1.704496469713861, "grad_norm": 1.9861349838912408, "learning_rate": 8.28661374642069e-06, "loss": 0.6824, "step": 22934 }, { "epoch": 1.7045707915273134, "grad_norm": 2.1770404047112097, "learning_rate": 8.28582324894765e-06, "loss": 0.6643, "step": 22935 }, { "epoch": 1.7046451133407654, "grad_norm": 2.2059516410105067, "learning_rate": 8.285032762510386e-06, "loss": 0.5962, "step": 22936 }, { "epoch": 1.7047194351542179, "grad_norm": 1.8257391029483951, "learning_rate": 8.284242287113992e-06, "loss": 0.5734, "step": 22937 }, { "epoch": 1.7047937569676699, "grad_norm": 2.457019138230929, "learning_rate": 8.28345182276356e-06, "loss": 0.596, "step": 22938 }, { "epoch": 1.7048680787811223, "grad_norm": 2.2297291620079758, "learning_rate": 8.282661369464175e-06, "loss": 0.5771, "step": 22939 }, { "epoch": 1.7049424005945744, "grad_norm": 1.9360874657825662, "learning_rate": 8.281870927220929e-06, "loss": 0.4949, "step": 22940 }, { "epoch": 1.7050167224080268, "grad_norm": 1.679687707956909, "learning_rate": 8.281080496038909e-06, "loss": 0.5553, "step": 22941 }, { "epoch": 1.705091044221479, "grad_norm": 1.7964784407232255, "learning_rate": 8.280290075923202e-06, "loss": 0.6142, "step": 22942 }, { "epoch": 1.7051653660349313, "grad_norm": 2.32985708253622, "learning_rate": 8.279499666878899e-06, "loss": 0.7215, "step": 22943 }, { "epoch": 1.7052396878483835, "grad_norm": 2.1422471293580223, "learning_rate": 8.278709268911086e-06, "loss": 0.6261, "step": 22944 }, { "epoch": 1.7053140096618358, "grad_norm": 1.8600377723466874, "learning_rate": 8.277918882024854e-06, "loss": 0.6358, "step": 22945 }, { "epoch": 1.705388331475288, "grad_norm": 2.1903831140461727, "learning_rate": 8.277128506225293e-06, "loss": 0.59, "step": 22946 }, { "epoch": 1.7054626532887402, "grad_norm": 1.76512193368028, "learning_rate": 8.276338141517486e-06, "loss": 0.6292, "step": 22947 }, { "epoch": 1.7055369751021925, "grad_norm": 2.1218802104958825, "learning_rate": 8.275547787906526e-06, "loss": 0.5824, "step": 22948 }, { "epoch": 1.7056112969156447, "grad_norm": 2.200189340955449, "learning_rate": 8.2747574453975e-06, "loss": 0.5843, "step": 22949 }, { "epoch": 1.705685618729097, "grad_norm": 1.7785692540625448, "learning_rate": 8.273967113995495e-06, "loss": 0.5981, "step": 22950 }, { "epoch": 1.7057599405425492, "grad_norm": 2.9390436501609316, "learning_rate": 8.273176793705603e-06, "loss": 0.6159, "step": 22951 }, { "epoch": 1.7058342623560014, "grad_norm": 1.930662037023598, "learning_rate": 8.272386484532908e-06, "loss": 0.4441, "step": 22952 }, { "epoch": 1.7059085841694537, "grad_norm": 2.075575826537619, "learning_rate": 8.2715961864825e-06, "loss": 0.6731, "step": 22953 }, { "epoch": 1.7059829059829061, "grad_norm": 1.4626301047071295, "learning_rate": 8.270805899559465e-06, "loss": 0.3907, "step": 22954 }, { "epoch": 1.7060572277963582, "grad_norm": 2.2421952603035105, "learning_rate": 8.27001562376889e-06, "loss": 0.6993, "step": 22955 }, { "epoch": 1.7061315496098106, "grad_norm": 1.5405920031039115, "learning_rate": 8.269225359115866e-06, "loss": 0.5628, "step": 22956 }, { "epoch": 1.7062058714232626, "grad_norm": 1.6259723320258481, "learning_rate": 8.268435105605482e-06, "loss": 0.628, "step": 22957 }, { "epoch": 1.706280193236715, "grad_norm": 2.284868390870541, "learning_rate": 8.267644863242819e-06, "loss": 0.6089, "step": 22958 }, { "epoch": 1.706354515050167, "grad_norm": 3.3366250470909313, "learning_rate": 8.266854632032971e-06, "loss": 0.823, "step": 22959 }, { "epoch": 1.7064288368636196, "grad_norm": 1.7734189233465762, "learning_rate": 8.26606441198102e-06, "loss": 0.5826, "step": 22960 }, { "epoch": 1.7065031586770716, "grad_norm": 1.6523927540698629, "learning_rate": 8.265274203092059e-06, "loss": 0.5252, "step": 22961 }, { "epoch": 1.706577480490524, "grad_norm": 1.2705023398541322, "learning_rate": 8.264484005371173e-06, "loss": 0.3917, "step": 22962 }, { "epoch": 1.706651802303976, "grad_norm": 1.7476264902746368, "learning_rate": 8.26369381882345e-06, "loss": 0.5794, "step": 22963 }, { "epoch": 1.7067261241174285, "grad_norm": 2.0331519772386195, "learning_rate": 8.262903643453974e-06, "loss": 0.6652, "step": 22964 }, { "epoch": 1.7068004459308808, "grad_norm": 2.268708885563939, "learning_rate": 8.262113479267836e-06, "loss": 0.5465, "step": 22965 }, { "epoch": 1.706874767744333, "grad_norm": 2.260379384822149, "learning_rate": 8.261323326270118e-06, "loss": 0.6268, "step": 22966 }, { "epoch": 1.7069490895577852, "grad_norm": 1.6967301972888524, "learning_rate": 8.26053318446591e-06, "loss": 0.6189, "step": 22967 }, { "epoch": 1.7070234113712375, "grad_norm": 2.3746971675969095, "learning_rate": 8.259743053860304e-06, "loss": 0.5012, "step": 22968 }, { "epoch": 1.7070977331846897, "grad_norm": 1.4986488584548487, "learning_rate": 8.258952934458378e-06, "loss": 0.4044, "step": 22969 }, { "epoch": 1.707172054998142, "grad_norm": 2.030088225986043, "learning_rate": 8.258162826265222e-06, "loss": 0.559, "step": 22970 }, { "epoch": 1.7072463768115942, "grad_norm": 6.815699443082809, "learning_rate": 8.257372729285925e-06, "loss": 0.6738, "step": 22971 }, { "epoch": 1.7073206986250464, "grad_norm": 8.150381702669197, "learning_rate": 8.256582643525571e-06, "loss": 0.6527, "step": 22972 }, { "epoch": 1.7073950204384987, "grad_norm": 1.7289139103976612, "learning_rate": 8.255792568989249e-06, "loss": 0.4528, "step": 22973 }, { "epoch": 1.707469342251951, "grad_norm": 1.9411410992335953, "learning_rate": 8.255002505682044e-06, "loss": 0.5032, "step": 22974 }, { "epoch": 1.7075436640654031, "grad_norm": 1.7048021775230306, "learning_rate": 8.254212453609043e-06, "loss": 0.4571, "step": 22975 }, { "epoch": 1.7076179858788554, "grad_norm": 1.8478747230269301, "learning_rate": 8.25342241277533e-06, "loss": 0.5879, "step": 22976 }, { "epoch": 1.7076923076923078, "grad_norm": 2.7734773423754873, "learning_rate": 8.252632383185993e-06, "loss": 0.5126, "step": 22977 }, { "epoch": 1.7077666295057599, "grad_norm": 2.1873486339840245, "learning_rate": 8.251842364846116e-06, "loss": 0.566, "step": 22978 }, { "epoch": 1.7078409513192123, "grad_norm": 2.174048218834085, "learning_rate": 8.251052357760791e-06, "loss": 0.5471, "step": 22979 }, { "epoch": 1.7079152731326643, "grad_norm": 1.815748028116497, "learning_rate": 8.250262361935097e-06, "loss": 0.5813, "step": 22980 }, { "epoch": 1.7079895949461168, "grad_norm": 2.1543577149900024, "learning_rate": 8.249472377374125e-06, "loss": 0.7146, "step": 22981 }, { "epoch": 1.7080639167595688, "grad_norm": 2.165751316316748, "learning_rate": 8.248682404082958e-06, "loss": 0.5839, "step": 22982 }, { "epoch": 1.7081382385730213, "grad_norm": 2.977709631268195, "learning_rate": 8.247892442066682e-06, "loss": 0.5826, "step": 22983 }, { "epoch": 1.7082125603864733, "grad_norm": 1.9965482939815473, "learning_rate": 8.247102491330387e-06, "loss": 0.5928, "step": 22984 }, { "epoch": 1.7082868821999257, "grad_norm": 2.1453797832469212, "learning_rate": 8.246312551879152e-06, "loss": 0.6585, "step": 22985 }, { "epoch": 1.7083612040133778, "grad_norm": 2.2887693390496175, "learning_rate": 8.24552262371807e-06, "loss": 0.7008, "step": 22986 }, { "epoch": 1.7084355258268302, "grad_norm": 1.6987928361314277, "learning_rate": 8.244732706852218e-06, "loss": 0.5581, "step": 22987 }, { "epoch": 1.7085098476402825, "grad_norm": 1.9816837715560154, "learning_rate": 8.243942801286687e-06, "loss": 0.5394, "step": 22988 }, { "epoch": 1.7085841694537347, "grad_norm": 2.2595007609591833, "learning_rate": 8.243152907026561e-06, "loss": 0.4873, "step": 22989 }, { "epoch": 1.708658491267187, "grad_norm": 2.144278683205682, "learning_rate": 8.242363024076926e-06, "loss": 0.6616, "step": 22990 }, { "epoch": 1.7087328130806392, "grad_norm": 1.9245442354682338, "learning_rate": 8.241573152442866e-06, "loss": 0.6633, "step": 22991 }, { "epoch": 1.7088071348940914, "grad_norm": 2.2540717933604424, "learning_rate": 8.240783292129465e-06, "loss": 0.6677, "step": 22992 }, { "epoch": 1.7088814567075437, "grad_norm": 1.7384344102072293, "learning_rate": 8.239993443141812e-06, "loss": 0.4993, "step": 22993 }, { "epoch": 1.708955778520996, "grad_norm": 2.1493601186900606, "learning_rate": 8.23920360548499e-06, "loss": 0.7231, "step": 22994 }, { "epoch": 1.7090301003344481, "grad_norm": 1.8729911966192652, "learning_rate": 8.238413779164085e-06, "loss": 0.6286, "step": 22995 }, { "epoch": 1.7091044221479004, "grad_norm": 1.9841612626872265, "learning_rate": 8.237623964184179e-06, "loss": 0.6125, "step": 22996 }, { "epoch": 1.7091787439613526, "grad_norm": 2.1687011852539295, "learning_rate": 8.236834160550362e-06, "loss": 0.7825, "step": 22997 }, { "epoch": 1.709253065774805, "grad_norm": 1.9057145075548743, "learning_rate": 8.236044368267712e-06, "loss": 0.5889, "step": 22998 }, { "epoch": 1.709327387588257, "grad_norm": 2.5914675226447232, "learning_rate": 8.235254587341316e-06, "loss": 0.734, "step": 22999 }, { "epoch": 1.7094017094017095, "grad_norm": 1.7746307546411404, "learning_rate": 8.234464817776259e-06, "loss": 0.4143, "step": 23000 }, { "epoch": 1.7094760312151616, "grad_norm": 1.865932411788155, "learning_rate": 8.233675059577628e-06, "loss": 0.5773, "step": 23001 }, { "epoch": 1.709550353028614, "grad_norm": 1.6514682778011804, "learning_rate": 8.232885312750503e-06, "loss": 0.5433, "step": 23002 }, { "epoch": 1.709624674842066, "grad_norm": 1.9755095400389246, "learning_rate": 8.232095577299972e-06, "loss": 0.642, "step": 23003 }, { "epoch": 1.7096989966555185, "grad_norm": 3.431423641173621, "learning_rate": 8.231305853231118e-06, "loss": 0.6969, "step": 23004 }, { "epoch": 1.7097733184689705, "grad_norm": 2.536738729424588, "learning_rate": 8.230516140549025e-06, "loss": 0.7408, "step": 23005 }, { "epoch": 1.709847640282423, "grad_norm": 2.307438031074054, "learning_rate": 8.229726439258779e-06, "loss": 0.625, "step": 23006 }, { "epoch": 1.709921962095875, "grad_norm": 2.19650806863277, "learning_rate": 8.22893674936546e-06, "loss": 0.6499, "step": 23007 }, { "epoch": 1.7099962839093275, "grad_norm": 1.6311147908927315, "learning_rate": 8.228147070874157e-06, "loss": 0.4651, "step": 23008 }, { "epoch": 1.7100706057227797, "grad_norm": 2.163777229994844, "learning_rate": 8.22735740378995e-06, "loss": 0.6045, "step": 23009 }, { "epoch": 1.710144927536232, "grad_norm": 1.5313540605761038, "learning_rate": 8.226567748117922e-06, "loss": 0.4342, "step": 23010 }, { "epoch": 1.7102192493496842, "grad_norm": 2.212528835563911, "learning_rate": 8.22577810386316e-06, "loss": 0.6035, "step": 23011 }, { "epoch": 1.7102935711631364, "grad_norm": 1.9866695085262835, "learning_rate": 8.224988471030748e-06, "loss": 0.7108, "step": 23012 }, { "epoch": 1.7103678929765886, "grad_norm": 2.3335554369657503, "learning_rate": 8.224198849625767e-06, "loss": 0.5833, "step": 23013 }, { "epoch": 1.7104422147900409, "grad_norm": 2.301467479788116, "learning_rate": 8.2234092396533e-06, "loss": 0.6822, "step": 23014 }, { "epoch": 1.7105165366034931, "grad_norm": 2.3896894874548864, "learning_rate": 8.222619641118432e-06, "loss": 0.5842, "step": 23015 }, { "epoch": 1.7105908584169454, "grad_norm": 1.748628005887626, "learning_rate": 8.221830054026247e-06, "loss": 0.482, "step": 23016 }, { "epoch": 1.7106651802303976, "grad_norm": 1.9176828179344707, "learning_rate": 8.221040478381833e-06, "loss": 0.6086, "step": 23017 }, { "epoch": 1.7107395020438498, "grad_norm": 1.8200981161848504, "learning_rate": 8.220250914190263e-06, "loss": 0.5763, "step": 23018 }, { "epoch": 1.710813823857302, "grad_norm": 1.8759224328705224, "learning_rate": 8.219461361456624e-06, "loss": 0.4752, "step": 23019 }, { "epoch": 1.7108881456707543, "grad_norm": 2.3471873537544377, "learning_rate": 8.218671820186006e-06, "loss": 0.6098, "step": 23020 }, { "epoch": 1.7109624674842068, "grad_norm": 1.8503069373738938, "learning_rate": 8.217882290383483e-06, "loss": 0.5845, "step": 23021 }, { "epoch": 1.7110367892976588, "grad_norm": 2.0218044831392, "learning_rate": 8.21709277205414e-06, "loss": 0.5027, "step": 23022 }, { "epoch": 1.7111111111111112, "grad_norm": 1.8339899198084753, "learning_rate": 8.216303265203064e-06, "loss": 0.4668, "step": 23023 }, { "epoch": 1.7111854329245633, "grad_norm": 2.5079376015904202, "learning_rate": 8.215513769835335e-06, "loss": 0.5055, "step": 23024 }, { "epoch": 1.7112597547380157, "grad_norm": 1.787453971085068, "learning_rate": 8.214724285956033e-06, "loss": 0.5435, "step": 23025 }, { "epoch": 1.7113340765514677, "grad_norm": 2.8152856711078664, "learning_rate": 8.213934813570244e-06, "loss": 0.6754, "step": 23026 }, { "epoch": 1.7114083983649202, "grad_norm": 2.4598488452149767, "learning_rate": 8.213145352683051e-06, "loss": 0.5918, "step": 23027 }, { "epoch": 1.7114827201783722, "grad_norm": 1.67250653985186, "learning_rate": 8.212355903299536e-06, "loss": 0.5287, "step": 23028 }, { "epoch": 1.7115570419918247, "grad_norm": 1.994578847288848, "learning_rate": 8.21156646542478e-06, "loss": 0.5976, "step": 23029 }, { "epoch": 1.7116313638052767, "grad_norm": 2.0672120236238722, "learning_rate": 8.210777039063867e-06, "loss": 0.6463, "step": 23030 }, { "epoch": 1.7117056856187292, "grad_norm": 1.7431678934861021, "learning_rate": 8.209987624221881e-06, "loss": 0.5851, "step": 23031 }, { "epoch": 1.7117800074321814, "grad_norm": 1.990624130710982, "learning_rate": 8.209198220903898e-06, "loss": 0.6485, "step": 23032 }, { "epoch": 1.7118543292456336, "grad_norm": 2.4914652474660612, "learning_rate": 8.208408829115005e-06, "loss": 0.508, "step": 23033 }, { "epoch": 1.7119286510590859, "grad_norm": 1.9078754531732827, "learning_rate": 8.207619448860285e-06, "loss": 0.5192, "step": 23034 }, { "epoch": 1.712002972872538, "grad_norm": 1.9771733845407125, "learning_rate": 8.206830080144814e-06, "loss": 0.5343, "step": 23035 }, { "epoch": 1.7120772946859903, "grad_norm": 2.070241431093147, "learning_rate": 8.20604072297368e-06, "loss": 0.5021, "step": 23036 }, { "epoch": 1.7121516164994426, "grad_norm": 2.0449015476798262, "learning_rate": 8.205251377351962e-06, "loss": 0.4723, "step": 23037 }, { "epoch": 1.7122259383128948, "grad_norm": 1.88939545319953, "learning_rate": 8.204462043284742e-06, "loss": 0.7129, "step": 23038 }, { "epoch": 1.712300260126347, "grad_norm": 2.1489945593908035, "learning_rate": 8.203672720777105e-06, "loss": 0.4916, "step": 23039 }, { "epoch": 1.7123745819397993, "grad_norm": 2.162793358121901, "learning_rate": 8.202883409834128e-06, "loss": 0.6166, "step": 23040 }, { "epoch": 1.7124489037532515, "grad_norm": 2.3311215598594277, "learning_rate": 8.202094110460892e-06, "loss": 0.4442, "step": 23041 }, { "epoch": 1.7125232255667038, "grad_norm": 2.547040388172092, "learning_rate": 8.201304822662488e-06, "loss": 0.7592, "step": 23042 }, { "epoch": 1.712597547380156, "grad_norm": 1.7886203652861514, "learning_rate": 8.200515546443985e-06, "loss": 0.5687, "step": 23043 }, { "epoch": 1.7126718691936085, "grad_norm": 1.987235401657587, "learning_rate": 8.199726281810469e-06, "loss": 0.4868, "step": 23044 }, { "epoch": 1.7127461910070605, "grad_norm": 2.0530780623534817, "learning_rate": 8.198937028767024e-06, "loss": 0.5928, "step": 23045 }, { "epoch": 1.712820512820513, "grad_norm": 1.8814030857670359, "learning_rate": 8.198147787318728e-06, "loss": 0.613, "step": 23046 }, { "epoch": 1.712894834633965, "grad_norm": 1.706194551619607, "learning_rate": 8.197358557470662e-06, "loss": 0.5576, "step": 23047 }, { "epoch": 1.7129691564474174, "grad_norm": 1.70463375959342, "learning_rate": 8.196569339227907e-06, "loss": 0.6048, "step": 23048 }, { "epoch": 1.7130434782608694, "grad_norm": 1.727461834061354, "learning_rate": 8.195780132595546e-06, "loss": 0.5021, "step": 23049 }, { "epoch": 1.713117800074322, "grad_norm": 1.924370192808339, "learning_rate": 8.194990937578662e-06, "loss": 0.5932, "step": 23050 }, { "epoch": 1.713192121887774, "grad_norm": 2.253697031534476, "learning_rate": 8.19420175418233e-06, "loss": 0.7173, "step": 23051 }, { "epoch": 1.7132664437012264, "grad_norm": 2.587419567617708, "learning_rate": 8.193412582411633e-06, "loss": 0.5374, "step": 23052 }, { "epoch": 1.7133407655146784, "grad_norm": 2.125459094470412, "learning_rate": 8.192623422271657e-06, "loss": 0.7067, "step": 23053 }, { "epoch": 1.7134150873281309, "grad_norm": 2.001345533741004, "learning_rate": 8.191834273767472e-06, "loss": 0.7417, "step": 23054 }, { "epoch": 1.713489409141583, "grad_norm": 2.520598578730984, "learning_rate": 8.191045136904165e-06, "loss": 0.5523, "step": 23055 }, { "epoch": 1.7135637309550353, "grad_norm": 2.28434463944984, "learning_rate": 8.190256011686818e-06, "loss": 0.683, "step": 23056 }, { "epoch": 1.7136380527684876, "grad_norm": 1.9245201971242727, "learning_rate": 8.189466898120507e-06, "loss": 0.6181, "step": 23057 }, { "epoch": 1.7137123745819398, "grad_norm": 1.8940028264506295, "learning_rate": 8.188677796210314e-06, "loss": 0.6301, "step": 23058 }, { "epoch": 1.713786696395392, "grad_norm": 2.244330382094179, "learning_rate": 8.187888705961318e-06, "loss": 0.5855, "step": 23059 }, { "epoch": 1.7138610182088443, "grad_norm": 1.8438967382045206, "learning_rate": 8.187099627378605e-06, "loss": 0.532, "step": 23060 }, { "epoch": 1.7139353400222965, "grad_norm": 1.7045108944071676, "learning_rate": 8.186310560467246e-06, "loss": 0.4999, "step": 23061 }, { "epoch": 1.7140096618357488, "grad_norm": 1.588496981474891, "learning_rate": 8.185521505232328e-06, "loss": 0.5907, "step": 23062 }, { "epoch": 1.714083983649201, "grad_norm": 1.6027567021650921, "learning_rate": 8.184732461678925e-06, "loss": 0.3709, "step": 23063 }, { "epoch": 1.7141583054626532, "grad_norm": 1.6363134191032669, "learning_rate": 8.183943429812123e-06, "loss": 0.6058, "step": 23064 }, { "epoch": 1.7142326272761057, "grad_norm": 2.2577458211999337, "learning_rate": 8.183154409637003e-06, "loss": 0.6347, "step": 23065 }, { "epoch": 1.7143069490895577, "grad_norm": 1.9890020230858285, "learning_rate": 8.182365401158637e-06, "loss": 0.6009, "step": 23066 }, { "epoch": 1.7143812709030102, "grad_norm": 2.2476581459334497, "learning_rate": 8.181576404382109e-06, "loss": 0.6848, "step": 23067 }, { "epoch": 1.7144555927164622, "grad_norm": 1.7761742451685882, "learning_rate": 8.180787419312494e-06, "loss": 0.518, "step": 23068 }, { "epoch": 1.7145299145299147, "grad_norm": 1.9439194534758995, "learning_rate": 8.179998445954877e-06, "loss": 0.6476, "step": 23069 }, { "epoch": 1.7146042363433667, "grad_norm": 2.138901906078124, "learning_rate": 8.179209484314336e-06, "loss": 0.6433, "step": 23070 }, { "epoch": 1.7146785581568191, "grad_norm": 1.958065018265544, "learning_rate": 8.178420534395952e-06, "loss": 0.6706, "step": 23071 }, { "epoch": 1.7147528799702711, "grad_norm": 2.0079928878843045, "learning_rate": 8.1776315962048e-06, "loss": 0.634, "step": 23072 }, { "epoch": 1.7148272017837236, "grad_norm": 1.9114104714506859, "learning_rate": 8.17684266974596e-06, "loss": 0.5857, "step": 23073 }, { "epoch": 1.7149015235971756, "grad_norm": 1.7611195998078442, "learning_rate": 8.176053755024513e-06, "loss": 0.5762, "step": 23074 }, { "epoch": 1.714975845410628, "grad_norm": 2.099046237877684, "learning_rate": 8.175264852045538e-06, "loss": 0.5697, "step": 23075 }, { "epoch": 1.71505016722408, "grad_norm": 2.124448826510807, "learning_rate": 8.174475960814117e-06, "loss": 0.7303, "step": 23076 }, { "epoch": 1.7151244890375326, "grad_norm": 1.9211918911223052, "learning_rate": 8.173687081335321e-06, "loss": 0.5472, "step": 23077 }, { "epoch": 1.7151988108509848, "grad_norm": 2.2072827765840155, "learning_rate": 8.172898213614234e-06, "loss": 0.6037, "step": 23078 }, { "epoch": 1.715273132664437, "grad_norm": 2.0324880718417493, "learning_rate": 8.172109357655932e-06, "loss": 0.5867, "step": 23079 }, { "epoch": 1.7153474544778893, "grad_norm": 2.093190070776876, "learning_rate": 8.171320513465495e-06, "loss": 0.7268, "step": 23080 }, { "epoch": 1.7154217762913415, "grad_norm": 1.5847137751414524, "learning_rate": 8.170531681048002e-06, "loss": 0.5242, "step": 23081 }, { "epoch": 1.7154960981047938, "grad_norm": 1.9604376675474262, "learning_rate": 8.169742860408533e-06, "loss": 0.6344, "step": 23082 }, { "epoch": 1.715570419918246, "grad_norm": 2.137686898885328, "learning_rate": 8.168954051552163e-06, "loss": 0.6868, "step": 23083 }, { "epoch": 1.7156447417316982, "grad_norm": 1.7020328943606808, "learning_rate": 8.168165254483971e-06, "loss": 0.5695, "step": 23084 }, { "epoch": 1.7157190635451505, "grad_norm": 1.9053631518394452, "learning_rate": 8.167376469209036e-06, "loss": 0.6632, "step": 23085 }, { "epoch": 1.7157933853586027, "grad_norm": 2.025964632621324, "learning_rate": 8.166587695732436e-06, "loss": 0.5454, "step": 23086 }, { "epoch": 1.715867707172055, "grad_norm": 2.3177325493124297, "learning_rate": 8.165798934059254e-06, "loss": 0.6744, "step": 23087 }, { "epoch": 1.7159420289855074, "grad_norm": 1.888211417128738, "learning_rate": 8.165010184194558e-06, "loss": 0.4403, "step": 23088 }, { "epoch": 1.7160163507989594, "grad_norm": 2.0302660476394694, "learning_rate": 8.164221446143434e-06, "loss": 0.6082, "step": 23089 }, { "epoch": 1.7160906726124119, "grad_norm": 1.9720760862998303, "learning_rate": 8.163432719910955e-06, "loss": 0.666, "step": 23090 }, { "epoch": 1.716164994425864, "grad_norm": 1.6383011384134938, "learning_rate": 8.162644005502201e-06, "loss": 0.4721, "step": 23091 }, { "epoch": 1.7162393162393164, "grad_norm": 2.6307435133092203, "learning_rate": 8.16185530292225e-06, "loss": 0.4381, "step": 23092 }, { "epoch": 1.7163136380527684, "grad_norm": 1.9842026700385609, "learning_rate": 8.161066612176182e-06, "loss": 0.8264, "step": 23093 }, { "epoch": 1.7163879598662208, "grad_norm": 1.7592506581140792, "learning_rate": 8.16027793326907e-06, "loss": 0.6486, "step": 23094 }, { "epoch": 1.7164622816796729, "grad_norm": 2.054526189970741, "learning_rate": 8.15948926620599e-06, "loss": 0.7179, "step": 23095 }, { "epoch": 1.7165366034931253, "grad_norm": 2.2000485263705034, "learning_rate": 8.158700610992024e-06, "loss": 0.5724, "step": 23096 }, { "epoch": 1.7166109253065773, "grad_norm": 1.9132881621169362, "learning_rate": 8.157911967632248e-06, "loss": 0.5652, "step": 23097 }, { "epoch": 1.7166852471200298, "grad_norm": 2.380741482768613, "learning_rate": 8.157123336131742e-06, "loss": 0.6191, "step": 23098 }, { "epoch": 1.716759568933482, "grad_norm": 1.5876787429645174, "learning_rate": 8.15633471649558e-06, "loss": 0.5223, "step": 23099 }, { "epoch": 1.7168338907469343, "grad_norm": 2.002777837755298, "learning_rate": 8.15554610872884e-06, "loss": 0.7056, "step": 23100 }, { "epoch": 1.7169082125603865, "grad_norm": 1.9380620636366466, "learning_rate": 8.154757512836595e-06, "loss": 0.5394, "step": 23101 }, { "epoch": 1.7169825343738387, "grad_norm": 1.8648998646625388, "learning_rate": 8.153968928823927e-06, "loss": 0.5816, "step": 23102 }, { "epoch": 1.717056856187291, "grad_norm": 1.9496796938850534, "learning_rate": 8.15318035669591e-06, "loss": 0.5451, "step": 23103 }, { "epoch": 1.7171311780007432, "grad_norm": 1.5147401210074558, "learning_rate": 8.152391796457623e-06, "loss": 0.3197, "step": 23104 }, { "epoch": 1.7172054998141955, "grad_norm": 2.3685828542362635, "learning_rate": 8.151603248114141e-06, "loss": 0.5962, "step": 23105 }, { "epoch": 1.7172798216276477, "grad_norm": 1.7088246569521068, "learning_rate": 8.150814711670543e-06, "loss": 0.4431, "step": 23106 }, { "epoch": 1.7173541434411, "grad_norm": 2.125843622566594, "learning_rate": 8.150026187131902e-06, "loss": 0.6762, "step": 23107 }, { "epoch": 1.7174284652545522, "grad_norm": 2.0234043600620457, "learning_rate": 8.149237674503296e-06, "loss": 0.6554, "step": 23108 }, { "epoch": 1.7175027870680044, "grad_norm": 1.8414346288450476, "learning_rate": 8.148449173789806e-06, "loss": 0.742, "step": 23109 }, { "epoch": 1.7175771088814566, "grad_norm": 1.8685785978828977, "learning_rate": 8.147660684996504e-06, "loss": 0.6638, "step": 23110 }, { "epoch": 1.717651430694909, "grad_norm": 1.7951929070688193, "learning_rate": 8.146872208128465e-06, "loss": 0.6035, "step": 23111 }, { "epoch": 1.7177257525083611, "grad_norm": 2.4106979631174057, "learning_rate": 8.146083743190765e-06, "loss": 0.7161, "step": 23112 }, { "epoch": 1.7178000743218136, "grad_norm": 1.7149987953771348, "learning_rate": 8.145295290188481e-06, "loss": 0.4956, "step": 23113 }, { "epoch": 1.7178743961352656, "grad_norm": 1.7587944996603482, "learning_rate": 8.14450684912669e-06, "loss": 0.5247, "step": 23114 }, { "epoch": 1.717948717948718, "grad_norm": 2.2700663424407597, "learning_rate": 8.14371842001047e-06, "loss": 0.5598, "step": 23115 }, { "epoch": 1.71802303976217, "grad_norm": 2.234661065351807, "learning_rate": 8.142930002844892e-06, "loss": 0.7817, "step": 23116 }, { "epoch": 1.7180973615756225, "grad_norm": 1.5280890354229675, "learning_rate": 8.142141597635035e-06, "loss": 0.4519, "step": 23117 }, { "epoch": 1.7181716833890746, "grad_norm": 2.127610171953036, "learning_rate": 8.141353204385973e-06, "loss": 0.5626, "step": 23118 }, { "epoch": 1.718246005202527, "grad_norm": 2.13809644328666, "learning_rate": 8.140564823102784e-06, "loss": 0.7154, "step": 23119 }, { "epoch": 1.718320327015979, "grad_norm": 1.9672045736733876, "learning_rate": 8.139776453790541e-06, "loss": 0.6245, "step": 23120 }, { "epoch": 1.7183946488294315, "grad_norm": 2.3829959816961135, "learning_rate": 8.138988096454325e-06, "loss": 0.6219, "step": 23121 }, { "epoch": 1.7184689706428837, "grad_norm": 1.6699792312820638, "learning_rate": 8.138199751099205e-06, "loss": 0.3574, "step": 23122 }, { "epoch": 1.718543292456336, "grad_norm": 2.251100488709544, "learning_rate": 8.137411417730257e-06, "loss": 0.5749, "step": 23123 }, { "epoch": 1.7186176142697882, "grad_norm": 2.16849973698828, "learning_rate": 8.136623096352556e-06, "loss": 0.6532, "step": 23124 }, { "epoch": 1.7186919360832404, "grad_norm": 1.731272297861733, "learning_rate": 8.13583478697118e-06, "loss": 0.4514, "step": 23125 }, { "epoch": 1.7187662578966927, "grad_norm": 2.049017655719143, "learning_rate": 8.135046489591207e-06, "loss": 0.6086, "step": 23126 }, { "epoch": 1.718840579710145, "grad_norm": 2.236083081344253, "learning_rate": 8.134258204217703e-06, "loss": 0.5841, "step": 23127 }, { "epoch": 1.7189149015235972, "grad_norm": 1.6860889512978579, "learning_rate": 8.133469930855749e-06, "loss": 0.4959, "step": 23128 }, { "epoch": 1.7189892233370494, "grad_norm": 1.7226979095133368, "learning_rate": 8.132681669510419e-06, "loss": 0.585, "step": 23129 }, { "epoch": 1.7190635451505016, "grad_norm": 2.144310470274803, "learning_rate": 8.131893420186785e-06, "loss": 0.6123, "step": 23130 }, { "epoch": 1.7191378669639539, "grad_norm": 1.868301310045843, "learning_rate": 8.131105182889929e-06, "loss": 0.5713, "step": 23131 }, { "epoch": 1.7192121887774061, "grad_norm": 1.9743699781180934, "learning_rate": 8.13031695762492e-06, "loss": 0.6531, "step": 23132 }, { "epoch": 1.7192865105908584, "grad_norm": 7.726051636347394, "learning_rate": 8.129528744396834e-06, "loss": 0.4463, "step": 23133 }, { "epoch": 1.7193608324043108, "grad_norm": 1.754352919249566, "learning_rate": 8.128740543210742e-06, "loss": 0.5383, "step": 23134 }, { "epoch": 1.7194351542177628, "grad_norm": 1.8881464714041785, "learning_rate": 8.127952354071721e-06, "loss": 0.4545, "step": 23135 }, { "epoch": 1.7195094760312153, "grad_norm": 1.9983853407492587, "learning_rate": 8.127164176984848e-06, "loss": 0.6266, "step": 23136 }, { "epoch": 1.7195837978446673, "grad_norm": 2.587280178297879, "learning_rate": 8.126376011955194e-06, "loss": 0.5399, "step": 23137 }, { "epoch": 1.7196581196581198, "grad_norm": 1.3445604555640736, "learning_rate": 8.125587858987835e-06, "loss": 0.3028, "step": 23138 }, { "epoch": 1.7197324414715718, "grad_norm": 2.817262535360629, "learning_rate": 8.124799718087842e-06, "loss": 0.6734, "step": 23139 }, { "epoch": 1.7198067632850242, "grad_norm": 2.159758503181852, "learning_rate": 8.124011589260292e-06, "loss": 0.5658, "step": 23140 }, { "epoch": 1.7198810850984763, "grad_norm": 2.1840656654739012, "learning_rate": 8.123223472510257e-06, "loss": 0.6839, "step": 23141 }, { "epoch": 1.7199554069119287, "grad_norm": 2.0130773843622265, "learning_rate": 8.122435367842813e-06, "loss": 0.6087, "step": 23142 }, { "epoch": 1.7200297287253807, "grad_norm": 1.733076710263369, "learning_rate": 8.121647275263033e-06, "loss": 0.554, "step": 23143 }, { "epoch": 1.7201040505388332, "grad_norm": 2.7142764798920127, "learning_rate": 8.120859194775994e-06, "loss": 0.7866, "step": 23144 }, { "epoch": 1.7201783723522854, "grad_norm": 2.005616539585507, "learning_rate": 8.120071126386761e-06, "loss": 0.612, "step": 23145 }, { "epoch": 1.7202526941657377, "grad_norm": 1.778752570124398, "learning_rate": 8.119283070100413e-06, "loss": 0.4364, "step": 23146 }, { "epoch": 1.72032701597919, "grad_norm": 2.143141384193223, "learning_rate": 8.118495025922024e-06, "loss": 0.558, "step": 23147 }, { "epoch": 1.7204013377926421, "grad_norm": 2.368537961058747, "learning_rate": 8.117706993856666e-06, "loss": 0.5163, "step": 23148 }, { "epoch": 1.7204756596060944, "grad_norm": 1.7172660319759994, "learning_rate": 8.116918973909412e-06, "loss": 0.4958, "step": 23149 }, { "epoch": 1.7205499814195466, "grad_norm": 1.6682271348931064, "learning_rate": 8.116130966085336e-06, "loss": 0.5054, "step": 23150 }, { "epoch": 1.7206243032329989, "grad_norm": 2.128377424799583, "learning_rate": 8.115342970389511e-06, "loss": 0.7392, "step": 23151 }, { "epoch": 1.720698625046451, "grad_norm": 2.212428244412163, "learning_rate": 8.11455498682701e-06, "loss": 0.6152, "step": 23152 }, { "epoch": 1.7207729468599033, "grad_norm": 2.1899577431626818, "learning_rate": 8.11376701540291e-06, "loss": 0.6886, "step": 23153 }, { "epoch": 1.7208472686733556, "grad_norm": 2.364265922648787, "learning_rate": 8.112979056122276e-06, "loss": 0.4967, "step": 23154 }, { "epoch": 1.720921590486808, "grad_norm": 2.0346667542799617, "learning_rate": 8.11219110899019e-06, "loss": 0.5271, "step": 23155 }, { "epoch": 1.72099591230026, "grad_norm": 2.0392285074316376, "learning_rate": 8.111403174011714e-06, "loss": 0.6256, "step": 23156 }, { "epoch": 1.7210702341137125, "grad_norm": 2.137385725996862, "learning_rate": 8.11061525119193e-06, "loss": 0.6444, "step": 23157 }, { "epoch": 1.7211445559271645, "grad_norm": 1.9676529613480562, "learning_rate": 8.109827340535904e-06, "loss": 0.6201, "step": 23158 }, { "epoch": 1.721218877740617, "grad_norm": 1.9517078754497952, "learning_rate": 8.109039442048712e-06, "loss": 0.555, "step": 23159 }, { "epoch": 1.721293199554069, "grad_norm": 2.1074929978239454, "learning_rate": 8.108251555735428e-06, "loss": 0.5643, "step": 23160 }, { "epoch": 1.7213675213675215, "grad_norm": 1.978563382086665, "learning_rate": 8.10746368160112e-06, "loss": 0.5044, "step": 23161 }, { "epoch": 1.7214418431809735, "grad_norm": 1.8648166626107965, "learning_rate": 8.106675819650862e-06, "loss": 0.5924, "step": 23162 }, { "epoch": 1.721516164994426, "grad_norm": 2.2229394750750027, "learning_rate": 8.105887969889727e-06, "loss": 0.6413, "step": 23163 }, { "epoch": 1.721590486807878, "grad_norm": 2.571020637175566, "learning_rate": 8.10510013232279e-06, "loss": 0.6625, "step": 23164 }, { "epoch": 1.7216648086213304, "grad_norm": 2.175214468568576, "learning_rate": 8.104312306955119e-06, "loss": 0.6413, "step": 23165 }, { "epoch": 1.7217391304347827, "grad_norm": 2.064865664895311, "learning_rate": 8.10352449379179e-06, "loss": 0.5596, "step": 23166 }, { "epoch": 1.721813452248235, "grad_norm": 1.7941976415542524, "learning_rate": 8.102736692837866e-06, "loss": 0.684, "step": 23167 }, { "epoch": 1.7218877740616871, "grad_norm": 1.696651962572531, "learning_rate": 8.101948904098428e-06, "loss": 0.4349, "step": 23168 }, { "epoch": 1.7219620958751394, "grad_norm": 1.9798998832667936, "learning_rate": 8.101161127578544e-06, "loss": 0.664, "step": 23169 }, { "epoch": 1.7220364176885916, "grad_norm": 2.0255435200341223, "learning_rate": 8.100373363283286e-06, "loss": 0.6415, "step": 23170 }, { "epoch": 1.7221107395020439, "grad_norm": 1.7321728626679613, "learning_rate": 8.099585611217725e-06, "loss": 0.5704, "step": 23171 }, { "epoch": 1.722185061315496, "grad_norm": 2.7176519140514106, "learning_rate": 8.098797871386931e-06, "loss": 0.6694, "step": 23172 }, { "epoch": 1.7222593831289483, "grad_norm": 1.9859334447253825, "learning_rate": 8.098010143795981e-06, "loss": 0.6732, "step": 23173 }, { "epoch": 1.7223337049424006, "grad_norm": 2.2289444386419963, "learning_rate": 8.097222428449942e-06, "loss": 0.6682, "step": 23174 }, { "epoch": 1.7224080267558528, "grad_norm": 2.1900841122441714, "learning_rate": 8.096434725353888e-06, "loss": 0.5658, "step": 23175 }, { "epoch": 1.722482348569305, "grad_norm": 2.1326760974791994, "learning_rate": 8.095647034512885e-06, "loss": 0.5942, "step": 23176 }, { "epoch": 1.7225566703827573, "grad_norm": 2.084757259028554, "learning_rate": 8.09485935593201e-06, "loss": 0.5595, "step": 23177 }, { "epoch": 1.7226309921962097, "grad_norm": 1.9327547280193154, "learning_rate": 8.094071689616334e-06, "loss": 0.4567, "step": 23178 }, { "epoch": 1.7227053140096618, "grad_norm": 1.8452875597128728, "learning_rate": 8.093284035570924e-06, "loss": 0.5695, "step": 23179 }, { "epoch": 1.7227796358231142, "grad_norm": 1.8932073729851933, "learning_rate": 8.09249639380085e-06, "loss": 0.6701, "step": 23180 }, { "epoch": 1.7228539576365662, "grad_norm": 2.1858750606327555, "learning_rate": 8.091708764311187e-06, "loss": 0.6125, "step": 23181 }, { "epoch": 1.7229282794500187, "grad_norm": 2.049015583255677, "learning_rate": 8.090921147107002e-06, "loss": 0.6132, "step": 23182 }, { "epoch": 1.7230026012634707, "grad_norm": 2.2464817335996092, "learning_rate": 8.09013354219337e-06, "loss": 0.6455, "step": 23183 }, { "epoch": 1.7230769230769232, "grad_norm": 2.118526975452664, "learning_rate": 8.089345949575358e-06, "loss": 0.5133, "step": 23184 }, { "epoch": 1.7231512448903752, "grad_norm": 2.1707895064545926, "learning_rate": 8.088558369258036e-06, "loss": 0.53, "step": 23185 }, { "epoch": 1.7232255667038276, "grad_norm": 2.039507980249292, "learning_rate": 8.08777080124648e-06, "loss": 0.5373, "step": 23186 }, { "epoch": 1.7232998885172797, "grad_norm": 2.4913509826955016, "learning_rate": 8.086983245545753e-06, "loss": 0.6231, "step": 23187 }, { "epoch": 1.7233742103307321, "grad_norm": 2.048753816102114, "learning_rate": 8.08619570216093e-06, "loss": 0.6883, "step": 23188 }, { "epoch": 1.7234485321441844, "grad_norm": 1.8498558360418662, "learning_rate": 8.085408171097082e-06, "loss": 0.5032, "step": 23189 }, { "epoch": 1.7235228539576366, "grad_norm": 2.048732012279132, "learning_rate": 8.084620652359274e-06, "loss": 0.5707, "step": 23190 }, { "epoch": 1.7235971757710888, "grad_norm": 2.229835197828962, "learning_rate": 8.083833145952578e-06, "loss": 0.6067, "step": 23191 }, { "epoch": 1.723671497584541, "grad_norm": 2.194629513124744, "learning_rate": 8.083045651882069e-06, "loss": 0.5317, "step": 23192 }, { "epoch": 1.7237458193979933, "grad_norm": 1.807340107590587, "learning_rate": 8.082258170152807e-06, "loss": 0.6098, "step": 23193 }, { "epoch": 1.7238201412114456, "grad_norm": 1.8963506990640473, "learning_rate": 8.081470700769871e-06, "loss": 0.5954, "step": 23194 }, { "epoch": 1.7238944630248978, "grad_norm": 2.1251268012120286, "learning_rate": 8.080683243738325e-06, "loss": 0.6253, "step": 23195 }, { "epoch": 1.72396878483835, "grad_norm": 1.8438084338974012, "learning_rate": 8.079895799063244e-06, "loss": 0.5074, "step": 23196 }, { "epoch": 1.7240431066518023, "grad_norm": 2.420948431067384, "learning_rate": 8.079108366749691e-06, "loss": 0.6054, "step": 23197 }, { "epoch": 1.7241174284652545, "grad_norm": 1.833643991713714, "learning_rate": 8.078320946802739e-06, "loss": 0.5607, "step": 23198 }, { "epoch": 1.7241917502787067, "grad_norm": 1.7686781764308883, "learning_rate": 8.077533539227458e-06, "loss": 0.5126, "step": 23199 }, { "epoch": 1.724266072092159, "grad_norm": 2.070851240933183, "learning_rate": 8.07674614402892e-06, "loss": 0.6968, "step": 23200 }, { "epoch": 1.7243403939056114, "grad_norm": 2.1274696948411775, "learning_rate": 8.075958761212186e-06, "loss": 0.5672, "step": 23201 }, { "epoch": 1.7244147157190635, "grad_norm": 2.661532902376951, "learning_rate": 8.075171390782329e-06, "loss": 0.4319, "step": 23202 }, { "epoch": 1.724489037532516, "grad_norm": 1.9403216321785792, "learning_rate": 8.074384032744422e-06, "loss": 0.5272, "step": 23203 }, { "epoch": 1.724563359345968, "grad_norm": 1.5091342980372344, "learning_rate": 8.073596687103527e-06, "loss": 0.4621, "step": 23204 }, { "epoch": 1.7246376811594204, "grad_norm": 2.028997851630565, "learning_rate": 8.072809353864719e-06, "loss": 0.7363, "step": 23205 }, { "epoch": 1.7247120029728724, "grad_norm": 1.714795655002064, "learning_rate": 8.072022033033062e-06, "loss": 0.4606, "step": 23206 }, { "epoch": 1.7247863247863249, "grad_norm": 2.1065693330561888, "learning_rate": 8.07123472461363e-06, "loss": 0.6239, "step": 23207 }, { "epoch": 1.724860646599777, "grad_norm": 1.472263605221274, "learning_rate": 8.070447428611487e-06, "loss": 0.4901, "step": 23208 }, { "epoch": 1.7249349684132294, "grad_norm": 1.9191333516832607, "learning_rate": 8.069660145031704e-06, "loss": 0.5216, "step": 23209 }, { "epoch": 1.7250092902266814, "grad_norm": 1.7188042442966933, "learning_rate": 8.068872873879348e-06, "loss": 0.53, "step": 23210 }, { "epoch": 1.7250836120401338, "grad_norm": 1.7906714205574834, "learning_rate": 8.06808561515949e-06, "loss": 0.5185, "step": 23211 }, { "epoch": 1.725157933853586, "grad_norm": 2.2282589107955464, "learning_rate": 8.067298368877198e-06, "loss": 0.6728, "step": 23212 }, { "epoch": 1.7252322556670383, "grad_norm": 2.0368334347514057, "learning_rate": 8.066511135037536e-06, "loss": 0.5642, "step": 23213 }, { "epoch": 1.7253065774804905, "grad_norm": 2.0409834541910126, "learning_rate": 8.065723913645576e-06, "loss": 0.514, "step": 23214 }, { "epoch": 1.7253808992939428, "grad_norm": 2.6758395987188455, "learning_rate": 8.064936704706384e-06, "loss": 0.712, "step": 23215 }, { "epoch": 1.725455221107395, "grad_norm": 1.7755198884338206, "learning_rate": 8.064149508225029e-06, "loss": 0.505, "step": 23216 }, { "epoch": 1.7255295429208473, "grad_norm": 1.9746184489691256, "learning_rate": 8.063362324206578e-06, "loss": 0.6197, "step": 23217 }, { "epoch": 1.7256038647342995, "grad_norm": 1.7146252600069951, "learning_rate": 8.062575152656104e-06, "loss": 0.5442, "step": 23218 }, { "epoch": 1.7256781865477517, "grad_norm": 2.117544996464447, "learning_rate": 8.061787993578667e-06, "loss": 0.5408, "step": 23219 }, { "epoch": 1.725752508361204, "grad_norm": 2.2120023815956937, "learning_rate": 8.061000846979338e-06, "loss": 0.6122, "step": 23220 }, { "epoch": 1.7258268301746562, "grad_norm": 2.20033955389932, "learning_rate": 8.060213712863186e-06, "loss": 0.66, "step": 23221 }, { "epoch": 1.7259011519881087, "grad_norm": 2.351491255877759, "learning_rate": 8.059426591235278e-06, "loss": 0.5305, "step": 23222 }, { "epoch": 1.7259754738015607, "grad_norm": 2.3658129714477796, "learning_rate": 8.058639482100684e-06, "loss": 0.6104, "step": 23223 }, { "epoch": 1.7260497956150131, "grad_norm": 1.5323531024576909, "learning_rate": 8.057852385464464e-06, "loss": 0.3825, "step": 23224 }, { "epoch": 1.7261241174284652, "grad_norm": 2.4520763654418283, "learning_rate": 8.057065301331692e-06, "loss": 0.698, "step": 23225 }, { "epoch": 1.7261984392419176, "grad_norm": 1.8592246219267254, "learning_rate": 8.05627822970743e-06, "loss": 0.4551, "step": 23226 }, { "epoch": 1.7262727610553696, "grad_norm": 4.394640684218364, "learning_rate": 8.05549117059675e-06, "loss": 0.7022, "step": 23227 }, { "epoch": 1.726347082868822, "grad_norm": 1.6240964957305544, "learning_rate": 8.054704124004716e-06, "loss": 0.631, "step": 23228 }, { "epoch": 1.7264214046822741, "grad_norm": 1.7424292977077829, "learning_rate": 8.053917089936395e-06, "loss": 0.5153, "step": 23229 }, { "epoch": 1.7264957264957266, "grad_norm": 2.002357493260907, "learning_rate": 8.053130068396856e-06, "loss": 0.7409, "step": 23230 }, { "epoch": 1.7265700483091786, "grad_norm": 1.8738322009118566, "learning_rate": 8.052343059391164e-06, "loss": 0.4634, "step": 23231 }, { "epoch": 1.726644370122631, "grad_norm": 1.8185896794635756, "learning_rate": 8.051556062924388e-06, "loss": 0.5872, "step": 23232 }, { "epoch": 1.7267186919360833, "grad_norm": 2.5635507381238494, "learning_rate": 8.05076907900159e-06, "loss": 0.6183, "step": 23233 }, { "epoch": 1.7267930137495355, "grad_norm": 1.870698455180109, "learning_rate": 8.049982107627845e-06, "loss": 0.5375, "step": 23234 }, { "epoch": 1.7268673355629878, "grad_norm": 1.994355025169176, "learning_rate": 8.04919514880821e-06, "loss": 0.6348, "step": 23235 }, { "epoch": 1.72694165737644, "grad_norm": 2.2380482775983235, "learning_rate": 8.048408202547757e-06, "loss": 0.6245, "step": 23236 }, { "epoch": 1.7270159791898922, "grad_norm": 1.9228201386686326, "learning_rate": 8.047621268851549e-06, "loss": 0.5465, "step": 23237 }, { "epoch": 1.7270903010033445, "grad_norm": 8.492985689833066, "learning_rate": 8.046834347724654e-06, "loss": 0.5874, "step": 23238 }, { "epoch": 1.7271646228167967, "grad_norm": 1.5743902780858454, "learning_rate": 8.046047439172137e-06, "loss": 0.3833, "step": 23239 }, { "epoch": 1.727238944630249, "grad_norm": 2.0046728332469352, "learning_rate": 8.04526054319907e-06, "loss": 0.6302, "step": 23240 }, { "epoch": 1.7273132664437012, "grad_norm": 2.2317020642716514, "learning_rate": 8.04447365981051e-06, "loss": 0.7498, "step": 23241 }, { "epoch": 1.7273875882571534, "grad_norm": 1.8069547237582362, "learning_rate": 8.043686789011526e-06, "loss": 0.559, "step": 23242 }, { "epoch": 1.7274619100706057, "grad_norm": 2.136076747701066, "learning_rate": 8.042899930807187e-06, "loss": 0.6572, "step": 23243 }, { "epoch": 1.727536231884058, "grad_norm": 1.7353567601270252, "learning_rate": 8.042113085202556e-06, "loss": 0.5934, "step": 23244 }, { "epoch": 1.7276105536975104, "grad_norm": 2.274311587572235, "learning_rate": 8.041326252202701e-06, "loss": 0.6775, "step": 23245 }, { "epoch": 1.7276848755109624, "grad_norm": 2.3236433759013484, "learning_rate": 8.040539431812687e-06, "loss": 0.6627, "step": 23246 }, { "epoch": 1.7277591973244149, "grad_norm": 1.9325461114552396, "learning_rate": 8.03975262403758e-06, "loss": 0.7169, "step": 23247 }, { "epoch": 1.7278335191378669, "grad_norm": 1.9530545968858704, "learning_rate": 8.038965828882439e-06, "loss": 0.6374, "step": 23248 }, { "epoch": 1.7279078409513193, "grad_norm": 1.830481043213466, "learning_rate": 8.038179046352334e-06, "loss": 0.5475, "step": 23249 }, { "epoch": 1.7279821627647713, "grad_norm": 1.723305609564701, "learning_rate": 8.037392276452332e-06, "loss": 0.5906, "step": 23250 }, { "epoch": 1.7280564845782238, "grad_norm": 1.6976398281016951, "learning_rate": 8.036605519187499e-06, "loss": 0.65, "step": 23251 }, { "epoch": 1.7281308063916758, "grad_norm": 2.398792339349023, "learning_rate": 8.035818774562895e-06, "loss": 0.67, "step": 23252 }, { "epoch": 1.7282051282051283, "grad_norm": 2.2310704731746465, "learning_rate": 8.035032042583586e-06, "loss": 0.6878, "step": 23253 }, { "epoch": 1.7282794500185803, "grad_norm": 5.479760431412687, "learning_rate": 8.034245323254642e-06, "loss": 0.6014, "step": 23254 }, { "epoch": 1.7283537718320328, "grad_norm": 1.5315629888658473, "learning_rate": 8.033458616581122e-06, "loss": 0.412, "step": 23255 }, { "epoch": 1.728428093645485, "grad_norm": 1.8785427975894449, "learning_rate": 8.032671922568099e-06, "loss": 0.6302, "step": 23256 }, { "epoch": 1.7285024154589372, "grad_norm": 2.3201064215727314, "learning_rate": 8.031885241220629e-06, "loss": 0.7645, "step": 23257 }, { "epoch": 1.7285767372723895, "grad_norm": 2.0828065404408487, "learning_rate": 8.03109857254378e-06, "loss": 0.7221, "step": 23258 }, { "epoch": 1.7286510590858417, "grad_norm": 3.6667811138720308, "learning_rate": 8.030311916542614e-06, "loss": 0.6165, "step": 23259 }, { "epoch": 1.728725380899294, "grad_norm": 1.4338940754264107, "learning_rate": 8.029525273222198e-06, "loss": 0.5474, "step": 23260 }, { "epoch": 1.7287997027127462, "grad_norm": 2.10340527731126, "learning_rate": 8.028738642587597e-06, "loss": 0.7496, "step": 23261 }, { "epoch": 1.7288740245261984, "grad_norm": 1.7749749323951645, "learning_rate": 8.027952024643877e-06, "loss": 0.5051, "step": 23262 }, { "epoch": 1.7289483463396507, "grad_norm": 1.9025011186598269, "learning_rate": 8.027165419396096e-06, "loss": 0.5686, "step": 23263 }, { "epoch": 1.729022668153103, "grad_norm": 2.1018806105160754, "learning_rate": 8.02637882684932e-06, "loss": 0.5574, "step": 23264 }, { "epoch": 1.7290969899665551, "grad_norm": 1.4973857141811293, "learning_rate": 8.025592247008617e-06, "loss": 0.4224, "step": 23265 }, { "epoch": 1.7291713117800074, "grad_norm": 1.681145298643268, "learning_rate": 8.024805679879049e-06, "loss": 0.388, "step": 23266 }, { "epoch": 1.7292456335934596, "grad_norm": 2.0099108669597854, "learning_rate": 8.024019125465681e-06, "loss": 0.5571, "step": 23267 }, { "epoch": 1.729319955406912, "grad_norm": 2.7826995252074425, "learning_rate": 8.023232583773576e-06, "loss": 0.5475, "step": 23268 }, { "epoch": 1.729394277220364, "grad_norm": 1.890553727811264, "learning_rate": 8.022446054807796e-06, "loss": 0.5814, "step": 23269 }, { "epoch": 1.7294685990338166, "grad_norm": 2.160742219395034, "learning_rate": 8.021659538573403e-06, "loss": 0.5204, "step": 23270 }, { "epoch": 1.7295429208472686, "grad_norm": 1.7311698375188154, "learning_rate": 8.020873035075464e-06, "loss": 0.5564, "step": 23271 }, { "epoch": 1.729617242660721, "grad_norm": 2.1088231617538415, "learning_rate": 8.020086544319043e-06, "loss": 0.7231, "step": 23272 }, { "epoch": 1.729691564474173, "grad_norm": 1.7667265090202617, "learning_rate": 8.019300066309203e-06, "loss": 0.4872, "step": 23273 }, { "epoch": 1.7297658862876255, "grad_norm": 1.8577455395026774, "learning_rate": 8.018513601051005e-06, "loss": 0.6569, "step": 23274 }, { "epoch": 1.7298402081010775, "grad_norm": 1.8493843023559458, "learning_rate": 8.017727148549513e-06, "loss": 0.5288, "step": 23275 }, { "epoch": 1.72991452991453, "grad_norm": 1.6279001818042405, "learning_rate": 8.01694070880979e-06, "loss": 0.4585, "step": 23276 }, { "epoch": 1.729988851727982, "grad_norm": 1.8044455974873332, "learning_rate": 8.016154281836902e-06, "loss": 0.515, "step": 23277 }, { "epoch": 1.7300631735414345, "grad_norm": 2.126229665513266, "learning_rate": 8.01536786763591e-06, "loss": 0.6316, "step": 23278 }, { "epoch": 1.7301374953548867, "grad_norm": 2.0854702264477263, "learning_rate": 8.014581466211877e-06, "loss": 0.651, "step": 23279 }, { "epoch": 1.730211817168339, "grad_norm": 2.081165577540758, "learning_rate": 8.013795077569865e-06, "loss": 0.6335, "step": 23280 }, { "epoch": 1.7302861389817912, "grad_norm": 2.598215686510593, "learning_rate": 8.013008701714936e-06, "loss": 0.7726, "step": 23281 }, { "epoch": 1.7303604607952434, "grad_norm": 2.6436287450494227, "learning_rate": 8.012222338652154e-06, "loss": 0.6791, "step": 23282 }, { "epoch": 1.7304347826086957, "grad_norm": 2.393043193058867, "learning_rate": 8.01143598838658e-06, "loss": 0.7045, "step": 23283 }, { "epoch": 1.730509104422148, "grad_norm": 1.8515436763563717, "learning_rate": 8.010649650923281e-06, "loss": 0.6212, "step": 23284 }, { "epoch": 1.7305834262356001, "grad_norm": 1.683382317863563, "learning_rate": 8.009863326267315e-06, "loss": 0.5225, "step": 23285 }, { "epoch": 1.7306577480490524, "grad_norm": 2.1308861109114776, "learning_rate": 8.009077014423744e-06, "loss": 0.6505, "step": 23286 }, { "epoch": 1.7307320698625046, "grad_norm": 2.5343574846830466, "learning_rate": 8.008290715397634e-06, "loss": 0.7197, "step": 23287 }, { "epoch": 1.7308063916759568, "grad_norm": 1.879361118235869, "learning_rate": 8.007504429194044e-06, "loss": 0.458, "step": 23288 }, { "epoch": 1.7308807134894093, "grad_norm": 1.7575093245008406, "learning_rate": 8.00671815581804e-06, "loss": 0.4125, "step": 23289 }, { "epoch": 1.7309550353028613, "grad_norm": 1.9809835253106252, "learning_rate": 8.005931895274676e-06, "loss": 0.68, "step": 23290 }, { "epoch": 1.7310293571163138, "grad_norm": 2.424326588449267, "learning_rate": 8.005145647569026e-06, "loss": 0.7272, "step": 23291 }, { "epoch": 1.7311036789297658, "grad_norm": 1.8269334573445886, "learning_rate": 8.004359412706141e-06, "loss": 0.5768, "step": 23292 }, { "epoch": 1.7311780007432183, "grad_norm": 1.51521756185942, "learning_rate": 8.003573190691086e-06, "loss": 0.4423, "step": 23293 }, { "epoch": 1.7312523225566703, "grad_norm": 2.02792885763869, "learning_rate": 8.002786981528922e-06, "loss": 0.4815, "step": 23294 }, { "epoch": 1.7313266443701227, "grad_norm": 1.9509833894704929, "learning_rate": 8.002000785224715e-06, "loss": 0.5529, "step": 23295 }, { "epoch": 1.7314009661835748, "grad_norm": 1.960828740326386, "learning_rate": 8.00121460178352e-06, "loss": 0.5299, "step": 23296 }, { "epoch": 1.7314752879970272, "grad_norm": 7.215999321381602, "learning_rate": 8.000428431210403e-06, "loss": 0.6246, "step": 23297 }, { "epoch": 1.7315496098104792, "grad_norm": 1.4137184814654569, "learning_rate": 7.999642273510424e-06, "loss": 0.4396, "step": 23298 }, { "epoch": 1.7316239316239317, "grad_norm": 1.5419315387432464, "learning_rate": 7.998856128688644e-06, "loss": 0.4474, "step": 23299 }, { "epoch": 1.731698253437384, "grad_norm": 1.7881013118401319, "learning_rate": 7.998069996750124e-06, "loss": 0.4117, "step": 23300 }, { "epoch": 1.7317725752508362, "grad_norm": 2.409656666374286, "learning_rate": 7.997283877699927e-06, "loss": 0.6561, "step": 23301 }, { "epoch": 1.7318468970642884, "grad_norm": 2.999897761728558, "learning_rate": 7.996497771543115e-06, "loss": 0.6436, "step": 23302 }, { "epoch": 1.7319212188777406, "grad_norm": 2.3706976508371964, "learning_rate": 7.995711678284743e-06, "loss": 0.5289, "step": 23303 }, { "epoch": 1.7319955406911929, "grad_norm": 2.3111517949112694, "learning_rate": 7.994925597929875e-06, "loss": 0.7632, "step": 23304 }, { "epoch": 1.7320698625046451, "grad_norm": 1.8672301246836949, "learning_rate": 7.99413953048357e-06, "loss": 0.5181, "step": 23305 }, { "epoch": 1.7321441843180974, "grad_norm": 1.7746913768182435, "learning_rate": 7.993353475950896e-06, "loss": 0.4779, "step": 23306 }, { "epoch": 1.7322185061315496, "grad_norm": 1.9753305638503107, "learning_rate": 7.992567434336905e-06, "loss": 0.5518, "step": 23307 }, { "epoch": 1.7322928279450018, "grad_norm": 2.65838356226487, "learning_rate": 7.991781405646658e-06, "loss": 0.69, "step": 23308 }, { "epoch": 1.732367149758454, "grad_norm": 2.082044411626195, "learning_rate": 7.990995389885222e-06, "loss": 0.6143, "step": 23309 }, { "epoch": 1.7324414715719063, "grad_norm": 1.906924474545272, "learning_rate": 7.990209387057651e-06, "loss": 0.6389, "step": 23310 }, { "epoch": 1.7325157933853585, "grad_norm": 1.632320456760187, "learning_rate": 7.98942339716901e-06, "loss": 0.539, "step": 23311 }, { "epoch": 1.732590115198811, "grad_norm": 1.8046109097382028, "learning_rate": 7.988637420224356e-06, "loss": 0.6341, "step": 23312 }, { "epoch": 1.732664437012263, "grad_norm": 1.838858369605015, "learning_rate": 7.987851456228753e-06, "loss": 0.4907, "step": 23313 }, { "epoch": 1.7327387588257155, "grad_norm": 2.041392762283304, "learning_rate": 7.987065505187254e-06, "loss": 0.62, "step": 23314 }, { "epoch": 1.7328130806391675, "grad_norm": 2.5683393023141776, "learning_rate": 7.986279567104924e-06, "loss": 0.6802, "step": 23315 }, { "epoch": 1.73288740245262, "grad_norm": 1.897353442421259, "learning_rate": 7.98549364198682e-06, "loss": 0.5284, "step": 23316 }, { "epoch": 1.732961724266072, "grad_norm": 2.0194708605510803, "learning_rate": 7.984707729838006e-06, "loss": 0.6218, "step": 23317 }, { "epoch": 1.7330360460795244, "grad_norm": 1.9881636553681676, "learning_rate": 7.983921830663537e-06, "loss": 0.6778, "step": 23318 }, { "epoch": 1.7331103678929765, "grad_norm": 1.9734503344521983, "learning_rate": 7.983135944468476e-06, "loss": 0.5511, "step": 23319 }, { "epoch": 1.733184689706429, "grad_norm": 1.677123025405097, "learning_rate": 7.98235007125788e-06, "loss": 0.5276, "step": 23320 }, { "epoch": 1.733259011519881, "grad_norm": 1.6637232664709998, "learning_rate": 7.98156421103681e-06, "loss": 0.5257, "step": 23321 }, { "epoch": 1.7333333333333334, "grad_norm": 1.835309182704102, "learning_rate": 7.980778363810325e-06, "loss": 0.533, "step": 23322 }, { "epoch": 1.7334076551467856, "grad_norm": 1.9407004617369172, "learning_rate": 7.979992529583484e-06, "loss": 0.6017, "step": 23323 }, { "epoch": 1.7334819769602379, "grad_norm": 1.8660118017356961, "learning_rate": 7.979206708361344e-06, "loss": 0.5496, "step": 23324 }, { "epoch": 1.73355629877369, "grad_norm": 2.068816082436279, "learning_rate": 7.978420900148973e-06, "loss": 0.5871, "step": 23325 }, { "epoch": 1.7336306205871423, "grad_norm": 2.145396986458304, "learning_rate": 7.97763510495142e-06, "loss": 0.7084, "step": 23326 }, { "epoch": 1.7337049424005946, "grad_norm": 1.6361684195160635, "learning_rate": 7.976849322773745e-06, "loss": 0.5021, "step": 23327 }, { "epoch": 1.7337792642140468, "grad_norm": 1.9467670489432742, "learning_rate": 7.976063553621012e-06, "loss": 0.607, "step": 23328 }, { "epoch": 1.733853586027499, "grad_norm": 2.2410144747529195, "learning_rate": 7.975277797498274e-06, "loss": 0.644, "step": 23329 }, { "epoch": 1.7339279078409513, "grad_norm": 7.518873999122612, "learning_rate": 7.974492054410592e-06, "loss": 0.515, "step": 23330 }, { "epoch": 1.7340022296544035, "grad_norm": 1.3689962620682543, "learning_rate": 7.973706324363028e-06, "loss": 0.4064, "step": 23331 }, { "epoch": 1.7340765514678558, "grad_norm": 1.5358447563231032, "learning_rate": 7.972920607360637e-06, "loss": 0.4611, "step": 23332 }, { "epoch": 1.734150873281308, "grad_norm": 2.022359655972328, "learning_rate": 7.972134903408478e-06, "loss": 0.5282, "step": 23333 }, { "epoch": 1.7342251950947603, "grad_norm": 2.031242048791553, "learning_rate": 7.971349212511607e-06, "loss": 0.7078, "step": 23334 }, { "epoch": 1.7342995169082127, "grad_norm": 2.2877016076803844, "learning_rate": 7.970563534675086e-06, "loss": 0.5688, "step": 23335 }, { "epoch": 1.7343738387216647, "grad_norm": 1.8366795432948575, "learning_rate": 7.969777869903973e-06, "loss": 0.6408, "step": 23336 }, { "epoch": 1.7344481605351172, "grad_norm": 2.5524114826517366, "learning_rate": 7.968992218203325e-06, "loss": 0.6476, "step": 23337 }, { "epoch": 1.7345224823485692, "grad_norm": 2.7719256746154857, "learning_rate": 7.968206579578197e-06, "loss": 0.5757, "step": 23338 }, { "epoch": 1.7345968041620217, "grad_norm": 2.0100997184576626, "learning_rate": 7.967420954033651e-06, "loss": 0.5132, "step": 23339 }, { "epoch": 1.7346711259754737, "grad_norm": 2.367235815886199, "learning_rate": 7.966635341574745e-06, "loss": 0.5157, "step": 23340 }, { "epoch": 1.7347454477889261, "grad_norm": 1.8371376890025068, "learning_rate": 7.965849742206531e-06, "loss": 0.587, "step": 23341 }, { "epoch": 1.7348197696023782, "grad_norm": 1.632646661592881, "learning_rate": 7.965064155934074e-06, "loss": 0.5102, "step": 23342 }, { "epoch": 1.7348940914158306, "grad_norm": 2.215432943961415, "learning_rate": 7.964278582762428e-06, "loss": 0.7197, "step": 23343 }, { "epoch": 1.7349684132292826, "grad_norm": 2.19557588917079, "learning_rate": 7.96349302269665e-06, "loss": 0.4823, "step": 23344 }, { "epoch": 1.735042735042735, "grad_norm": 2.0454470055068437, "learning_rate": 7.962707475741798e-06, "loss": 0.5546, "step": 23345 }, { "epoch": 1.7351170568561873, "grad_norm": 2.124339926373947, "learning_rate": 7.961921941902931e-06, "loss": 0.5699, "step": 23346 }, { "epoch": 1.7351913786696396, "grad_norm": 2.078776856067853, "learning_rate": 7.961136421185108e-06, "loss": 0.6195, "step": 23347 }, { "epoch": 1.7352657004830918, "grad_norm": 1.8052145665878003, "learning_rate": 7.960350913593376e-06, "loss": 0.5627, "step": 23348 }, { "epoch": 1.735340022296544, "grad_norm": 2.3049028026914495, "learning_rate": 7.959565419132802e-06, "loss": 0.6315, "step": 23349 }, { "epoch": 1.7354143441099963, "grad_norm": 1.8046937215642291, "learning_rate": 7.958779937808442e-06, "loss": 0.5093, "step": 23350 }, { "epoch": 1.7354886659234485, "grad_norm": 1.869833733962776, "learning_rate": 7.957994469625347e-06, "loss": 0.7415, "step": 23351 }, { "epoch": 1.7355629877369008, "grad_norm": 2.0299258243386267, "learning_rate": 7.95720901458858e-06, "loss": 0.6374, "step": 23352 }, { "epoch": 1.735637309550353, "grad_norm": 1.8851162388433527, "learning_rate": 7.956423572703194e-06, "loss": 0.5635, "step": 23353 }, { "epoch": 1.7357116313638052, "grad_norm": 11.70486398519621, "learning_rate": 7.955638143974248e-06, "loss": 0.6726, "step": 23354 }, { "epoch": 1.7357859531772575, "grad_norm": 1.6764291895028505, "learning_rate": 7.954852728406797e-06, "loss": 0.5688, "step": 23355 }, { "epoch": 1.73586027499071, "grad_norm": 2.256861108939576, "learning_rate": 7.954067326005899e-06, "loss": 0.6894, "step": 23356 }, { "epoch": 1.735934596804162, "grad_norm": 2.226787053534417, "learning_rate": 7.953281936776608e-06, "loss": 0.4387, "step": 23357 }, { "epoch": 1.7360089186176144, "grad_norm": 1.4564421451019816, "learning_rate": 7.952496560723986e-06, "loss": 0.4643, "step": 23358 }, { "epoch": 1.7360832404310664, "grad_norm": 1.8674550273464683, "learning_rate": 7.95171119785308e-06, "loss": 0.5509, "step": 23359 }, { "epoch": 1.736157562244519, "grad_norm": 1.7366656351415417, "learning_rate": 7.950925848168953e-06, "loss": 0.4589, "step": 23360 }, { "epoch": 1.736231884057971, "grad_norm": 2.779802785207423, "learning_rate": 7.950140511676659e-06, "loss": 0.52, "step": 23361 }, { "epoch": 1.7363062058714234, "grad_norm": 1.9394450295411283, "learning_rate": 7.949355188381253e-06, "loss": 0.6571, "step": 23362 }, { "epoch": 1.7363805276848754, "grad_norm": 1.8274170571945403, "learning_rate": 7.948569878287791e-06, "loss": 0.6126, "step": 23363 }, { "epoch": 1.7364548494983278, "grad_norm": 1.6824167526754203, "learning_rate": 7.947784581401333e-06, "loss": 0.5332, "step": 23364 }, { "epoch": 1.7365291713117799, "grad_norm": 2.226376478856333, "learning_rate": 7.94699929772693e-06, "loss": 0.6603, "step": 23365 }, { "epoch": 1.7366034931252323, "grad_norm": 1.4275226910854582, "learning_rate": 7.946214027269638e-06, "loss": 0.4504, "step": 23366 }, { "epoch": 1.7366778149386843, "grad_norm": 2.131011477338253, "learning_rate": 7.945428770034512e-06, "loss": 0.723, "step": 23367 }, { "epoch": 1.7367521367521368, "grad_norm": 1.7438110202189316, "learning_rate": 7.944643526026611e-06, "loss": 0.5679, "step": 23368 }, { "epoch": 1.736826458565589, "grad_norm": 1.6696075391673442, "learning_rate": 7.943858295250988e-06, "loss": 0.5793, "step": 23369 }, { "epoch": 1.7369007803790413, "grad_norm": 1.7952310238604283, "learning_rate": 7.943073077712701e-06, "loss": 0.5051, "step": 23370 }, { "epoch": 1.7369751021924935, "grad_norm": 1.943555410881955, "learning_rate": 7.9422878734168e-06, "loss": 0.4661, "step": 23371 }, { "epoch": 1.7370494240059458, "grad_norm": 1.9489050329064443, "learning_rate": 7.941502682368345e-06, "loss": 0.5142, "step": 23372 }, { "epoch": 1.737123745819398, "grad_norm": 3.7218414022765365, "learning_rate": 7.940717504572388e-06, "loss": 0.47, "step": 23373 }, { "epoch": 1.7371980676328502, "grad_norm": 1.8338322774464386, "learning_rate": 7.939932340033985e-06, "loss": 0.5806, "step": 23374 }, { "epoch": 1.7372723894463025, "grad_norm": 1.9213029260171406, "learning_rate": 7.939147188758189e-06, "loss": 0.7165, "step": 23375 }, { "epoch": 1.7373467112597547, "grad_norm": 1.7852475728246995, "learning_rate": 7.93836205075006e-06, "loss": 0.6195, "step": 23376 }, { "epoch": 1.737421033073207, "grad_norm": 1.627475712630326, "learning_rate": 7.937576926014646e-06, "loss": 0.5131, "step": 23377 }, { "epoch": 1.7374953548866592, "grad_norm": 1.7784091262289161, "learning_rate": 7.936791814557003e-06, "loss": 0.7139, "step": 23378 }, { "epoch": 1.7375696767001116, "grad_norm": 1.75410649335852, "learning_rate": 7.936006716382189e-06, "loss": 0.5341, "step": 23379 }, { "epoch": 1.7376439985135637, "grad_norm": 1.8152918035914558, "learning_rate": 7.935221631495259e-06, "loss": 0.5731, "step": 23380 }, { "epoch": 1.7377183203270161, "grad_norm": 1.8406819346880439, "learning_rate": 7.934436559901265e-06, "loss": 0.5565, "step": 23381 }, { "epoch": 1.7377926421404681, "grad_norm": 1.8068346533376558, "learning_rate": 7.93365150160526e-06, "loss": 0.6188, "step": 23382 }, { "epoch": 1.7378669639539206, "grad_norm": 1.7162972483064558, "learning_rate": 7.9328664566123e-06, "loss": 0.5699, "step": 23383 }, { "epoch": 1.7379412857673726, "grad_norm": 2.040953703522487, "learning_rate": 7.932081424927437e-06, "loss": 0.649, "step": 23384 }, { "epoch": 1.738015607580825, "grad_norm": 2.2257029815969216, "learning_rate": 7.931296406555727e-06, "loss": 0.7095, "step": 23385 }, { "epoch": 1.738089929394277, "grad_norm": 2.1247493831778783, "learning_rate": 7.930511401502223e-06, "loss": 0.5965, "step": 23386 }, { "epoch": 1.7381642512077295, "grad_norm": 2.1280899645630784, "learning_rate": 7.92972640977198e-06, "loss": 0.6773, "step": 23387 }, { "epoch": 1.7382385730211816, "grad_norm": 1.8314726535737977, "learning_rate": 7.92894143137005e-06, "loss": 0.6338, "step": 23388 }, { "epoch": 1.738312894834634, "grad_norm": 1.795580802086009, "learning_rate": 7.928156466301488e-06, "loss": 0.5161, "step": 23389 }, { "epoch": 1.7383872166480863, "grad_norm": 2.184268556454939, "learning_rate": 7.927371514571347e-06, "loss": 0.5766, "step": 23390 }, { "epoch": 1.7384615384615385, "grad_norm": 2.0659891371303636, "learning_rate": 7.926586576184679e-06, "loss": 0.4237, "step": 23391 }, { "epoch": 1.7385358602749907, "grad_norm": 1.492718441766228, "learning_rate": 7.925801651146545e-06, "loss": 0.562, "step": 23392 }, { "epoch": 1.738610182088443, "grad_norm": 2.275176937316627, "learning_rate": 7.925016739461989e-06, "loss": 0.5596, "step": 23393 }, { "epoch": 1.7386845039018952, "grad_norm": 1.5864714059994265, "learning_rate": 7.924231841136067e-06, "loss": 0.4523, "step": 23394 }, { "epoch": 1.7387588257153475, "grad_norm": 2.3139661015170985, "learning_rate": 7.923446956173832e-06, "loss": 0.7095, "step": 23395 }, { "epoch": 1.7388331475287997, "grad_norm": 2.22746046113801, "learning_rate": 7.922662084580338e-06, "loss": 0.6344, "step": 23396 }, { "epoch": 1.738907469342252, "grad_norm": 1.873353081288193, "learning_rate": 7.921877226360638e-06, "loss": 0.6488, "step": 23397 }, { "epoch": 1.7389817911557042, "grad_norm": 2.8478566043301305, "learning_rate": 7.921092381519786e-06, "loss": 0.7481, "step": 23398 }, { "epoch": 1.7390561129691564, "grad_norm": 2.1178573688640037, "learning_rate": 7.920307550062832e-06, "loss": 0.5989, "step": 23399 }, { "epoch": 1.7391304347826086, "grad_norm": 1.7684864041747455, "learning_rate": 7.919522731994829e-06, "loss": 0.5033, "step": 23400 }, { "epoch": 1.7392047565960609, "grad_norm": 1.5486991659031424, "learning_rate": 7.918737927320833e-06, "loss": 0.5011, "step": 23401 }, { "epoch": 1.7392790784095133, "grad_norm": 2.654608952526706, "learning_rate": 7.917953136045891e-06, "loss": 0.5842, "step": 23402 }, { "epoch": 1.7393534002229654, "grad_norm": 2.030450633976403, "learning_rate": 7.917168358175063e-06, "loss": 0.5382, "step": 23403 }, { "epoch": 1.7394277220364178, "grad_norm": 1.5999809493852941, "learning_rate": 7.916383593713395e-06, "loss": 0.5238, "step": 23404 }, { "epoch": 1.7395020438498698, "grad_norm": 1.5787932753706893, "learning_rate": 7.915598842665944e-06, "loss": 0.4139, "step": 23405 }, { "epoch": 1.7395763656633223, "grad_norm": 1.4237961996259434, "learning_rate": 7.914814105037756e-06, "loss": 0.3584, "step": 23406 }, { "epoch": 1.7396506874767743, "grad_norm": 2.131363612462065, "learning_rate": 7.914029380833887e-06, "loss": 0.4939, "step": 23407 }, { "epoch": 1.7397250092902268, "grad_norm": 2.138564918538265, "learning_rate": 7.913244670059388e-06, "loss": 0.7203, "step": 23408 }, { "epoch": 1.7397993311036788, "grad_norm": 2.2214984477915114, "learning_rate": 7.912459972719313e-06, "loss": 0.6721, "step": 23409 }, { "epoch": 1.7398736529171313, "grad_norm": 2.078701163589803, "learning_rate": 7.911675288818712e-06, "loss": 0.5873, "step": 23410 }, { "epoch": 1.7399479747305833, "grad_norm": 2.156435928334328, "learning_rate": 7.910890618362636e-06, "loss": 0.5511, "step": 23411 }, { "epoch": 1.7400222965440357, "grad_norm": 2.5348001018325728, "learning_rate": 7.910105961356138e-06, "loss": 0.6897, "step": 23412 }, { "epoch": 1.740096618357488, "grad_norm": 2.4894022950317876, "learning_rate": 7.90932131780427e-06, "loss": 0.626, "step": 23413 }, { "epoch": 1.7401709401709402, "grad_norm": 1.6748787650685102, "learning_rate": 7.908536687712083e-06, "loss": 0.4738, "step": 23414 }, { "epoch": 1.7402452619843924, "grad_norm": 2.122766046091731, "learning_rate": 7.90775207108463e-06, "loss": 0.6485, "step": 23415 }, { "epoch": 1.7403195837978447, "grad_norm": 2.1752318752750726, "learning_rate": 7.90696746792696e-06, "loss": 0.6485, "step": 23416 }, { "epoch": 1.740393905611297, "grad_norm": 2.174993875005704, "learning_rate": 7.906182878244124e-06, "loss": 0.6274, "step": 23417 }, { "epoch": 1.7404682274247492, "grad_norm": 2.0152983734258125, "learning_rate": 7.905398302041173e-06, "loss": 0.5994, "step": 23418 }, { "epoch": 1.7405425492382014, "grad_norm": 1.9228747872533705, "learning_rate": 7.90461373932316e-06, "loss": 0.6182, "step": 23419 }, { "epoch": 1.7406168710516536, "grad_norm": 1.9644198002007567, "learning_rate": 7.903829190095135e-06, "loss": 0.5118, "step": 23420 }, { "epoch": 1.7406911928651059, "grad_norm": 1.7423652217297225, "learning_rate": 7.903044654362148e-06, "loss": 0.5175, "step": 23421 }, { "epoch": 1.7407655146785581, "grad_norm": 1.8595586160602442, "learning_rate": 7.902260132129252e-06, "loss": 0.4928, "step": 23422 }, { "epoch": 1.7408398364920104, "grad_norm": 1.6394224714639178, "learning_rate": 7.901475623401494e-06, "loss": 0.6113, "step": 23423 }, { "epoch": 1.7409141583054626, "grad_norm": 2.7599278677320225, "learning_rate": 7.900691128183928e-06, "loss": 0.5889, "step": 23424 }, { "epoch": 1.740988480118915, "grad_norm": 2.0609676902245107, "learning_rate": 7.899906646481607e-06, "loss": 0.5198, "step": 23425 }, { "epoch": 1.741062801932367, "grad_norm": 1.7833520423824747, "learning_rate": 7.899122178299577e-06, "loss": 0.5257, "step": 23426 }, { "epoch": 1.7411371237458195, "grad_norm": 1.5874980542762138, "learning_rate": 7.89833772364289e-06, "loss": 0.4018, "step": 23427 }, { "epoch": 1.7412114455592715, "grad_norm": 2.0815211738514727, "learning_rate": 7.897553282516593e-06, "loss": 0.5013, "step": 23428 }, { "epoch": 1.741285767372724, "grad_norm": 1.7676571661189544, "learning_rate": 7.896768854925739e-06, "loss": 0.551, "step": 23429 }, { "epoch": 1.741360089186176, "grad_norm": 1.6589828539567426, "learning_rate": 7.895984440875377e-06, "loss": 0.5609, "step": 23430 }, { "epoch": 1.7414344109996285, "grad_norm": 1.6625942097205277, "learning_rate": 7.895200040370561e-06, "loss": 0.4463, "step": 23431 }, { "epoch": 1.7415087328130805, "grad_norm": 2.0854927229298297, "learning_rate": 7.894415653416336e-06, "loss": 0.6337, "step": 23432 }, { "epoch": 1.741583054626533, "grad_norm": 1.98300787878049, "learning_rate": 7.893631280017752e-06, "loss": 0.577, "step": 23433 }, { "epoch": 1.741657376439985, "grad_norm": 2.1556886860846878, "learning_rate": 7.892846920179862e-06, "loss": 0.546, "step": 23434 }, { "epoch": 1.7417316982534374, "grad_norm": 2.1505753916071972, "learning_rate": 7.892062573907715e-06, "loss": 0.5877, "step": 23435 }, { "epoch": 1.7418060200668897, "grad_norm": 4.004721822101809, "learning_rate": 7.891278241206362e-06, "loss": 0.506, "step": 23436 }, { "epoch": 1.741880341880342, "grad_norm": 2.2199137004622718, "learning_rate": 7.890493922080847e-06, "loss": 0.5996, "step": 23437 }, { "epoch": 1.7419546636937941, "grad_norm": 1.7045540714621064, "learning_rate": 7.889709616536227e-06, "loss": 0.447, "step": 23438 }, { "epoch": 1.7420289855072464, "grad_norm": 2.55558873218828, "learning_rate": 7.888925324577543e-06, "loss": 0.5682, "step": 23439 }, { "epoch": 1.7421033073206986, "grad_norm": 2.1706895180161805, "learning_rate": 7.888141046209849e-06, "loss": 0.6188, "step": 23440 }, { "epoch": 1.7421776291341509, "grad_norm": 1.9273385369881182, "learning_rate": 7.887356781438192e-06, "loss": 0.6259, "step": 23441 }, { "epoch": 1.742251950947603, "grad_norm": 2.2113274034315524, "learning_rate": 7.886572530267626e-06, "loss": 0.5845, "step": 23442 }, { "epoch": 1.7423262727610553, "grad_norm": 1.569733310423346, "learning_rate": 7.885788292703193e-06, "loss": 0.4719, "step": 23443 }, { "epoch": 1.7424005945745076, "grad_norm": 1.6553822654080113, "learning_rate": 7.885004068749945e-06, "loss": 0.4365, "step": 23444 }, { "epoch": 1.7424749163879598, "grad_norm": 2.140009240757747, "learning_rate": 7.884219858412932e-06, "loss": 0.7133, "step": 23445 }, { "epoch": 1.7425492382014123, "grad_norm": 1.9073995561358843, "learning_rate": 7.883435661697202e-06, "loss": 0.4735, "step": 23446 }, { "epoch": 1.7426235600148643, "grad_norm": 1.9508996667794616, "learning_rate": 7.882651478607804e-06, "loss": 0.6093, "step": 23447 }, { "epoch": 1.7426978818283168, "grad_norm": 2.238293352613904, "learning_rate": 7.881867309149785e-06, "loss": 0.6678, "step": 23448 }, { "epoch": 1.7427722036417688, "grad_norm": 2.028368517632035, "learning_rate": 7.8810831533282e-06, "loss": 0.5182, "step": 23449 }, { "epoch": 1.7428465254552212, "grad_norm": 2.4289106337210358, "learning_rate": 7.880299011148086e-06, "loss": 0.6459, "step": 23450 }, { "epoch": 1.7429208472686732, "grad_norm": 2.1419131040159374, "learning_rate": 7.879514882614497e-06, "loss": 0.5712, "step": 23451 }, { "epoch": 1.7429951690821257, "grad_norm": 2.230706234939423, "learning_rate": 7.87873076773248e-06, "loss": 0.6806, "step": 23452 }, { "epoch": 1.7430694908955777, "grad_norm": 2.1097159220849755, "learning_rate": 7.877946666507087e-06, "loss": 0.5231, "step": 23453 }, { "epoch": 1.7431438127090302, "grad_norm": 2.1450170278914547, "learning_rate": 7.877162578943361e-06, "loss": 0.5861, "step": 23454 }, { "epoch": 1.7432181345224822, "grad_norm": 1.6739120189044274, "learning_rate": 7.876378505046354e-06, "loss": 0.6054, "step": 23455 }, { "epoch": 1.7432924563359347, "grad_norm": 1.958463715273968, "learning_rate": 7.875594444821109e-06, "loss": 0.5451, "step": 23456 }, { "epoch": 1.743366778149387, "grad_norm": 1.8617667103548654, "learning_rate": 7.874810398272681e-06, "loss": 0.5724, "step": 23457 }, { "epoch": 1.7434410999628391, "grad_norm": 1.9438139352385622, "learning_rate": 7.874026365406111e-06, "loss": 0.6145, "step": 23458 }, { "epoch": 1.7435154217762914, "grad_norm": 2.02627995382401, "learning_rate": 7.87324234622645e-06, "loss": 0.4387, "step": 23459 }, { "epoch": 1.7435897435897436, "grad_norm": 1.945136177436312, "learning_rate": 7.872458340738747e-06, "loss": 0.5714, "step": 23460 }, { "epoch": 1.7436640654031959, "grad_norm": 1.9580937286987883, "learning_rate": 7.871674348948043e-06, "loss": 0.5836, "step": 23461 }, { "epoch": 1.743738387216648, "grad_norm": 1.9021061948251552, "learning_rate": 7.870890370859389e-06, "loss": 0.5048, "step": 23462 }, { "epoch": 1.7438127090301003, "grad_norm": 1.959838208569557, "learning_rate": 7.870106406477832e-06, "loss": 0.6526, "step": 23463 }, { "epoch": 1.7438870308435526, "grad_norm": 1.8175359297651836, "learning_rate": 7.869322455808422e-06, "loss": 0.5457, "step": 23464 }, { "epoch": 1.7439613526570048, "grad_norm": 1.8878572116158512, "learning_rate": 7.868538518856202e-06, "loss": 0.5478, "step": 23465 }, { "epoch": 1.744035674470457, "grad_norm": 2.244793220566816, "learning_rate": 7.86775459562622e-06, "loss": 0.5842, "step": 23466 }, { "epoch": 1.7441099962839093, "grad_norm": 1.8194188234520612, "learning_rate": 7.866970686123524e-06, "loss": 0.5341, "step": 23467 }, { "epoch": 1.7441843180973615, "grad_norm": 1.9576885940969189, "learning_rate": 7.86618679035316e-06, "loss": 0.6679, "step": 23468 }, { "epoch": 1.744258639910814, "grad_norm": 2.493262157386578, "learning_rate": 7.865402908320173e-06, "loss": 0.7057, "step": 23469 }, { "epoch": 1.744332961724266, "grad_norm": 1.754192241923806, "learning_rate": 7.864619040029613e-06, "loss": 0.7254, "step": 23470 }, { "epoch": 1.7444072835377185, "grad_norm": 2.2241435542105794, "learning_rate": 7.863835185486527e-06, "loss": 0.5101, "step": 23471 }, { "epoch": 1.7444816053511705, "grad_norm": 2.2677172130818577, "learning_rate": 7.863051344695959e-06, "loss": 0.56, "step": 23472 }, { "epoch": 1.744555927164623, "grad_norm": 2.471739240691693, "learning_rate": 7.862267517662951e-06, "loss": 0.5695, "step": 23473 }, { "epoch": 1.744630248978075, "grad_norm": 1.7367883829127928, "learning_rate": 7.861483704392556e-06, "loss": 0.5887, "step": 23474 }, { "epoch": 1.7447045707915274, "grad_norm": 1.8801507376840523, "learning_rate": 7.860699904889819e-06, "loss": 0.6001, "step": 23475 }, { "epoch": 1.7447788926049794, "grad_norm": 2.1282346066014517, "learning_rate": 7.859916119159785e-06, "loss": 0.6023, "step": 23476 }, { "epoch": 1.7448532144184319, "grad_norm": 1.8125860366249202, "learning_rate": 7.859132347207497e-06, "loss": 0.4459, "step": 23477 }, { "epoch": 1.744927536231884, "grad_norm": 1.5575578855630001, "learning_rate": 7.858348589038004e-06, "loss": 0.4662, "step": 23478 }, { "epoch": 1.7450018580453364, "grad_norm": 1.914631377834109, "learning_rate": 7.857564844656356e-06, "loss": 0.5027, "step": 23479 }, { "epoch": 1.7450761798587886, "grad_norm": 2.0466745404184894, "learning_rate": 7.856781114067591e-06, "loss": 0.563, "step": 23480 }, { "epoch": 1.7451505016722408, "grad_norm": 2.2523528585197363, "learning_rate": 7.85599739727676e-06, "loss": 0.4652, "step": 23481 }, { "epoch": 1.745224823485693, "grad_norm": 2.6392997476828204, "learning_rate": 7.855213694288905e-06, "loss": 0.6163, "step": 23482 }, { "epoch": 1.7452991452991453, "grad_norm": 2.6104397168802698, "learning_rate": 7.854430005109077e-06, "loss": 0.632, "step": 23483 }, { "epoch": 1.7453734671125976, "grad_norm": 2.3875539254673983, "learning_rate": 7.853646329742313e-06, "loss": 0.6382, "step": 23484 }, { "epoch": 1.7454477889260498, "grad_norm": 1.9841225864514629, "learning_rate": 7.852862668193663e-06, "loss": 0.6403, "step": 23485 }, { "epoch": 1.745522110739502, "grad_norm": 1.665675794754976, "learning_rate": 7.852079020468173e-06, "loss": 0.6046, "step": 23486 }, { "epoch": 1.7455964325529543, "grad_norm": 2.643258455702277, "learning_rate": 7.851295386570885e-06, "loss": 0.7876, "step": 23487 }, { "epoch": 1.7456707543664065, "grad_norm": 2.1769418572008785, "learning_rate": 7.850511766506847e-06, "loss": 0.4631, "step": 23488 }, { "epoch": 1.7457450761798587, "grad_norm": 1.8760957815636659, "learning_rate": 7.849728160281102e-06, "loss": 0.5979, "step": 23489 }, { "epoch": 1.745819397993311, "grad_norm": 2.087443178217792, "learning_rate": 7.848944567898697e-06, "loss": 0.4083, "step": 23490 }, { "epoch": 1.7458937198067632, "grad_norm": 1.9158553677614953, "learning_rate": 7.848160989364673e-06, "loss": 0.5958, "step": 23491 }, { "epoch": 1.7459680416202157, "grad_norm": 2.0326369637890065, "learning_rate": 7.847377424684079e-06, "loss": 0.5862, "step": 23492 }, { "epoch": 1.7460423634336677, "grad_norm": 1.6439996213697405, "learning_rate": 7.846593873861955e-06, "loss": 0.4822, "step": 23493 }, { "epoch": 1.7461166852471202, "grad_norm": 1.567215206044093, "learning_rate": 7.845810336903353e-06, "loss": 0.4146, "step": 23494 }, { "epoch": 1.7461910070605722, "grad_norm": 1.6933523601790212, "learning_rate": 7.845026813813308e-06, "loss": 0.5427, "step": 23495 }, { "epoch": 1.7462653288740246, "grad_norm": 1.6648402630784982, "learning_rate": 7.844243304596869e-06, "loss": 0.4885, "step": 23496 }, { "epoch": 1.7463396506874767, "grad_norm": 2.3906134005360387, "learning_rate": 7.843459809259083e-06, "loss": 0.7754, "step": 23497 }, { "epoch": 1.7464139725009291, "grad_norm": 2.252830210823119, "learning_rate": 7.842676327804986e-06, "loss": 0.6589, "step": 23498 }, { "epoch": 1.7464882943143811, "grad_norm": 2.218020713170994, "learning_rate": 7.841892860239628e-06, "loss": 0.7515, "step": 23499 }, { "epoch": 1.7465626161278336, "grad_norm": 1.869626629218773, "learning_rate": 7.841109406568052e-06, "loss": 0.3799, "step": 23500 }, { "epoch": 1.7466369379412856, "grad_norm": 1.9096335026802167, "learning_rate": 7.840325966795303e-06, "loss": 0.4249, "step": 23501 }, { "epoch": 1.746711259754738, "grad_norm": 2.0122376370600543, "learning_rate": 7.839542540926423e-06, "loss": 0.5884, "step": 23502 }, { "epoch": 1.7467855815681903, "grad_norm": 1.6360132141457218, "learning_rate": 7.838759128966455e-06, "loss": 0.4871, "step": 23503 }, { "epoch": 1.7468599033816425, "grad_norm": 1.7574364335630903, "learning_rate": 7.837975730920444e-06, "loss": 0.5032, "step": 23504 }, { "epoch": 1.7469342251950948, "grad_norm": 1.773419932544883, "learning_rate": 7.837192346793437e-06, "loss": 0.4531, "step": 23505 }, { "epoch": 1.747008547008547, "grad_norm": 1.8899681362402458, "learning_rate": 7.836408976590469e-06, "loss": 0.497, "step": 23506 }, { "epoch": 1.7470828688219993, "grad_norm": 2.291353030932688, "learning_rate": 7.835625620316586e-06, "loss": 0.5917, "step": 23507 }, { "epoch": 1.7471571906354515, "grad_norm": 1.9479517041652292, "learning_rate": 7.834842277976838e-06, "loss": 0.4704, "step": 23508 }, { "epoch": 1.7472315124489037, "grad_norm": 2.4600181191113815, "learning_rate": 7.83405894957626e-06, "loss": 0.6026, "step": 23509 }, { "epoch": 1.747305834262356, "grad_norm": 2.0361884974531685, "learning_rate": 7.833275635119898e-06, "loss": 0.5961, "step": 23510 }, { "epoch": 1.7473801560758082, "grad_norm": 2.8092773652908525, "learning_rate": 7.832492334612795e-06, "loss": 0.637, "step": 23511 }, { "epoch": 1.7474544778892604, "grad_norm": 2.087473812437329, "learning_rate": 7.831709048059995e-06, "loss": 0.4892, "step": 23512 }, { "epoch": 1.747528799702713, "grad_norm": 1.6217358426277708, "learning_rate": 7.830925775466539e-06, "loss": 0.5718, "step": 23513 }, { "epoch": 1.747603121516165, "grad_norm": 1.939588540515636, "learning_rate": 7.83014251683747e-06, "loss": 0.6121, "step": 23514 }, { "epoch": 1.7476774433296174, "grad_norm": 2.392708407576232, "learning_rate": 7.829359272177832e-06, "loss": 0.63, "step": 23515 }, { "epoch": 1.7477517651430694, "grad_norm": 1.8761503163136555, "learning_rate": 7.828576041492664e-06, "loss": 0.5732, "step": 23516 }, { "epoch": 1.7478260869565219, "grad_norm": 1.6411124144281726, "learning_rate": 7.827792824787016e-06, "loss": 0.4947, "step": 23517 }, { "epoch": 1.7479004087699739, "grad_norm": 1.7886958281814787, "learning_rate": 7.82700962206592e-06, "loss": 0.5726, "step": 23518 }, { "epoch": 1.7479747305834263, "grad_norm": 2.0401949725619155, "learning_rate": 7.826226433334427e-06, "loss": 0.5713, "step": 23519 }, { "epoch": 1.7480490523968784, "grad_norm": 2.424861986069305, "learning_rate": 7.825443258597574e-06, "loss": 0.7292, "step": 23520 }, { "epoch": 1.7481233742103308, "grad_norm": 1.8738201176581641, "learning_rate": 7.824660097860402e-06, "loss": 0.5271, "step": 23521 }, { "epoch": 1.7481976960237828, "grad_norm": 2.2279230808658648, "learning_rate": 7.823876951127958e-06, "loss": 0.5534, "step": 23522 }, { "epoch": 1.7482720178372353, "grad_norm": 1.929180370772042, "learning_rate": 7.82309381840528e-06, "loss": 0.5133, "step": 23523 }, { "epoch": 1.7483463396506875, "grad_norm": 2.033439533678107, "learning_rate": 7.822310699697413e-06, "loss": 0.5896, "step": 23524 }, { "epoch": 1.7484206614641398, "grad_norm": 1.9055209414222103, "learning_rate": 7.821527595009393e-06, "loss": 0.5414, "step": 23525 }, { "epoch": 1.748494983277592, "grad_norm": 1.675840906623004, "learning_rate": 7.820744504346268e-06, "loss": 0.5945, "step": 23526 }, { "epoch": 1.7485693050910442, "grad_norm": 1.9402425850895157, "learning_rate": 7.819961427713076e-06, "loss": 0.5306, "step": 23527 }, { "epoch": 1.7486436269044965, "grad_norm": 2.141892440412693, "learning_rate": 7.819178365114863e-06, "loss": 0.6274, "step": 23528 }, { "epoch": 1.7487179487179487, "grad_norm": 1.7971134689576491, "learning_rate": 7.818395316556662e-06, "loss": 0.6043, "step": 23529 }, { "epoch": 1.748792270531401, "grad_norm": 2.5415456545144233, "learning_rate": 7.817612282043522e-06, "loss": 0.5851, "step": 23530 }, { "epoch": 1.7488665923448532, "grad_norm": 1.954187236453999, "learning_rate": 7.816829261580478e-06, "loss": 0.6338, "step": 23531 }, { "epoch": 1.7489409141583054, "grad_norm": 1.9200306322779042, "learning_rate": 7.816046255172574e-06, "loss": 0.5273, "step": 23532 }, { "epoch": 1.7490152359717577, "grad_norm": 1.4986261643482444, "learning_rate": 7.81526326282485e-06, "loss": 0.4429, "step": 23533 }, { "epoch": 1.74908955778521, "grad_norm": 1.579622456280534, "learning_rate": 7.814480284542351e-06, "loss": 0.5059, "step": 23534 }, { "epoch": 1.7491638795986622, "grad_norm": 2.01521281514705, "learning_rate": 7.813697320330113e-06, "loss": 0.5851, "step": 23535 }, { "epoch": 1.7492382014121146, "grad_norm": 1.6613303597819382, "learning_rate": 7.812914370193177e-06, "loss": 0.428, "step": 23536 }, { "epoch": 1.7493125232255666, "grad_norm": 2.3861803525250465, "learning_rate": 7.812131434136585e-06, "loss": 0.4259, "step": 23537 }, { "epoch": 1.749386845039019, "grad_norm": 5.376646788200805, "learning_rate": 7.811348512165377e-06, "loss": 0.7127, "step": 23538 }, { "epoch": 1.749461166852471, "grad_norm": 2.3581617351114854, "learning_rate": 7.810565604284601e-06, "loss": 0.5611, "step": 23539 }, { "epoch": 1.7495354886659236, "grad_norm": 1.7660622403094905, "learning_rate": 7.809782710499281e-06, "loss": 0.4934, "step": 23540 }, { "epoch": 1.7496098104793756, "grad_norm": 2.41998004065873, "learning_rate": 7.808999830814472e-06, "loss": 0.6836, "step": 23541 }, { "epoch": 1.749684132292828, "grad_norm": 2.3451961323013313, "learning_rate": 7.808216965235205e-06, "loss": 0.5597, "step": 23542 }, { "epoch": 1.74975845410628, "grad_norm": 2.3883399756143877, "learning_rate": 7.807434113766524e-06, "loss": 0.5665, "step": 23543 }, { "epoch": 1.7498327759197325, "grad_norm": 1.9926707737858542, "learning_rate": 7.806651276413466e-06, "loss": 0.4802, "step": 23544 }, { "epoch": 1.7499070977331845, "grad_norm": 2.114166787705297, "learning_rate": 7.805868453181079e-06, "loss": 0.6283, "step": 23545 }, { "epoch": 1.749981419546637, "grad_norm": 1.8383618520135747, "learning_rate": 7.805085644074393e-06, "loss": 0.5524, "step": 23546 }, { "epoch": 1.7500557413600892, "grad_norm": 2.3609889747869555, "learning_rate": 7.804302849098452e-06, "loss": 0.53, "step": 23547 }, { "epoch": 1.7501300631735415, "grad_norm": 2.6215084390499013, "learning_rate": 7.803520068258295e-06, "loss": 0.7906, "step": 23548 }, { "epoch": 1.7502043849869937, "grad_norm": 2.5387391001062776, "learning_rate": 7.802737301558962e-06, "loss": 0.6566, "step": 23549 }, { "epoch": 1.750278706800446, "grad_norm": 1.8787598635805864, "learning_rate": 7.801954549005494e-06, "loss": 0.5693, "step": 23550 }, { "epoch": 1.7503530286138982, "grad_norm": 1.9213458773375633, "learning_rate": 7.80117181060293e-06, "loss": 0.51, "step": 23551 }, { "epoch": 1.7504273504273504, "grad_norm": 2.150294429051647, "learning_rate": 7.800389086356306e-06, "loss": 0.52, "step": 23552 }, { "epoch": 1.7505016722408027, "grad_norm": 1.9169052336766352, "learning_rate": 7.799606376270662e-06, "loss": 0.5438, "step": 23553 }, { "epoch": 1.750575994054255, "grad_norm": 2.1937356105978183, "learning_rate": 7.798823680351038e-06, "loss": 0.6257, "step": 23554 }, { "epoch": 1.7506503158677071, "grad_norm": 1.7521448337868806, "learning_rate": 7.798040998602471e-06, "loss": 0.4505, "step": 23555 }, { "epoch": 1.7507246376811594, "grad_norm": 2.009964885448244, "learning_rate": 7.797258331030006e-06, "loss": 0.5517, "step": 23556 }, { "epoch": 1.7507989594946116, "grad_norm": 1.99288370771314, "learning_rate": 7.796475677638676e-06, "loss": 0.5701, "step": 23557 }, { "epoch": 1.7508732813080639, "grad_norm": 2.2467856379079567, "learning_rate": 7.79569303843352e-06, "loss": 0.6113, "step": 23558 }, { "epoch": 1.7509476031215163, "grad_norm": 2.0207848237692048, "learning_rate": 7.794910413419578e-06, "loss": 0.6145, "step": 23559 }, { "epoch": 1.7510219249349683, "grad_norm": 2.3314355956807016, "learning_rate": 7.79412780260189e-06, "loss": 0.6529, "step": 23560 }, { "epoch": 1.7510962467484208, "grad_norm": 1.7398687211169477, "learning_rate": 7.793345205985492e-06, "loss": 0.5857, "step": 23561 }, { "epoch": 1.7511705685618728, "grad_norm": 1.949475021791619, "learning_rate": 7.792562623575424e-06, "loss": 0.5662, "step": 23562 }, { "epoch": 1.7512448903753253, "grad_norm": 2.2977765006024384, "learning_rate": 7.791780055376723e-06, "loss": 0.6643, "step": 23563 }, { "epoch": 1.7513192121887773, "grad_norm": 2.157868649727722, "learning_rate": 7.790997501394427e-06, "loss": 0.5912, "step": 23564 }, { "epoch": 1.7513935340022297, "grad_norm": 1.5530572102871436, "learning_rate": 7.790214961633573e-06, "loss": 0.5166, "step": 23565 }, { "epoch": 1.7514678558156818, "grad_norm": 1.9519117493639313, "learning_rate": 7.7894324360992e-06, "loss": 0.6036, "step": 23566 }, { "epoch": 1.7515421776291342, "grad_norm": 1.8855350068879766, "learning_rate": 7.78864992479635e-06, "loss": 0.463, "step": 23567 }, { "epoch": 1.7516164994425862, "grad_norm": 1.7842545848430866, "learning_rate": 7.787867427730054e-06, "loss": 0.5535, "step": 23568 }, { "epoch": 1.7516908212560387, "grad_norm": 2.3493720545847143, "learning_rate": 7.787084944905353e-06, "loss": 0.4917, "step": 23569 }, { "epoch": 1.751765143069491, "grad_norm": 1.8103005911704397, "learning_rate": 7.786302476327283e-06, "loss": 0.5976, "step": 23570 }, { "epoch": 1.7518394648829432, "grad_norm": 2.170875614101636, "learning_rate": 7.785520022000885e-06, "loss": 0.559, "step": 23571 }, { "epoch": 1.7519137866963954, "grad_norm": 1.7228755960769635, "learning_rate": 7.784737581931196e-06, "loss": 0.4205, "step": 23572 }, { "epoch": 1.7519881085098477, "grad_norm": 1.6995568440441091, "learning_rate": 7.783955156123252e-06, "loss": 0.5118, "step": 23573 }, { "epoch": 1.7520624303233, "grad_norm": 2.434937912280024, "learning_rate": 7.783172744582087e-06, "loss": 0.6065, "step": 23574 }, { "epoch": 1.7521367521367521, "grad_norm": 1.593850157116551, "learning_rate": 7.782390347312739e-06, "loss": 0.4841, "step": 23575 }, { "epoch": 1.7522110739502044, "grad_norm": 1.9963144175094718, "learning_rate": 7.781607964320249e-06, "loss": 0.6674, "step": 23576 }, { "epoch": 1.7522853957636566, "grad_norm": 1.5502295944961293, "learning_rate": 7.780825595609652e-06, "loss": 0.5109, "step": 23577 }, { "epoch": 1.7523597175771088, "grad_norm": 1.8634200177020401, "learning_rate": 7.780043241185984e-06, "loss": 0.5364, "step": 23578 }, { "epoch": 1.752434039390561, "grad_norm": 1.92090361359556, "learning_rate": 7.779260901054282e-06, "loss": 0.6385, "step": 23579 }, { "epoch": 1.7525083612040135, "grad_norm": 1.904121874453541, "learning_rate": 7.778478575219583e-06, "loss": 0.6034, "step": 23580 }, { "epoch": 1.7525826830174656, "grad_norm": 1.872830442261957, "learning_rate": 7.777696263686924e-06, "loss": 0.5074, "step": 23581 }, { "epoch": 1.752657004830918, "grad_norm": 1.758824776475689, "learning_rate": 7.776913966461344e-06, "loss": 0.4718, "step": 23582 }, { "epoch": 1.75273132664437, "grad_norm": 1.4962893756687197, "learning_rate": 7.776131683547873e-06, "loss": 0.3554, "step": 23583 }, { "epoch": 1.7528056484578225, "grad_norm": 1.9980139162276356, "learning_rate": 7.775349414951555e-06, "loss": 0.5865, "step": 23584 }, { "epoch": 1.7528799702712745, "grad_norm": 2.330676518407488, "learning_rate": 7.774567160677418e-06, "loss": 0.6292, "step": 23585 }, { "epoch": 1.752954292084727, "grad_norm": 1.7965040804945571, "learning_rate": 7.773784920730502e-06, "loss": 0.6047, "step": 23586 }, { "epoch": 1.753028613898179, "grad_norm": 1.9716678599835182, "learning_rate": 7.773002695115844e-06, "loss": 0.5053, "step": 23587 }, { "epoch": 1.7531029357116314, "grad_norm": 1.558519647538419, "learning_rate": 7.77222048383848e-06, "loss": 0.4382, "step": 23588 }, { "epoch": 1.7531772575250835, "grad_norm": 1.9308345092014596, "learning_rate": 7.771438286903446e-06, "loss": 0.5896, "step": 23589 }, { "epoch": 1.753251579338536, "grad_norm": 1.6033579751201432, "learning_rate": 7.770656104315773e-06, "loss": 0.5579, "step": 23590 }, { "epoch": 1.753325901151988, "grad_norm": 1.8596156648047846, "learning_rate": 7.769873936080502e-06, "loss": 0.6208, "step": 23591 }, { "epoch": 1.7534002229654404, "grad_norm": 1.9224846339231316, "learning_rate": 7.769091782202667e-06, "loss": 0.4773, "step": 23592 }, { "epoch": 1.7534745447788926, "grad_norm": 2.214128403367298, "learning_rate": 7.768309642687305e-06, "loss": 0.6932, "step": 23593 }, { "epoch": 1.7535488665923449, "grad_norm": 2.136895464819827, "learning_rate": 7.767527517539449e-06, "loss": 0.5059, "step": 23594 }, { "epoch": 1.7536231884057971, "grad_norm": 1.9717389116891133, "learning_rate": 7.766745406764132e-06, "loss": 0.5007, "step": 23595 }, { "epoch": 1.7536975102192494, "grad_norm": 2.209557092313945, "learning_rate": 7.765963310366398e-06, "loss": 0.5838, "step": 23596 }, { "epoch": 1.7537718320327016, "grad_norm": 2.238912915463932, "learning_rate": 7.765181228351273e-06, "loss": 0.7716, "step": 23597 }, { "epoch": 1.7538461538461538, "grad_norm": 1.9855686098265224, "learning_rate": 7.764399160723794e-06, "loss": 0.6983, "step": 23598 }, { "epoch": 1.753920475659606, "grad_norm": 1.8873002175418312, "learning_rate": 7.763617107488997e-06, "loss": 0.5146, "step": 23599 }, { "epoch": 1.7539947974730583, "grad_norm": 1.806584263646086, "learning_rate": 7.76283506865192e-06, "loss": 0.6683, "step": 23600 }, { "epoch": 1.7540691192865105, "grad_norm": 1.970307169871481, "learning_rate": 7.762053044217593e-06, "loss": 0.6081, "step": 23601 }, { "epoch": 1.7541434410999628, "grad_norm": 1.764367068400404, "learning_rate": 7.761271034191051e-06, "loss": 0.4746, "step": 23602 }, { "epoch": 1.7542177629134152, "grad_norm": 1.8748689245242898, "learning_rate": 7.76048903857733e-06, "loss": 0.5504, "step": 23603 }, { "epoch": 1.7542920847268673, "grad_norm": 2.262501129329168, "learning_rate": 7.759707057381468e-06, "loss": 0.5654, "step": 23604 }, { "epoch": 1.7543664065403197, "grad_norm": 2.254607975628805, "learning_rate": 7.758925090608493e-06, "loss": 0.6002, "step": 23605 }, { "epoch": 1.7544407283537717, "grad_norm": 1.9404685982483902, "learning_rate": 7.758143138263442e-06, "loss": 0.4874, "step": 23606 }, { "epoch": 1.7545150501672242, "grad_norm": 1.7336932099150915, "learning_rate": 7.757361200351354e-06, "loss": 0.5952, "step": 23607 }, { "epoch": 1.7545893719806762, "grad_norm": 2.920890890429456, "learning_rate": 7.756579276877251e-06, "loss": 0.5887, "step": 23608 }, { "epoch": 1.7546636937941287, "grad_norm": 2.1594507979251114, "learning_rate": 7.755797367846178e-06, "loss": 0.6651, "step": 23609 }, { "epoch": 1.7547380156075807, "grad_norm": 1.8973472887403542, "learning_rate": 7.755015473263163e-06, "loss": 0.5698, "step": 23610 }, { "epoch": 1.7548123374210332, "grad_norm": 1.9222092335545353, "learning_rate": 7.754233593133244e-06, "loss": 0.4343, "step": 23611 }, { "epoch": 1.7548866592344852, "grad_norm": 2.3396251814869222, "learning_rate": 7.753451727461452e-06, "loss": 0.6935, "step": 23612 }, { "epoch": 1.7549609810479376, "grad_norm": 1.673585555834269, "learning_rate": 7.75266987625282e-06, "loss": 0.5532, "step": 23613 }, { "epoch": 1.7550353028613899, "grad_norm": 1.9592548873637525, "learning_rate": 7.751888039512383e-06, "loss": 0.5337, "step": 23614 }, { "epoch": 1.755109624674842, "grad_norm": 2.1974152784330947, "learning_rate": 7.751106217245176e-06, "loss": 0.6136, "step": 23615 }, { "epoch": 1.7551839464882943, "grad_norm": 1.7717850654481848, "learning_rate": 7.75032440945623e-06, "loss": 0.5382, "step": 23616 }, { "epoch": 1.7552582683017466, "grad_norm": 1.9471317629309104, "learning_rate": 7.749542616150576e-06, "loss": 0.4765, "step": 23617 }, { "epoch": 1.7553325901151988, "grad_norm": 2.624406507937793, "learning_rate": 7.748760837333256e-06, "loss": 0.8754, "step": 23618 }, { "epoch": 1.755406911928651, "grad_norm": 2.6005482086164466, "learning_rate": 7.747979073009292e-06, "loss": 0.6696, "step": 23619 }, { "epoch": 1.7554812337421033, "grad_norm": 2.3741751929756307, "learning_rate": 7.747197323183721e-06, "loss": 0.4605, "step": 23620 }, { "epoch": 1.7555555555555555, "grad_norm": 1.9284045114541641, "learning_rate": 7.746415587861579e-06, "loss": 0.5046, "step": 23621 }, { "epoch": 1.7556298773690078, "grad_norm": 1.5675771101313263, "learning_rate": 7.745633867047896e-06, "loss": 0.4672, "step": 23622 }, { "epoch": 1.75570419918246, "grad_norm": 1.745494473370337, "learning_rate": 7.744852160747705e-06, "loss": 0.5807, "step": 23623 }, { "epoch": 1.7557785209959123, "grad_norm": 1.7869031712134924, "learning_rate": 7.744070468966038e-06, "loss": 0.6004, "step": 23624 }, { "epoch": 1.7558528428093645, "grad_norm": 1.655567741951238, "learning_rate": 7.74328879170793e-06, "loss": 0.5321, "step": 23625 }, { "epoch": 1.755927164622817, "grad_norm": 1.9970056701598426, "learning_rate": 7.742507128978412e-06, "loss": 0.7103, "step": 23626 }, { "epoch": 1.756001486436269, "grad_norm": 1.888772054685712, "learning_rate": 7.741725480782514e-06, "loss": 0.5864, "step": 23627 }, { "epoch": 1.7560758082497214, "grad_norm": 1.8533092007230496, "learning_rate": 7.740943847125269e-06, "loss": 0.519, "step": 23628 }, { "epoch": 1.7561501300631734, "grad_norm": 1.7612199895932448, "learning_rate": 7.740162228011712e-06, "loss": 0.4893, "step": 23629 }, { "epoch": 1.756224451876626, "grad_norm": 2.048488894951976, "learning_rate": 7.739380623446877e-06, "loss": 0.5803, "step": 23630 }, { "epoch": 1.756298773690078, "grad_norm": 2.067470032692515, "learning_rate": 7.738599033435788e-06, "loss": 0.5529, "step": 23631 }, { "epoch": 1.7563730955035304, "grad_norm": 2.1250241046203064, "learning_rate": 7.737817457983481e-06, "loss": 0.6515, "step": 23632 }, { "epoch": 1.7564474173169824, "grad_norm": 2.0646561795827387, "learning_rate": 7.737035897094991e-06, "loss": 0.6014, "step": 23633 }, { "epoch": 1.7565217391304349, "grad_norm": 2.2708496372898765, "learning_rate": 7.736254350775342e-06, "loss": 0.5924, "step": 23634 }, { "epoch": 1.7565960609438869, "grad_norm": 1.910132024655364, "learning_rate": 7.735472819029573e-06, "loss": 0.6139, "step": 23635 }, { "epoch": 1.7566703827573393, "grad_norm": 2.033251359394676, "learning_rate": 7.734691301862711e-06, "loss": 0.5993, "step": 23636 }, { "epoch": 1.7567447045707916, "grad_norm": 2.1003165186157275, "learning_rate": 7.733909799279791e-06, "loss": 0.6598, "step": 23637 }, { "epoch": 1.7568190263842438, "grad_norm": 1.9445171913884336, "learning_rate": 7.73312831128584e-06, "loss": 0.6535, "step": 23638 }, { "epoch": 1.756893348197696, "grad_norm": 1.6061185667733342, "learning_rate": 7.73234683788589e-06, "loss": 0.6071, "step": 23639 }, { "epoch": 1.7569676700111483, "grad_norm": 2.824990657952118, "learning_rate": 7.731565379084976e-06, "loss": 0.6196, "step": 23640 }, { "epoch": 1.7570419918246005, "grad_norm": 2.5821759608729593, "learning_rate": 7.73078393488813e-06, "loss": 0.6611, "step": 23641 }, { "epoch": 1.7571163136380528, "grad_norm": 1.6771963236384493, "learning_rate": 7.730002505300375e-06, "loss": 0.4994, "step": 23642 }, { "epoch": 1.757190635451505, "grad_norm": 1.7416227101335584, "learning_rate": 7.729221090326744e-06, "loss": 0.4886, "step": 23643 }, { "epoch": 1.7572649572649572, "grad_norm": 1.9597443345914418, "learning_rate": 7.728439689972275e-06, "loss": 0.5372, "step": 23644 }, { "epoch": 1.7573392790784095, "grad_norm": 2.2803726018786534, "learning_rate": 7.727658304241989e-06, "loss": 0.5199, "step": 23645 }, { "epoch": 1.7574136008918617, "grad_norm": 1.785927837169372, "learning_rate": 7.72687693314092e-06, "loss": 0.5388, "step": 23646 }, { "epoch": 1.7574879227053142, "grad_norm": 1.901362763390402, "learning_rate": 7.726095576674103e-06, "loss": 0.4999, "step": 23647 }, { "epoch": 1.7575622445187662, "grad_norm": 1.886086611289694, "learning_rate": 7.725314234846565e-06, "loss": 0.5821, "step": 23648 }, { "epoch": 1.7576365663322187, "grad_norm": 1.5522964264592378, "learning_rate": 7.724532907663333e-06, "loss": 0.5029, "step": 23649 }, { "epoch": 1.7577108881456707, "grad_norm": 2.1043066114689943, "learning_rate": 7.723751595129441e-06, "loss": 0.6983, "step": 23650 }, { "epoch": 1.7577852099591231, "grad_norm": 2.9079089455350142, "learning_rate": 7.722970297249918e-06, "loss": 0.6534, "step": 23651 }, { "epoch": 1.7578595317725751, "grad_norm": 2.0341379971412503, "learning_rate": 7.722189014029798e-06, "loss": 0.6156, "step": 23652 }, { "epoch": 1.7579338535860276, "grad_norm": 2.2690833601484814, "learning_rate": 7.721407745474103e-06, "loss": 0.4881, "step": 23653 }, { "epoch": 1.7580081753994796, "grad_norm": 1.7829459690622407, "learning_rate": 7.720626491587866e-06, "loss": 0.5661, "step": 23654 }, { "epoch": 1.758082497212932, "grad_norm": 1.799222360962786, "learning_rate": 7.71984525237612e-06, "loss": 0.4429, "step": 23655 }, { "epoch": 1.758156819026384, "grad_norm": 1.695447189108506, "learning_rate": 7.71906402784389e-06, "loss": 0.4571, "step": 23656 }, { "epoch": 1.7582311408398366, "grad_norm": 1.8686242492498768, "learning_rate": 7.718282817996208e-06, "loss": 0.5505, "step": 23657 }, { "epoch": 1.7583054626532886, "grad_norm": 1.8091830647504414, "learning_rate": 7.717501622838101e-06, "loss": 0.6238, "step": 23658 }, { "epoch": 1.758379784466741, "grad_norm": 2.069420840926371, "learning_rate": 7.716720442374605e-06, "loss": 0.5794, "step": 23659 }, { "epoch": 1.7584541062801933, "grad_norm": 1.8012057754750008, "learning_rate": 7.71593927661074e-06, "loss": 0.6002, "step": 23660 }, { "epoch": 1.7585284280936455, "grad_norm": 1.8920233386807987, "learning_rate": 7.71515812555154e-06, "loss": 0.5595, "step": 23661 }, { "epoch": 1.7586027499070978, "grad_norm": 1.967915433951895, "learning_rate": 7.714376989202033e-06, "loss": 0.5177, "step": 23662 }, { "epoch": 1.75867707172055, "grad_norm": 2.177419847989486, "learning_rate": 7.713595867567253e-06, "loss": 0.708, "step": 23663 }, { "epoch": 1.7587513935340022, "grad_norm": 2.295606658356261, "learning_rate": 7.71281476065222e-06, "loss": 0.5223, "step": 23664 }, { "epoch": 1.7588257153474545, "grad_norm": 1.7129452525449376, "learning_rate": 7.712033668461968e-06, "loss": 0.4836, "step": 23665 }, { "epoch": 1.7589000371609067, "grad_norm": 2.067223947375694, "learning_rate": 7.711252591001525e-06, "loss": 0.6155, "step": 23666 }, { "epoch": 1.758974358974359, "grad_norm": 2.6139779222614994, "learning_rate": 7.710471528275919e-06, "loss": 0.6896, "step": 23667 }, { "epoch": 1.7590486807878112, "grad_norm": 2.2093022533323956, "learning_rate": 7.709690480290176e-06, "loss": 0.7159, "step": 23668 }, { "epoch": 1.7591230026012634, "grad_norm": 1.8415580290153857, "learning_rate": 7.708909447049327e-06, "loss": 0.55, "step": 23669 }, { "epoch": 1.7591973244147159, "grad_norm": 1.8703599105069935, "learning_rate": 7.708128428558403e-06, "loss": 0.6698, "step": 23670 }, { "epoch": 1.759271646228168, "grad_norm": 1.8949572095085656, "learning_rate": 7.707347424822427e-06, "loss": 0.6943, "step": 23671 }, { "epoch": 1.7593459680416204, "grad_norm": 2.286967329886015, "learning_rate": 7.706566435846431e-06, "loss": 0.6372, "step": 23672 }, { "epoch": 1.7594202898550724, "grad_norm": 1.9765097361427866, "learning_rate": 7.70578546163544e-06, "loss": 0.5889, "step": 23673 }, { "epoch": 1.7594946116685248, "grad_norm": 2.044129304540563, "learning_rate": 7.705004502194482e-06, "loss": 0.6629, "step": 23674 }, { "epoch": 1.7595689334819768, "grad_norm": 1.9186032827437387, "learning_rate": 7.704223557528591e-06, "loss": 0.6166, "step": 23675 }, { "epoch": 1.7596432552954293, "grad_norm": 1.8623922425950978, "learning_rate": 7.703442627642786e-06, "loss": 0.522, "step": 23676 }, { "epoch": 1.7597175771088813, "grad_norm": 1.8793097366442122, "learning_rate": 7.7026617125421e-06, "loss": 0.6781, "step": 23677 }, { "epoch": 1.7597918989223338, "grad_norm": 1.9063647328268223, "learning_rate": 7.701880812231557e-06, "loss": 0.5212, "step": 23678 }, { "epoch": 1.7598662207357858, "grad_norm": 1.7530991353959855, "learning_rate": 7.701099926716185e-06, "loss": 0.573, "step": 23679 }, { "epoch": 1.7599405425492383, "grad_norm": 1.8912798718567088, "learning_rate": 7.700319056001013e-06, "loss": 0.5816, "step": 23680 }, { "epoch": 1.7600148643626905, "grad_norm": 1.760259127851251, "learning_rate": 7.69953820009107e-06, "loss": 0.6137, "step": 23681 }, { "epoch": 1.7600891861761427, "grad_norm": 1.8932102535455777, "learning_rate": 7.698757358991378e-06, "loss": 0.6086, "step": 23682 }, { "epoch": 1.760163507989595, "grad_norm": 1.9602779629534486, "learning_rate": 7.697976532706967e-06, "loss": 0.7308, "step": 23683 }, { "epoch": 1.7602378298030472, "grad_norm": 2.044953036895366, "learning_rate": 7.697195721242862e-06, "loss": 0.6265, "step": 23684 }, { "epoch": 1.7603121516164995, "grad_norm": 2.146174903675106, "learning_rate": 7.696414924604094e-06, "loss": 0.6692, "step": 23685 }, { "epoch": 1.7603864734299517, "grad_norm": 2.2282525049639954, "learning_rate": 7.695634142795692e-06, "loss": 0.5948, "step": 23686 }, { "epoch": 1.760460795243404, "grad_norm": 2.1489785539385333, "learning_rate": 7.69485337582267e-06, "loss": 0.5437, "step": 23687 }, { "epoch": 1.7605351170568562, "grad_norm": 1.8408358069739572, "learning_rate": 7.694072623690068e-06, "loss": 0.5342, "step": 23688 }, { "epoch": 1.7606094388703084, "grad_norm": 1.9241701292987576, "learning_rate": 7.693291886402903e-06, "loss": 0.5327, "step": 23689 }, { "epoch": 1.7606837606837606, "grad_norm": 2.2733378281851833, "learning_rate": 7.692511163966205e-06, "loss": 0.7695, "step": 23690 }, { "epoch": 1.7607580824972129, "grad_norm": 1.9260054836123646, "learning_rate": 7.691730456385001e-06, "loss": 0.47, "step": 23691 }, { "epoch": 1.7608324043106651, "grad_norm": 2.1858126267639415, "learning_rate": 7.690949763664317e-06, "loss": 0.5506, "step": 23692 }, { "epoch": 1.7609067261241176, "grad_norm": 2.208665535828386, "learning_rate": 7.690169085809179e-06, "loss": 0.7183, "step": 23693 }, { "epoch": 1.7609810479375696, "grad_norm": 2.574682770094358, "learning_rate": 7.689388422824612e-06, "loss": 0.6665, "step": 23694 }, { "epoch": 1.761055369751022, "grad_norm": 2.221180382599053, "learning_rate": 7.688607774715641e-06, "loss": 0.5574, "step": 23695 }, { "epoch": 1.761129691564474, "grad_norm": 1.5545355845400226, "learning_rate": 7.687827141487296e-06, "loss": 0.5119, "step": 23696 }, { "epoch": 1.7612040133779265, "grad_norm": 2.191974872052122, "learning_rate": 7.687046523144601e-06, "loss": 0.5542, "step": 23697 }, { "epoch": 1.7612783351913786, "grad_norm": 2.2063454802808464, "learning_rate": 7.68626591969258e-06, "loss": 0.6295, "step": 23698 }, { "epoch": 1.761352657004831, "grad_norm": 2.0203548981520285, "learning_rate": 7.685485331136257e-06, "loss": 0.4941, "step": 23699 }, { "epoch": 1.761426978818283, "grad_norm": 1.967381526969074, "learning_rate": 7.684704757480659e-06, "loss": 0.5127, "step": 23700 }, { "epoch": 1.7615013006317355, "grad_norm": 1.6943529635976973, "learning_rate": 7.68392419873081e-06, "loss": 0.6068, "step": 23701 }, { "epoch": 1.7615756224451875, "grad_norm": 2.2190857719447243, "learning_rate": 7.68314365489174e-06, "loss": 0.643, "step": 23702 }, { "epoch": 1.76164994425864, "grad_norm": 1.6982004593808198, "learning_rate": 7.68236312596847e-06, "loss": 0.4953, "step": 23703 }, { "epoch": 1.7617242660720922, "grad_norm": 1.688807926862187, "learning_rate": 7.681582611966026e-06, "loss": 0.5166, "step": 23704 }, { "epoch": 1.7617985878855444, "grad_norm": 1.818955077405366, "learning_rate": 7.680802112889432e-06, "loss": 0.5385, "step": 23705 }, { "epoch": 1.7618729096989967, "grad_norm": 1.6694642549193373, "learning_rate": 7.680021628743714e-06, "loss": 0.632, "step": 23706 }, { "epoch": 1.761947231512449, "grad_norm": 2.39369685656376, "learning_rate": 7.679241159533895e-06, "loss": 0.6051, "step": 23707 }, { "epoch": 1.7620215533259012, "grad_norm": 2.268731419592471, "learning_rate": 7.678460705265006e-06, "loss": 0.5428, "step": 23708 }, { "epoch": 1.7620958751393534, "grad_norm": 2.2112910615697885, "learning_rate": 7.677680265942065e-06, "loss": 0.537, "step": 23709 }, { "epoch": 1.7621701969528056, "grad_norm": 2.372976395953893, "learning_rate": 7.676899841570096e-06, "loss": 0.6738, "step": 23710 }, { "epoch": 1.7622445187662579, "grad_norm": 2.099885528731211, "learning_rate": 7.676119432154126e-06, "loss": 0.5542, "step": 23711 }, { "epoch": 1.76231884057971, "grad_norm": 1.8942405856220867, "learning_rate": 7.675339037699176e-06, "loss": 0.541, "step": 23712 }, { "epoch": 1.7623931623931623, "grad_norm": 1.989255064779412, "learning_rate": 7.674558658210273e-06, "loss": 0.6065, "step": 23713 }, { "epoch": 1.7624674842066146, "grad_norm": 2.0969718333765712, "learning_rate": 7.673778293692446e-06, "loss": 0.6415, "step": 23714 }, { "epoch": 1.7625418060200668, "grad_norm": 2.2622281222448937, "learning_rate": 7.672997944150709e-06, "loss": 0.5043, "step": 23715 }, { "epoch": 1.7626161278335193, "grad_norm": 2.35456822983643, "learning_rate": 7.67221760959009e-06, "loss": 0.6323, "step": 23716 }, { "epoch": 1.7626904496469713, "grad_norm": 1.8524777522560312, "learning_rate": 7.671437290015615e-06, "loss": 0.5177, "step": 23717 }, { "epoch": 1.7627647714604238, "grad_norm": 2.2054422272215977, "learning_rate": 7.670656985432307e-06, "loss": 0.6398, "step": 23718 }, { "epoch": 1.7628390932738758, "grad_norm": 2.5881036380458355, "learning_rate": 7.669876695845186e-06, "loss": 0.4476, "step": 23719 }, { "epoch": 1.7629134150873282, "grad_norm": 1.4629502572522026, "learning_rate": 7.669096421259283e-06, "loss": 0.4498, "step": 23720 }, { "epoch": 1.7629877369007803, "grad_norm": 1.776621831363755, "learning_rate": 7.668316161679612e-06, "loss": 0.6045, "step": 23721 }, { "epoch": 1.7630620587142327, "grad_norm": 1.606612251416932, "learning_rate": 7.6675359171112e-06, "loss": 0.459, "step": 23722 }, { "epoch": 1.7631363805276847, "grad_norm": 1.9452687337092378, "learning_rate": 7.66675568755907e-06, "loss": 0.6044, "step": 23723 }, { "epoch": 1.7632107023411372, "grad_norm": 1.8559980629440658, "learning_rate": 7.665975473028247e-06, "loss": 0.4532, "step": 23724 }, { "epoch": 1.7632850241545892, "grad_norm": 2.239121080410543, "learning_rate": 7.665195273523756e-06, "loss": 0.4855, "step": 23725 }, { "epoch": 1.7633593459680417, "grad_norm": 1.9645194939389745, "learning_rate": 7.664415089050612e-06, "loss": 0.5525, "step": 23726 }, { "epoch": 1.763433667781494, "grad_norm": 1.8614379293654073, "learning_rate": 7.663634919613846e-06, "loss": 0.4379, "step": 23727 }, { "epoch": 1.7635079895949461, "grad_norm": 1.9160660478908917, "learning_rate": 7.662854765218472e-06, "loss": 0.6865, "step": 23728 }, { "epoch": 1.7635823114083984, "grad_norm": 2.248187027165725, "learning_rate": 7.662074625869524e-06, "loss": 0.6, "step": 23729 }, { "epoch": 1.7636566332218506, "grad_norm": 1.8139189997705063, "learning_rate": 7.661294501572016e-06, "loss": 0.5777, "step": 23730 }, { "epoch": 1.7637309550353029, "grad_norm": 1.766860300433536, "learning_rate": 7.660514392330976e-06, "loss": 0.4825, "step": 23731 }, { "epoch": 1.763805276848755, "grad_norm": 1.9254826978810222, "learning_rate": 7.659734298151418e-06, "loss": 0.5521, "step": 23732 }, { "epoch": 1.7638795986622073, "grad_norm": 2.285180477991981, "learning_rate": 7.65895421903837e-06, "loss": 0.6356, "step": 23733 }, { "epoch": 1.7639539204756596, "grad_norm": 2.462621130386275, "learning_rate": 7.658174154996853e-06, "loss": 0.4109, "step": 23734 }, { "epoch": 1.7640282422891118, "grad_norm": 2.03500758414054, "learning_rate": 7.657394106031888e-06, "loss": 0.7361, "step": 23735 }, { "epoch": 1.764102564102564, "grad_norm": 2.1172147367211425, "learning_rate": 7.656614072148503e-06, "loss": 0.528, "step": 23736 }, { "epoch": 1.7641768859160165, "grad_norm": 2.3856374999639463, "learning_rate": 7.65583405335171e-06, "loss": 0.6698, "step": 23737 }, { "epoch": 1.7642512077294685, "grad_norm": 2.058584736839801, "learning_rate": 7.655054049646537e-06, "loss": 0.5456, "step": 23738 }, { "epoch": 1.764325529542921, "grad_norm": 1.967584906909324, "learning_rate": 7.654274061038005e-06, "loss": 0.5491, "step": 23739 }, { "epoch": 1.764399851356373, "grad_norm": 2.053059843546343, "learning_rate": 7.653494087531137e-06, "loss": 0.5797, "step": 23740 }, { "epoch": 1.7644741731698255, "grad_norm": 2.1389253390813594, "learning_rate": 7.652714129130949e-06, "loss": 0.5327, "step": 23741 }, { "epoch": 1.7645484949832775, "grad_norm": 1.5795512880480316, "learning_rate": 7.651934185842466e-06, "loss": 0.4489, "step": 23742 }, { "epoch": 1.76462281679673, "grad_norm": 2.144373709514204, "learning_rate": 7.651154257670714e-06, "loss": 0.5554, "step": 23743 }, { "epoch": 1.764697138610182, "grad_norm": 2.002242953828393, "learning_rate": 7.650374344620705e-06, "loss": 0.4406, "step": 23744 }, { "epoch": 1.7647714604236344, "grad_norm": 1.9222526917813865, "learning_rate": 7.649594446697464e-06, "loss": 0.5399, "step": 23745 }, { "epoch": 1.7648457822370864, "grad_norm": 1.9886167141400715, "learning_rate": 7.64881456390601e-06, "loss": 0.5476, "step": 23746 }, { "epoch": 1.764920104050539, "grad_norm": 2.185308144830658, "learning_rate": 7.648034696251369e-06, "loss": 0.7438, "step": 23747 }, { "epoch": 1.7649944258639911, "grad_norm": 2.0745865846106684, "learning_rate": 7.647254843738558e-06, "loss": 0.6358, "step": 23748 }, { "epoch": 1.7650687476774434, "grad_norm": 2.2393173299452642, "learning_rate": 7.646475006372597e-06, "loss": 0.7748, "step": 23749 }, { "epoch": 1.7651430694908956, "grad_norm": 2.1865944501107024, "learning_rate": 7.645695184158508e-06, "loss": 0.7181, "step": 23750 }, { "epoch": 1.7652173913043478, "grad_norm": 2.0477210403919597, "learning_rate": 7.644915377101312e-06, "loss": 0.6076, "step": 23751 }, { "epoch": 1.7652917131178, "grad_norm": 2.4367455971862952, "learning_rate": 7.644135585206028e-06, "loss": 0.6866, "step": 23752 }, { "epoch": 1.7653660349312523, "grad_norm": 1.8857337289762945, "learning_rate": 7.643355808477677e-06, "loss": 0.6698, "step": 23753 }, { "epoch": 1.7654403567447046, "grad_norm": 1.9122629658926062, "learning_rate": 7.642576046921283e-06, "loss": 0.6167, "step": 23754 }, { "epoch": 1.7655146785581568, "grad_norm": 2.258398609048109, "learning_rate": 7.641796300541858e-06, "loss": 0.6055, "step": 23755 }, { "epoch": 1.765589000371609, "grad_norm": 1.9272021899929495, "learning_rate": 7.641016569344426e-06, "loss": 0.5118, "step": 23756 }, { "epoch": 1.7656633221850613, "grad_norm": 2.4325088390522986, "learning_rate": 7.640236853334007e-06, "loss": 0.5763, "step": 23757 }, { "epoch": 1.7657376439985135, "grad_norm": 1.7137483968535219, "learning_rate": 7.639457152515622e-06, "loss": 0.6102, "step": 23758 }, { "epoch": 1.7658119658119658, "grad_norm": 1.6008979134612074, "learning_rate": 7.638677466894287e-06, "loss": 0.6221, "step": 23759 }, { "epoch": 1.7658862876254182, "grad_norm": 1.8021869369690366, "learning_rate": 7.637897796475025e-06, "loss": 0.3997, "step": 23760 }, { "epoch": 1.7659606094388702, "grad_norm": 1.8762677921164728, "learning_rate": 7.637118141262854e-06, "loss": 0.5756, "step": 23761 }, { "epoch": 1.7660349312523227, "grad_norm": 1.957378679547448, "learning_rate": 7.636338501262796e-06, "loss": 0.6306, "step": 23762 }, { "epoch": 1.7661092530657747, "grad_norm": 1.83580957360593, "learning_rate": 7.635558876479867e-06, "loss": 0.5216, "step": 23763 }, { "epoch": 1.7661835748792272, "grad_norm": 1.646836203301788, "learning_rate": 7.634779266919085e-06, "loss": 0.3652, "step": 23764 }, { "epoch": 1.7662578966926792, "grad_norm": 2.041130394913527, "learning_rate": 7.633999672585477e-06, "loss": 0.595, "step": 23765 }, { "epoch": 1.7663322185061316, "grad_norm": 1.8002910595766044, "learning_rate": 7.633220093484053e-06, "loss": 0.6181, "step": 23766 }, { "epoch": 1.7664065403195837, "grad_norm": 2.0949072149072747, "learning_rate": 7.632440529619833e-06, "loss": 0.6256, "step": 23767 }, { "epoch": 1.7664808621330361, "grad_norm": 1.9298089908755467, "learning_rate": 7.63166098099784e-06, "loss": 0.7295, "step": 23768 }, { "epoch": 1.7665551839464881, "grad_norm": 2.9004853457067368, "learning_rate": 7.630881447623092e-06, "loss": 0.6673, "step": 23769 }, { "epoch": 1.7666295057599406, "grad_norm": 1.8382485526762757, "learning_rate": 7.630101929500605e-06, "loss": 0.5848, "step": 23770 }, { "epoch": 1.7667038275733928, "grad_norm": 1.8815775222709106, "learning_rate": 7.629322426635397e-06, "loss": 0.5022, "step": 23771 }, { "epoch": 1.766778149386845, "grad_norm": 1.644789688484363, "learning_rate": 7.6285429390324895e-06, "loss": 0.4474, "step": 23772 }, { "epoch": 1.7668524712002973, "grad_norm": 1.8529974093639987, "learning_rate": 7.627763466696902e-06, "loss": 0.5554, "step": 23773 }, { "epoch": 1.7669267930137496, "grad_norm": 1.939572856271949, "learning_rate": 7.626984009633647e-06, "loss": 0.5961, "step": 23774 }, { "epoch": 1.7670011148272018, "grad_norm": 2.0255450558568158, "learning_rate": 7.626204567847746e-06, "loss": 0.7293, "step": 23775 }, { "epoch": 1.767075436640654, "grad_norm": 4.1593070329544926, "learning_rate": 7.625425141344222e-06, "loss": 0.5411, "step": 23776 }, { "epoch": 1.7671497584541063, "grad_norm": 2.238490759909695, "learning_rate": 7.624645730128082e-06, "loss": 0.6964, "step": 23777 }, { "epoch": 1.7672240802675585, "grad_norm": 2.0654554953159274, "learning_rate": 7.623866334204351e-06, "loss": 0.6351, "step": 23778 }, { "epoch": 1.7672984020810107, "grad_norm": 1.6785160202662932, "learning_rate": 7.623086953578045e-06, "loss": 0.4024, "step": 23779 }, { "epoch": 1.767372723894463, "grad_norm": 1.742713135819616, "learning_rate": 7.622307588254183e-06, "loss": 0.5847, "step": 23780 }, { "epoch": 1.7674470457079152, "grad_norm": 1.8562123968502746, "learning_rate": 7.62152823823778e-06, "loss": 0.5645, "step": 23781 }, { "epoch": 1.7675213675213675, "grad_norm": 1.7991404659121126, "learning_rate": 7.620748903533854e-06, "loss": 0.535, "step": 23782 }, { "epoch": 1.76759568933482, "grad_norm": 2.2151500763460863, "learning_rate": 7.619969584147424e-06, "loss": 0.6436, "step": 23783 }, { "epoch": 1.767670011148272, "grad_norm": 1.9308249784239215, "learning_rate": 7.619190280083508e-06, "loss": 0.5232, "step": 23784 }, { "epoch": 1.7677443329617244, "grad_norm": 2.0300447929177126, "learning_rate": 7.61841099134712e-06, "loss": 0.5613, "step": 23785 }, { "epoch": 1.7678186547751764, "grad_norm": 1.7582403606810326, "learning_rate": 7.617631717943279e-06, "loss": 0.4099, "step": 23786 }, { "epoch": 1.7678929765886289, "grad_norm": 1.9713669561014673, "learning_rate": 7.616852459877e-06, "loss": 0.6701, "step": 23787 }, { "epoch": 1.767967298402081, "grad_norm": 2.082765043874796, "learning_rate": 7.616073217153306e-06, "loss": 0.5707, "step": 23788 }, { "epoch": 1.7680416202155333, "grad_norm": 1.786655381187713, "learning_rate": 7.615293989777203e-06, "loss": 0.4564, "step": 23789 }, { "epoch": 1.7681159420289854, "grad_norm": 1.8468780115135215, "learning_rate": 7.614514777753716e-06, "loss": 0.6652, "step": 23790 }, { "epoch": 1.7681902638424378, "grad_norm": 2.0266991542756996, "learning_rate": 7.613735581087861e-06, "loss": 0.6091, "step": 23791 }, { "epoch": 1.7682645856558898, "grad_norm": 1.6372632741082016, "learning_rate": 7.61295639978465e-06, "loss": 0.4835, "step": 23792 }, { "epoch": 1.7683389074693423, "grad_norm": 1.948691353337767, "learning_rate": 7.612177233849101e-06, "loss": 0.5743, "step": 23793 }, { "epoch": 1.7684132292827945, "grad_norm": 1.9723032650908148, "learning_rate": 7.611398083286232e-06, "loss": 0.5634, "step": 23794 }, { "epoch": 1.7684875510962468, "grad_norm": 1.6975531243590745, "learning_rate": 7.61061894810106e-06, "loss": 0.5411, "step": 23795 }, { "epoch": 1.768561872909699, "grad_norm": 1.8718746240776183, "learning_rate": 7.609839828298597e-06, "loss": 0.6761, "step": 23796 }, { "epoch": 1.7686361947231513, "grad_norm": 1.7892249504275661, "learning_rate": 7.609060723883861e-06, "loss": 0.5782, "step": 23797 }, { "epoch": 1.7687105165366035, "grad_norm": 1.587018632261683, "learning_rate": 7.6082816348618685e-06, "loss": 0.5997, "step": 23798 }, { "epoch": 1.7687848383500557, "grad_norm": 1.4432663712736857, "learning_rate": 7.607502561237638e-06, "loss": 0.4307, "step": 23799 }, { "epoch": 1.768859160163508, "grad_norm": 2.2702198802539204, "learning_rate": 7.606723503016179e-06, "loss": 0.7506, "step": 23800 }, { "epoch": 1.7689334819769602, "grad_norm": 2.542853460948746, "learning_rate": 7.605944460202509e-06, "loss": 0.4999, "step": 23801 }, { "epoch": 1.7690078037904124, "grad_norm": 2.063896645209237, "learning_rate": 7.605165432801647e-06, "loss": 0.4495, "step": 23802 }, { "epoch": 1.7690821256038647, "grad_norm": 1.7892333133382443, "learning_rate": 7.604386420818604e-06, "loss": 0.6507, "step": 23803 }, { "epoch": 1.7691564474173171, "grad_norm": 1.9463497657440783, "learning_rate": 7.603607424258396e-06, "loss": 0.5408, "step": 23804 }, { "epoch": 1.7692307692307692, "grad_norm": 2.099857958052353, "learning_rate": 7.602828443126039e-06, "loss": 0.6785, "step": 23805 }, { "epoch": 1.7693050910442216, "grad_norm": 2.3922851304273896, "learning_rate": 7.6020494774265506e-06, "loss": 0.73, "step": 23806 }, { "epoch": 1.7693794128576736, "grad_norm": 1.6734912515537008, "learning_rate": 7.601270527164941e-06, "loss": 0.5503, "step": 23807 }, { "epoch": 1.769453734671126, "grad_norm": 1.8785249522625926, "learning_rate": 7.600491592346228e-06, "loss": 0.572, "step": 23808 }, { "epoch": 1.7695280564845781, "grad_norm": 1.9199406419698126, "learning_rate": 7.599712672975424e-06, "loss": 0.6859, "step": 23809 }, { "epoch": 1.7696023782980306, "grad_norm": 1.7822472289820839, "learning_rate": 7.59893376905755e-06, "loss": 0.5267, "step": 23810 }, { "epoch": 1.7696767001114826, "grad_norm": 1.9283848632409484, "learning_rate": 7.598154880597612e-06, "loss": 0.6224, "step": 23811 }, { "epoch": 1.769751021924935, "grad_norm": 2.1401338337777385, "learning_rate": 7.597376007600629e-06, "loss": 0.6167, "step": 23812 }, { "epoch": 1.769825343738387, "grad_norm": 1.7829743424445554, "learning_rate": 7.596597150071616e-06, "loss": 0.6469, "step": 23813 }, { "epoch": 1.7698996655518395, "grad_norm": 2.517855949068455, "learning_rate": 7.595818308015584e-06, "loss": 0.7343, "step": 23814 }, { "epoch": 1.7699739873652918, "grad_norm": 2.051755036907206, "learning_rate": 7.595039481437548e-06, "loss": 0.5659, "step": 23815 }, { "epoch": 1.770048309178744, "grad_norm": 1.7258158384417914, "learning_rate": 7.594260670342523e-06, "loss": 0.4967, "step": 23816 }, { "epoch": 1.7701226309921962, "grad_norm": 2.232421747727209, "learning_rate": 7.5934818747355265e-06, "loss": 0.401, "step": 23817 }, { "epoch": 1.7701969528056485, "grad_norm": 1.6295464001676154, "learning_rate": 7.592703094621566e-06, "loss": 0.5698, "step": 23818 }, { "epoch": 1.7702712746191007, "grad_norm": 1.568360167296842, "learning_rate": 7.591924330005659e-06, "loss": 0.4548, "step": 23819 }, { "epoch": 1.770345596432553, "grad_norm": 2.0215653786357075, "learning_rate": 7.5911455808928166e-06, "loss": 0.6774, "step": 23820 }, { "epoch": 1.7704199182460052, "grad_norm": 1.8942027099822005, "learning_rate": 7.5903668472880555e-06, "loss": 0.5724, "step": 23821 }, { "epoch": 1.7704942400594574, "grad_norm": 2.127829744305717, "learning_rate": 7.58958812919639e-06, "loss": 0.6762, "step": 23822 }, { "epoch": 1.7705685618729097, "grad_norm": 1.7559614671931385, "learning_rate": 7.588809426622829e-06, "loss": 0.6941, "step": 23823 }, { "epoch": 1.770642883686362, "grad_norm": 2.0218010572045544, "learning_rate": 7.588030739572388e-06, "loss": 0.4407, "step": 23824 }, { "epoch": 1.7707172054998142, "grad_norm": 2.4356731020318745, "learning_rate": 7.5872520680500795e-06, "loss": 0.6414, "step": 23825 }, { "epoch": 1.7707915273132664, "grad_norm": 2.071682998109691, "learning_rate": 7.586473412060915e-06, "loss": 0.6246, "step": 23826 }, { "epoch": 1.7708658491267188, "grad_norm": 1.8797117341029737, "learning_rate": 7.585694771609912e-06, "loss": 0.5181, "step": 23827 }, { "epoch": 1.7709401709401709, "grad_norm": 2.2291543305742754, "learning_rate": 7.58491614670208e-06, "loss": 0.7511, "step": 23828 }, { "epoch": 1.7710144927536233, "grad_norm": 2.0202769078763296, "learning_rate": 7.5841375373424335e-06, "loss": 0.5218, "step": 23829 }, { "epoch": 1.7710888145670753, "grad_norm": 1.5250192061147005, "learning_rate": 7.583358943535981e-06, "loss": 0.5369, "step": 23830 }, { "epoch": 1.7711631363805278, "grad_norm": 2.250033355985874, "learning_rate": 7.58258036528774e-06, "loss": 0.5775, "step": 23831 }, { "epoch": 1.7712374581939798, "grad_norm": 1.9042491655487643, "learning_rate": 7.581801802602722e-06, "loss": 0.6301, "step": 23832 }, { "epoch": 1.7713117800074323, "grad_norm": 1.7324095585199086, "learning_rate": 7.581023255485942e-06, "loss": 0.4969, "step": 23833 }, { "epoch": 1.7713861018208843, "grad_norm": 2.1231251808130303, "learning_rate": 7.580244723942406e-06, "loss": 0.7191, "step": 23834 }, { "epoch": 1.7714604236343368, "grad_norm": 2.031582789801936, "learning_rate": 7.579466207977126e-06, "loss": 0.6811, "step": 23835 }, { "epoch": 1.7715347454477888, "grad_norm": 1.723549350785072, "learning_rate": 7.578687707595119e-06, "loss": 0.5363, "step": 23836 }, { "epoch": 1.7716090672612412, "grad_norm": 6.905250004114151, "learning_rate": 7.577909222801394e-06, "loss": 0.7719, "step": 23837 }, { "epoch": 1.7716833890746935, "grad_norm": 3.1128805335692054, "learning_rate": 7.577130753600964e-06, "loss": 0.6318, "step": 23838 }, { "epoch": 1.7717577108881457, "grad_norm": 1.8381949596466558, "learning_rate": 7.576352299998842e-06, "loss": 0.5951, "step": 23839 }, { "epoch": 1.771832032701598, "grad_norm": 2.664935105550105, "learning_rate": 7.575573862000037e-06, "loss": 0.7693, "step": 23840 }, { "epoch": 1.7719063545150502, "grad_norm": 1.7536614297283653, "learning_rate": 7.574795439609561e-06, "loss": 0.559, "step": 23841 }, { "epoch": 1.7719806763285024, "grad_norm": 2.7630334241328827, "learning_rate": 7.574017032832426e-06, "loss": 0.6193, "step": 23842 }, { "epoch": 1.7720549981419547, "grad_norm": 1.8023349645486848, "learning_rate": 7.573238641673646e-06, "loss": 0.618, "step": 23843 }, { "epoch": 1.772129319955407, "grad_norm": 1.9259447910906942, "learning_rate": 7.572460266138229e-06, "loss": 0.5393, "step": 23844 }, { "epoch": 1.7722036417688591, "grad_norm": 2.10900342195842, "learning_rate": 7.5716819062311876e-06, "loss": 0.6645, "step": 23845 }, { "epoch": 1.7722779635823114, "grad_norm": 1.8690566526708166, "learning_rate": 7.5709035619575296e-06, "loss": 0.5873, "step": 23846 }, { "epoch": 1.7723522853957636, "grad_norm": 1.9920618015371636, "learning_rate": 7.57012523332227e-06, "loss": 0.5471, "step": 23847 }, { "epoch": 1.7724266072092159, "grad_norm": 1.8256384177125566, "learning_rate": 7.569346920330416e-06, "loss": 0.4847, "step": 23848 }, { "epoch": 1.772500929022668, "grad_norm": 1.9800100297942307, "learning_rate": 7.56856862298698e-06, "loss": 0.5726, "step": 23849 }, { "epoch": 1.7725752508361206, "grad_norm": 2.14290687248172, "learning_rate": 7.567790341296977e-06, "loss": 0.5897, "step": 23850 }, { "epoch": 1.7726495726495726, "grad_norm": 1.6732655573064346, "learning_rate": 7.567012075265411e-06, "loss": 0.5094, "step": 23851 }, { "epoch": 1.772723894463025, "grad_norm": 2.051176103478179, "learning_rate": 7.5662338248972946e-06, "loss": 0.5613, "step": 23852 }, { "epoch": 1.772798216276477, "grad_norm": 1.8617252917507192, "learning_rate": 7.565455590197639e-06, "loss": 0.5295, "step": 23853 }, { "epoch": 1.7728725380899295, "grad_norm": 1.9535748869249867, "learning_rate": 7.564677371171456e-06, "loss": 0.5776, "step": 23854 }, { "epoch": 1.7729468599033815, "grad_norm": 2.052356606161276, "learning_rate": 7.56389916782375e-06, "loss": 0.6643, "step": 23855 }, { "epoch": 1.773021181716834, "grad_norm": 2.280252923952717, "learning_rate": 7.563120980159542e-06, "loss": 0.7121, "step": 23856 }, { "epoch": 1.773095503530286, "grad_norm": 2.127731330245725, "learning_rate": 7.562342808183828e-06, "loss": 0.7216, "step": 23857 }, { "epoch": 1.7731698253437385, "grad_norm": 1.8044513277382037, "learning_rate": 7.5615646519016254e-06, "loss": 0.4895, "step": 23858 }, { "epoch": 1.7732441471571905, "grad_norm": 2.0827583189462335, "learning_rate": 7.5607865113179435e-06, "loss": 0.6198, "step": 23859 }, { "epoch": 1.773318468970643, "grad_norm": 2.0562265188464663, "learning_rate": 7.560008386437791e-06, "loss": 0.5969, "step": 23860 }, { "epoch": 1.7733927907840952, "grad_norm": 1.9553467513260285, "learning_rate": 7.559230277266179e-06, "loss": 0.539, "step": 23861 }, { "epoch": 1.7734671125975474, "grad_norm": 2.2619163310957435, "learning_rate": 7.558452183808116e-06, "loss": 0.5296, "step": 23862 }, { "epoch": 1.7735414344109997, "grad_norm": 1.5750797618436794, "learning_rate": 7.557674106068608e-06, "loss": 0.4561, "step": 23863 }, { "epoch": 1.773615756224452, "grad_norm": 2.4185724193132927, "learning_rate": 7.55689604405267e-06, "loss": 0.6687, "step": 23864 }, { "epoch": 1.7736900780379041, "grad_norm": 2.7211799078168304, "learning_rate": 7.556117997765311e-06, "loss": 0.692, "step": 23865 }, { "epoch": 1.7737643998513564, "grad_norm": 2.886789614881431, "learning_rate": 7.555339967211534e-06, "loss": 0.5664, "step": 23866 }, { "epoch": 1.7738387216648086, "grad_norm": 2.545756224327342, "learning_rate": 7.554561952396355e-06, "loss": 0.621, "step": 23867 }, { "epoch": 1.7739130434782608, "grad_norm": 2.023227836578826, "learning_rate": 7.553783953324777e-06, "loss": 0.6296, "step": 23868 }, { "epoch": 1.773987365291713, "grad_norm": 2.7441782899422567, "learning_rate": 7.55300597000181e-06, "loss": 0.7154, "step": 23869 }, { "epoch": 1.7740616871051653, "grad_norm": 1.8718949892753989, "learning_rate": 7.552228002432464e-06, "loss": 0.5168, "step": 23870 }, { "epoch": 1.7741360089186178, "grad_norm": 1.8438637336235912, "learning_rate": 7.551450050621746e-06, "loss": 0.5115, "step": 23871 }, { "epoch": 1.7742103307320698, "grad_norm": 1.728783486262296, "learning_rate": 7.5506721145746685e-06, "loss": 0.5061, "step": 23872 }, { "epoch": 1.7742846525455223, "grad_norm": 1.784634367445549, "learning_rate": 7.549894194296235e-06, "loss": 0.4993, "step": 23873 }, { "epoch": 1.7743589743589743, "grad_norm": 1.662556806249888, "learning_rate": 7.549116289791455e-06, "loss": 0.5429, "step": 23874 }, { "epoch": 1.7744332961724267, "grad_norm": 2.512803813128953, "learning_rate": 7.548338401065337e-06, "loss": 0.707, "step": 23875 }, { "epoch": 1.7745076179858787, "grad_norm": 3.7090487053187284, "learning_rate": 7.547560528122891e-06, "loss": 0.617, "step": 23876 }, { "epoch": 1.7745819397993312, "grad_norm": 1.9398029027815766, "learning_rate": 7.546782670969121e-06, "loss": 0.5463, "step": 23877 }, { "epoch": 1.7746562616127832, "grad_norm": 2.051212688105219, "learning_rate": 7.546004829609043e-06, "loss": 0.6202, "step": 23878 }, { "epoch": 1.7747305834262357, "grad_norm": 1.6317561010354966, "learning_rate": 7.545227004047653e-06, "loss": 0.515, "step": 23879 }, { "epoch": 1.7748049052396877, "grad_norm": 1.7091989146872708, "learning_rate": 7.5444491942899644e-06, "loss": 0.482, "step": 23880 }, { "epoch": 1.7748792270531402, "grad_norm": 1.9870803241978474, "learning_rate": 7.543671400340984e-06, "loss": 0.6696, "step": 23881 }, { "epoch": 1.7749535488665922, "grad_norm": 1.9327175458414, "learning_rate": 7.542893622205719e-06, "loss": 0.6509, "step": 23882 }, { "epoch": 1.7750278706800446, "grad_norm": 2.2410130377063555, "learning_rate": 7.542115859889181e-06, "loss": 0.7447, "step": 23883 }, { "epoch": 1.7751021924934969, "grad_norm": 2.0484991002648094, "learning_rate": 7.5413381133963705e-06, "loss": 0.5676, "step": 23884 }, { "epoch": 1.7751765143069491, "grad_norm": 1.9883025253416975, "learning_rate": 7.540560382732298e-06, "loss": 0.5894, "step": 23885 }, { "epoch": 1.7752508361204014, "grad_norm": 1.901978153950195, "learning_rate": 7.53978266790197e-06, "loss": 0.6175, "step": 23886 }, { "epoch": 1.7753251579338536, "grad_norm": 1.7618163314942146, "learning_rate": 7.5390049689103965e-06, "loss": 0.5621, "step": 23887 }, { "epoch": 1.7753994797473058, "grad_norm": 1.5244829303822305, "learning_rate": 7.5382272857625785e-06, "loss": 0.4506, "step": 23888 }, { "epoch": 1.775473801560758, "grad_norm": 2.0653112813817387, "learning_rate": 7.53744961846353e-06, "loss": 0.632, "step": 23889 }, { "epoch": 1.7755481233742103, "grad_norm": 2.071604578017213, "learning_rate": 7.5366719670182475e-06, "loss": 0.5195, "step": 23890 }, { "epoch": 1.7756224451876625, "grad_norm": 2.275554156625957, "learning_rate": 7.5358943314317446e-06, "loss": 0.7564, "step": 23891 }, { "epoch": 1.7756967670011148, "grad_norm": 1.6568306864310496, "learning_rate": 7.535116711709025e-06, "loss": 0.4421, "step": 23892 }, { "epoch": 1.775771088814567, "grad_norm": 1.655091291651195, "learning_rate": 7.534339107855097e-06, "loss": 0.5039, "step": 23893 }, { "epoch": 1.7758454106280195, "grad_norm": 2.3701255971340225, "learning_rate": 7.533561519874968e-06, "loss": 0.5957, "step": 23894 }, { "epoch": 1.7759197324414715, "grad_norm": 2.541653585720507, "learning_rate": 7.53278394777364e-06, "loss": 0.631, "step": 23895 }, { "epoch": 1.775994054254924, "grad_norm": 2.1462025625662355, "learning_rate": 7.53200639155612e-06, "loss": 0.7084, "step": 23896 }, { "epoch": 1.776068376068376, "grad_norm": 1.7925652650331212, "learning_rate": 7.531228851227415e-06, "loss": 0.6258, "step": 23897 }, { "epoch": 1.7761426978818284, "grad_norm": 1.9444327145477902, "learning_rate": 7.530451326792534e-06, "loss": 0.5203, "step": 23898 }, { "epoch": 1.7762170196952805, "grad_norm": 1.9577352094795606, "learning_rate": 7.529673818256475e-06, "loss": 0.5624, "step": 23899 }, { "epoch": 1.776291341508733, "grad_norm": 1.8820000535899795, "learning_rate": 7.52889632562425e-06, "loss": 0.6388, "step": 23900 }, { "epoch": 1.776365663322185, "grad_norm": 1.8337999699259866, "learning_rate": 7.528118848900865e-06, "loss": 0.5429, "step": 23901 }, { "epoch": 1.7764399851356374, "grad_norm": 1.9686903869355568, "learning_rate": 7.527341388091319e-06, "loss": 0.6224, "step": 23902 }, { "epoch": 1.7765143069490894, "grad_norm": 1.8383843935952082, "learning_rate": 7.526563943200621e-06, "loss": 0.6492, "step": 23903 }, { "epoch": 1.7765886287625419, "grad_norm": 2.285960206252492, "learning_rate": 7.525786514233775e-06, "loss": 0.6017, "step": 23904 }, { "epoch": 1.776662950575994, "grad_norm": 2.178343477369977, "learning_rate": 7.525009101195791e-06, "loss": 0.6619, "step": 23905 }, { "epoch": 1.7767372723894463, "grad_norm": 1.710920612202215, "learning_rate": 7.524231704091666e-06, "loss": 0.4743, "step": 23906 }, { "epoch": 1.7768115942028986, "grad_norm": 2.001900994778733, "learning_rate": 7.5234543229264086e-06, "loss": 0.4527, "step": 23907 }, { "epoch": 1.7768859160163508, "grad_norm": 1.8523943204041091, "learning_rate": 7.5226769577050255e-06, "loss": 0.5749, "step": 23908 }, { "epoch": 1.776960237829803, "grad_norm": 3.5410485981930635, "learning_rate": 7.52189960843252e-06, "loss": 0.5721, "step": 23909 }, { "epoch": 1.7770345596432553, "grad_norm": 1.956090986133815, "learning_rate": 7.521122275113896e-06, "loss": 0.5257, "step": 23910 }, { "epoch": 1.7771088814567075, "grad_norm": 1.9130348851554746, "learning_rate": 7.520344957754157e-06, "loss": 0.5565, "step": 23911 }, { "epoch": 1.7771832032701598, "grad_norm": 1.7985380743424695, "learning_rate": 7.519567656358313e-06, "loss": 0.6074, "step": 23912 }, { "epoch": 1.777257525083612, "grad_norm": 2.0649684569469398, "learning_rate": 7.518790370931359e-06, "loss": 0.6082, "step": 23913 }, { "epoch": 1.7773318468970642, "grad_norm": 1.8996170246518695, "learning_rate": 7.518013101478304e-06, "loss": 0.5099, "step": 23914 }, { "epoch": 1.7774061687105165, "grad_norm": 1.6439487982808756, "learning_rate": 7.5172358480041515e-06, "loss": 0.3968, "step": 23915 }, { "epoch": 1.7774804905239687, "grad_norm": 1.9409707474229092, "learning_rate": 7.51645861051391e-06, "loss": 0.6412, "step": 23916 }, { "epoch": 1.7775548123374212, "grad_norm": 2.305470147875614, "learning_rate": 7.515681389012574e-06, "loss": 0.6743, "step": 23917 }, { "epoch": 1.7776291341508732, "grad_norm": 2.6191956226814277, "learning_rate": 7.514904183505155e-06, "loss": 0.6682, "step": 23918 }, { "epoch": 1.7777034559643257, "grad_norm": 2.264159546375589, "learning_rate": 7.514126993996652e-06, "loss": 0.6896, "step": 23919 }, { "epoch": 1.7777777777777777, "grad_norm": 1.9918453031450711, "learning_rate": 7.513349820492074e-06, "loss": 0.6599, "step": 23920 }, { "epoch": 1.7778520995912301, "grad_norm": 1.7342939777973243, "learning_rate": 7.512572662996418e-06, "loss": 0.5924, "step": 23921 }, { "epoch": 1.7779264214046822, "grad_norm": 1.8957299619892376, "learning_rate": 7.51179552151469e-06, "loss": 0.5243, "step": 23922 }, { "epoch": 1.7780007432181346, "grad_norm": 2.2058370780960654, "learning_rate": 7.511018396051898e-06, "loss": 0.6104, "step": 23923 }, { "epoch": 1.7780750650315866, "grad_norm": 2.098158415606958, "learning_rate": 7.510241286613036e-06, "loss": 0.6606, "step": 23924 }, { "epoch": 1.778149386845039, "grad_norm": 2.2038655762178614, "learning_rate": 7.509464193203111e-06, "loss": 0.5429, "step": 23925 }, { "epoch": 1.778223708658491, "grad_norm": 1.715792923139201, "learning_rate": 7.5086871158271266e-06, "loss": 0.5639, "step": 23926 }, { "epoch": 1.7782980304719436, "grad_norm": 2.668690721686812, "learning_rate": 7.507910054490087e-06, "loss": 0.6558, "step": 23927 }, { "epoch": 1.7783723522853958, "grad_norm": 2.4364668189923044, "learning_rate": 7.507133009196992e-06, "loss": 0.6181, "step": 23928 }, { "epoch": 1.778446674098848, "grad_norm": 1.4326935384276884, "learning_rate": 7.506355979952844e-06, "loss": 0.4845, "step": 23929 }, { "epoch": 1.7785209959123003, "grad_norm": 1.828784598387127, "learning_rate": 7.505578966762648e-06, "loss": 0.5299, "step": 23930 }, { "epoch": 1.7785953177257525, "grad_norm": 3.0752423914289806, "learning_rate": 7.504801969631407e-06, "loss": 0.6806, "step": 23931 }, { "epoch": 1.7786696395392048, "grad_norm": 1.8413573442699283, "learning_rate": 7.504024988564119e-06, "loss": 0.6591, "step": 23932 }, { "epoch": 1.778743961352657, "grad_norm": 1.7407580674481633, "learning_rate": 7.503248023565789e-06, "loss": 0.4332, "step": 23933 }, { "epoch": 1.7788182831661092, "grad_norm": 1.9351275672477788, "learning_rate": 7.502471074641419e-06, "loss": 0.6631, "step": 23934 }, { "epoch": 1.7788926049795615, "grad_norm": 2.003264109313345, "learning_rate": 7.501694141796014e-06, "loss": 0.5661, "step": 23935 }, { "epoch": 1.7789669267930137, "grad_norm": 2.7339240101202553, "learning_rate": 7.500917225034569e-06, "loss": 0.4942, "step": 23936 }, { "epoch": 1.779041248606466, "grad_norm": 1.8203734429158642, "learning_rate": 7.5001403243620885e-06, "loss": 0.6521, "step": 23937 }, { "epoch": 1.7791155704199182, "grad_norm": 1.304793853093924, "learning_rate": 7.499363439783578e-06, "loss": 0.4114, "step": 23938 }, { "epoch": 1.7791898922333704, "grad_norm": 1.6766940620798332, "learning_rate": 7.498586571304032e-06, "loss": 0.5554, "step": 23939 }, { "epoch": 1.779264214046823, "grad_norm": 2.046335752536468, "learning_rate": 7.497809718928457e-06, "loss": 0.5527, "step": 23940 }, { "epoch": 1.779338535860275, "grad_norm": 2.7697633646917668, "learning_rate": 7.497032882661855e-06, "loss": 0.6257, "step": 23941 }, { "epoch": 1.7794128576737274, "grad_norm": 1.9417741071834025, "learning_rate": 7.496256062509224e-06, "loss": 0.5335, "step": 23942 }, { "epoch": 1.7794871794871794, "grad_norm": 1.7529187830122737, "learning_rate": 7.495479258475566e-06, "loss": 0.6446, "step": 23943 }, { "epoch": 1.7795615013006318, "grad_norm": 2.0686886501002646, "learning_rate": 7.4947024705658835e-06, "loss": 0.6067, "step": 23944 }, { "epoch": 1.7796358231140839, "grad_norm": 1.9295208518080043, "learning_rate": 7.493925698785176e-06, "loss": 0.5751, "step": 23945 }, { "epoch": 1.7797101449275363, "grad_norm": 1.9596795987854312, "learning_rate": 7.493148943138449e-06, "loss": 0.5147, "step": 23946 }, { "epoch": 1.7797844667409883, "grad_norm": 1.4729339750115489, "learning_rate": 7.4923722036306945e-06, "loss": 0.3893, "step": 23947 }, { "epoch": 1.7798587885544408, "grad_norm": 2.188173374320678, "learning_rate": 7.491595480266918e-06, "loss": 0.636, "step": 23948 }, { "epoch": 1.7799331103678928, "grad_norm": 1.6900472845349952, "learning_rate": 7.490818773052122e-06, "loss": 0.3113, "step": 23949 }, { "epoch": 1.7800074321813453, "grad_norm": 2.3799917699070754, "learning_rate": 7.490042081991301e-06, "loss": 0.6618, "step": 23950 }, { "epoch": 1.7800817539947975, "grad_norm": 2.5140640203710123, "learning_rate": 7.489265407089461e-06, "loss": 0.5593, "step": 23951 }, { "epoch": 1.7801560758082497, "grad_norm": 2.8698084723816906, "learning_rate": 7.488488748351599e-06, "loss": 0.6706, "step": 23952 }, { "epoch": 1.780230397621702, "grad_norm": 2.0922795115035147, "learning_rate": 7.487712105782717e-06, "loss": 0.5632, "step": 23953 }, { "epoch": 1.7803047194351542, "grad_norm": 1.794464004340572, "learning_rate": 7.486935479387814e-06, "loss": 0.5801, "step": 23954 }, { "epoch": 1.7803790412486065, "grad_norm": 2.2271094623422716, "learning_rate": 7.486158869171889e-06, "loss": 0.5206, "step": 23955 }, { "epoch": 1.7804533630620587, "grad_norm": 1.8792680199529919, "learning_rate": 7.485382275139943e-06, "loss": 0.516, "step": 23956 }, { "epoch": 1.780527684875511, "grad_norm": 2.1146484075366585, "learning_rate": 7.484605697296978e-06, "loss": 0.6284, "step": 23957 }, { "epoch": 1.7806020066889632, "grad_norm": 1.6939632266452345, "learning_rate": 7.483829135647988e-06, "loss": 0.446, "step": 23958 }, { "epoch": 1.7806763285024154, "grad_norm": 1.8564668896764416, "learning_rate": 7.483052590197977e-06, "loss": 0.6054, "step": 23959 }, { "epoch": 1.7807506503158677, "grad_norm": 1.871584713318947, "learning_rate": 7.482276060951944e-06, "loss": 0.552, "step": 23960 }, { "epoch": 1.7808249721293201, "grad_norm": 2.1117118630396416, "learning_rate": 7.481499547914884e-06, "loss": 0.5404, "step": 23961 }, { "epoch": 1.7808992939427721, "grad_norm": 1.9639622816219187, "learning_rate": 7.480723051091801e-06, "loss": 0.5306, "step": 23962 }, { "epoch": 1.7809736157562246, "grad_norm": 2.74760257197324, "learning_rate": 7.4799465704876906e-06, "loss": 0.5827, "step": 23963 }, { "epoch": 1.7810479375696766, "grad_norm": 1.8808476339477402, "learning_rate": 7.479170106107556e-06, "loss": 0.6033, "step": 23964 }, { "epoch": 1.781122259383129, "grad_norm": 1.946473931208774, "learning_rate": 7.478393657956392e-06, "loss": 0.7231, "step": 23965 }, { "epoch": 1.781196581196581, "grad_norm": 1.7292504351148756, "learning_rate": 7.477617226039198e-06, "loss": 0.3774, "step": 23966 }, { "epoch": 1.7812709030100335, "grad_norm": 1.9334852401557887, "learning_rate": 7.476840810360974e-06, "loss": 0.5678, "step": 23967 }, { "epoch": 1.7813452248234856, "grad_norm": 2.000738334006548, "learning_rate": 7.476064410926722e-06, "loss": 0.5057, "step": 23968 }, { "epoch": 1.781419546636938, "grad_norm": 2.101742396657198, "learning_rate": 7.475288027741432e-06, "loss": 0.5752, "step": 23969 }, { "epoch": 1.78149386845039, "grad_norm": 1.8330878001906599, "learning_rate": 7.474511660810109e-06, "loss": 0.5435, "step": 23970 }, { "epoch": 1.7815681902638425, "grad_norm": 1.612650126344237, "learning_rate": 7.473735310137747e-06, "loss": 0.4972, "step": 23971 }, { "epoch": 1.7816425120772947, "grad_norm": 1.639269458176037, "learning_rate": 7.4729589757293456e-06, "loss": 0.4953, "step": 23972 }, { "epoch": 1.781716833890747, "grad_norm": 2.007155858689356, "learning_rate": 7.472182657589903e-06, "loss": 0.5357, "step": 23973 }, { "epoch": 1.7817911557041992, "grad_norm": 2.890117023636767, "learning_rate": 7.471406355724418e-06, "loss": 0.646, "step": 23974 }, { "epoch": 1.7818654775176515, "grad_norm": 2.2255344472119387, "learning_rate": 7.470630070137888e-06, "loss": 0.7576, "step": 23975 }, { "epoch": 1.7819397993311037, "grad_norm": 2.0981792361696994, "learning_rate": 7.4698538008353095e-06, "loss": 0.5312, "step": 23976 }, { "epoch": 1.782014121144556, "grad_norm": 1.893041883218626, "learning_rate": 7.469077547821681e-06, "loss": 0.5044, "step": 23977 }, { "epoch": 1.7820884429580082, "grad_norm": 1.5508590899167032, "learning_rate": 7.468301311102e-06, "loss": 0.4859, "step": 23978 }, { "epoch": 1.7821627647714604, "grad_norm": 2.278086902482604, "learning_rate": 7.4675250906812655e-06, "loss": 0.7866, "step": 23979 }, { "epoch": 1.7822370865849126, "grad_norm": 2.2727108740517745, "learning_rate": 7.466748886564474e-06, "loss": 0.6729, "step": 23980 }, { "epoch": 1.7823114083983649, "grad_norm": 2.1364204785110346, "learning_rate": 7.465972698756621e-06, "loss": 0.7148, "step": 23981 }, { "epoch": 1.7823857302118171, "grad_norm": 2.3532146356168866, "learning_rate": 7.465196527262701e-06, "loss": 0.4979, "step": 23982 }, { "epoch": 1.7824600520252694, "grad_norm": 1.699376747892165, "learning_rate": 7.464420372087718e-06, "loss": 0.5458, "step": 23983 }, { "epoch": 1.7825343738387218, "grad_norm": 1.9591208861658764, "learning_rate": 7.463644233236661e-06, "loss": 0.5192, "step": 23984 }, { "epoch": 1.7826086956521738, "grad_norm": 2.525096261971597, "learning_rate": 7.462868110714533e-06, "loss": 0.6256, "step": 23985 }, { "epoch": 1.7826830174656263, "grad_norm": 2.823010673369974, "learning_rate": 7.462092004526329e-06, "loss": 0.7367, "step": 23986 }, { "epoch": 1.7827573392790783, "grad_norm": 1.8258726249869228, "learning_rate": 7.461315914677045e-06, "loss": 0.5763, "step": 23987 }, { "epoch": 1.7828316610925308, "grad_norm": 3.3612526466091794, "learning_rate": 7.460539841171676e-06, "loss": 0.5477, "step": 23988 }, { "epoch": 1.7829059829059828, "grad_norm": 2.042703215670396, "learning_rate": 7.4597637840152195e-06, "loss": 0.6105, "step": 23989 }, { "epoch": 1.7829803047194352, "grad_norm": 1.7966814125778532, "learning_rate": 7.458987743212676e-06, "loss": 0.5262, "step": 23990 }, { "epoch": 1.7830546265328873, "grad_norm": 1.8468791495063914, "learning_rate": 7.458211718769035e-06, "loss": 0.5435, "step": 23991 }, { "epoch": 1.7831289483463397, "grad_norm": 2.8661109734717316, "learning_rate": 7.457435710689296e-06, "loss": 0.6045, "step": 23992 }, { "epoch": 1.7832032701597917, "grad_norm": 2.0967007359521257, "learning_rate": 7.456659718978452e-06, "loss": 0.5376, "step": 23993 }, { "epoch": 1.7832775919732442, "grad_norm": 2.1202160971849278, "learning_rate": 7.455883743641501e-06, "loss": 0.5686, "step": 23994 }, { "epoch": 1.7833519137866964, "grad_norm": 2.1225886510186527, "learning_rate": 7.455107784683438e-06, "loss": 0.7549, "step": 23995 }, { "epoch": 1.7834262356001487, "grad_norm": 2.183275550637862, "learning_rate": 7.454331842109259e-06, "loss": 0.6161, "step": 23996 }, { "epoch": 1.783500557413601, "grad_norm": 2.295200009634264, "learning_rate": 7.4535559159239625e-06, "loss": 0.6597, "step": 23997 }, { "epoch": 1.7835748792270532, "grad_norm": 2.2928944170182293, "learning_rate": 7.452780006132538e-06, "loss": 0.5994, "step": 23998 }, { "epoch": 1.7836492010405054, "grad_norm": 2.0988475306098993, "learning_rate": 7.4520041127399846e-06, "loss": 0.6636, "step": 23999 }, { "epoch": 1.7837235228539576, "grad_norm": 2.0348935285621335, "learning_rate": 7.451228235751295e-06, "loss": 0.586, "step": 24000 }, { "epoch": 1.7837978446674099, "grad_norm": 1.7541033147655802, "learning_rate": 7.45045237517147e-06, "loss": 0.4823, "step": 24001 }, { "epoch": 1.783872166480862, "grad_norm": 2.192940570016267, "learning_rate": 7.449676531005499e-06, "loss": 0.5983, "step": 24002 }, { "epoch": 1.7839464882943143, "grad_norm": 1.9522628130192912, "learning_rate": 7.448900703258377e-06, "loss": 0.5922, "step": 24003 }, { "epoch": 1.7840208101077666, "grad_norm": 1.8262526836413484, "learning_rate": 7.4481248919351e-06, "loss": 0.6564, "step": 24004 }, { "epoch": 1.7840951319212188, "grad_norm": 2.299160415886341, "learning_rate": 7.447349097040661e-06, "loss": 0.6456, "step": 24005 }, { "epoch": 1.784169453734671, "grad_norm": 2.1082635810376074, "learning_rate": 7.4465733185800575e-06, "loss": 0.6185, "step": 24006 }, { "epoch": 1.7842437755481235, "grad_norm": 1.9601950119679619, "learning_rate": 7.445797556558281e-06, "loss": 0.5891, "step": 24007 }, { "epoch": 1.7843180973615755, "grad_norm": 1.6319029262170115, "learning_rate": 7.4450218109803305e-06, "loss": 0.5258, "step": 24008 }, { "epoch": 1.784392419175028, "grad_norm": 1.7786020502561264, "learning_rate": 7.444246081851194e-06, "loss": 0.4911, "step": 24009 }, { "epoch": 1.78446674098848, "grad_norm": 2.11613587010326, "learning_rate": 7.443470369175869e-06, "loss": 0.6542, "step": 24010 }, { "epoch": 1.7845410628019325, "grad_norm": 1.8306630457563493, "learning_rate": 7.44269467295935e-06, "loss": 0.6125, "step": 24011 }, { "epoch": 1.7846153846153845, "grad_norm": 1.822608036822418, "learning_rate": 7.441918993206629e-06, "loss": 0.5919, "step": 24012 }, { "epoch": 1.784689706428837, "grad_norm": 2.2611444247997565, "learning_rate": 7.441143329922702e-06, "loss": 0.5763, "step": 24013 }, { "epoch": 1.784764028242289, "grad_norm": 2.30878149883299, "learning_rate": 7.4403676831125645e-06, "loss": 0.6308, "step": 24014 }, { "epoch": 1.7848383500557414, "grad_norm": 2.5364581034100615, "learning_rate": 7.4395920527812035e-06, "loss": 0.7514, "step": 24015 }, { "epoch": 1.7849126718691934, "grad_norm": 1.6813451100599652, "learning_rate": 7.4388164389336145e-06, "loss": 0.5415, "step": 24016 }, { "epoch": 1.784986993682646, "grad_norm": 1.6608388660813769, "learning_rate": 7.438040841574794e-06, "loss": 0.4868, "step": 24017 }, { "epoch": 1.7850613154960981, "grad_norm": 1.6375276327895225, "learning_rate": 7.437265260709733e-06, "loss": 0.5443, "step": 24018 }, { "epoch": 1.7851356373095504, "grad_norm": 2.2286198682771245, "learning_rate": 7.4364896963434276e-06, "loss": 0.6431, "step": 24019 }, { "epoch": 1.7852099591230026, "grad_norm": 1.7958389213774284, "learning_rate": 7.4357141484808656e-06, "loss": 0.5502, "step": 24020 }, { "epoch": 1.7852842809364549, "grad_norm": 2.4364066774395576, "learning_rate": 7.4349386171270435e-06, "loss": 0.5451, "step": 24021 }, { "epoch": 1.785358602749907, "grad_norm": 2.516378873397013, "learning_rate": 7.434163102286953e-06, "loss": 0.7073, "step": 24022 }, { "epoch": 1.7854329245633593, "grad_norm": 1.9670158261311008, "learning_rate": 7.4333876039655895e-06, "loss": 0.5121, "step": 24023 }, { "epoch": 1.7855072463768116, "grad_norm": 1.870161553028503, "learning_rate": 7.432612122167942e-06, "loss": 0.6196, "step": 24024 }, { "epoch": 1.7855815681902638, "grad_norm": 1.5881574320515295, "learning_rate": 7.431836656899008e-06, "loss": 0.4925, "step": 24025 }, { "epoch": 1.785655890003716, "grad_norm": 2.3903109262011815, "learning_rate": 7.431061208163774e-06, "loss": 0.7432, "step": 24026 }, { "epoch": 1.7857302118171683, "grad_norm": 2.1146624823860254, "learning_rate": 7.430285775967232e-06, "loss": 0.6633, "step": 24027 }, { "epoch": 1.7858045336306207, "grad_norm": 3.2003205991212123, "learning_rate": 7.429510360314378e-06, "loss": 0.6335, "step": 24028 }, { "epoch": 1.7858788554440728, "grad_norm": 2.3345969827585242, "learning_rate": 7.428734961210204e-06, "loss": 0.5752, "step": 24029 }, { "epoch": 1.7859531772575252, "grad_norm": 2.076342291543344, "learning_rate": 7.4279595786597e-06, "loss": 0.6202, "step": 24030 }, { "epoch": 1.7860274990709772, "grad_norm": 2.1412908458875624, "learning_rate": 7.42718421266786e-06, "loss": 0.7421, "step": 24031 }, { "epoch": 1.7861018208844297, "grad_norm": 1.650570577791616, "learning_rate": 7.4264088632396724e-06, "loss": 0.5456, "step": 24032 }, { "epoch": 1.7861761426978817, "grad_norm": 2.0593727278387397, "learning_rate": 7.425633530380132e-06, "loss": 0.5329, "step": 24033 }, { "epoch": 1.7862504645113342, "grad_norm": 2.1278838726682014, "learning_rate": 7.424858214094231e-06, "loss": 0.6025, "step": 24034 }, { "epoch": 1.7863247863247862, "grad_norm": 1.90847944384129, "learning_rate": 7.424082914386957e-06, "loss": 0.5206, "step": 24035 }, { "epoch": 1.7863991081382387, "grad_norm": 1.5587855147958642, "learning_rate": 7.423307631263308e-06, "loss": 0.5418, "step": 24036 }, { "epoch": 1.7864734299516907, "grad_norm": 1.7087157311861048, "learning_rate": 7.422532364728268e-06, "loss": 0.5642, "step": 24037 }, { "epoch": 1.7865477517651431, "grad_norm": 1.6652525116135293, "learning_rate": 7.421757114786828e-06, "loss": 0.5865, "step": 24038 }, { "epoch": 1.7866220735785954, "grad_norm": 1.7410392939772914, "learning_rate": 7.420981881443984e-06, "loss": 0.5124, "step": 24039 }, { "epoch": 1.7866963953920476, "grad_norm": 1.9135074058010761, "learning_rate": 7.420206664704723e-06, "loss": 0.4986, "step": 24040 }, { "epoch": 1.7867707172054998, "grad_norm": 2.0549002725701295, "learning_rate": 7.419431464574043e-06, "loss": 0.4052, "step": 24041 }, { "epoch": 1.786845039018952, "grad_norm": 1.9509437885826357, "learning_rate": 7.418656281056924e-06, "loss": 0.5184, "step": 24042 }, { "epoch": 1.7869193608324043, "grad_norm": 1.5166371469163407, "learning_rate": 7.417881114158363e-06, "loss": 0.4516, "step": 24043 }, { "epoch": 1.7869936826458566, "grad_norm": 1.8208458201808384, "learning_rate": 7.41710596388335e-06, "loss": 0.5978, "step": 24044 }, { "epoch": 1.7870680044593088, "grad_norm": 1.6173422140302525, "learning_rate": 7.416330830236876e-06, "loss": 0.5467, "step": 24045 }, { "epoch": 1.787142326272761, "grad_norm": 2.1694621020506086, "learning_rate": 7.415555713223929e-06, "loss": 0.6226, "step": 24046 }, { "epoch": 1.7872166480862133, "grad_norm": 1.7795987478624995, "learning_rate": 7.4147806128495e-06, "loss": 0.5372, "step": 24047 }, { "epoch": 1.7872909698996655, "grad_norm": 2.2436609314184244, "learning_rate": 7.414005529118582e-06, "loss": 0.7333, "step": 24048 }, { "epoch": 1.7873652917131178, "grad_norm": 2.0501968780328554, "learning_rate": 7.413230462036159e-06, "loss": 0.6719, "step": 24049 }, { "epoch": 1.78743961352657, "grad_norm": 2.5373712433869846, "learning_rate": 7.412455411607224e-06, "loss": 0.5732, "step": 24050 }, { "epoch": 1.7875139353400225, "grad_norm": 1.7398113760837515, "learning_rate": 7.411680377836768e-06, "loss": 0.5416, "step": 24051 }, { "epoch": 1.7875882571534745, "grad_norm": 1.5555169633975283, "learning_rate": 7.410905360729781e-06, "loss": 0.3979, "step": 24052 }, { "epoch": 1.787662578966927, "grad_norm": 1.8877032655091346, "learning_rate": 7.410130360291248e-06, "loss": 0.4381, "step": 24053 }, { "epoch": 1.787736900780379, "grad_norm": 2.2352055542215252, "learning_rate": 7.409355376526161e-06, "loss": 0.5775, "step": 24054 }, { "epoch": 1.7878112225938314, "grad_norm": 2.1800053162137027, "learning_rate": 7.408580409439509e-06, "loss": 0.6529, "step": 24055 }, { "epoch": 1.7878855444072834, "grad_norm": 2.0631551522152236, "learning_rate": 7.407805459036287e-06, "loss": 0.495, "step": 24056 }, { "epoch": 1.7879598662207359, "grad_norm": 3.003308816065652, "learning_rate": 7.4070305253214745e-06, "loss": 0.7107, "step": 24057 }, { "epoch": 1.788034188034188, "grad_norm": 2.2442855121408645, "learning_rate": 7.406255608300066e-06, "loss": 0.4177, "step": 24058 }, { "epoch": 1.7881085098476404, "grad_norm": 2.0132857723082607, "learning_rate": 7.405480707977053e-06, "loss": 0.5987, "step": 24059 }, { "epoch": 1.7881828316610924, "grad_norm": 1.7690415465807983, "learning_rate": 7.404705824357417e-06, "loss": 0.5746, "step": 24060 }, { "epoch": 1.7882571534745448, "grad_norm": 2.0193512537533573, "learning_rate": 7.403930957446151e-06, "loss": 0.5523, "step": 24061 }, { "epoch": 1.788331475287997, "grad_norm": 2.369268919583287, "learning_rate": 7.403156107248241e-06, "loss": 0.6777, "step": 24062 }, { "epoch": 1.7884057971014493, "grad_norm": 1.7965504561461072, "learning_rate": 7.40238127376868e-06, "loss": 0.645, "step": 24063 }, { "epoch": 1.7884801189149016, "grad_norm": 1.8492032721811418, "learning_rate": 7.401606457012451e-06, "loss": 0.4702, "step": 24064 }, { "epoch": 1.7885544407283538, "grad_norm": 1.8682325672429148, "learning_rate": 7.400831656984544e-06, "loss": 0.5781, "step": 24065 }, { "epoch": 1.788628762541806, "grad_norm": 1.7692403378641577, "learning_rate": 7.400056873689949e-06, "loss": 0.5827, "step": 24066 }, { "epoch": 1.7887030843552583, "grad_norm": 2.0213110247465, "learning_rate": 7.399282107133656e-06, "loss": 0.5464, "step": 24067 }, { "epoch": 1.7887774061687105, "grad_norm": 2.374588745160143, "learning_rate": 7.398507357320647e-06, "loss": 0.7205, "step": 24068 }, { "epoch": 1.7888517279821627, "grad_norm": 1.8949167049666962, "learning_rate": 7.397732624255913e-06, "loss": 0.4873, "step": 24069 }, { "epoch": 1.788926049795615, "grad_norm": 2.1273983901250837, "learning_rate": 7.396957907944445e-06, "loss": 0.531, "step": 24070 }, { "epoch": 1.7890003716090672, "grad_norm": 2.153894351039836, "learning_rate": 7.396183208391223e-06, "loss": 0.5478, "step": 24071 }, { "epoch": 1.7890746934225195, "grad_norm": 2.370274763653884, "learning_rate": 7.395408525601237e-06, "loss": 0.5079, "step": 24072 }, { "epoch": 1.7891490152359717, "grad_norm": 2.055267982438578, "learning_rate": 7.394633859579478e-06, "loss": 0.6823, "step": 24073 }, { "epoch": 1.7892233370494242, "grad_norm": 2.2275946084738547, "learning_rate": 7.393859210330931e-06, "loss": 0.6278, "step": 24074 }, { "epoch": 1.7892976588628762, "grad_norm": 1.9024359439956038, "learning_rate": 7.393084577860583e-06, "loss": 0.5919, "step": 24075 }, { "epoch": 1.7893719806763286, "grad_norm": 2.1061250761028996, "learning_rate": 7.3923099621734205e-06, "loss": 0.5289, "step": 24076 }, { "epoch": 1.7894463024897806, "grad_norm": 1.9038205728266264, "learning_rate": 7.391535363274431e-06, "loss": 0.53, "step": 24077 }, { "epoch": 1.789520624303233, "grad_norm": 2.254642320956184, "learning_rate": 7.390760781168604e-06, "loss": 0.6236, "step": 24078 }, { "epoch": 1.7895949461166851, "grad_norm": 1.9238047915581327, "learning_rate": 7.38998621586092e-06, "loss": 0.5267, "step": 24079 }, { "epoch": 1.7896692679301376, "grad_norm": 2.2198844398446056, "learning_rate": 7.389211667356372e-06, "loss": 0.6187, "step": 24080 }, { "epoch": 1.7897435897435896, "grad_norm": 2.1171071572150035, "learning_rate": 7.388437135659947e-06, "loss": 0.7344, "step": 24081 }, { "epoch": 1.789817911557042, "grad_norm": 2.3355972872874116, "learning_rate": 7.387662620776623e-06, "loss": 0.619, "step": 24082 }, { "epoch": 1.789892233370494, "grad_norm": 1.996387180562712, "learning_rate": 7.386888122711392e-06, "loss": 0.4891, "step": 24083 }, { "epoch": 1.7899665551839465, "grad_norm": 2.2578796848812, "learning_rate": 7.38611364146924e-06, "loss": 0.614, "step": 24084 }, { "epoch": 1.7900408769973988, "grad_norm": 1.933504119138161, "learning_rate": 7.385339177055155e-06, "loss": 0.644, "step": 24085 }, { "epoch": 1.790115198810851, "grad_norm": 1.858279376466319, "learning_rate": 7.384564729474117e-06, "loss": 0.5933, "step": 24086 }, { "epoch": 1.7901895206243033, "grad_norm": 1.8224014372099366, "learning_rate": 7.383790298731117e-06, "loss": 0.6561, "step": 24087 }, { "epoch": 1.7902638424377555, "grad_norm": 3.1088824650288065, "learning_rate": 7.383015884831139e-06, "loss": 0.5365, "step": 24088 }, { "epoch": 1.7903381642512077, "grad_norm": 2.868272076453782, "learning_rate": 7.3822414877791725e-06, "loss": 0.7895, "step": 24089 }, { "epoch": 1.79041248606466, "grad_norm": 2.2002944575778076, "learning_rate": 7.381467107580196e-06, "loss": 0.6908, "step": 24090 }, { "epoch": 1.7904868078781122, "grad_norm": 2.990666193149072, "learning_rate": 7.380692744239198e-06, "loss": 0.5223, "step": 24091 }, { "epoch": 1.7905611296915644, "grad_norm": 1.8664980806799925, "learning_rate": 7.379918397761166e-06, "loss": 0.54, "step": 24092 }, { "epoch": 1.7906354515050167, "grad_norm": 1.7670508150876567, "learning_rate": 7.379144068151085e-06, "loss": 0.5201, "step": 24093 }, { "epoch": 1.790709773318469, "grad_norm": 1.898460453924143, "learning_rate": 7.3783697554139366e-06, "loss": 0.5882, "step": 24094 }, { "epoch": 1.7907840951319214, "grad_norm": 2.0138827717993255, "learning_rate": 7.377595459554708e-06, "loss": 0.6079, "step": 24095 }, { "epoch": 1.7908584169453734, "grad_norm": 1.4959143875473098, "learning_rate": 7.376821180578383e-06, "loss": 0.4732, "step": 24096 }, { "epoch": 1.7909327387588259, "grad_norm": 1.9630016611809682, "learning_rate": 7.376046918489946e-06, "loss": 0.6203, "step": 24097 }, { "epoch": 1.7910070605722779, "grad_norm": 2.4079255208106076, "learning_rate": 7.3752726732943836e-06, "loss": 0.4792, "step": 24098 }, { "epoch": 1.7910813823857303, "grad_norm": 1.8651752186274717, "learning_rate": 7.374498444996679e-06, "loss": 0.6005, "step": 24099 }, { "epoch": 1.7911557041991824, "grad_norm": 2.241234626554262, "learning_rate": 7.37372423360182e-06, "loss": 0.5062, "step": 24100 }, { "epoch": 1.7912300260126348, "grad_norm": 1.8859090780811363, "learning_rate": 7.372950039114786e-06, "loss": 0.4468, "step": 24101 }, { "epoch": 1.7913043478260868, "grad_norm": 1.9950658328329993, "learning_rate": 7.372175861540563e-06, "loss": 0.6892, "step": 24102 }, { "epoch": 1.7913786696395393, "grad_norm": 2.15332759600072, "learning_rate": 7.371401700884136e-06, "loss": 0.7139, "step": 24103 }, { "epoch": 1.7914529914529913, "grad_norm": 2.230387687307957, "learning_rate": 7.370627557150492e-06, "loss": 0.4883, "step": 24104 }, { "epoch": 1.7915273132664438, "grad_norm": 2.974288622108899, "learning_rate": 7.369853430344607e-06, "loss": 0.6327, "step": 24105 }, { "epoch": 1.791601635079896, "grad_norm": 1.7522143318006778, "learning_rate": 7.36907932047147e-06, "loss": 0.6348, "step": 24106 }, { "epoch": 1.7916759568933482, "grad_norm": 1.487745409495312, "learning_rate": 7.368305227536063e-06, "loss": 0.3201, "step": 24107 }, { "epoch": 1.7917502787068005, "grad_norm": 1.8537898404365278, "learning_rate": 7.367531151543373e-06, "loss": 0.4921, "step": 24108 }, { "epoch": 1.7918246005202527, "grad_norm": 2.272106903329781, "learning_rate": 7.366757092498377e-06, "loss": 0.4827, "step": 24109 }, { "epoch": 1.791898922333705, "grad_norm": 1.9376115075867009, "learning_rate": 7.365983050406064e-06, "loss": 0.5278, "step": 24110 }, { "epoch": 1.7919732441471572, "grad_norm": 2.2107485757678402, "learning_rate": 7.365209025271417e-06, "loss": 0.5818, "step": 24111 }, { "epoch": 1.7920475659606094, "grad_norm": 1.9278376199405636, "learning_rate": 7.364435017099416e-06, "loss": 0.6247, "step": 24112 }, { "epoch": 1.7921218877740617, "grad_norm": 2.2029031874912834, "learning_rate": 7.363661025895046e-06, "loss": 0.6034, "step": 24113 }, { "epoch": 1.792196209587514, "grad_norm": 2.044003582448804, "learning_rate": 7.362887051663288e-06, "loss": 0.5116, "step": 24114 }, { "epoch": 1.7922705314009661, "grad_norm": 2.0428955795441874, "learning_rate": 7.362113094409131e-06, "loss": 0.6424, "step": 24115 }, { "epoch": 1.7923448532144184, "grad_norm": 1.8321976148076333, "learning_rate": 7.36133915413755e-06, "loss": 0.5874, "step": 24116 }, { "epoch": 1.7924191750278706, "grad_norm": 1.6208497908880515, "learning_rate": 7.360565230853531e-06, "loss": 0.3529, "step": 24117 }, { "epoch": 1.792493496841323, "grad_norm": 1.8450280983622978, "learning_rate": 7.359791324562055e-06, "loss": 0.6335, "step": 24118 }, { "epoch": 1.792567818654775, "grad_norm": 1.955940899797263, "learning_rate": 7.359017435268105e-06, "loss": 0.5496, "step": 24119 }, { "epoch": 1.7926421404682276, "grad_norm": 1.904950325689161, "learning_rate": 7.358243562976665e-06, "loss": 0.4751, "step": 24120 }, { "epoch": 1.7927164622816796, "grad_norm": 2.145131138976786, "learning_rate": 7.357469707692715e-06, "loss": 0.482, "step": 24121 }, { "epoch": 1.792790784095132, "grad_norm": 1.9493841251222959, "learning_rate": 7.35669586942124e-06, "loss": 0.5248, "step": 24122 }, { "epoch": 1.792865105908584, "grad_norm": 1.6222800271290962, "learning_rate": 7.355922048167217e-06, "loss": 0.485, "step": 24123 }, { "epoch": 1.7929394277220365, "grad_norm": 2.1180190549027436, "learning_rate": 7.3551482439356325e-06, "loss": 0.6736, "step": 24124 }, { "epoch": 1.7930137495354885, "grad_norm": 1.8818775813143473, "learning_rate": 7.3543744567314645e-06, "loss": 0.7067, "step": 24125 }, { "epoch": 1.793088071348941, "grad_norm": 2.090690891088897, "learning_rate": 7.3536006865596986e-06, "loss": 0.6815, "step": 24126 }, { "epoch": 1.793162393162393, "grad_norm": 1.9880659006736303, "learning_rate": 7.352826933425316e-06, "loss": 0.6556, "step": 24127 }, { "epoch": 1.7932367149758455, "grad_norm": 1.8632383814864895, "learning_rate": 7.3520531973332954e-06, "loss": 0.4877, "step": 24128 }, { "epoch": 1.7933110367892977, "grad_norm": 1.4894083279502417, "learning_rate": 7.351279478288615e-06, "loss": 0.3667, "step": 24129 }, { "epoch": 1.79338535860275, "grad_norm": 1.6893902358090909, "learning_rate": 7.350505776296262e-06, "loss": 0.5376, "step": 24130 }, { "epoch": 1.7934596804162022, "grad_norm": 2.5704631787284025, "learning_rate": 7.349732091361214e-06, "loss": 0.4022, "step": 24131 }, { "epoch": 1.7935340022296544, "grad_norm": 1.2994213216525148, "learning_rate": 7.348958423488455e-06, "loss": 0.4152, "step": 24132 }, { "epoch": 1.7936083240431067, "grad_norm": 2.9445747781045943, "learning_rate": 7.348184772682966e-06, "loss": 0.7762, "step": 24133 }, { "epoch": 1.793682645856559, "grad_norm": 2.026813057086496, "learning_rate": 7.347411138949723e-06, "loss": 0.5044, "step": 24134 }, { "epoch": 1.7937569676700111, "grad_norm": 1.789754245871915, "learning_rate": 7.346637522293709e-06, "loss": 0.5988, "step": 24135 }, { "epoch": 1.7938312894834634, "grad_norm": 2.223955985559621, "learning_rate": 7.345863922719906e-06, "loss": 0.6611, "step": 24136 }, { "epoch": 1.7939056112969156, "grad_norm": 1.8509796350595533, "learning_rate": 7.345090340233294e-06, "loss": 0.6555, "step": 24137 }, { "epoch": 1.7939799331103679, "grad_norm": 2.0751890407651326, "learning_rate": 7.344316774838853e-06, "loss": 0.5904, "step": 24138 }, { "epoch": 1.79405425492382, "grad_norm": 2.68463488724356, "learning_rate": 7.343543226541565e-06, "loss": 0.6605, "step": 24139 }, { "epoch": 1.7941285767372723, "grad_norm": 2.4047377776826484, "learning_rate": 7.3427696953464035e-06, "loss": 0.5666, "step": 24140 }, { "epoch": 1.7942028985507248, "grad_norm": 2.091275005271481, "learning_rate": 7.341996181258353e-06, "loss": 0.6465, "step": 24141 }, { "epoch": 1.7942772203641768, "grad_norm": 1.9145216467313189, "learning_rate": 7.341222684282394e-06, "loss": 0.4862, "step": 24142 }, { "epoch": 1.7943515421776293, "grad_norm": 2.5798953262813016, "learning_rate": 7.340449204423506e-06, "loss": 0.6858, "step": 24143 }, { "epoch": 1.7944258639910813, "grad_norm": 2.0269739125531467, "learning_rate": 7.339675741686668e-06, "loss": 0.5406, "step": 24144 }, { "epoch": 1.7945001858045337, "grad_norm": 1.9896919263792634, "learning_rate": 7.338902296076859e-06, "loss": 0.7484, "step": 24145 }, { "epoch": 1.7945745076179858, "grad_norm": 1.7147467709801936, "learning_rate": 7.338128867599058e-06, "loss": 0.5306, "step": 24146 }, { "epoch": 1.7946488294314382, "grad_norm": 1.816448655310158, "learning_rate": 7.337355456258246e-06, "loss": 0.6626, "step": 24147 }, { "epoch": 1.7947231512448902, "grad_norm": 1.9689948134729907, "learning_rate": 7.336582062059403e-06, "loss": 0.5849, "step": 24148 }, { "epoch": 1.7947974730583427, "grad_norm": 1.7291024450019088, "learning_rate": 7.3358086850075074e-06, "loss": 0.4863, "step": 24149 }, { "epoch": 1.7948717948717947, "grad_norm": 2.3021735866466138, "learning_rate": 7.335035325107535e-06, "loss": 0.6811, "step": 24150 }, { "epoch": 1.7949461166852472, "grad_norm": 2.627866924465015, "learning_rate": 7.334261982364468e-06, "loss": 0.6003, "step": 24151 }, { "epoch": 1.7950204384986994, "grad_norm": 2.1311067184739754, "learning_rate": 7.333488656783283e-06, "loss": 0.5199, "step": 24152 }, { "epoch": 1.7950947603121516, "grad_norm": 1.4691669003023362, "learning_rate": 7.3327153483689585e-06, "loss": 0.4394, "step": 24153 }, { "epoch": 1.7951690821256039, "grad_norm": 3.1648692578584634, "learning_rate": 7.331942057126475e-06, "loss": 0.7378, "step": 24154 }, { "epoch": 1.7952434039390561, "grad_norm": 1.6153653082397013, "learning_rate": 7.331168783060811e-06, "loss": 0.4586, "step": 24155 }, { "epoch": 1.7953177257525084, "grad_norm": 1.8370354964501392, "learning_rate": 7.330395526176943e-06, "loss": 0.5039, "step": 24156 }, { "epoch": 1.7953920475659606, "grad_norm": 2.3837021732460344, "learning_rate": 7.329622286479851e-06, "loss": 0.662, "step": 24157 }, { "epoch": 1.7954663693794128, "grad_norm": 2.377237006857719, "learning_rate": 7.32884906397451e-06, "loss": 0.7825, "step": 24158 }, { "epoch": 1.795540691192865, "grad_norm": 2.1030021573095294, "learning_rate": 7.328075858665904e-06, "loss": 0.6423, "step": 24159 }, { "epoch": 1.7956150130063173, "grad_norm": 2.4432395660710564, "learning_rate": 7.327302670559004e-06, "loss": 0.6214, "step": 24160 }, { "epoch": 1.7956893348197696, "grad_norm": 1.8876100082481089, "learning_rate": 7.326529499658796e-06, "loss": 0.5155, "step": 24161 }, { "epoch": 1.795763656633222, "grad_norm": 2.1609035812477693, "learning_rate": 7.325756345970246e-06, "loss": 0.6045, "step": 24162 }, { "epoch": 1.795837978446674, "grad_norm": 1.9284702649606527, "learning_rate": 7.32498320949834e-06, "loss": 0.4712, "step": 24163 }, { "epoch": 1.7959123002601265, "grad_norm": 3.2409003529438176, "learning_rate": 7.324210090248052e-06, "loss": 0.6589, "step": 24164 }, { "epoch": 1.7959866220735785, "grad_norm": 1.7594646813918786, "learning_rate": 7.323436988224362e-06, "loss": 0.483, "step": 24165 }, { "epoch": 1.796060943887031, "grad_norm": 2.016387640005919, "learning_rate": 7.322663903432245e-06, "loss": 0.5615, "step": 24166 }, { "epoch": 1.796135265700483, "grad_norm": 2.7607370724217937, "learning_rate": 7.321890835876679e-06, "loss": 0.7517, "step": 24167 }, { "epoch": 1.7962095875139354, "grad_norm": 1.7006628817380576, "learning_rate": 7.321117785562641e-06, "loss": 0.465, "step": 24168 }, { "epoch": 1.7962839093273875, "grad_norm": 1.8636390049828875, "learning_rate": 7.3203447524951074e-06, "loss": 0.4925, "step": 24169 }, { "epoch": 1.79635823114084, "grad_norm": 1.846840156320502, "learning_rate": 7.319571736679057e-06, "loss": 0.4563, "step": 24170 }, { "epoch": 1.796432552954292, "grad_norm": 2.055989835025891, "learning_rate": 7.318798738119463e-06, "loss": 0.5537, "step": 24171 }, { "epoch": 1.7965068747677444, "grad_norm": 2.4862823596054997, "learning_rate": 7.318025756821308e-06, "loss": 0.5667, "step": 24172 }, { "epoch": 1.7965811965811964, "grad_norm": 2.055918980210728, "learning_rate": 7.31725279278956e-06, "loss": 0.5723, "step": 24173 }, { "epoch": 1.7966555183946489, "grad_norm": 1.904701128334403, "learning_rate": 7.316479846029198e-06, "loss": 0.5453, "step": 24174 }, { "epoch": 1.7967298402081011, "grad_norm": 2.210283051147211, "learning_rate": 7.3157069165452e-06, "loss": 0.623, "step": 24175 }, { "epoch": 1.7968041620215534, "grad_norm": 1.9225424597352807, "learning_rate": 7.314934004342542e-06, "loss": 0.6247, "step": 24176 }, { "epoch": 1.7968784838350056, "grad_norm": 2.2598419582885527, "learning_rate": 7.314161109426201e-06, "loss": 0.6403, "step": 24177 }, { "epoch": 1.7969528056484578, "grad_norm": 2.049531490630245, "learning_rate": 7.31338823180115e-06, "loss": 0.704, "step": 24178 }, { "epoch": 1.79702712746191, "grad_norm": 2.078060470544245, "learning_rate": 7.312615371472367e-06, "loss": 0.5379, "step": 24179 }, { "epoch": 1.7971014492753623, "grad_norm": 1.8124555251283507, "learning_rate": 7.311842528444825e-06, "loss": 0.4396, "step": 24180 }, { "epoch": 1.7971757710888145, "grad_norm": 2.1185106620898795, "learning_rate": 7.311069702723505e-06, "loss": 0.5725, "step": 24181 }, { "epoch": 1.7972500929022668, "grad_norm": 2.709844695452288, "learning_rate": 7.310296894313376e-06, "loss": 0.6917, "step": 24182 }, { "epoch": 1.797324414715719, "grad_norm": 2.2637120535916906, "learning_rate": 7.30952410321942e-06, "loss": 0.6372, "step": 24183 }, { "epoch": 1.7973987365291713, "grad_norm": 2.161025618531029, "learning_rate": 7.308751329446604e-06, "loss": 0.6433, "step": 24184 }, { "epoch": 1.7974730583426237, "grad_norm": 2.2187755228247954, "learning_rate": 7.307978572999908e-06, "loss": 0.6668, "step": 24185 }, { "epoch": 1.7975473801560757, "grad_norm": 2.442696913766212, "learning_rate": 7.307205833884305e-06, "loss": 0.6487, "step": 24186 }, { "epoch": 1.7976217019695282, "grad_norm": 2.1890086418797043, "learning_rate": 7.306433112104772e-06, "loss": 0.5862, "step": 24187 }, { "epoch": 1.7976960237829802, "grad_norm": 2.1187005669603045, "learning_rate": 7.305660407666284e-06, "loss": 0.5794, "step": 24188 }, { "epoch": 1.7977703455964327, "grad_norm": 1.9318157180059867, "learning_rate": 7.304887720573814e-06, "loss": 0.5644, "step": 24189 }, { "epoch": 1.7978446674098847, "grad_norm": 1.7690605403599329, "learning_rate": 7.3041150508323355e-06, "loss": 0.5321, "step": 24190 }, { "epoch": 1.7979189892233371, "grad_norm": 1.9938178663785782, "learning_rate": 7.303342398446826e-06, "loss": 0.4301, "step": 24191 }, { "epoch": 1.7979933110367892, "grad_norm": 2.0057849723094114, "learning_rate": 7.3025697634222585e-06, "loss": 0.5904, "step": 24192 }, { "epoch": 1.7980676328502416, "grad_norm": 1.9081506692396901, "learning_rate": 7.301797145763606e-06, "loss": 0.5924, "step": 24193 }, { "epoch": 1.7981419546636936, "grad_norm": 2.0312584006812364, "learning_rate": 7.3010245454758475e-06, "loss": 0.641, "step": 24194 }, { "epoch": 1.798216276477146, "grad_norm": 2.0943946008507903, "learning_rate": 7.300251962563949e-06, "loss": 0.5259, "step": 24195 }, { "epoch": 1.7982905982905983, "grad_norm": 1.8795301469397703, "learning_rate": 7.299479397032887e-06, "loss": 0.6458, "step": 24196 }, { "epoch": 1.7983649201040506, "grad_norm": 2.5328038825245227, "learning_rate": 7.2987068488876375e-06, "loss": 0.6217, "step": 24197 }, { "epoch": 1.7984392419175028, "grad_norm": 1.698259850730081, "learning_rate": 7.297934318133173e-06, "loss": 0.5312, "step": 24198 }, { "epoch": 1.798513563730955, "grad_norm": 2.5445790533385426, "learning_rate": 7.2971618047744695e-06, "loss": 0.7884, "step": 24199 }, { "epoch": 1.7985878855444073, "grad_norm": 2.372095887605976, "learning_rate": 7.2963893088164965e-06, "loss": 0.6887, "step": 24200 }, { "epoch": 1.7986622073578595, "grad_norm": 2.053892135542979, "learning_rate": 7.295616830264227e-06, "loss": 0.6152, "step": 24201 }, { "epoch": 1.7987365291713118, "grad_norm": 1.9653641087880622, "learning_rate": 7.294844369122638e-06, "loss": 0.4953, "step": 24202 }, { "epoch": 1.798810850984764, "grad_norm": 1.9289100731876148, "learning_rate": 7.294071925396702e-06, "loss": 0.7349, "step": 24203 }, { "epoch": 1.7988851727982162, "grad_norm": 2.1257156198184046, "learning_rate": 7.293299499091388e-06, "loss": 0.7301, "step": 24204 }, { "epoch": 1.7989594946116685, "grad_norm": 2.0120002169420963, "learning_rate": 7.292527090211672e-06, "loss": 0.6571, "step": 24205 }, { "epoch": 1.7990338164251207, "grad_norm": 2.0932070673853675, "learning_rate": 7.2917546987625296e-06, "loss": 0.581, "step": 24206 }, { "epoch": 1.799108138238573, "grad_norm": 1.7336575055210162, "learning_rate": 7.290982324748926e-06, "loss": 0.481, "step": 24207 }, { "epoch": 1.7991824600520254, "grad_norm": 1.924769067533151, "learning_rate": 7.290209968175839e-06, "loss": 0.4465, "step": 24208 }, { "epoch": 1.7992567818654774, "grad_norm": 2.1298151213740097, "learning_rate": 7.289437629048237e-06, "loss": 0.7284, "step": 24209 }, { "epoch": 1.79933110367893, "grad_norm": 2.1538530093609296, "learning_rate": 7.2886653073710985e-06, "loss": 0.4916, "step": 24210 }, { "epoch": 1.799405425492382, "grad_norm": 1.6778671568225667, "learning_rate": 7.2878930031493915e-06, "loss": 0.5311, "step": 24211 }, { "epoch": 1.7994797473058344, "grad_norm": 2.3521201958390567, "learning_rate": 7.287120716388085e-06, "loss": 0.5306, "step": 24212 }, { "epoch": 1.7995540691192864, "grad_norm": 1.8573728448430489, "learning_rate": 7.286348447092157e-06, "loss": 0.5164, "step": 24213 }, { "epoch": 1.7996283909327389, "grad_norm": 2.221214609179016, "learning_rate": 7.285576195266579e-06, "loss": 0.5526, "step": 24214 }, { "epoch": 1.7997027127461909, "grad_norm": 2.096014413614642, "learning_rate": 7.284803960916318e-06, "loss": 0.6666, "step": 24215 }, { "epoch": 1.7997770345596433, "grad_norm": 3.8782094075958424, "learning_rate": 7.284031744046348e-06, "loss": 0.4937, "step": 24216 }, { "epoch": 1.7998513563730953, "grad_norm": 2.1729393304649935, "learning_rate": 7.283259544661645e-06, "loss": 0.7329, "step": 24217 }, { "epoch": 1.7999256781865478, "grad_norm": 1.883754520465563, "learning_rate": 7.282487362767172e-06, "loss": 0.6124, "step": 24218 }, { "epoch": 1.8, "grad_norm": 1.6503339569607278, "learning_rate": 7.281715198367905e-06, "loss": 0.5793, "step": 24219 }, { "epoch": 1.8000743218134523, "grad_norm": 1.9402482091246271, "learning_rate": 7.280943051468814e-06, "loss": 0.5568, "step": 24220 }, { "epoch": 1.8001486436269045, "grad_norm": 1.7725520611981354, "learning_rate": 7.280170922074873e-06, "loss": 0.563, "step": 24221 }, { "epoch": 1.8002229654403568, "grad_norm": 1.8826498767679458, "learning_rate": 7.279398810191047e-06, "loss": 0.6238, "step": 24222 }, { "epoch": 1.800297287253809, "grad_norm": 1.6036890047242698, "learning_rate": 7.278626715822312e-06, "loss": 0.5042, "step": 24223 }, { "epoch": 1.8003716090672612, "grad_norm": 1.7837249324611837, "learning_rate": 7.277854638973636e-06, "loss": 0.5605, "step": 24224 }, { "epoch": 1.8004459308807135, "grad_norm": 1.7550359591765623, "learning_rate": 7.277082579649993e-06, "loss": 0.5776, "step": 24225 }, { "epoch": 1.8005202526941657, "grad_norm": 1.8059302889065096, "learning_rate": 7.276310537856349e-06, "loss": 0.4474, "step": 24226 }, { "epoch": 1.800594574507618, "grad_norm": 2.193345270096689, "learning_rate": 7.275538513597676e-06, "loss": 0.6201, "step": 24227 }, { "epoch": 1.8006688963210702, "grad_norm": 2.2663135888602235, "learning_rate": 7.27476650687895e-06, "loss": 0.6549, "step": 24228 }, { "epoch": 1.8007432181345224, "grad_norm": 2.0346780592240963, "learning_rate": 7.273994517705131e-06, "loss": 0.5675, "step": 24229 }, { "epoch": 1.8008175399479747, "grad_norm": 1.7750663605604198, "learning_rate": 7.2732225460811935e-06, "loss": 0.4612, "step": 24230 }, { "epoch": 1.8008918617614271, "grad_norm": 2.276613862896358, "learning_rate": 7.272450592012111e-06, "loss": 0.5374, "step": 24231 }, { "epoch": 1.8009661835748791, "grad_norm": 1.806777574473841, "learning_rate": 7.2716786555028466e-06, "loss": 0.6269, "step": 24232 }, { "epoch": 1.8010405053883316, "grad_norm": 2.50614077946446, "learning_rate": 7.270906736558374e-06, "loss": 0.6722, "step": 24233 }, { "epoch": 1.8011148272017836, "grad_norm": 2.0254939895558723, "learning_rate": 7.270134835183662e-06, "loss": 0.6091, "step": 24234 }, { "epoch": 1.801189149015236, "grad_norm": 2.4028626653766234, "learning_rate": 7.269362951383681e-06, "loss": 0.756, "step": 24235 }, { "epoch": 1.801263470828688, "grad_norm": 1.9918388211347062, "learning_rate": 7.2685910851634e-06, "loss": 0.6708, "step": 24236 }, { "epoch": 1.8013377926421406, "grad_norm": 1.8472170609228058, "learning_rate": 7.267819236527786e-06, "loss": 0.3284, "step": 24237 }, { "epoch": 1.8014121144555926, "grad_norm": 2.3669122015785224, "learning_rate": 7.267047405481812e-06, "loss": 0.6991, "step": 24238 }, { "epoch": 1.801486436269045, "grad_norm": 2.3142417033983462, "learning_rate": 7.266275592030444e-06, "loss": 0.6848, "step": 24239 }, { "epoch": 1.801560758082497, "grad_norm": 2.432254293033504, "learning_rate": 7.265503796178657e-06, "loss": 0.4957, "step": 24240 }, { "epoch": 1.8016350798959495, "grad_norm": 2.1195062999555785, "learning_rate": 7.264732017931408e-06, "loss": 0.5273, "step": 24241 }, { "epoch": 1.8017094017094017, "grad_norm": 1.5112043912442346, "learning_rate": 7.263960257293676e-06, "loss": 0.4114, "step": 24242 }, { "epoch": 1.801783723522854, "grad_norm": 1.5472419559986408, "learning_rate": 7.263188514270425e-06, "loss": 0.4558, "step": 24243 }, { "epoch": 1.8018580453363062, "grad_norm": 2.210819823813184, "learning_rate": 7.262416788866622e-06, "loss": 0.617, "step": 24244 }, { "epoch": 1.8019323671497585, "grad_norm": 1.7439909314917776, "learning_rate": 7.261645081087239e-06, "loss": 0.4822, "step": 24245 }, { "epoch": 1.8020066889632107, "grad_norm": 1.9584497384559292, "learning_rate": 7.260873390937242e-06, "loss": 0.6068, "step": 24246 }, { "epoch": 1.802081010776663, "grad_norm": 1.7199875557297462, "learning_rate": 7.260101718421604e-06, "loss": 0.5601, "step": 24247 }, { "epoch": 1.8021553325901152, "grad_norm": 1.7078893052052464, "learning_rate": 7.259330063545285e-06, "loss": 0.4875, "step": 24248 }, { "epoch": 1.8022296544035674, "grad_norm": 1.9889299276815997, "learning_rate": 7.258558426313258e-06, "loss": 0.6809, "step": 24249 }, { "epoch": 1.8023039762170197, "grad_norm": 1.760743720234563, "learning_rate": 7.257786806730489e-06, "loss": 0.5482, "step": 24250 }, { "epoch": 1.802378298030472, "grad_norm": 1.725523332215382, "learning_rate": 7.25701520480195e-06, "loss": 0.5136, "step": 24251 }, { "epoch": 1.8024526198439244, "grad_norm": 2.38811001377966, "learning_rate": 7.256243620532601e-06, "loss": 0.688, "step": 24252 }, { "epoch": 1.8025269416573764, "grad_norm": 1.894883843979424, "learning_rate": 7.255472053927416e-06, "loss": 0.5791, "step": 24253 }, { "epoch": 1.8026012634708288, "grad_norm": 2.337761544876037, "learning_rate": 7.254700504991357e-06, "loss": 0.7372, "step": 24254 }, { "epoch": 1.8026755852842808, "grad_norm": 1.8861649008047285, "learning_rate": 7.253928973729394e-06, "loss": 0.5966, "step": 24255 }, { "epoch": 1.8027499070977333, "grad_norm": 4.034159383820283, "learning_rate": 7.253157460146492e-06, "loss": 0.7716, "step": 24256 }, { "epoch": 1.8028242289111853, "grad_norm": 2.2851319221288073, "learning_rate": 7.252385964247621e-06, "loss": 0.6231, "step": 24257 }, { "epoch": 1.8028985507246378, "grad_norm": 2.2848613761114756, "learning_rate": 7.251614486037749e-06, "loss": 0.552, "step": 24258 }, { "epoch": 1.8029728725380898, "grad_norm": 1.8481650668238716, "learning_rate": 7.250843025521839e-06, "loss": 0.5062, "step": 24259 }, { "epoch": 1.8030471943515423, "grad_norm": 3.5950413251637334, "learning_rate": 7.2500715827048575e-06, "loss": 0.5295, "step": 24260 }, { "epoch": 1.8031215161649943, "grad_norm": 1.699071905038258, "learning_rate": 7.249300157591772e-06, "loss": 0.5955, "step": 24261 }, { "epoch": 1.8031958379784467, "grad_norm": 1.8992304332223366, "learning_rate": 7.2485287501875556e-06, "loss": 0.4543, "step": 24262 }, { "epoch": 1.803270159791899, "grad_norm": 2.0057917813407737, "learning_rate": 7.247757360497163e-06, "loss": 0.6412, "step": 24263 }, { "epoch": 1.8033444816053512, "grad_norm": 1.5601099164217667, "learning_rate": 7.246985988525568e-06, "loss": 0.4677, "step": 24264 }, { "epoch": 1.8034188034188035, "grad_norm": 1.8660258627084116, "learning_rate": 7.246214634277732e-06, "loss": 0.5324, "step": 24265 }, { "epoch": 1.8034931252322557, "grad_norm": 1.8076035032584403, "learning_rate": 7.245443297758623e-06, "loss": 0.5538, "step": 24266 }, { "epoch": 1.803567447045708, "grad_norm": 2.072954785415748, "learning_rate": 7.244671978973208e-06, "loss": 0.5627, "step": 24267 }, { "epoch": 1.8036417688591602, "grad_norm": 2.189296919650548, "learning_rate": 7.243900677926451e-06, "loss": 0.6705, "step": 24268 }, { "epoch": 1.8037160906726124, "grad_norm": 1.8690333503144363, "learning_rate": 7.24312939462332e-06, "loss": 0.6251, "step": 24269 }, { "epoch": 1.8037904124860646, "grad_norm": 1.8906245734893543, "learning_rate": 7.242358129068779e-06, "loss": 0.3837, "step": 24270 }, { "epoch": 1.8038647342995169, "grad_norm": 2.5434875031788353, "learning_rate": 7.241586881267792e-06, "loss": 0.6265, "step": 24271 }, { "epoch": 1.8039390561129691, "grad_norm": 2.095026763000228, "learning_rate": 7.240815651225325e-06, "loss": 0.5623, "step": 24272 }, { "epoch": 1.8040133779264214, "grad_norm": 1.8402010039602246, "learning_rate": 7.240044438946347e-06, "loss": 0.589, "step": 24273 }, { "epoch": 1.8040876997398736, "grad_norm": 1.9090514958331704, "learning_rate": 7.239273244435818e-06, "loss": 0.5353, "step": 24274 }, { "epoch": 1.804162021553326, "grad_norm": 1.9720911875604277, "learning_rate": 7.238502067698706e-06, "loss": 0.5152, "step": 24275 }, { "epoch": 1.804236343366778, "grad_norm": 1.8044938823149448, "learning_rate": 7.2377309087399714e-06, "loss": 0.475, "step": 24276 }, { "epoch": 1.8043106651802305, "grad_norm": 2.3685597867185613, "learning_rate": 7.236959767564582e-06, "loss": 0.4624, "step": 24277 }, { "epoch": 1.8043849869936825, "grad_norm": 2.4002990703077955, "learning_rate": 7.236188644177503e-06, "loss": 0.5848, "step": 24278 }, { "epoch": 1.804459308807135, "grad_norm": 1.4906349328233208, "learning_rate": 7.235417538583698e-06, "loss": 0.4059, "step": 24279 }, { "epoch": 1.804533630620587, "grad_norm": 1.9046233881787464, "learning_rate": 7.234646450788134e-06, "loss": 0.6602, "step": 24280 }, { "epoch": 1.8046079524340395, "grad_norm": 1.9915011939121268, "learning_rate": 7.233875380795769e-06, "loss": 0.5243, "step": 24281 }, { "epoch": 1.8046822742474915, "grad_norm": 2.107240039065337, "learning_rate": 7.233104328611572e-06, "loss": 0.6289, "step": 24282 }, { "epoch": 1.804756596060944, "grad_norm": 1.7519064067275663, "learning_rate": 7.232333294240506e-06, "loss": 0.608, "step": 24283 }, { "epoch": 1.804830917874396, "grad_norm": 1.8387395759277008, "learning_rate": 7.2315622776875364e-06, "loss": 0.4891, "step": 24284 }, { "epoch": 1.8049052396878484, "grad_norm": 2.3042045327525273, "learning_rate": 7.230791278957624e-06, "loss": 0.714, "step": 24285 }, { "epoch": 1.8049795615013007, "grad_norm": 2.2450050563846333, "learning_rate": 7.230020298055735e-06, "loss": 0.5965, "step": 24286 }, { "epoch": 1.805053883314753, "grad_norm": 2.3830065663475497, "learning_rate": 7.229249334986829e-06, "loss": 0.7802, "step": 24287 }, { "epoch": 1.8051282051282052, "grad_norm": 3.196286788492857, "learning_rate": 7.228478389755872e-06, "loss": 0.5449, "step": 24288 }, { "epoch": 1.8052025269416574, "grad_norm": 2.1300012427602204, "learning_rate": 7.227707462367828e-06, "loss": 0.5364, "step": 24289 }, { "epoch": 1.8052768487551096, "grad_norm": 1.7152744605796533, "learning_rate": 7.226936552827658e-06, "loss": 0.5211, "step": 24290 }, { "epoch": 1.8053511705685619, "grad_norm": 2.0919721425601834, "learning_rate": 7.22616566114033e-06, "loss": 0.551, "step": 24291 }, { "epoch": 1.805425492382014, "grad_norm": 2.023260042499868, "learning_rate": 7.225394787310801e-06, "loss": 0.618, "step": 24292 }, { "epoch": 1.8054998141954663, "grad_norm": 1.7427118489440212, "learning_rate": 7.224623931344037e-06, "loss": 0.5202, "step": 24293 }, { "epoch": 1.8055741360089186, "grad_norm": 1.9507779987965363, "learning_rate": 7.223853093244999e-06, "loss": 0.5313, "step": 24294 }, { "epoch": 1.8056484578223708, "grad_norm": 1.694481045153862, "learning_rate": 7.223082273018653e-06, "loss": 0.561, "step": 24295 }, { "epoch": 1.805722779635823, "grad_norm": 2.3837839585761746, "learning_rate": 7.2223114706699605e-06, "loss": 0.696, "step": 24296 }, { "epoch": 1.8057971014492753, "grad_norm": 2.720391541881771, "learning_rate": 7.2215406862038815e-06, "loss": 0.4769, "step": 24297 }, { "epoch": 1.8058714232627278, "grad_norm": 2.4353199472879985, "learning_rate": 7.220769919625377e-06, "loss": 0.7113, "step": 24298 }, { "epoch": 1.8059457450761798, "grad_norm": 2.1215508859357937, "learning_rate": 7.219999170939413e-06, "loss": 0.5239, "step": 24299 }, { "epoch": 1.8060200668896322, "grad_norm": 1.6596473857826726, "learning_rate": 7.219228440150949e-06, "loss": 0.4816, "step": 24300 }, { "epoch": 1.8060943887030843, "grad_norm": 1.859288149118672, "learning_rate": 7.2184577272649494e-06, "loss": 0.6705, "step": 24301 }, { "epoch": 1.8061687105165367, "grad_norm": 1.8129481748581266, "learning_rate": 7.217687032286375e-06, "loss": 0.515, "step": 24302 }, { "epoch": 1.8062430323299887, "grad_norm": 1.9708497964444172, "learning_rate": 7.216916355220185e-06, "loss": 0.5326, "step": 24303 }, { "epoch": 1.8063173541434412, "grad_norm": 2.2828314191613677, "learning_rate": 7.216145696071344e-06, "loss": 0.465, "step": 24304 }, { "epoch": 1.8063916759568932, "grad_norm": 2.25538169023945, "learning_rate": 7.215375054844813e-06, "loss": 0.5704, "step": 24305 }, { "epoch": 1.8064659977703457, "grad_norm": 2.330496498818661, "learning_rate": 7.214604431545554e-06, "loss": 0.6095, "step": 24306 }, { "epoch": 1.8065403195837977, "grad_norm": 1.821393404720197, "learning_rate": 7.2138338261785266e-06, "loss": 0.5804, "step": 24307 }, { "epoch": 1.8066146413972501, "grad_norm": 1.462696910999356, "learning_rate": 7.213063238748694e-06, "loss": 0.4973, "step": 24308 }, { "epoch": 1.8066889632107024, "grad_norm": 2.0236851774247513, "learning_rate": 7.212292669261013e-06, "loss": 0.4384, "step": 24309 }, { "epoch": 1.8067632850241546, "grad_norm": 1.7986940811078262, "learning_rate": 7.211522117720446e-06, "loss": 0.4186, "step": 24310 }, { "epoch": 1.8068376068376069, "grad_norm": 1.8191662334549799, "learning_rate": 7.210751584131956e-06, "loss": 0.5823, "step": 24311 }, { "epoch": 1.806911928651059, "grad_norm": 2.0656469138065345, "learning_rate": 7.209981068500503e-06, "loss": 0.7243, "step": 24312 }, { "epoch": 1.8069862504645113, "grad_norm": 1.6649936152468023, "learning_rate": 7.209210570831049e-06, "loss": 0.3716, "step": 24313 }, { "epoch": 1.8070605722779636, "grad_norm": 1.9030818489315389, "learning_rate": 7.20844009112855e-06, "loss": 0.513, "step": 24314 }, { "epoch": 1.8071348940914158, "grad_norm": 2.573933984788049, "learning_rate": 7.2076696293979686e-06, "loss": 0.5539, "step": 24315 }, { "epoch": 1.807209215904868, "grad_norm": 1.868651046738147, "learning_rate": 7.206899185644267e-06, "loss": 0.627, "step": 24316 }, { "epoch": 1.8072835377183203, "grad_norm": 2.2028452065504567, "learning_rate": 7.206128759872404e-06, "loss": 0.5661, "step": 24317 }, { "epoch": 1.8073578595317725, "grad_norm": 1.9016355668127243, "learning_rate": 7.205358352087337e-06, "loss": 0.6609, "step": 24318 }, { "epoch": 1.807432181345225, "grad_norm": 1.9375508093657787, "learning_rate": 7.204587962294032e-06, "loss": 0.4772, "step": 24319 }, { "epoch": 1.807506503158677, "grad_norm": 1.9174347742535278, "learning_rate": 7.203817590497441e-06, "loss": 0.4854, "step": 24320 }, { "epoch": 1.8075808249721295, "grad_norm": 2.1032234055746155, "learning_rate": 7.203047236702528e-06, "loss": 0.6505, "step": 24321 }, { "epoch": 1.8076551467855815, "grad_norm": 1.9808870230498656, "learning_rate": 7.20227690091425e-06, "loss": 0.5847, "step": 24322 }, { "epoch": 1.807729468599034, "grad_norm": 1.9167764154412341, "learning_rate": 7.2015065831375694e-06, "loss": 0.5901, "step": 24323 }, { "epoch": 1.807803790412486, "grad_norm": 1.7734402357886723, "learning_rate": 7.2007362833774455e-06, "loss": 0.582, "step": 24324 }, { "epoch": 1.8078781122259384, "grad_norm": 1.8060832493142223, "learning_rate": 7.199966001638835e-06, "loss": 0.6737, "step": 24325 }, { "epoch": 1.8079524340393904, "grad_norm": 1.907202966538485, "learning_rate": 7.199195737926698e-06, "loss": 0.5371, "step": 24326 }, { "epoch": 1.808026755852843, "grad_norm": 4.380988148486233, "learning_rate": 7.198425492245993e-06, "loss": 0.7265, "step": 24327 }, { "epoch": 1.808101077666295, "grad_norm": 2.1848804579348546, "learning_rate": 7.1976552646016814e-06, "loss": 0.4722, "step": 24328 }, { "epoch": 1.8081753994797474, "grad_norm": 1.9932219083156644, "learning_rate": 7.196885054998718e-06, "loss": 0.5411, "step": 24329 }, { "epoch": 1.8082497212931996, "grad_norm": 1.7205953029475225, "learning_rate": 7.196114863442067e-06, "loss": 0.6039, "step": 24330 }, { "epoch": 1.8083240431066518, "grad_norm": 2.007979928424987, "learning_rate": 7.195344689936679e-06, "loss": 0.6982, "step": 24331 }, { "epoch": 1.808398364920104, "grad_norm": 1.661041071163964, "learning_rate": 7.194574534487517e-06, "loss": 0.5304, "step": 24332 }, { "epoch": 1.8084726867335563, "grad_norm": 5.42523338191108, "learning_rate": 7.193804397099538e-06, "loss": 0.5992, "step": 24333 }, { "epoch": 1.8085470085470086, "grad_norm": 1.894985565972503, "learning_rate": 7.193034277777699e-06, "loss": 0.6209, "step": 24334 }, { "epoch": 1.8086213303604608, "grad_norm": 1.578198226867556, "learning_rate": 7.192264176526964e-06, "loss": 0.4821, "step": 24335 }, { "epoch": 1.808695652173913, "grad_norm": 2.114720204938391, "learning_rate": 7.1914940933522846e-06, "loss": 0.7062, "step": 24336 }, { "epoch": 1.8087699739873653, "grad_norm": 2.179737252511731, "learning_rate": 7.19072402825862e-06, "loss": 0.6764, "step": 24337 }, { "epoch": 1.8088442958008175, "grad_norm": 2.1432260111452455, "learning_rate": 7.189953981250928e-06, "loss": 0.459, "step": 24338 }, { "epoch": 1.8089186176142698, "grad_norm": 2.246456478108516, "learning_rate": 7.189183952334169e-06, "loss": 0.5495, "step": 24339 }, { "epoch": 1.808992939427722, "grad_norm": 2.16552448516611, "learning_rate": 7.1884139415132945e-06, "loss": 0.7427, "step": 24340 }, { "epoch": 1.8090672612411742, "grad_norm": 1.8208363355160766, "learning_rate": 7.18764394879327e-06, "loss": 0.5846, "step": 24341 }, { "epoch": 1.8091415830546267, "grad_norm": 2.8363148158077913, "learning_rate": 7.186873974179045e-06, "loss": 0.7252, "step": 24342 }, { "epoch": 1.8092159048680787, "grad_norm": 1.9740157877599964, "learning_rate": 7.186104017675579e-06, "loss": 0.5259, "step": 24343 }, { "epoch": 1.8092902266815312, "grad_norm": 1.979033657175734, "learning_rate": 7.185334079287828e-06, "loss": 0.7079, "step": 24344 }, { "epoch": 1.8093645484949832, "grad_norm": 1.9106140217102303, "learning_rate": 7.1845641590207505e-06, "loss": 0.4494, "step": 24345 }, { "epoch": 1.8094388703084356, "grad_norm": 1.9908897274054365, "learning_rate": 7.183794256879306e-06, "loss": 0.6523, "step": 24346 }, { "epoch": 1.8095131921218877, "grad_norm": 1.8034174269020333, "learning_rate": 7.1830243728684455e-06, "loss": 0.6594, "step": 24347 }, { "epoch": 1.8095875139353401, "grad_norm": 1.8072616809897604, "learning_rate": 7.182254506993127e-06, "loss": 0.4666, "step": 24348 }, { "epoch": 1.8096618357487921, "grad_norm": 1.6482766923493979, "learning_rate": 7.181484659258309e-06, "loss": 0.5025, "step": 24349 }, { "epoch": 1.8097361575622446, "grad_norm": 1.8895190833735487, "learning_rate": 7.180714829668948e-06, "loss": 0.5675, "step": 24350 }, { "epoch": 1.8098104793756966, "grad_norm": 2.402243053715778, "learning_rate": 7.179945018229997e-06, "loss": 0.6004, "step": 24351 }, { "epoch": 1.809884801189149, "grad_norm": 5.530566367592933, "learning_rate": 7.179175224946413e-06, "loss": 0.6424, "step": 24352 }, { "epoch": 1.8099591230026013, "grad_norm": 1.9252576925736116, "learning_rate": 7.178405449823156e-06, "loss": 0.5606, "step": 24353 }, { "epoch": 1.8100334448160535, "grad_norm": 1.555190415534911, "learning_rate": 7.177635692865175e-06, "loss": 0.6214, "step": 24354 }, { "epoch": 1.8101077666295058, "grad_norm": 1.9890790641622134, "learning_rate": 7.17686595407743e-06, "loss": 0.656, "step": 24355 }, { "epoch": 1.810182088442958, "grad_norm": 2.0591770369164357, "learning_rate": 7.176096233464875e-06, "loss": 0.5809, "step": 24356 }, { "epoch": 1.8102564102564103, "grad_norm": 2.1563715836278035, "learning_rate": 7.175326531032467e-06, "loss": 0.682, "step": 24357 }, { "epoch": 1.8103307320698625, "grad_norm": 1.7070773521489457, "learning_rate": 7.1745568467851574e-06, "loss": 0.462, "step": 24358 }, { "epoch": 1.8104050538833147, "grad_norm": 2.2172333654726644, "learning_rate": 7.1737871807279066e-06, "loss": 0.6768, "step": 24359 }, { "epoch": 1.810479375696767, "grad_norm": 1.7353347978131843, "learning_rate": 7.173017532865666e-06, "loss": 0.4928, "step": 24360 }, { "epoch": 1.8105536975102192, "grad_norm": 1.8519245150115673, "learning_rate": 7.172247903203393e-06, "loss": 0.5785, "step": 24361 }, { "epoch": 1.8106280193236715, "grad_norm": 2.1189252882574516, "learning_rate": 7.171478291746041e-06, "loss": 0.5267, "step": 24362 }, { "epoch": 1.8107023411371237, "grad_norm": 1.4535329316837449, "learning_rate": 7.170708698498565e-06, "loss": 0.4098, "step": 24363 }, { "epoch": 1.810776662950576, "grad_norm": 2.1335435785799373, "learning_rate": 7.1699391234659235e-06, "loss": 0.6505, "step": 24364 }, { "epoch": 1.8108509847640284, "grad_norm": 1.5756243650322352, "learning_rate": 7.169169566653061e-06, "loss": 0.4661, "step": 24365 }, { "epoch": 1.8109253065774804, "grad_norm": 2.6019701026542887, "learning_rate": 7.16840002806494e-06, "loss": 0.6224, "step": 24366 }, { "epoch": 1.8109996283909329, "grad_norm": 2.113844257044597, "learning_rate": 7.167630507706515e-06, "loss": 0.575, "step": 24367 }, { "epoch": 1.8110739502043849, "grad_norm": 1.8843158848076824, "learning_rate": 7.166861005582735e-06, "loss": 0.6428, "step": 24368 }, { "epoch": 1.8111482720178373, "grad_norm": 1.7962373726856602, "learning_rate": 7.166091521698556e-06, "loss": 0.6412, "step": 24369 }, { "epoch": 1.8112225938312894, "grad_norm": 2.272805278645155, "learning_rate": 7.165322056058934e-06, "loss": 0.5902, "step": 24370 }, { "epoch": 1.8112969156447418, "grad_norm": 1.8172130266284496, "learning_rate": 7.16455260866882e-06, "loss": 0.5524, "step": 24371 }, { "epoch": 1.8113712374581938, "grad_norm": 1.698568681175458, "learning_rate": 7.1637831795331725e-06, "loss": 0.5063, "step": 24372 }, { "epoch": 1.8114455592716463, "grad_norm": 1.8390481127484393, "learning_rate": 7.16301376865694e-06, "loss": 0.5471, "step": 24373 }, { "epoch": 1.8115198810850983, "grad_norm": 1.7530505253778734, "learning_rate": 7.162244376045078e-06, "loss": 0.5462, "step": 24374 }, { "epoch": 1.8115942028985508, "grad_norm": 2.069777902396786, "learning_rate": 7.161475001702543e-06, "loss": 0.59, "step": 24375 }, { "epoch": 1.811668524712003, "grad_norm": 2.4037589226505314, "learning_rate": 7.160705645634279e-06, "loss": 0.4715, "step": 24376 }, { "epoch": 1.8117428465254553, "grad_norm": 2.052234554789736, "learning_rate": 7.159936307845245e-06, "loss": 0.6038, "step": 24377 }, { "epoch": 1.8118171683389075, "grad_norm": 2.1525477252952374, "learning_rate": 7.159166988340398e-06, "loss": 0.6298, "step": 24378 }, { "epoch": 1.8118914901523597, "grad_norm": 1.4488703581262112, "learning_rate": 7.158397687124683e-06, "loss": 0.4537, "step": 24379 }, { "epoch": 1.811965811965812, "grad_norm": 1.4619880097612823, "learning_rate": 7.157628404203057e-06, "loss": 0.4175, "step": 24380 }, { "epoch": 1.8120401337792642, "grad_norm": 1.8961235943235573, "learning_rate": 7.156859139580471e-06, "loss": 0.6645, "step": 24381 }, { "epoch": 1.8121144555927164, "grad_norm": 2.1676056061017754, "learning_rate": 7.1560898932618785e-06, "loss": 0.6628, "step": 24382 }, { "epoch": 1.8121887774061687, "grad_norm": 2.294234427893307, "learning_rate": 7.155320665252234e-06, "loss": 0.5189, "step": 24383 }, { "epoch": 1.812263099219621, "grad_norm": 1.976658653046772, "learning_rate": 7.154551455556485e-06, "loss": 0.6316, "step": 24384 }, { "epoch": 1.8123374210330732, "grad_norm": 2.1178055125937028, "learning_rate": 7.153782264179587e-06, "loss": 0.6409, "step": 24385 }, { "epoch": 1.8124117428465256, "grad_norm": 2.213377938249955, "learning_rate": 7.153013091126495e-06, "loss": 0.6875, "step": 24386 }, { "epoch": 1.8124860646599776, "grad_norm": 1.6520938741842421, "learning_rate": 7.152243936402153e-06, "loss": 0.5913, "step": 24387 }, { "epoch": 1.81256038647343, "grad_norm": 1.4138336097020276, "learning_rate": 7.1514748000115155e-06, "loss": 0.3076, "step": 24388 }, { "epoch": 1.8126347082868821, "grad_norm": 1.9606292935214884, "learning_rate": 7.150705681959538e-06, "loss": 0.635, "step": 24389 }, { "epoch": 1.8127090301003346, "grad_norm": 2.016047697640383, "learning_rate": 7.149936582251168e-06, "loss": 0.5898, "step": 24390 }, { "epoch": 1.8127833519137866, "grad_norm": 2.008320600720772, "learning_rate": 7.1491675008913585e-06, "loss": 0.5866, "step": 24391 }, { "epoch": 1.812857673727239, "grad_norm": 1.7213013039952618, "learning_rate": 7.14839843788506e-06, "loss": 0.5719, "step": 24392 }, { "epoch": 1.812931995540691, "grad_norm": 2.0032186855336747, "learning_rate": 7.1476293932372245e-06, "loss": 0.6013, "step": 24393 }, { "epoch": 1.8130063173541435, "grad_norm": 2.4252634149060013, "learning_rate": 7.146860366952806e-06, "loss": 0.6814, "step": 24394 }, { "epoch": 1.8130806391675955, "grad_norm": 1.6473457972241665, "learning_rate": 7.146091359036748e-06, "loss": 0.4186, "step": 24395 }, { "epoch": 1.813154960981048, "grad_norm": 1.9056991656769178, "learning_rate": 7.1453223694940076e-06, "loss": 0.5161, "step": 24396 }, { "epoch": 1.8132292827945002, "grad_norm": 1.806827367163882, "learning_rate": 7.144553398329535e-06, "loss": 0.5197, "step": 24397 }, { "epoch": 1.8133036046079525, "grad_norm": 1.704042514887824, "learning_rate": 7.1437844455482805e-06, "loss": 0.4967, "step": 24398 }, { "epoch": 1.8133779264214047, "grad_norm": 1.7715753968541992, "learning_rate": 7.143015511155191e-06, "loss": 0.5137, "step": 24399 }, { "epoch": 1.813452248234857, "grad_norm": 2.010343385960958, "learning_rate": 7.142246595155222e-06, "loss": 0.6269, "step": 24400 }, { "epoch": 1.8135265700483092, "grad_norm": 1.8429088531559068, "learning_rate": 7.141477697553318e-06, "loss": 0.5292, "step": 24401 }, { "epoch": 1.8136008918617614, "grad_norm": 1.9239294661305517, "learning_rate": 7.140708818354434e-06, "loss": 0.5937, "step": 24402 }, { "epoch": 1.8136752136752137, "grad_norm": 1.9786112735401284, "learning_rate": 7.139939957563515e-06, "loss": 0.5463, "step": 24403 }, { "epoch": 1.813749535488666, "grad_norm": 2.2672568305835066, "learning_rate": 7.139171115185517e-06, "loss": 0.7038, "step": 24404 }, { "epoch": 1.8138238573021181, "grad_norm": 1.456398824582507, "learning_rate": 7.138402291225388e-06, "loss": 0.442, "step": 24405 }, { "epoch": 1.8138981791155704, "grad_norm": 1.5775373797597272, "learning_rate": 7.137633485688076e-06, "loss": 0.5028, "step": 24406 }, { "epoch": 1.8139725009290226, "grad_norm": 2.1043712820712437, "learning_rate": 7.136864698578529e-06, "loss": 0.446, "step": 24407 }, { "epoch": 1.8140468227424749, "grad_norm": 2.2669713259718334, "learning_rate": 7.136095929901699e-06, "loss": 0.691, "step": 24408 }, { "epoch": 1.8141211445559273, "grad_norm": 1.8447586720318843, "learning_rate": 7.135327179662539e-06, "loss": 0.5241, "step": 24409 }, { "epoch": 1.8141954663693793, "grad_norm": 1.839135451714501, "learning_rate": 7.13455844786599e-06, "loss": 0.4931, "step": 24410 }, { "epoch": 1.8142697881828318, "grad_norm": 1.7221473546486008, "learning_rate": 7.133789734517007e-06, "loss": 0.5377, "step": 24411 }, { "epoch": 1.8143441099962838, "grad_norm": 2.07445677712395, "learning_rate": 7.133021039620535e-06, "loss": 0.5541, "step": 24412 }, { "epoch": 1.8144184318097363, "grad_norm": 2.1873291500954384, "learning_rate": 7.1322523631815255e-06, "loss": 0.6219, "step": 24413 }, { "epoch": 1.8144927536231883, "grad_norm": 1.9218594170099905, "learning_rate": 7.131483705204926e-06, "loss": 0.5348, "step": 24414 }, { "epoch": 1.8145670754366408, "grad_norm": 1.967686882772772, "learning_rate": 7.130715065695686e-06, "loss": 0.5987, "step": 24415 }, { "epoch": 1.8146413972500928, "grad_norm": 2.057366384028972, "learning_rate": 7.129946444658754e-06, "loss": 0.6576, "step": 24416 }, { "epoch": 1.8147157190635452, "grad_norm": 2.1003544385097297, "learning_rate": 7.129177842099078e-06, "loss": 0.5684, "step": 24417 }, { "epoch": 1.8147900408769972, "grad_norm": 1.9104328781428597, "learning_rate": 7.128409258021603e-06, "loss": 0.4091, "step": 24418 }, { "epoch": 1.8148643626904497, "grad_norm": 1.981312904301064, "learning_rate": 7.127640692431284e-06, "loss": 0.5433, "step": 24419 }, { "epoch": 1.814938684503902, "grad_norm": 1.8370247673247133, "learning_rate": 7.126872145333066e-06, "loss": 0.4807, "step": 24420 }, { "epoch": 1.8150130063173542, "grad_norm": 2.4035793323773826, "learning_rate": 7.126103616731894e-06, "loss": 0.6028, "step": 24421 }, { "epoch": 1.8150873281308064, "grad_norm": 2.2179127287177423, "learning_rate": 7.1253351066327195e-06, "loss": 0.6232, "step": 24422 }, { "epoch": 1.8151616499442587, "grad_norm": 2.166647900639724, "learning_rate": 7.124566615040486e-06, "loss": 0.5322, "step": 24423 }, { "epoch": 1.815235971757711, "grad_norm": 2.110458682609622, "learning_rate": 7.123798141960143e-06, "loss": 0.658, "step": 24424 }, { "epoch": 1.8153102935711631, "grad_norm": 1.623935444659383, "learning_rate": 7.1230296873966385e-06, "loss": 0.483, "step": 24425 }, { "epoch": 1.8153846153846154, "grad_norm": 1.7470777834490434, "learning_rate": 7.122261251354921e-06, "loss": 0.3651, "step": 24426 }, { "epoch": 1.8154589371980676, "grad_norm": 1.804356144405798, "learning_rate": 7.121492833839936e-06, "loss": 0.6393, "step": 24427 }, { "epoch": 1.8155332590115199, "grad_norm": 1.761951765111466, "learning_rate": 7.12072443485663e-06, "loss": 0.5525, "step": 24428 }, { "epoch": 1.815607580824972, "grad_norm": 1.5576757530763636, "learning_rate": 7.119956054409951e-06, "loss": 0.4971, "step": 24429 }, { "epoch": 1.8156819026384243, "grad_norm": 1.8964805358518748, "learning_rate": 7.119187692504845e-06, "loss": 0.6147, "step": 24430 }, { "epoch": 1.8157562244518766, "grad_norm": 1.7781663490289064, "learning_rate": 7.1184193491462615e-06, "loss": 0.5739, "step": 24431 }, { "epoch": 1.815830546265329, "grad_norm": 2.266928859998898, "learning_rate": 7.117651024339145e-06, "loss": 0.5873, "step": 24432 }, { "epoch": 1.815904868078781, "grad_norm": 2.1628390430264224, "learning_rate": 7.11688271808844e-06, "loss": 0.6039, "step": 24433 }, { "epoch": 1.8159791898922335, "grad_norm": 2.5840840380877794, "learning_rate": 7.116114430399094e-06, "loss": 0.6314, "step": 24434 }, { "epoch": 1.8160535117056855, "grad_norm": 2.328056936413929, "learning_rate": 7.1153461612760535e-06, "loss": 0.6834, "step": 24435 }, { "epoch": 1.816127833519138, "grad_norm": 2.1987004446963705, "learning_rate": 7.114577910724265e-06, "loss": 0.6621, "step": 24436 }, { "epoch": 1.81620215533259, "grad_norm": 2.0416117526935156, "learning_rate": 7.113809678748673e-06, "loss": 0.5733, "step": 24437 }, { "epoch": 1.8162764771460425, "grad_norm": 2.1368559002854366, "learning_rate": 7.113041465354228e-06, "loss": 0.5098, "step": 24438 }, { "epoch": 1.8163507989594945, "grad_norm": 1.667937547245157, "learning_rate": 7.112273270545869e-06, "loss": 0.4821, "step": 24439 }, { "epoch": 1.816425120772947, "grad_norm": 2.081824991500801, "learning_rate": 7.111505094328546e-06, "loss": 0.6241, "step": 24440 }, { "epoch": 1.816499442586399, "grad_norm": 1.958464517441028, "learning_rate": 7.110736936707202e-06, "loss": 0.5171, "step": 24441 }, { "epoch": 1.8165737643998514, "grad_norm": 2.821248597181395, "learning_rate": 7.109968797686787e-06, "loss": 0.4335, "step": 24442 }, { "epoch": 1.8166480862133036, "grad_norm": 1.6184073526660845, "learning_rate": 7.109200677272242e-06, "loss": 0.4589, "step": 24443 }, { "epoch": 1.8167224080267559, "grad_norm": 1.7758218593403272, "learning_rate": 7.108432575468514e-06, "loss": 0.4564, "step": 24444 }, { "epoch": 1.8167967298402081, "grad_norm": 2.0312022255007007, "learning_rate": 7.107664492280544e-06, "loss": 0.5556, "step": 24445 }, { "epoch": 1.8168710516536604, "grad_norm": 1.736107661543977, "learning_rate": 7.10689642771328e-06, "loss": 0.5067, "step": 24446 }, { "epoch": 1.8169453734671126, "grad_norm": 1.6668300372920215, "learning_rate": 7.106128381771667e-06, "loss": 0.4535, "step": 24447 }, { "epoch": 1.8170196952805648, "grad_norm": 1.7292665616966418, "learning_rate": 7.10536035446065e-06, "loss": 0.4892, "step": 24448 }, { "epoch": 1.817094017094017, "grad_norm": 1.9641942653078897, "learning_rate": 7.104592345785174e-06, "loss": 0.56, "step": 24449 }, { "epoch": 1.8171683389074693, "grad_norm": 1.9710648228061085, "learning_rate": 7.10382435575018e-06, "loss": 0.537, "step": 24450 }, { "epoch": 1.8172426607209216, "grad_norm": 2.045115066934659, "learning_rate": 7.103056384360616e-06, "loss": 0.6194, "step": 24451 }, { "epoch": 1.8173169825343738, "grad_norm": 2.2160569659365352, "learning_rate": 7.102288431621422e-06, "loss": 0.7332, "step": 24452 }, { "epoch": 1.8173913043478263, "grad_norm": 2.0356239815757733, "learning_rate": 7.10152049753755e-06, "loss": 0.6201, "step": 24453 }, { "epoch": 1.8174656261612783, "grad_norm": 1.8457523115876662, "learning_rate": 7.100752582113937e-06, "loss": 0.5469, "step": 24454 }, { "epoch": 1.8175399479747307, "grad_norm": 1.6442478103138578, "learning_rate": 7.099984685355528e-06, "loss": 0.4641, "step": 24455 }, { "epoch": 1.8176142697881827, "grad_norm": 2.1192514232295254, "learning_rate": 7.099216807267266e-06, "loss": 0.551, "step": 24456 }, { "epoch": 1.8176885916016352, "grad_norm": 1.8124748843625316, "learning_rate": 7.098448947854095e-06, "loss": 0.5166, "step": 24457 }, { "epoch": 1.8177629134150872, "grad_norm": 1.795951861953078, "learning_rate": 7.09768110712096e-06, "loss": 0.4931, "step": 24458 }, { "epoch": 1.8178372352285397, "grad_norm": 1.928722558219167, "learning_rate": 7.096913285072802e-06, "loss": 0.5921, "step": 24459 }, { "epoch": 1.8179115570419917, "grad_norm": 2.0990852947428578, "learning_rate": 7.0961454817145695e-06, "loss": 0.4848, "step": 24460 }, { "epoch": 1.8179858788554442, "grad_norm": 1.737862587082629, "learning_rate": 7.095377697051198e-06, "loss": 0.5323, "step": 24461 }, { "epoch": 1.8180602006688962, "grad_norm": 4.142086950938418, "learning_rate": 7.094609931087635e-06, "loss": 0.6475, "step": 24462 }, { "epoch": 1.8181345224823486, "grad_norm": 3.8522525169629027, "learning_rate": 7.093842183828822e-06, "loss": 0.5819, "step": 24463 }, { "epoch": 1.8182088442958007, "grad_norm": 1.9946527018240734, "learning_rate": 7.093074455279705e-06, "loss": 0.6141, "step": 24464 }, { "epoch": 1.8182831661092531, "grad_norm": 2.0017134867069384, "learning_rate": 7.092306745445221e-06, "loss": 0.6313, "step": 24465 }, { "epoch": 1.8183574879227054, "grad_norm": 2.049900396672021, "learning_rate": 7.091539054330319e-06, "loss": 0.6422, "step": 24466 }, { "epoch": 1.8184318097361576, "grad_norm": 2.011235763304603, "learning_rate": 7.090771381939936e-06, "loss": 0.6922, "step": 24467 }, { "epoch": 1.8185061315496098, "grad_norm": 1.7057992324803197, "learning_rate": 7.090003728279013e-06, "loss": 0.5988, "step": 24468 }, { "epoch": 1.818580453363062, "grad_norm": 1.6480517595936794, "learning_rate": 7.089236093352496e-06, "loss": 0.4356, "step": 24469 }, { "epoch": 1.8186547751765143, "grad_norm": 1.767193272928849, "learning_rate": 7.088468477165326e-06, "loss": 0.4942, "step": 24470 }, { "epoch": 1.8187290969899665, "grad_norm": 1.6058685811305302, "learning_rate": 7.087700879722448e-06, "loss": 0.5556, "step": 24471 }, { "epoch": 1.8188034188034188, "grad_norm": 1.2687803652047884, "learning_rate": 7.086933301028798e-06, "loss": 0.4786, "step": 24472 }, { "epoch": 1.818877740616871, "grad_norm": 2.150001140128765, "learning_rate": 7.086165741089321e-06, "loss": 0.6037, "step": 24473 }, { "epoch": 1.8189520624303233, "grad_norm": 1.924011707227439, "learning_rate": 7.085398199908956e-06, "loss": 0.5178, "step": 24474 }, { "epoch": 1.8190263842437755, "grad_norm": 1.8775475438522125, "learning_rate": 7.084630677492649e-06, "loss": 0.5417, "step": 24475 }, { "epoch": 1.819100706057228, "grad_norm": 1.7403471646277655, "learning_rate": 7.083863173845336e-06, "loss": 0.5833, "step": 24476 }, { "epoch": 1.81917502787068, "grad_norm": 1.5227646842060607, "learning_rate": 7.083095688971966e-06, "loss": 0.4182, "step": 24477 }, { "epoch": 1.8192493496841324, "grad_norm": 1.598533782959775, "learning_rate": 7.082328222877469e-06, "loss": 0.5052, "step": 24478 }, { "epoch": 1.8193236714975844, "grad_norm": 2.014460119871326, "learning_rate": 7.081560775566791e-06, "loss": 0.4825, "step": 24479 }, { "epoch": 1.819397993311037, "grad_norm": 1.726274642753264, "learning_rate": 7.080793347044876e-06, "loss": 0.4914, "step": 24480 }, { "epoch": 1.819472315124489, "grad_norm": 2.0398422044953897, "learning_rate": 7.08002593731666e-06, "loss": 0.5221, "step": 24481 }, { "epoch": 1.8195466369379414, "grad_norm": 1.4604663995617448, "learning_rate": 7.079258546387088e-06, "loss": 0.4621, "step": 24482 }, { "epoch": 1.8196209587513934, "grad_norm": 2.00160859920745, "learning_rate": 7.078491174261095e-06, "loss": 0.5734, "step": 24483 }, { "epoch": 1.8196952805648459, "grad_norm": 2.826295209485846, "learning_rate": 7.077723820943625e-06, "loss": 0.5943, "step": 24484 }, { "epoch": 1.8197696023782979, "grad_norm": 2.1823640419589867, "learning_rate": 7.076956486439619e-06, "loss": 0.6788, "step": 24485 }, { "epoch": 1.8198439241917503, "grad_norm": 1.8689415029823062, "learning_rate": 7.076189170754016e-06, "loss": 0.6418, "step": 24486 }, { "epoch": 1.8199182460052026, "grad_norm": 1.6379155523937237, "learning_rate": 7.075421873891752e-06, "loss": 0.4677, "step": 24487 }, { "epoch": 1.8199925678186548, "grad_norm": 2.2290163819803395, "learning_rate": 7.074654595857776e-06, "loss": 0.7625, "step": 24488 }, { "epoch": 1.820066889632107, "grad_norm": 2.1646837128204406, "learning_rate": 7.073887336657017e-06, "loss": 0.6817, "step": 24489 }, { "epoch": 1.8201412114455593, "grad_norm": 1.756086563698846, "learning_rate": 7.073120096294421e-06, "loss": 0.4954, "step": 24490 }, { "epoch": 1.8202155332590115, "grad_norm": 2.4700505827084567, "learning_rate": 7.072352874774925e-06, "loss": 0.7443, "step": 24491 }, { "epoch": 1.8202898550724638, "grad_norm": 2.344405594096612, "learning_rate": 7.071585672103472e-06, "loss": 0.5917, "step": 24492 }, { "epoch": 1.820364176885916, "grad_norm": 1.8226693019211677, "learning_rate": 7.070818488284995e-06, "loss": 0.6157, "step": 24493 }, { "epoch": 1.8204384986993682, "grad_norm": 1.6531124119853715, "learning_rate": 7.070051323324437e-06, "loss": 0.6215, "step": 24494 }, { "epoch": 1.8205128205128205, "grad_norm": 2.117093692160657, "learning_rate": 7.069284177226736e-06, "loss": 0.5795, "step": 24495 }, { "epoch": 1.8205871423262727, "grad_norm": 1.7878340578516316, "learning_rate": 7.068517049996833e-06, "loss": 0.4334, "step": 24496 }, { "epoch": 1.820661464139725, "grad_norm": 1.8681426520856195, "learning_rate": 7.0677499416396665e-06, "loss": 0.5218, "step": 24497 }, { "epoch": 1.8207357859531772, "grad_norm": 1.9532858750413877, "learning_rate": 7.066982852160172e-06, "loss": 0.5177, "step": 24498 }, { "epoch": 1.8208101077666297, "grad_norm": 2.115750076877702, "learning_rate": 7.066215781563294e-06, "loss": 0.592, "step": 24499 }, { "epoch": 1.8208844295800817, "grad_norm": 2.3871481776741574, "learning_rate": 7.065448729853961e-06, "loss": 0.5343, "step": 24500 }, { "epoch": 1.8209587513935341, "grad_norm": 2.8910933721920125, "learning_rate": 7.064681697037118e-06, "loss": 0.5373, "step": 24501 }, { "epoch": 1.8210330732069862, "grad_norm": 2.0522769564140204, "learning_rate": 7.063914683117702e-06, "loss": 0.5464, "step": 24502 }, { "epoch": 1.8211073950204386, "grad_norm": 1.8773727995787397, "learning_rate": 7.063147688100653e-06, "loss": 0.6243, "step": 24503 }, { "epoch": 1.8211817168338906, "grad_norm": 1.7229119388211125, "learning_rate": 7.0623807119909036e-06, "loss": 0.5081, "step": 24504 }, { "epoch": 1.821256038647343, "grad_norm": 1.8256610098997905, "learning_rate": 7.061613754793396e-06, "loss": 0.6682, "step": 24505 }, { "epoch": 1.821330360460795, "grad_norm": 1.847205109888529, "learning_rate": 7.060846816513066e-06, "loss": 0.5056, "step": 24506 }, { "epoch": 1.8214046822742476, "grad_norm": 1.5462748981493246, "learning_rate": 7.060079897154853e-06, "loss": 0.5437, "step": 24507 }, { "epoch": 1.8214790040876996, "grad_norm": 1.94313720177255, "learning_rate": 7.059312996723693e-06, "loss": 0.5377, "step": 24508 }, { "epoch": 1.821553325901152, "grad_norm": 2.479416976979699, "learning_rate": 7.058546115224523e-06, "loss": 0.7063, "step": 24509 }, { "epoch": 1.8216276477146043, "grad_norm": 1.7095217950803794, "learning_rate": 7.057779252662279e-06, "loss": 0.459, "step": 24510 }, { "epoch": 1.8217019695280565, "grad_norm": 1.9951577726980352, "learning_rate": 7.057012409041904e-06, "loss": 0.6152, "step": 24511 }, { "epoch": 1.8217762913415088, "grad_norm": 2.1359146431451603, "learning_rate": 7.056245584368326e-06, "loss": 0.594, "step": 24512 }, { "epoch": 1.821850613154961, "grad_norm": 2.096968771976655, "learning_rate": 7.055478778646487e-06, "loss": 0.5394, "step": 24513 }, { "epoch": 1.8219249349684132, "grad_norm": 1.8397442803322481, "learning_rate": 7.0547119918813255e-06, "loss": 0.5369, "step": 24514 }, { "epoch": 1.8219992567818655, "grad_norm": 1.923181721513127, "learning_rate": 7.053945224077773e-06, "loss": 0.565, "step": 24515 }, { "epoch": 1.8220735785953177, "grad_norm": 1.7527987346926217, "learning_rate": 7.053178475240767e-06, "loss": 0.6009, "step": 24516 }, { "epoch": 1.82214790040877, "grad_norm": 1.9334775355828377, "learning_rate": 7.052411745375247e-06, "loss": 0.4783, "step": 24517 }, { "epoch": 1.8222222222222222, "grad_norm": 1.6014839303416368, "learning_rate": 7.051645034486145e-06, "loss": 0.4706, "step": 24518 }, { "epoch": 1.8222965440356744, "grad_norm": 2.1631234410345574, "learning_rate": 7.050878342578403e-06, "loss": 0.6435, "step": 24519 }, { "epoch": 1.8223708658491267, "grad_norm": 2.1735637386412003, "learning_rate": 7.0501116696569515e-06, "loss": 0.5816, "step": 24520 }, { "epoch": 1.822445187662579, "grad_norm": 2.025090306502622, "learning_rate": 7.049345015726726e-06, "loss": 0.7038, "step": 24521 }, { "epoch": 1.8225195094760314, "grad_norm": 1.685574621772759, "learning_rate": 7.04857838079267e-06, "loss": 0.4497, "step": 24522 }, { "epoch": 1.8225938312894834, "grad_norm": 2.1297099702873274, "learning_rate": 7.047811764859709e-06, "loss": 0.5581, "step": 24523 }, { "epoch": 1.8226681531029358, "grad_norm": 2.131932033309923, "learning_rate": 7.047045167932782e-06, "loss": 0.5844, "step": 24524 }, { "epoch": 1.8227424749163879, "grad_norm": 2.225882418358002, "learning_rate": 7.046278590016827e-06, "loss": 0.6903, "step": 24525 }, { "epoch": 1.8228167967298403, "grad_norm": 1.8497082942548733, "learning_rate": 7.045512031116776e-06, "loss": 0.5702, "step": 24526 }, { "epoch": 1.8228911185432923, "grad_norm": 2.3114541853572503, "learning_rate": 7.044745491237564e-06, "loss": 0.6223, "step": 24527 }, { "epoch": 1.8229654403567448, "grad_norm": 2.4442631766243723, "learning_rate": 7.043978970384129e-06, "loss": 0.719, "step": 24528 }, { "epoch": 1.8230397621701968, "grad_norm": 1.8060581370332844, "learning_rate": 7.043212468561402e-06, "loss": 0.6599, "step": 24529 }, { "epoch": 1.8231140839836493, "grad_norm": 2.4216450446099134, "learning_rate": 7.042445985774324e-06, "loss": 0.8137, "step": 24530 }, { "epoch": 1.8231884057971013, "grad_norm": 1.7000193122964744, "learning_rate": 7.0416795220278215e-06, "loss": 0.518, "step": 24531 }, { "epoch": 1.8232627276105537, "grad_norm": 1.883301605240525, "learning_rate": 7.040913077326832e-06, "loss": 0.4928, "step": 24532 }, { "epoch": 1.823337049424006, "grad_norm": 2.2317169895021745, "learning_rate": 7.040146651676296e-06, "loss": 0.7612, "step": 24533 }, { "epoch": 1.8234113712374582, "grad_norm": 1.9911094012777206, "learning_rate": 7.039380245081139e-06, "loss": 0.7046, "step": 24534 }, { "epoch": 1.8234856930509105, "grad_norm": 1.8148133663188593, "learning_rate": 7.038613857546297e-06, "loss": 0.528, "step": 24535 }, { "epoch": 1.8235600148643627, "grad_norm": 1.8894135937213317, "learning_rate": 7.037847489076708e-06, "loss": 0.4581, "step": 24536 }, { "epoch": 1.823634336677815, "grad_norm": 2.2655198039808755, "learning_rate": 7.0370811396773005e-06, "loss": 0.5659, "step": 24537 }, { "epoch": 1.8237086584912672, "grad_norm": 1.938318840870517, "learning_rate": 7.036314809353011e-06, "loss": 0.4593, "step": 24538 }, { "epoch": 1.8237829803047194, "grad_norm": 2.2310372088823924, "learning_rate": 7.035548498108773e-06, "loss": 0.6447, "step": 24539 }, { "epoch": 1.8238573021181717, "grad_norm": 2.034203835665472, "learning_rate": 7.034782205949521e-06, "loss": 0.618, "step": 24540 }, { "epoch": 1.823931623931624, "grad_norm": 1.6960802305021736, "learning_rate": 7.034015932880188e-06, "loss": 0.5972, "step": 24541 }, { "epoch": 1.8240059457450761, "grad_norm": 2.044028593361457, "learning_rate": 7.033249678905705e-06, "loss": 0.5437, "step": 24542 }, { "epoch": 1.8240802675585286, "grad_norm": 1.7505548724360278, "learning_rate": 7.032483444031006e-06, "loss": 0.5379, "step": 24543 }, { "epoch": 1.8241545893719806, "grad_norm": 2.0849282274666145, "learning_rate": 7.031717228261026e-06, "loss": 0.7076, "step": 24544 }, { "epoch": 1.824228911185433, "grad_norm": 4.268846780897333, "learning_rate": 7.0309510316007006e-06, "loss": 0.4817, "step": 24545 }, { "epoch": 1.824303232998885, "grad_norm": 2.2186478902229783, "learning_rate": 7.030184854054953e-06, "loss": 0.6239, "step": 24546 }, { "epoch": 1.8243775548123375, "grad_norm": 1.7594407568979291, "learning_rate": 7.029418695628724e-06, "loss": 0.4043, "step": 24547 }, { "epoch": 1.8244518766257896, "grad_norm": 2.7308674735004157, "learning_rate": 7.028652556326942e-06, "loss": 0.5815, "step": 24548 }, { "epoch": 1.824526198439242, "grad_norm": 1.9995646716740914, "learning_rate": 7.027886436154538e-06, "loss": 0.6393, "step": 24549 }, { "epoch": 1.824600520252694, "grad_norm": 2.051308956416881, "learning_rate": 7.02712033511645e-06, "loss": 0.6423, "step": 24550 }, { "epoch": 1.8246748420661465, "grad_norm": 2.3016016713431307, "learning_rate": 7.0263542532176055e-06, "loss": 0.5915, "step": 24551 }, { "epoch": 1.8247491638795985, "grad_norm": 2.517513449238319, "learning_rate": 7.02558819046294e-06, "loss": 0.7335, "step": 24552 }, { "epoch": 1.824823485693051, "grad_norm": 1.8198555281757858, "learning_rate": 7.024822146857382e-06, "loss": 0.5861, "step": 24553 }, { "epoch": 1.8248978075065032, "grad_norm": 1.682384738412686, "learning_rate": 7.024056122405864e-06, "loss": 0.4919, "step": 24554 }, { "epoch": 1.8249721293199554, "grad_norm": 2.504071027827875, "learning_rate": 7.023290117113318e-06, "loss": 0.681, "step": 24555 }, { "epoch": 1.8250464511334077, "grad_norm": 2.221303670089552, "learning_rate": 7.02252413098468e-06, "loss": 0.633, "step": 24556 }, { "epoch": 1.82512077294686, "grad_norm": 2.1834210484701555, "learning_rate": 7.0217581640248745e-06, "loss": 0.5033, "step": 24557 }, { "epoch": 1.8251950947603122, "grad_norm": 2.013923144651836, "learning_rate": 7.020992216238835e-06, "loss": 0.5759, "step": 24558 }, { "epoch": 1.8252694165737644, "grad_norm": 1.9500823491615988, "learning_rate": 7.020226287631493e-06, "loss": 0.5537, "step": 24559 }, { "epoch": 1.8253437383872166, "grad_norm": 2.296323650221121, "learning_rate": 7.01946037820778e-06, "loss": 0.7254, "step": 24560 }, { "epoch": 1.8254180602006689, "grad_norm": 2.351977507309938, "learning_rate": 7.0186944879726245e-06, "loss": 0.6094, "step": 24561 }, { "epoch": 1.8254923820141211, "grad_norm": 1.9107981966745362, "learning_rate": 7.01792861693096e-06, "loss": 0.5406, "step": 24562 }, { "epoch": 1.8255667038275734, "grad_norm": 1.9926562580285323, "learning_rate": 7.017162765087719e-06, "loss": 0.4406, "step": 24563 }, { "epoch": 1.8256410256410256, "grad_norm": 2.4082858802699327, "learning_rate": 7.016396932447827e-06, "loss": 0.6908, "step": 24564 }, { "epoch": 1.8257153474544778, "grad_norm": 2.0526952015507938, "learning_rate": 7.015631119016217e-06, "loss": 0.5521, "step": 24565 }, { "epoch": 1.8257896692679303, "grad_norm": 2.124587529751195, "learning_rate": 7.014865324797818e-06, "loss": 0.5948, "step": 24566 }, { "epoch": 1.8258639910813823, "grad_norm": 2.376753381645633, "learning_rate": 7.0140995497975665e-06, "loss": 0.4876, "step": 24567 }, { "epoch": 1.8259383128948348, "grad_norm": 1.9892639220393487, "learning_rate": 7.013333794020382e-06, "loss": 0.6094, "step": 24568 }, { "epoch": 1.8260126347082868, "grad_norm": 2.0331940289563937, "learning_rate": 7.012568057471202e-06, "loss": 0.6372, "step": 24569 }, { "epoch": 1.8260869565217392, "grad_norm": 1.6986960038527927, "learning_rate": 7.011802340154952e-06, "loss": 0.571, "step": 24570 }, { "epoch": 1.8261612783351913, "grad_norm": 1.9643490289163124, "learning_rate": 7.011036642076563e-06, "loss": 0.5846, "step": 24571 }, { "epoch": 1.8262356001486437, "grad_norm": 1.927791670985348, "learning_rate": 7.010270963240965e-06, "loss": 0.3748, "step": 24572 }, { "epoch": 1.8263099219620957, "grad_norm": 1.8349446143863553, "learning_rate": 7.009505303653088e-06, "loss": 0.3922, "step": 24573 }, { "epoch": 1.8263842437755482, "grad_norm": 1.6992928128511664, "learning_rate": 7.0087396633178626e-06, "loss": 0.4345, "step": 24574 }, { "epoch": 1.8264585655890002, "grad_norm": 1.8120902301475985, "learning_rate": 7.007974042240213e-06, "loss": 0.509, "step": 24575 }, { "epoch": 1.8265328874024527, "grad_norm": 2.4265825699014525, "learning_rate": 7.007208440425073e-06, "loss": 0.7229, "step": 24576 }, { "epoch": 1.826607209215905, "grad_norm": 1.9848619715412423, "learning_rate": 7.006442857877368e-06, "loss": 0.4976, "step": 24577 }, { "epoch": 1.8266815310293572, "grad_norm": 2.298744481704202, "learning_rate": 7.005677294602031e-06, "loss": 0.6455, "step": 24578 }, { "epoch": 1.8267558528428094, "grad_norm": 1.7106233516859666, "learning_rate": 7.004911750603989e-06, "loss": 0.3702, "step": 24579 }, { "epoch": 1.8268301746562616, "grad_norm": 1.759199445211977, "learning_rate": 7.00414622588817e-06, "loss": 0.5338, "step": 24580 }, { "epoch": 1.8269044964697139, "grad_norm": 2.1087939158293887, "learning_rate": 7.003380720459499e-06, "loss": 0.6094, "step": 24581 }, { "epoch": 1.826978818283166, "grad_norm": 2.1341056943073204, "learning_rate": 7.002615234322908e-06, "loss": 0.5437, "step": 24582 }, { "epoch": 1.8270531400966183, "grad_norm": 1.6371459661754277, "learning_rate": 7.001849767483324e-06, "loss": 0.3471, "step": 24583 }, { "epoch": 1.8271274619100706, "grad_norm": 2.075618906608877, "learning_rate": 7.001084319945677e-06, "loss": 0.704, "step": 24584 }, { "epoch": 1.8272017837235228, "grad_norm": 2.247921145522681, "learning_rate": 7.000318891714895e-06, "loss": 0.6536, "step": 24585 }, { "epoch": 1.827276105536975, "grad_norm": 2.1125961254722205, "learning_rate": 6.999553482795902e-06, "loss": 0.6221, "step": 24586 }, { "epoch": 1.8273504273504273, "grad_norm": 2.142602050695389, "learning_rate": 6.998788093193627e-06, "loss": 0.5147, "step": 24587 }, { "epoch": 1.8274247491638795, "grad_norm": 2.366502084643571, "learning_rate": 6.9980227229130005e-06, "loss": 0.6486, "step": 24588 }, { "epoch": 1.827499070977332, "grad_norm": 2.080372725737501, "learning_rate": 6.997257371958948e-06, "loss": 0.6152, "step": 24589 }, { "epoch": 1.827573392790784, "grad_norm": 1.6917606747997174, "learning_rate": 6.996492040336399e-06, "loss": 0.561, "step": 24590 }, { "epoch": 1.8276477146042365, "grad_norm": 2.298960584552342, "learning_rate": 6.9957267280502775e-06, "loss": 0.4801, "step": 24591 }, { "epoch": 1.8277220364176885, "grad_norm": 2.0517797692877395, "learning_rate": 6.994961435105508e-06, "loss": 0.6463, "step": 24592 }, { "epoch": 1.827796358231141, "grad_norm": 1.5686070290454426, "learning_rate": 6.994196161507023e-06, "loss": 0.4494, "step": 24593 }, { "epoch": 1.827870680044593, "grad_norm": 1.680037824272457, "learning_rate": 6.993430907259746e-06, "loss": 0.4132, "step": 24594 }, { "epoch": 1.8279450018580454, "grad_norm": 2.3057538735611374, "learning_rate": 6.992665672368605e-06, "loss": 0.643, "step": 24595 }, { "epoch": 1.8280193236714974, "grad_norm": 2.127212799743478, "learning_rate": 6.991900456838528e-06, "loss": 0.5857, "step": 24596 }, { "epoch": 1.82809364548495, "grad_norm": 2.0142963961300118, "learning_rate": 6.9911352606744376e-06, "loss": 0.5964, "step": 24597 }, { "epoch": 1.828167967298402, "grad_norm": 2.1139076273411868, "learning_rate": 6.990370083881264e-06, "loss": 0.635, "step": 24598 }, { "epoch": 1.8282422891118544, "grad_norm": 1.8138469145674374, "learning_rate": 6.9896049264639295e-06, "loss": 0.6818, "step": 24599 }, { "epoch": 1.8283166109253066, "grad_norm": 1.6389375593654725, "learning_rate": 6.988839788427366e-06, "loss": 0.6139, "step": 24600 }, { "epoch": 1.8283909327387589, "grad_norm": 2.5111357868496142, "learning_rate": 6.988074669776494e-06, "loss": 0.4746, "step": 24601 }, { "epoch": 1.828465254552211, "grad_norm": 1.9071621662304077, "learning_rate": 6.987309570516243e-06, "loss": 0.703, "step": 24602 }, { "epoch": 1.8285395763656633, "grad_norm": 1.6849759079973328, "learning_rate": 6.986544490651532e-06, "loss": 0.4466, "step": 24603 }, { "epoch": 1.8286138981791156, "grad_norm": 2.3911426925214565, "learning_rate": 6.985779430187294e-06, "loss": 0.6468, "step": 24604 }, { "epoch": 1.8286882199925678, "grad_norm": 1.8863739445995593, "learning_rate": 6.9850143891284505e-06, "loss": 0.6009, "step": 24605 }, { "epoch": 1.82876254180602, "grad_norm": 2.1527027256661815, "learning_rate": 6.984249367479927e-06, "loss": 0.5651, "step": 24606 }, { "epoch": 1.8288368636194723, "grad_norm": 2.557789534637582, "learning_rate": 6.9834843652466535e-06, "loss": 0.6489, "step": 24607 }, { "epoch": 1.8289111854329245, "grad_norm": 2.0851190932422505, "learning_rate": 6.982719382433547e-06, "loss": 0.6301, "step": 24608 }, { "epoch": 1.8289855072463768, "grad_norm": 2.36232904736079, "learning_rate": 6.981954419045538e-06, "loss": 0.753, "step": 24609 }, { "epoch": 1.8290598290598292, "grad_norm": 1.903971028538972, "learning_rate": 6.9811894750875495e-06, "loss": 0.5718, "step": 24610 }, { "epoch": 1.8291341508732812, "grad_norm": 2.1156741758821314, "learning_rate": 6.980424550564509e-06, "loss": 0.5752, "step": 24611 }, { "epoch": 1.8292084726867337, "grad_norm": 1.7237745525228576, "learning_rate": 6.979659645481338e-06, "loss": 0.5205, "step": 24612 }, { "epoch": 1.8292827945001857, "grad_norm": 1.8749707454618372, "learning_rate": 6.978894759842962e-06, "loss": 0.5852, "step": 24613 }, { "epoch": 1.8293571163136382, "grad_norm": 2.233875576765695, "learning_rate": 6.978129893654302e-06, "loss": 0.6624, "step": 24614 }, { "epoch": 1.8294314381270902, "grad_norm": 2.037976386350268, "learning_rate": 6.977365046920286e-06, "loss": 0.4919, "step": 24615 }, { "epoch": 1.8295057599405427, "grad_norm": 2.521959533832402, "learning_rate": 6.976600219645835e-06, "loss": 0.8868, "step": 24616 }, { "epoch": 1.8295800817539947, "grad_norm": 1.732368757910425, "learning_rate": 6.975835411835878e-06, "loss": 0.5836, "step": 24617 }, { "epoch": 1.8296544035674471, "grad_norm": 2.565243083201964, "learning_rate": 6.975070623495335e-06, "loss": 0.5671, "step": 24618 }, { "epoch": 1.8297287253808991, "grad_norm": 2.108798616335525, "learning_rate": 6.974305854629128e-06, "loss": 0.6782, "step": 24619 }, { "epoch": 1.8298030471943516, "grad_norm": 1.8821200343500668, "learning_rate": 6.973541105242185e-06, "loss": 0.5783, "step": 24620 }, { "epoch": 1.8298773690078038, "grad_norm": 1.7264156985265395, "learning_rate": 6.972776375339426e-06, "loss": 0.55, "step": 24621 }, { "epoch": 1.829951690821256, "grad_norm": 1.8962317932055923, "learning_rate": 6.9720116649257785e-06, "loss": 0.5468, "step": 24622 }, { "epoch": 1.8300260126347083, "grad_norm": 1.888718993532849, "learning_rate": 6.97124697400616e-06, "loss": 0.6086, "step": 24623 }, { "epoch": 1.8301003344481606, "grad_norm": 2.013580319536089, "learning_rate": 6.970482302585501e-06, "loss": 0.5619, "step": 24624 }, { "epoch": 1.8301746562616128, "grad_norm": 1.9166548625627249, "learning_rate": 6.969717650668716e-06, "loss": 0.599, "step": 24625 }, { "epoch": 1.830248978075065, "grad_norm": 2.1172812861704986, "learning_rate": 6.968953018260731e-06, "loss": 0.5651, "step": 24626 }, { "epoch": 1.8303232998885173, "grad_norm": 1.7700565231164793, "learning_rate": 6.9681884053664695e-06, "loss": 0.5233, "step": 24627 }, { "epoch": 1.8303976217019695, "grad_norm": 1.6096075687822557, "learning_rate": 6.967423811990855e-06, "loss": 0.4274, "step": 24628 }, { "epoch": 1.8304719435154218, "grad_norm": 2.266533607106538, "learning_rate": 6.966659238138808e-06, "loss": 0.6422, "step": 24629 }, { "epoch": 1.830546265328874, "grad_norm": 2.3936820988311434, "learning_rate": 6.96589468381525e-06, "loss": 0.6934, "step": 24630 }, { "epoch": 1.8306205871423262, "grad_norm": 2.0709719233320723, "learning_rate": 6.965130149025106e-06, "loss": 0.6381, "step": 24631 }, { "epoch": 1.8306949089557785, "grad_norm": 2.4808912193639596, "learning_rate": 6.964365633773294e-06, "loss": 0.7173, "step": 24632 }, { "epoch": 1.830769230769231, "grad_norm": 1.8577341653312076, "learning_rate": 6.963601138064744e-06, "loss": 0.5467, "step": 24633 }, { "epoch": 1.830843552582683, "grad_norm": 2.1498772346688932, "learning_rate": 6.962836661904369e-06, "loss": 0.6081, "step": 24634 }, { "epoch": 1.8309178743961354, "grad_norm": 2.07465972572171, "learning_rate": 6.9620722052970965e-06, "loss": 0.5642, "step": 24635 }, { "epoch": 1.8309921962095874, "grad_norm": 2.0304243701650595, "learning_rate": 6.961307768247843e-06, "loss": 0.6037, "step": 24636 }, { "epoch": 1.8310665180230399, "grad_norm": 1.7153660341076657, "learning_rate": 6.960543350761533e-06, "loss": 0.4621, "step": 24637 }, { "epoch": 1.831140839836492, "grad_norm": 1.6632474786826197, "learning_rate": 6.9597789528430855e-06, "loss": 0.4648, "step": 24638 }, { "epoch": 1.8312151616499444, "grad_norm": 2.0428835548580677, "learning_rate": 6.9590145744974255e-06, "loss": 0.5036, "step": 24639 }, { "epoch": 1.8312894834633964, "grad_norm": 2.2257959664548688, "learning_rate": 6.958250215729471e-06, "loss": 0.5901, "step": 24640 }, { "epoch": 1.8313638052768488, "grad_norm": 1.5244898497015356, "learning_rate": 6.957485876544143e-06, "loss": 0.4264, "step": 24641 }, { "epoch": 1.8314381270903008, "grad_norm": 2.1894765970411556, "learning_rate": 6.956721556946364e-06, "loss": 0.5833, "step": 24642 }, { "epoch": 1.8315124489037533, "grad_norm": 1.6245703278842634, "learning_rate": 6.955957256941053e-06, "loss": 0.5241, "step": 24643 }, { "epoch": 1.8315867707172055, "grad_norm": 1.8802509618798526, "learning_rate": 6.9551929765331326e-06, "loss": 0.6077, "step": 24644 }, { "epoch": 1.8316610925306578, "grad_norm": 1.707438350187719, "learning_rate": 6.954428715727522e-06, "loss": 0.5444, "step": 24645 }, { "epoch": 1.83173541434411, "grad_norm": 1.6549831663964985, "learning_rate": 6.953664474529143e-06, "loss": 0.5396, "step": 24646 }, { "epoch": 1.8318097361575623, "grad_norm": 1.8273460937982822, "learning_rate": 6.952900252942912e-06, "loss": 0.55, "step": 24647 }, { "epoch": 1.8318840579710145, "grad_norm": 2.134095632126572, "learning_rate": 6.952136050973749e-06, "loss": 0.5339, "step": 24648 }, { "epoch": 1.8319583797844667, "grad_norm": 2.2191663774995307, "learning_rate": 6.9513718686265775e-06, "loss": 0.6969, "step": 24649 }, { "epoch": 1.832032701597919, "grad_norm": 2.1010021458954213, "learning_rate": 6.950607705906316e-06, "loss": 0.664, "step": 24650 }, { "epoch": 1.8321070234113712, "grad_norm": 1.954499026016828, "learning_rate": 6.949843562817884e-06, "loss": 0.5224, "step": 24651 }, { "epoch": 1.8321813452248235, "grad_norm": 1.8937370362829782, "learning_rate": 6.9490794393662e-06, "loss": 0.6465, "step": 24652 }, { "epoch": 1.8322556670382757, "grad_norm": 1.632066108567922, "learning_rate": 6.948315335556184e-06, "loss": 0.5007, "step": 24653 }, { "epoch": 1.832329988851728, "grad_norm": 2.1515406025770845, "learning_rate": 6.9475512513927544e-06, "loss": 0.5306, "step": 24654 }, { "epoch": 1.8324043106651802, "grad_norm": 2.2397230607882483, "learning_rate": 6.9467871868808355e-06, "loss": 0.6849, "step": 24655 }, { "epoch": 1.8324786324786326, "grad_norm": 2.360917393817502, "learning_rate": 6.94602314202534e-06, "loss": 0.6758, "step": 24656 }, { "epoch": 1.8325529542920846, "grad_norm": 1.979131942485004, "learning_rate": 6.945259116831189e-06, "loss": 0.5554, "step": 24657 }, { "epoch": 1.832627276105537, "grad_norm": 2.0469028368439774, "learning_rate": 6.944495111303305e-06, "loss": 0.499, "step": 24658 }, { "epoch": 1.8327015979189891, "grad_norm": 2.058324211132858, "learning_rate": 6.9437311254465975e-06, "loss": 0.6575, "step": 24659 }, { "epoch": 1.8327759197324416, "grad_norm": 2.1939795752927522, "learning_rate": 6.942967159265992e-06, "loss": 0.6029, "step": 24660 }, { "epoch": 1.8328502415458936, "grad_norm": 2.295749882670891, "learning_rate": 6.942203212766408e-06, "loss": 0.5852, "step": 24661 }, { "epoch": 1.832924563359346, "grad_norm": 1.7940055981042038, "learning_rate": 6.941439285952758e-06, "loss": 0.6073, "step": 24662 }, { "epoch": 1.832998885172798, "grad_norm": 1.9329206697462982, "learning_rate": 6.940675378829963e-06, "loss": 0.6548, "step": 24663 }, { "epoch": 1.8330732069862505, "grad_norm": 1.9573680157134683, "learning_rate": 6.939911491402942e-06, "loss": 0.6255, "step": 24664 }, { "epoch": 1.8331475287997026, "grad_norm": 1.8762302068180972, "learning_rate": 6.939147623676611e-06, "loss": 0.5243, "step": 24665 }, { "epoch": 1.833221850613155, "grad_norm": 2.163577375933301, "learning_rate": 6.938383775655891e-06, "loss": 0.606, "step": 24666 }, { "epoch": 1.8332961724266073, "grad_norm": 1.99251257265921, "learning_rate": 6.937619947345695e-06, "loss": 0.6048, "step": 24667 }, { "epoch": 1.8333704942400595, "grad_norm": 1.99728142160154, "learning_rate": 6.936856138750945e-06, "loss": 0.6814, "step": 24668 }, { "epoch": 1.8334448160535117, "grad_norm": 1.6615995157791188, "learning_rate": 6.9360923498765585e-06, "loss": 0.5535, "step": 24669 }, { "epoch": 1.833519137866964, "grad_norm": 1.6795506705769059, "learning_rate": 6.935328580727447e-06, "loss": 0.4481, "step": 24670 }, { "epoch": 1.8335934596804162, "grad_norm": 1.5638909958662448, "learning_rate": 6.93456483130853e-06, "loss": 0.5336, "step": 24671 }, { "epoch": 1.8336677814938684, "grad_norm": 1.7108553410589813, "learning_rate": 6.933801101624728e-06, "loss": 0.4146, "step": 24672 }, { "epoch": 1.8337421033073207, "grad_norm": 2.2035077508724896, "learning_rate": 6.9330373916809545e-06, "loss": 0.5919, "step": 24673 }, { "epoch": 1.833816425120773, "grad_norm": 1.916539089540641, "learning_rate": 6.932273701482126e-06, "loss": 0.5889, "step": 24674 }, { "epoch": 1.8338907469342252, "grad_norm": 2.146800352758309, "learning_rate": 6.9315100310331615e-06, "loss": 0.5766, "step": 24675 }, { "epoch": 1.8339650687476774, "grad_norm": 2.511769818683705, "learning_rate": 6.9307463803389755e-06, "loss": 0.557, "step": 24676 }, { "epoch": 1.8340393905611299, "grad_norm": 1.9640055573320419, "learning_rate": 6.929982749404487e-06, "loss": 0.5795, "step": 24677 }, { "epoch": 1.8341137123745819, "grad_norm": 2.4780480755873464, "learning_rate": 6.929219138234609e-06, "loss": 0.7484, "step": 24678 }, { "epoch": 1.8341880341880343, "grad_norm": 1.697578814858132, "learning_rate": 6.928455546834257e-06, "loss": 0.5044, "step": 24679 }, { "epoch": 1.8342623560014863, "grad_norm": 2.0118030016123654, "learning_rate": 6.927691975208355e-06, "loss": 0.5068, "step": 24680 }, { "epoch": 1.8343366778149388, "grad_norm": 2.26796504961346, "learning_rate": 6.9269284233618076e-06, "loss": 0.635, "step": 24681 }, { "epoch": 1.8344109996283908, "grad_norm": 1.5946849016032023, "learning_rate": 6.926164891299535e-06, "loss": 0.4618, "step": 24682 }, { "epoch": 1.8344853214418433, "grad_norm": 2.3490420359093926, "learning_rate": 6.925401379026456e-06, "loss": 0.5858, "step": 24683 }, { "epoch": 1.8345596432552953, "grad_norm": 2.1618796790246098, "learning_rate": 6.924637886547481e-06, "loss": 0.4996, "step": 24684 }, { "epoch": 1.8346339650687478, "grad_norm": 2.1239701331283016, "learning_rate": 6.923874413867529e-06, "loss": 0.6789, "step": 24685 }, { "epoch": 1.8347082868821998, "grad_norm": 2.326302771417452, "learning_rate": 6.923110960991513e-06, "loss": 0.5312, "step": 24686 }, { "epoch": 1.8347826086956522, "grad_norm": 2.4001552298175657, "learning_rate": 6.922347527924348e-06, "loss": 0.5952, "step": 24687 }, { "epoch": 1.8348569305091043, "grad_norm": 1.7199859221701759, "learning_rate": 6.921584114670953e-06, "loss": 0.5482, "step": 24688 }, { "epoch": 1.8349312523225567, "grad_norm": 2.2439813421464736, "learning_rate": 6.9208207212362375e-06, "loss": 0.6898, "step": 24689 }, { "epoch": 1.835005574136009, "grad_norm": 2.0716950135830716, "learning_rate": 6.9200573476251175e-06, "loss": 0.6322, "step": 24690 }, { "epoch": 1.8350798959494612, "grad_norm": 2.2083141678710554, "learning_rate": 6.919293993842515e-06, "loss": 0.6528, "step": 24691 }, { "epoch": 1.8351542177629134, "grad_norm": 1.9626074536187077, "learning_rate": 6.918530659893332e-06, "loss": 0.503, "step": 24692 }, { "epoch": 1.8352285395763657, "grad_norm": 3.8710242003171755, "learning_rate": 6.917767345782489e-06, "loss": 0.6258, "step": 24693 }, { "epoch": 1.835302861389818, "grad_norm": 1.845365851179213, "learning_rate": 6.917004051514902e-06, "loss": 0.5132, "step": 24694 }, { "epoch": 1.8353771832032701, "grad_norm": 1.8409824684484775, "learning_rate": 6.916240777095481e-06, "loss": 0.4952, "step": 24695 }, { "epoch": 1.8354515050167224, "grad_norm": 2.1155396909537614, "learning_rate": 6.915477522529141e-06, "loss": 0.5864, "step": 24696 }, { "epoch": 1.8355258268301746, "grad_norm": 1.885506433278932, "learning_rate": 6.9147142878207985e-06, "loss": 0.471, "step": 24697 }, { "epoch": 1.8356001486436269, "grad_norm": 1.5110090598410444, "learning_rate": 6.913951072975364e-06, "loss": 0.4762, "step": 24698 }, { "epoch": 1.835674470457079, "grad_norm": 2.7383880385838544, "learning_rate": 6.913187877997756e-06, "loss": 0.7403, "step": 24699 }, { "epoch": 1.8357487922705316, "grad_norm": 1.809556112667993, "learning_rate": 6.9124247028928796e-06, "loss": 0.5732, "step": 24700 }, { "epoch": 1.8358231140839836, "grad_norm": 1.8164702116075389, "learning_rate": 6.911661547665654e-06, "loss": 0.5576, "step": 24701 }, { "epoch": 1.835897435897436, "grad_norm": 4.021024585593533, "learning_rate": 6.910898412320992e-06, "loss": 0.5367, "step": 24702 }, { "epoch": 1.835971757710888, "grad_norm": 2.043357895103611, "learning_rate": 6.9101352968638095e-06, "loss": 0.5073, "step": 24703 }, { "epoch": 1.8360460795243405, "grad_norm": 2.138680472267343, "learning_rate": 6.909372201299011e-06, "loss": 0.5473, "step": 24704 }, { "epoch": 1.8361204013377925, "grad_norm": 2.00693462514267, "learning_rate": 6.908609125631516e-06, "loss": 0.527, "step": 24705 }, { "epoch": 1.836194723151245, "grad_norm": 1.9744008472131804, "learning_rate": 6.907846069866233e-06, "loss": 0.5861, "step": 24706 }, { "epoch": 1.836269044964697, "grad_norm": 1.696912872301058, "learning_rate": 6.907083034008076e-06, "loss": 0.4434, "step": 24707 }, { "epoch": 1.8363433667781495, "grad_norm": 2.896977597565902, "learning_rate": 6.906320018061959e-06, "loss": 0.6481, "step": 24708 }, { "epoch": 1.8364176885916015, "grad_norm": 2.200717090688211, "learning_rate": 6.905557022032793e-06, "loss": 0.6186, "step": 24709 }, { "epoch": 1.836492010405054, "grad_norm": 2.0122564492822086, "learning_rate": 6.904794045925492e-06, "loss": 0.5534, "step": 24710 }, { "epoch": 1.8365663322185062, "grad_norm": 1.6781299706462867, "learning_rate": 6.9040310897449645e-06, "loss": 0.4058, "step": 24711 }, { "epoch": 1.8366406540319584, "grad_norm": 2.2243359873629505, "learning_rate": 6.903268153496125e-06, "loss": 0.6631, "step": 24712 }, { "epoch": 1.8367149758454107, "grad_norm": 1.9425174297955425, "learning_rate": 6.902505237183883e-06, "loss": 0.5593, "step": 24713 }, { "epoch": 1.836789297658863, "grad_norm": 2.0989094368150587, "learning_rate": 6.901742340813156e-06, "loss": 0.5107, "step": 24714 }, { "epoch": 1.8368636194723151, "grad_norm": 2.1961428508498515, "learning_rate": 6.900979464388847e-06, "loss": 0.5386, "step": 24715 }, { "epoch": 1.8369379412857674, "grad_norm": 1.9239548682104408, "learning_rate": 6.900216607915874e-06, "loss": 0.6242, "step": 24716 }, { "epoch": 1.8370122630992196, "grad_norm": 1.8670053260260795, "learning_rate": 6.899453771399143e-06, "loss": 0.5497, "step": 24717 }, { "epoch": 1.8370865849126718, "grad_norm": 1.8023831822408383, "learning_rate": 6.8986909548435674e-06, "loss": 0.6613, "step": 24718 }, { "epoch": 1.837160906726124, "grad_norm": 1.8301534138463391, "learning_rate": 6.89792815825406e-06, "loss": 0.5808, "step": 24719 }, { "epoch": 1.8372352285395763, "grad_norm": 2.3799761712120238, "learning_rate": 6.897165381635529e-06, "loss": 0.4588, "step": 24720 }, { "epoch": 1.8373095503530286, "grad_norm": 1.8907027704610657, "learning_rate": 6.8964026249928886e-06, "loss": 0.4901, "step": 24721 }, { "epoch": 1.8373838721664808, "grad_norm": 1.812242926714971, "learning_rate": 6.8956398883310446e-06, "loss": 0.6751, "step": 24722 }, { "epoch": 1.8374581939799333, "grad_norm": 1.5653147676251453, "learning_rate": 6.894877171654912e-06, "loss": 0.4707, "step": 24723 }, { "epoch": 1.8375325157933853, "grad_norm": 1.9141447345205127, "learning_rate": 6.8941144749693966e-06, "loss": 0.6017, "step": 24724 }, { "epoch": 1.8376068376068377, "grad_norm": 1.7043022649769362, "learning_rate": 6.8933517982794165e-06, "loss": 0.4911, "step": 24725 }, { "epoch": 1.8376811594202898, "grad_norm": 1.9768054248139497, "learning_rate": 6.892589141589871e-06, "loss": 0.5018, "step": 24726 }, { "epoch": 1.8377554812337422, "grad_norm": 1.6415944819350021, "learning_rate": 6.891826504905678e-06, "loss": 0.5111, "step": 24727 }, { "epoch": 1.8378298030471942, "grad_norm": 1.7550130482342288, "learning_rate": 6.891063888231743e-06, "loss": 0.5366, "step": 24728 }, { "epoch": 1.8379041248606467, "grad_norm": 1.8789008423982017, "learning_rate": 6.890301291572978e-06, "loss": 0.5955, "step": 24729 }, { "epoch": 1.8379784466740987, "grad_norm": 1.9534632337190292, "learning_rate": 6.8895387149342895e-06, "loss": 0.6171, "step": 24730 }, { "epoch": 1.8380527684875512, "grad_norm": 2.4687612538018744, "learning_rate": 6.888776158320591e-06, "loss": 0.7667, "step": 24731 }, { "epoch": 1.8381270903010032, "grad_norm": 2.560488100660763, "learning_rate": 6.8880136217367934e-06, "loss": 0.5717, "step": 24732 }, { "epoch": 1.8382014121144556, "grad_norm": 2.11978228681484, "learning_rate": 6.887251105187799e-06, "loss": 0.5951, "step": 24733 }, { "epoch": 1.8382757339279079, "grad_norm": 2.276178929614351, "learning_rate": 6.886488608678521e-06, "loss": 0.7033, "step": 24734 }, { "epoch": 1.8383500557413601, "grad_norm": 1.7227044412014794, "learning_rate": 6.885726132213868e-06, "loss": 0.5923, "step": 24735 }, { "epoch": 1.8384243775548124, "grad_norm": 2.027299232448763, "learning_rate": 6.88496367579875e-06, "loss": 0.6336, "step": 24736 }, { "epoch": 1.8384986993682646, "grad_norm": 1.8265944661040574, "learning_rate": 6.8842012394380745e-06, "loss": 0.3684, "step": 24737 }, { "epoch": 1.8385730211817168, "grad_norm": 2.047147426149881, "learning_rate": 6.88343882313675e-06, "loss": 0.6545, "step": 24738 }, { "epoch": 1.838647342995169, "grad_norm": 2.219417054041758, "learning_rate": 6.882676426899681e-06, "loss": 0.5627, "step": 24739 }, { "epoch": 1.8387216648086213, "grad_norm": 2.0223223697106985, "learning_rate": 6.8819140507317815e-06, "loss": 0.5789, "step": 24740 }, { "epoch": 1.8387959866220736, "grad_norm": 1.917213208687275, "learning_rate": 6.881151694637958e-06, "loss": 0.5636, "step": 24741 }, { "epoch": 1.8388703084355258, "grad_norm": 2.2545292144708906, "learning_rate": 6.880389358623116e-06, "loss": 0.677, "step": 24742 }, { "epoch": 1.838944630248978, "grad_norm": 1.871205594308481, "learning_rate": 6.879627042692167e-06, "loss": 0.574, "step": 24743 }, { "epoch": 1.8390189520624305, "grad_norm": 1.989662440917053, "learning_rate": 6.878864746850018e-06, "loss": 0.6314, "step": 24744 }, { "epoch": 1.8390932738758825, "grad_norm": 1.3561603246718357, "learning_rate": 6.878102471101573e-06, "loss": 0.2393, "step": 24745 }, { "epoch": 1.839167595689335, "grad_norm": 1.871510215158848, "learning_rate": 6.877340215451745e-06, "loss": 0.5905, "step": 24746 }, { "epoch": 1.839241917502787, "grad_norm": 1.7960492399664658, "learning_rate": 6.876577979905439e-06, "loss": 0.479, "step": 24747 }, { "epoch": 1.8393162393162394, "grad_norm": 1.9936748103882278, "learning_rate": 6.875815764467562e-06, "loss": 0.7303, "step": 24748 }, { "epoch": 1.8393905611296915, "grad_norm": 2.52493437503578, "learning_rate": 6.875053569143021e-06, "loss": 0.6016, "step": 24749 }, { "epoch": 1.839464882943144, "grad_norm": 2.646019077187589, "learning_rate": 6.874291393936721e-06, "loss": 0.5858, "step": 24750 }, { "epoch": 1.839539204756596, "grad_norm": 6.431256806112055, "learning_rate": 6.873529238853571e-06, "loss": 0.4909, "step": 24751 }, { "epoch": 1.8396135265700484, "grad_norm": 2.4183696175652902, "learning_rate": 6.872767103898478e-06, "loss": 0.5655, "step": 24752 }, { "epoch": 1.8396878483835004, "grad_norm": 1.9076731825259732, "learning_rate": 6.87200498907635e-06, "loss": 0.543, "step": 24753 }, { "epoch": 1.8397621701969529, "grad_norm": 1.9378265031071757, "learning_rate": 6.871242894392089e-06, "loss": 0.6326, "step": 24754 }, { "epoch": 1.839836492010405, "grad_norm": 1.65224455164329, "learning_rate": 6.8704808198506046e-06, "loss": 0.5195, "step": 24755 }, { "epoch": 1.8399108138238573, "grad_norm": 2.412966342991769, "learning_rate": 6.869718765456801e-06, "loss": 0.693, "step": 24756 }, { "epoch": 1.8399851356373096, "grad_norm": 1.948914869603261, "learning_rate": 6.868956731215586e-06, "loss": 0.5548, "step": 24757 }, { "epoch": 1.8400594574507618, "grad_norm": 2.0582583617841284, "learning_rate": 6.8681947171318686e-06, "loss": 0.6043, "step": 24758 }, { "epoch": 1.840133779264214, "grad_norm": 1.925012478502477, "learning_rate": 6.867432723210549e-06, "loss": 0.5267, "step": 24759 }, { "epoch": 1.8402081010776663, "grad_norm": 1.8602692031854646, "learning_rate": 6.866670749456537e-06, "loss": 0.4844, "step": 24760 }, { "epoch": 1.8402824228911185, "grad_norm": 4.887833570041341, "learning_rate": 6.865908795874733e-06, "loss": 0.6855, "step": 24761 }, { "epoch": 1.8403567447045708, "grad_norm": 2.176860223506862, "learning_rate": 6.865146862470045e-06, "loss": 0.6249, "step": 24762 }, { "epoch": 1.840431066518023, "grad_norm": 1.8449233074381417, "learning_rate": 6.8643849492473804e-06, "loss": 0.5022, "step": 24763 }, { "epoch": 1.8405053883314753, "grad_norm": 1.7614318577653896, "learning_rate": 6.8636230562116435e-06, "loss": 0.5041, "step": 24764 }, { "epoch": 1.8405797101449275, "grad_norm": 2.278677307836255, "learning_rate": 6.862861183367737e-06, "loss": 0.6612, "step": 24765 }, { "epoch": 1.8406540319583797, "grad_norm": 1.5608398000306942, "learning_rate": 6.862099330720567e-06, "loss": 0.448, "step": 24766 }, { "epoch": 1.8407283537718322, "grad_norm": 2.2764293631149783, "learning_rate": 6.86133749827504e-06, "loss": 0.6502, "step": 24767 }, { "epoch": 1.8408026755852842, "grad_norm": 1.931465244099436, "learning_rate": 6.860575686036058e-06, "loss": 0.579, "step": 24768 }, { "epoch": 1.8408769973987367, "grad_norm": 1.58083720682788, "learning_rate": 6.85981389400853e-06, "loss": 0.5008, "step": 24769 }, { "epoch": 1.8409513192121887, "grad_norm": 1.4950298807211821, "learning_rate": 6.8590521221973535e-06, "loss": 0.5419, "step": 24770 }, { "epoch": 1.8410256410256411, "grad_norm": 2.2246712723541373, "learning_rate": 6.858290370607443e-06, "loss": 0.6363, "step": 24771 }, { "epoch": 1.8410999628390932, "grad_norm": 2.146968438664487, "learning_rate": 6.85752863924369e-06, "loss": 0.6903, "step": 24772 }, { "epoch": 1.8411742846525456, "grad_norm": 2.177603306211512, "learning_rate": 6.856766928111005e-06, "loss": 0.6065, "step": 24773 }, { "epoch": 1.8412486064659976, "grad_norm": 1.8722010606954405, "learning_rate": 6.856005237214291e-06, "loss": 0.4994, "step": 24774 }, { "epoch": 1.84132292827945, "grad_norm": 1.820670709581771, "learning_rate": 6.855243566558456e-06, "loss": 0.5957, "step": 24775 }, { "epoch": 1.8413972500929021, "grad_norm": 2.120954092755076, "learning_rate": 6.854481916148397e-06, "loss": 0.6396, "step": 24776 }, { "epoch": 1.8414715719063546, "grad_norm": 2.3738593813363913, "learning_rate": 6.853720285989019e-06, "loss": 0.7331, "step": 24777 }, { "epoch": 1.8415458937198068, "grad_norm": 2.1876963259143785, "learning_rate": 6.8529586760852275e-06, "loss": 0.567, "step": 24778 }, { "epoch": 1.841620215533259, "grad_norm": 1.5595415989519823, "learning_rate": 6.852197086441925e-06, "loss": 0.4901, "step": 24779 }, { "epoch": 1.8416945373467113, "grad_norm": 1.847121722223605, "learning_rate": 6.8514355170640155e-06, "loss": 0.5388, "step": 24780 }, { "epoch": 1.8417688591601635, "grad_norm": 1.9215337791572025, "learning_rate": 6.8506739679564e-06, "loss": 0.5521, "step": 24781 }, { "epoch": 1.8418431809736158, "grad_norm": 1.8112534954188133, "learning_rate": 6.849912439123985e-06, "loss": 0.5289, "step": 24782 }, { "epoch": 1.841917502787068, "grad_norm": 7.760965382277503, "learning_rate": 6.849150930571666e-06, "loss": 0.5956, "step": 24783 }, { "epoch": 1.8419918246005202, "grad_norm": 1.8617434177901415, "learning_rate": 6.848389442304351e-06, "loss": 0.5699, "step": 24784 }, { "epoch": 1.8420661464139725, "grad_norm": 2.535720316055985, "learning_rate": 6.847627974326941e-06, "loss": 0.7457, "step": 24785 }, { "epoch": 1.8421404682274247, "grad_norm": 2.1351676224940968, "learning_rate": 6.846866526644339e-06, "loss": 0.6097, "step": 24786 }, { "epoch": 1.842214790040877, "grad_norm": 2.02367953627578, "learning_rate": 6.846105099261446e-06, "loss": 0.5236, "step": 24787 }, { "epoch": 1.8422891118543292, "grad_norm": 2.1549881943875246, "learning_rate": 6.845343692183164e-06, "loss": 0.6221, "step": 24788 }, { "epoch": 1.8423634336677814, "grad_norm": 2.3289616455076056, "learning_rate": 6.844582305414397e-06, "loss": 0.7451, "step": 24789 }, { "epoch": 1.842437755481234, "grad_norm": 2.053112961970854, "learning_rate": 6.843820938960044e-06, "loss": 0.5241, "step": 24790 }, { "epoch": 1.842512077294686, "grad_norm": 1.7718496345710348, "learning_rate": 6.843059592825011e-06, "loss": 0.4924, "step": 24791 }, { "epoch": 1.8425863991081384, "grad_norm": 2.0700151528394266, "learning_rate": 6.842298267014193e-06, "loss": 0.5907, "step": 24792 }, { "epoch": 1.8426607209215904, "grad_norm": 2.036487206943271, "learning_rate": 6.841536961532501e-06, "loss": 0.6764, "step": 24793 }, { "epoch": 1.8427350427350428, "grad_norm": 1.872720144394582, "learning_rate": 6.840775676384826e-06, "loss": 0.4361, "step": 24794 }, { "epoch": 1.8428093645484949, "grad_norm": 1.6735817296245115, "learning_rate": 6.840014411576073e-06, "loss": 0.5374, "step": 24795 }, { "epoch": 1.8428836863619473, "grad_norm": 2.011189360826146, "learning_rate": 6.839253167111142e-06, "loss": 0.6461, "step": 24796 }, { "epoch": 1.8429580081753993, "grad_norm": 1.6497304770659436, "learning_rate": 6.838491942994938e-06, "loss": 0.5744, "step": 24797 }, { "epoch": 1.8430323299888518, "grad_norm": 1.9741549365097466, "learning_rate": 6.837730739232355e-06, "loss": 0.4727, "step": 24798 }, { "epoch": 1.8431066518023038, "grad_norm": 1.7018638164658302, "learning_rate": 6.8369695558283e-06, "loss": 0.4143, "step": 24799 }, { "epoch": 1.8431809736157563, "grad_norm": 1.9463396568854956, "learning_rate": 6.83620839278767e-06, "loss": 0.6019, "step": 24800 }, { "epoch": 1.8432552954292085, "grad_norm": 1.6462783983233413, "learning_rate": 6.835447250115366e-06, "loss": 0.4282, "step": 24801 }, { "epoch": 1.8433296172426608, "grad_norm": 1.8799332161344877, "learning_rate": 6.83468612781629e-06, "loss": 0.5655, "step": 24802 }, { "epoch": 1.843403939056113, "grad_norm": 1.803724730805888, "learning_rate": 6.8339250258953405e-06, "loss": 0.6078, "step": 24803 }, { "epoch": 1.8434782608695652, "grad_norm": 1.869420906450988, "learning_rate": 6.83316394435742e-06, "loss": 0.5391, "step": 24804 }, { "epoch": 1.8435525826830175, "grad_norm": 2.317084045868615, "learning_rate": 6.832402883207421e-06, "loss": 0.6054, "step": 24805 }, { "epoch": 1.8436269044964697, "grad_norm": 2.0009148321348618, "learning_rate": 6.83164184245025e-06, "loss": 0.6074, "step": 24806 }, { "epoch": 1.843701226309922, "grad_norm": 2.206654434036839, "learning_rate": 6.830880822090802e-06, "loss": 0.6972, "step": 24807 }, { "epoch": 1.8437755481233742, "grad_norm": 2.1717168844922203, "learning_rate": 6.830119822133981e-06, "loss": 0.5731, "step": 24808 }, { "epoch": 1.8438498699368264, "grad_norm": 1.7892080191877957, "learning_rate": 6.829358842584684e-06, "loss": 0.5619, "step": 24809 }, { "epoch": 1.8439241917502787, "grad_norm": 1.7290900168244794, "learning_rate": 6.8285978834478096e-06, "loss": 0.5871, "step": 24810 }, { "epoch": 1.843998513563731, "grad_norm": 2.2174385616331045, "learning_rate": 6.827836944728256e-06, "loss": 0.5445, "step": 24811 }, { "epoch": 1.8440728353771831, "grad_norm": 1.346076854285579, "learning_rate": 6.827076026430926e-06, "loss": 0.4386, "step": 24812 }, { "epoch": 1.8441471571906356, "grad_norm": 2.2015268761383475, "learning_rate": 6.8263151285607155e-06, "loss": 0.5979, "step": 24813 }, { "epoch": 1.8442214790040876, "grad_norm": 1.6808456319285718, "learning_rate": 6.825554251122523e-06, "loss": 0.6007, "step": 24814 }, { "epoch": 1.84429580081754, "grad_norm": 2.1467726185283342, "learning_rate": 6.8247933941212475e-06, "loss": 0.5769, "step": 24815 }, { "epoch": 1.844370122630992, "grad_norm": 1.8959586220070466, "learning_rate": 6.824032557561791e-06, "loss": 0.565, "step": 24816 }, { "epoch": 1.8444444444444446, "grad_norm": 2.0542377778832006, "learning_rate": 6.823271741449044e-06, "loss": 0.6646, "step": 24817 }, { "epoch": 1.8445187662578966, "grad_norm": 1.6823628232677512, "learning_rate": 6.822510945787908e-06, "loss": 0.5611, "step": 24818 }, { "epoch": 1.844593088071349, "grad_norm": 1.447407614669096, "learning_rate": 6.821750170583285e-06, "loss": 0.3519, "step": 24819 }, { "epoch": 1.844667409884801, "grad_norm": 1.7460255635266333, "learning_rate": 6.820989415840067e-06, "loss": 0.5583, "step": 24820 }, { "epoch": 1.8447417316982535, "grad_norm": 1.9127925452479024, "learning_rate": 6.820228681563154e-06, "loss": 0.5872, "step": 24821 }, { "epoch": 1.8448160535117055, "grad_norm": 2.3938889230791442, "learning_rate": 6.819467967757442e-06, "loss": 0.4569, "step": 24822 }, { "epoch": 1.844890375325158, "grad_norm": 1.895048008965574, "learning_rate": 6.818707274427834e-06, "loss": 0.5192, "step": 24823 }, { "epoch": 1.8449646971386102, "grad_norm": 1.6510977744450315, "learning_rate": 6.817946601579224e-06, "loss": 0.6026, "step": 24824 }, { "epoch": 1.8450390189520625, "grad_norm": 1.8545528837944314, "learning_rate": 6.817185949216505e-06, "loss": 0.5628, "step": 24825 }, { "epoch": 1.8451133407655147, "grad_norm": 2.123917661159747, "learning_rate": 6.81642531734458e-06, "loss": 0.6593, "step": 24826 }, { "epoch": 1.845187662578967, "grad_norm": 4.919842128394232, "learning_rate": 6.815664705968346e-06, "loss": 0.7481, "step": 24827 }, { "epoch": 1.8452619843924192, "grad_norm": 1.7720378633879486, "learning_rate": 6.814904115092695e-06, "loss": 0.4409, "step": 24828 }, { "epoch": 1.8453363062058714, "grad_norm": 1.7545792264193771, "learning_rate": 6.814143544722524e-06, "loss": 0.4418, "step": 24829 }, { "epoch": 1.8454106280193237, "grad_norm": 1.6154061846846044, "learning_rate": 6.813382994862734e-06, "loss": 0.593, "step": 24830 }, { "epoch": 1.845484949832776, "grad_norm": 1.4723094794517417, "learning_rate": 6.8126224655182184e-06, "loss": 0.4009, "step": 24831 }, { "epoch": 1.8455592716462281, "grad_norm": 2.6873712238511227, "learning_rate": 6.811861956693873e-06, "loss": 0.4969, "step": 24832 }, { "epoch": 1.8456335934596804, "grad_norm": 2.3322409568598697, "learning_rate": 6.811101468394594e-06, "loss": 0.6395, "step": 24833 }, { "epoch": 1.8457079152731328, "grad_norm": 2.06794866208238, "learning_rate": 6.81034100062528e-06, "loss": 0.5304, "step": 24834 }, { "epoch": 1.8457822370865848, "grad_norm": 1.9897842087184623, "learning_rate": 6.809580553390827e-06, "loss": 0.541, "step": 24835 }, { "epoch": 1.8458565589000373, "grad_norm": 1.9728297272260193, "learning_rate": 6.808820126696126e-06, "loss": 0.6756, "step": 24836 }, { "epoch": 1.8459308807134893, "grad_norm": 35.82994021328363, "learning_rate": 6.808059720546077e-06, "loss": 0.7185, "step": 24837 }, { "epoch": 1.8460052025269418, "grad_norm": 2.413684114148856, "learning_rate": 6.807299334945576e-06, "loss": 0.6382, "step": 24838 }, { "epoch": 1.8460795243403938, "grad_norm": 2.166188578353634, "learning_rate": 6.806538969899512e-06, "loss": 0.6759, "step": 24839 }, { "epoch": 1.8461538461538463, "grad_norm": 2.0829223913898245, "learning_rate": 6.805778625412785e-06, "loss": 0.5352, "step": 24840 }, { "epoch": 1.8462281679672983, "grad_norm": 2.0939004853327656, "learning_rate": 6.805018301490293e-06, "loss": 0.5635, "step": 24841 }, { "epoch": 1.8463024897807507, "grad_norm": 1.8124618867907476, "learning_rate": 6.8042579981369226e-06, "loss": 0.5922, "step": 24842 }, { "epoch": 1.8463768115942027, "grad_norm": 2.082589459207386, "learning_rate": 6.803497715357576e-06, "loss": 0.6614, "step": 24843 }, { "epoch": 1.8464511334076552, "grad_norm": 2.6768258463784282, "learning_rate": 6.8027374531571444e-06, "loss": 0.8188, "step": 24844 }, { "epoch": 1.8465254552211074, "grad_norm": 2.0160786045977286, "learning_rate": 6.801977211540522e-06, "loss": 0.5539, "step": 24845 }, { "epoch": 1.8465997770345597, "grad_norm": 1.6561062879964303, "learning_rate": 6.801216990512608e-06, "loss": 0.4485, "step": 24846 }, { "epoch": 1.846674098848012, "grad_norm": 1.7829024656134587, "learning_rate": 6.80045679007829e-06, "loss": 0.6247, "step": 24847 }, { "epoch": 1.8467484206614642, "grad_norm": 1.8827708371147205, "learning_rate": 6.799696610242465e-06, "loss": 0.6266, "step": 24848 }, { "epoch": 1.8468227424749164, "grad_norm": 1.5995997209186887, "learning_rate": 6.7989364510100285e-06, "loss": 0.4405, "step": 24849 }, { "epoch": 1.8468970642883686, "grad_norm": 2.146412029784292, "learning_rate": 6.798176312385877e-06, "loss": 0.7593, "step": 24850 }, { "epoch": 1.8469713861018209, "grad_norm": 2.168898708002607, "learning_rate": 6.797416194374895e-06, "loss": 0.625, "step": 24851 }, { "epoch": 1.8470457079152731, "grad_norm": 1.6678362746461013, "learning_rate": 6.7966560969819836e-06, "loss": 0.5164, "step": 24852 }, { "epoch": 1.8471200297287254, "grad_norm": 2.8486937704374276, "learning_rate": 6.795896020212033e-06, "loss": 0.4611, "step": 24853 }, { "epoch": 1.8471943515421776, "grad_norm": 2.212339222947862, "learning_rate": 6.795135964069937e-06, "loss": 0.3873, "step": 24854 }, { "epoch": 1.8472686733556298, "grad_norm": 1.980052528016113, "learning_rate": 6.794375928560589e-06, "loss": 0.5385, "step": 24855 }, { "epoch": 1.847342995169082, "grad_norm": 2.300912369802206, "learning_rate": 6.793615913688883e-06, "loss": 0.5983, "step": 24856 }, { "epoch": 1.8474173169825345, "grad_norm": 1.9067015471009714, "learning_rate": 6.792855919459713e-06, "loss": 0.6265, "step": 24857 }, { "epoch": 1.8474916387959865, "grad_norm": 2.1943353148630615, "learning_rate": 6.792095945877968e-06, "loss": 0.62, "step": 24858 }, { "epoch": 1.847565960609439, "grad_norm": 2.220461141312942, "learning_rate": 6.791335992948544e-06, "loss": 0.7026, "step": 24859 }, { "epoch": 1.847640282422891, "grad_norm": 1.535414719812914, "learning_rate": 6.79057606067633e-06, "loss": 0.4536, "step": 24860 }, { "epoch": 1.8477146042363435, "grad_norm": 1.5543885551489076, "learning_rate": 6.789816149066228e-06, "loss": 0.4908, "step": 24861 }, { "epoch": 1.8477889260497955, "grad_norm": 1.7000019135428381, "learning_rate": 6.7890562581231165e-06, "loss": 0.5163, "step": 24862 }, { "epoch": 1.847863247863248, "grad_norm": 1.5704234990821626, "learning_rate": 6.788296387851898e-06, "loss": 0.3859, "step": 24863 }, { "epoch": 1.8479375696767, "grad_norm": 2.178087970053176, "learning_rate": 6.787536538257457e-06, "loss": 0.652, "step": 24864 }, { "epoch": 1.8480118914901524, "grad_norm": 2.1030505116472837, "learning_rate": 6.78677670934469e-06, "loss": 0.5185, "step": 24865 }, { "epoch": 1.8480862133036045, "grad_norm": 2.2894881179259357, "learning_rate": 6.786016901118488e-06, "loss": 0.6236, "step": 24866 }, { "epoch": 1.848160535117057, "grad_norm": 2.2823081061803374, "learning_rate": 6.785257113583742e-06, "loss": 0.5533, "step": 24867 }, { "epoch": 1.8482348569305092, "grad_norm": 2.1138149008600204, "learning_rate": 6.784497346745347e-06, "loss": 0.6739, "step": 24868 }, { "epoch": 1.8483091787439614, "grad_norm": 3.0902973120803945, "learning_rate": 6.7837376006081866e-06, "loss": 0.5241, "step": 24869 }, { "epoch": 1.8483835005574136, "grad_norm": 1.6837095655455867, "learning_rate": 6.782977875177159e-06, "loss": 0.5838, "step": 24870 }, { "epoch": 1.8484578223708659, "grad_norm": 1.4687161818749543, "learning_rate": 6.7822181704571535e-06, "loss": 0.4508, "step": 24871 }, { "epoch": 1.848532144184318, "grad_norm": 2.466548117641075, "learning_rate": 6.781458486453063e-06, "loss": 0.6099, "step": 24872 }, { "epoch": 1.8486064659977703, "grad_norm": 1.8655576382188208, "learning_rate": 6.7806988231697715e-06, "loss": 0.6025, "step": 24873 }, { "epoch": 1.8486807878112226, "grad_norm": 1.5728610550996693, "learning_rate": 6.7799391806121765e-06, "loss": 0.4135, "step": 24874 }, { "epoch": 1.8487551096246748, "grad_norm": 1.8532759871897508, "learning_rate": 6.779179558785164e-06, "loss": 0.3733, "step": 24875 }, { "epoch": 1.848829431438127, "grad_norm": 2.0369890116187856, "learning_rate": 6.7784199576936264e-06, "loss": 0.6991, "step": 24876 }, { "epoch": 1.8489037532515793, "grad_norm": 2.1146050148243916, "learning_rate": 6.777660377342453e-06, "loss": 0.5133, "step": 24877 }, { "epoch": 1.8489780750650315, "grad_norm": 1.4568631407228332, "learning_rate": 6.776900817736538e-06, "loss": 0.3833, "step": 24878 }, { "epoch": 1.8490523968784838, "grad_norm": 2.2741342678831713, "learning_rate": 6.7761412788807654e-06, "loss": 0.6544, "step": 24879 }, { "epoch": 1.8491267186919362, "grad_norm": 2.0162152670857405, "learning_rate": 6.775381760780029e-06, "loss": 0.6233, "step": 24880 }, { "epoch": 1.8492010405053882, "grad_norm": 1.9148320744206335, "learning_rate": 6.774622263439218e-06, "loss": 0.5899, "step": 24881 }, { "epoch": 1.8492753623188407, "grad_norm": 2.0510341988248117, "learning_rate": 6.77386278686322e-06, "loss": 0.5846, "step": 24882 }, { "epoch": 1.8493496841322927, "grad_norm": 2.20608516242623, "learning_rate": 6.773103331056929e-06, "loss": 0.7031, "step": 24883 }, { "epoch": 1.8494240059457452, "grad_norm": 2.22883718491186, "learning_rate": 6.772343896025231e-06, "loss": 0.6868, "step": 24884 }, { "epoch": 1.8494983277591972, "grad_norm": 2.1830966637156393, "learning_rate": 6.7715844817730146e-06, "loss": 0.551, "step": 24885 }, { "epoch": 1.8495726495726497, "grad_norm": 2.504520450617786, "learning_rate": 6.77082508830517e-06, "loss": 0.588, "step": 24886 }, { "epoch": 1.8496469713861017, "grad_norm": 2.3200908046893622, "learning_rate": 6.770065715626583e-06, "loss": 0.671, "step": 24887 }, { "epoch": 1.8497212931995541, "grad_norm": 2.1945090243219445, "learning_rate": 6.7693063637421474e-06, "loss": 0.5647, "step": 24888 }, { "epoch": 1.8497956150130062, "grad_norm": 2.0016199508326724, "learning_rate": 6.768547032656751e-06, "loss": 0.611, "step": 24889 }, { "epoch": 1.8498699368264586, "grad_norm": 1.7789521513428208, "learning_rate": 6.7677877223752804e-06, "loss": 0.4769, "step": 24890 }, { "epoch": 1.8499442586399109, "grad_norm": 2.522410460165909, "learning_rate": 6.767028432902623e-06, "loss": 0.6282, "step": 24891 }, { "epoch": 1.850018580453363, "grad_norm": 2.1461626396718367, "learning_rate": 6.7662691642436705e-06, "loss": 0.4778, "step": 24892 }, { "epoch": 1.8500929022668153, "grad_norm": 1.9663191508535456, "learning_rate": 6.765509916403308e-06, "loss": 0.5198, "step": 24893 }, { "epoch": 1.8501672240802676, "grad_norm": 1.7171432522495005, "learning_rate": 6.764750689386428e-06, "loss": 0.5066, "step": 24894 }, { "epoch": 1.8502415458937198, "grad_norm": 2.1167581975028593, "learning_rate": 6.763991483197914e-06, "loss": 0.5516, "step": 24895 }, { "epoch": 1.850315867707172, "grad_norm": 2.173562634435073, "learning_rate": 6.763232297842656e-06, "loss": 0.7168, "step": 24896 }, { "epoch": 1.8503901895206243, "grad_norm": 1.8554407706980456, "learning_rate": 6.7624731333255375e-06, "loss": 0.6088, "step": 24897 }, { "epoch": 1.8504645113340765, "grad_norm": 2.1732561074577443, "learning_rate": 6.7617139896514495e-06, "loss": 0.5448, "step": 24898 }, { "epoch": 1.8505388331475288, "grad_norm": 2.364837565691731, "learning_rate": 6.760954866825279e-06, "loss": 0.4536, "step": 24899 }, { "epoch": 1.850613154960981, "grad_norm": 2.720255767661503, "learning_rate": 6.760195764851915e-06, "loss": 0.6468, "step": 24900 }, { "epoch": 1.8506874767744335, "grad_norm": 2.117559786421223, "learning_rate": 6.75943668373624e-06, "loss": 0.6096, "step": 24901 }, { "epoch": 1.8507617985878855, "grad_norm": 2.0238220539983525, "learning_rate": 6.758677623483143e-06, "loss": 0.5905, "step": 24902 }, { "epoch": 1.850836120401338, "grad_norm": 2.113036205707872, "learning_rate": 6.757918584097513e-06, "loss": 0.5617, "step": 24903 }, { "epoch": 1.85091044221479, "grad_norm": 2.214095716030112, "learning_rate": 6.757159565584233e-06, "loss": 0.6303, "step": 24904 }, { "epoch": 1.8509847640282424, "grad_norm": 1.6685808518844862, "learning_rate": 6.756400567948194e-06, "loss": 0.4804, "step": 24905 }, { "epoch": 1.8510590858416944, "grad_norm": 3.737926734731011, "learning_rate": 6.755641591194282e-06, "loss": 0.5443, "step": 24906 }, { "epoch": 1.851133407655147, "grad_norm": 1.5063805856971904, "learning_rate": 6.754882635327379e-06, "loss": 0.4745, "step": 24907 }, { "epoch": 1.851207729468599, "grad_norm": 1.9082540013372318, "learning_rate": 6.754123700352371e-06, "loss": 0.4656, "step": 24908 }, { "epoch": 1.8512820512820514, "grad_norm": 2.477165793058011, "learning_rate": 6.753364786274148e-06, "loss": 0.6567, "step": 24909 }, { "epoch": 1.8513563730955034, "grad_norm": 2.035449365175883, "learning_rate": 6.752605893097592e-06, "loss": 0.5548, "step": 24910 }, { "epoch": 1.8514306949089558, "grad_norm": 2.026006979140296, "learning_rate": 6.751847020827593e-06, "loss": 0.6529, "step": 24911 }, { "epoch": 1.851505016722408, "grad_norm": 1.9361791241491253, "learning_rate": 6.751088169469034e-06, "loss": 0.6182, "step": 24912 }, { "epoch": 1.8515793385358603, "grad_norm": 2.0697228596140795, "learning_rate": 6.7503293390267995e-06, "loss": 0.5704, "step": 24913 }, { "epoch": 1.8516536603493126, "grad_norm": 1.728408196566829, "learning_rate": 6.749570529505776e-06, "loss": 0.5435, "step": 24914 }, { "epoch": 1.8517279821627648, "grad_norm": 2.1646592533583586, "learning_rate": 6.74881174091085e-06, "loss": 0.5391, "step": 24915 }, { "epoch": 1.851802303976217, "grad_norm": 1.7839530448877918, "learning_rate": 6.748052973246906e-06, "loss": 0.5624, "step": 24916 }, { "epoch": 1.8518766257896693, "grad_norm": 1.9140000925218188, "learning_rate": 6.747294226518828e-06, "loss": 0.541, "step": 24917 }, { "epoch": 1.8519509476031215, "grad_norm": 1.7705906305095398, "learning_rate": 6.746535500731503e-06, "loss": 0.5286, "step": 24918 }, { "epoch": 1.8520252694165737, "grad_norm": 2.323983703703904, "learning_rate": 6.7457767958898094e-06, "loss": 0.5726, "step": 24919 }, { "epoch": 1.852099591230026, "grad_norm": 1.8663610467432588, "learning_rate": 6.7450181119986366e-06, "loss": 0.5314, "step": 24920 }, { "epoch": 1.8521739130434782, "grad_norm": 2.2010964081484725, "learning_rate": 6.744259449062869e-06, "loss": 0.5835, "step": 24921 }, { "epoch": 1.8522482348569305, "grad_norm": 1.621630644320668, "learning_rate": 6.743500807087392e-06, "loss": 0.3769, "step": 24922 }, { "epoch": 1.8523225566703827, "grad_norm": 1.8676603871942228, "learning_rate": 6.742742186077084e-06, "loss": 0.5031, "step": 24923 }, { "epoch": 1.8523968784838352, "grad_norm": 2.171246162270243, "learning_rate": 6.741983586036835e-06, "loss": 0.5302, "step": 24924 }, { "epoch": 1.8524712002972872, "grad_norm": 1.900787651120345, "learning_rate": 6.741225006971526e-06, "loss": 0.3792, "step": 24925 }, { "epoch": 1.8525455221107396, "grad_norm": 2.0711076952612197, "learning_rate": 6.740466448886042e-06, "loss": 0.6262, "step": 24926 }, { "epoch": 1.8526198439241917, "grad_norm": 1.8824367609336996, "learning_rate": 6.739707911785267e-06, "loss": 0.3791, "step": 24927 }, { "epoch": 1.8526941657376441, "grad_norm": 2.2115381057686005, "learning_rate": 6.738949395674081e-06, "loss": 0.56, "step": 24928 }, { "epoch": 1.8527684875510961, "grad_norm": 1.8161080557140585, "learning_rate": 6.738190900557375e-06, "loss": 0.514, "step": 24929 }, { "epoch": 1.8528428093645486, "grad_norm": 2.204272253650694, "learning_rate": 6.7374324264400215e-06, "loss": 0.6473, "step": 24930 }, { "epoch": 1.8529171311780006, "grad_norm": 1.931764474410677, "learning_rate": 6.736673973326908e-06, "loss": 0.606, "step": 24931 }, { "epoch": 1.852991452991453, "grad_norm": 2.4678058750523473, "learning_rate": 6.735915541222919e-06, "loss": 0.7359, "step": 24932 }, { "epoch": 1.853065774804905, "grad_norm": 2.2351108636930292, "learning_rate": 6.735157130132939e-06, "loss": 0.533, "step": 24933 }, { "epoch": 1.8531400966183575, "grad_norm": 3.0496078958391846, "learning_rate": 6.734398740061846e-06, "loss": 0.7849, "step": 24934 }, { "epoch": 1.8532144184318098, "grad_norm": 1.9671158624654426, "learning_rate": 6.733640371014525e-06, "loss": 0.5947, "step": 24935 }, { "epoch": 1.853288740245262, "grad_norm": 1.9225224500373024, "learning_rate": 6.7328820229958555e-06, "loss": 0.6175, "step": 24936 }, { "epoch": 1.8533630620587143, "grad_norm": 1.9726210319460442, "learning_rate": 6.732123696010724e-06, "loss": 0.6061, "step": 24937 }, { "epoch": 1.8534373838721665, "grad_norm": 1.8823205734356128, "learning_rate": 6.731365390064012e-06, "loss": 0.5298, "step": 24938 }, { "epoch": 1.8535117056856187, "grad_norm": 2.3638185717813474, "learning_rate": 6.7306071051605985e-06, "loss": 0.6312, "step": 24939 }, { "epoch": 1.853586027499071, "grad_norm": 2.7938906298065427, "learning_rate": 6.72984884130537e-06, "loss": 0.7671, "step": 24940 }, { "epoch": 1.8536603493125232, "grad_norm": 2.21419594751946, "learning_rate": 6.729090598503201e-06, "loss": 0.7076, "step": 24941 }, { "epoch": 1.8537346711259755, "grad_norm": 2.9436258859781033, "learning_rate": 6.728332376758977e-06, "loss": 0.6615, "step": 24942 }, { "epoch": 1.8538089929394277, "grad_norm": 2.1873662308868855, "learning_rate": 6.727574176077581e-06, "loss": 0.6326, "step": 24943 }, { "epoch": 1.85388331475288, "grad_norm": 1.362721748928191, "learning_rate": 6.726815996463892e-06, "loss": 0.3226, "step": 24944 }, { "epoch": 1.8539576365663322, "grad_norm": 1.9067705767140029, "learning_rate": 6.726057837922792e-06, "loss": 0.4929, "step": 24945 }, { "epoch": 1.8540319583797844, "grad_norm": 2.075164195712051, "learning_rate": 6.7252997004591604e-06, "loss": 0.621, "step": 24946 }, { "epoch": 1.8541062801932369, "grad_norm": 1.6625625216726045, "learning_rate": 6.724541584077879e-06, "loss": 0.4482, "step": 24947 }, { "epoch": 1.8541806020066889, "grad_norm": 1.9524552197558902, "learning_rate": 6.723783488783831e-06, "loss": 0.6773, "step": 24948 }, { "epoch": 1.8542549238201413, "grad_norm": 1.7786730186523232, "learning_rate": 6.7230254145818955e-06, "loss": 0.67, "step": 24949 }, { "epoch": 1.8543292456335934, "grad_norm": 2.0510614098323487, "learning_rate": 6.72226736147695e-06, "loss": 0.6225, "step": 24950 }, { "epoch": 1.8544035674470458, "grad_norm": 2.300482941024811, "learning_rate": 6.721509329473882e-06, "loss": 0.7253, "step": 24951 }, { "epoch": 1.8544778892604978, "grad_norm": 2.0670566731470075, "learning_rate": 6.720751318577563e-06, "loss": 0.6262, "step": 24952 }, { "epoch": 1.8545522110739503, "grad_norm": 1.8773787357982623, "learning_rate": 6.719993328792876e-06, "loss": 0.5225, "step": 24953 }, { "epoch": 1.8546265328874023, "grad_norm": 1.5120045871194654, "learning_rate": 6.719235360124703e-06, "loss": 0.5036, "step": 24954 }, { "epoch": 1.8547008547008548, "grad_norm": 2.1586455378306195, "learning_rate": 6.718477412577923e-06, "loss": 0.5364, "step": 24955 }, { "epoch": 1.8547751765143068, "grad_norm": 1.52889121620142, "learning_rate": 6.717719486157413e-06, "loss": 0.4777, "step": 24956 }, { "epoch": 1.8548494983277592, "grad_norm": 2.5355887500503296, "learning_rate": 6.716961580868056e-06, "loss": 0.4628, "step": 24957 }, { "epoch": 1.8549238201412115, "grad_norm": 1.9078756375398285, "learning_rate": 6.716203696714729e-06, "loss": 0.5331, "step": 24958 }, { "epoch": 1.8549981419546637, "grad_norm": 2.036151174702855, "learning_rate": 6.715445833702313e-06, "loss": 0.595, "step": 24959 }, { "epoch": 1.855072463768116, "grad_norm": 2.3858274620582267, "learning_rate": 6.714687991835689e-06, "loss": 0.707, "step": 24960 }, { "epoch": 1.8551467855815682, "grad_norm": 2.073781584470203, "learning_rate": 6.7139301711197294e-06, "loss": 0.5787, "step": 24961 }, { "epoch": 1.8552211073950204, "grad_norm": 2.0501423535285337, "learning_rate": 6.713172371559318e-06, "loss": 0.6364, "step": 24962 }, { "epoch": 1.8552954292084727, "grad_norm": 2.363144001664968, "learning_rate": 6.7124145931593356e-06, "loss": 0.5932, "step": 24963 }, { "epoch": 1.855369751021925, "grad_norm": 2.2287239039629085, "learning_rate": 6.711656835924655e-06, "loss": 0.4923, "step": 24964 }, { "epoch": 1.8554440728353772, "grad_norm": 2.245512796515844, "learning_rate": 6.710899099860155e-06, "loss": 0.6183, "step": 24965 }, { "epoch": 1.8555183946488294, "grad_norm": 2.020118382833462, "learning_rate": 6.710141384970719e-06, "loss": 0.6197, "step": 24966 }, { "epoch": 1.8555927164622816, "grad_norm": 1.7819184514764836, "learning_rate": 6.70938369126122e-06, "loss": 0.4856, "step": 24967 }, { "epoch": 1.855667038275734, "grad_norm": 1.746539889857727, "learning_rate": 6.708626018736538e-06, "loss": 0.6068, "step": 24968 }, { "epoch": 1.855741360089186, "grad_norm": 2.174204594645971, "learning_rate": 6.707868367401551e-06, "loss": 0.544, "step": 24969 }, { "epoch": 1.8558156819026386, "grad_norm": 2.090013247548321, "learning_rate": 6.707110737261138e-06, "loss": 0.6159, "step": 24970 }, { "epoch": 1.8558900037160906, "grad_norm": 1.8252694073207643, "learning_rate": 6.706353128320175e-06, "loss": 0.4948, "step": 24971 }, { "epoch": 1.855964325529543, "grad_norm": 2.1249288430631594, "learning_rate": 6.705595540583538e-06, "loss": 0.6798, "step": 24972 }, { "epoch": 1.856038647342995, "grad_norm": 1.7415814100493205, "learning_rate": 6.704837974056106e-06, "loss": 0.5524, "step": 24973 }, { "epoch": 1.8561129691564475, "grad_norm": 1.8217292241726288, "learning_rate": 6.704080428742761e-06, "loss": 0.5336, "step": 24974 }, { "epoch": 1.8561872909698995, "grad_norm": 2.4279997172955725, "learning_rate": 6.703322904648371e-06, "loss": 0.6894, "step": 24975 }, { "epoch": 1.856261612783352, "grad_norm": 2.438827603281307, "learning_rate": 6.702565401777816e-06, "loss": 0.702, "step": 24976 }, { "epoch": 1.856335934596804, "grad_norm": 1.8735002700832388, "learning_rate": 6.7018079201359775e-06, "loss": 0.5734, "step": 24977 }, { "epoch": 1.8564102564102565, "grad_norm": 1.5627241398416805, "learning_rate": 6.701050459727726e-06, "loss": 0.469, "step": 24978 }, { "epoch": 1.8564845782237085, "grad_norm": 2.1112891715806854, "learning_rate": 6.7002930205579395e-06, "loss": 0.5326, "step": 24979 }, { "epoch": 1.856558900037161, "grad_norm": 2.0431125175618092, "learning_rate": 6.699535602631497e-06, "loss": 0.5922, "step": 24980 }, { "epoch": 1.8566332218506132, "grad_norm": 1.9999254757751186, "learning_rate": 6.698778205953271e-06, "loss": 0.5356, "step": 24981 }, { "epoch": 1.8567075436640654, "grad_norm": 2.2117700036900914, "learning_rate": 6.698020830528143e-06, "loss": 0.7257, "step": 24982 }, { "epoch": 1.8567818654775177, "grad_norm": 1.875432392692892, "learning_rate": 6.697263476360984e-06, "loss": 0.5098, "step": 24983 }, { "epoch": 1.85685618729097, "grad_norm": 2.162669336199596, "learning_rate": 6.69650614345667e-06, "loss": 0.5113, "step": 24984 }, { "epoch": 1.8569305091044221, "grad_norm": 2.4233183316475797, "learning_rate": 6.695748831820084e-06, "loss": 0.4715, "step": 24985 }, { "epoch": 1.8570048309178744, "grad_norm": 1.8018213745508933, "learning_rate": 6.694991541456088e-06, "loss": 0.6448, "step": 24986 }, { "epoch": 1.8570791527313266, "grad_norm": 2.5427942720580248, "learning_rate": 6.694234272369569e-06, "loss": 0.6069, "step": 24987 }, { "epoch": 1.8571534745447789, "grad_norm": 1.4741136089267093, "learning_rate": 6.693477024565398e-06, "loss": 0.4321, "step": 24988 }, { "epoch": 1.857227796358231, "grad_norm": 2.108243958542922, "learning_rate": 6.692719798048448e-06, "loss": 0.6563, "step": 24989 }, { "epoch": 1.8573021181716833, "grad_norm": 2.0725502145297563, "learning_rate": 6.6919625928235975e-06, "loss": 0.6522, "step": 24990 }, { "epoch": 1.8573764399851358, "grad_norm": 1.8515726456908375, "learning_rate": 6.69120540889572e-06, "loss": 0.5796, "step": 24991 }, { "epoch": 1.8574507617985878, "grad_norm": 1.9848219286780475, "learning_rate": 6.69044824626969e-06, "loss": 0.6524, "step": 24992 }, { "epoch": 1.8575250836120403, "grad_norm": 1.678298831823211, "learning_rate": 6.689691104950384e-06, "loss": 0.4702, "step": 24993 }, { "epoch": 1.8575994054254923, "grad_norm": 1.9868508604318573, "learning_rate": 6.688933984942674e-06, "loss": 0.6235, "step": 24994 }, { "epoch": 1.8576737272389447, "grad_norm": 2.3536870093474302, "learning_rate": 6.688176886251434e-06, "loss": 0.7057, "step": 24995 }, { "epoch": 1.8577480490523968, "grad_norm": 3.118163508069263, "learning_rate": 6.687419808881546e-06, "loss": 0.5976, "step": 24996 }, { "epoch": 1.8578223708658492, "grad_norm": 2.4273802717659136, "learning_rate": 6.686662752837872e-06, "loss": 0.6009, "step": 24997 }, { "epoch": 1.8578966926793012, "grad_norm": 3.5340249342668257, "learning_rate": 6.685905718125289e-06, "loss": 0.7153, "step": 24998 }, { "epoch": 1.8579710144927537, "grad_norm": 1.76297832306392, "learning_rate": 6.685148704748677e-06, "loss": 0.5895, "step": 24999 }, { "epoch": 1.8580453363062057, "grad_norm": 1.735282093576907, "learning_rate": 6.684391712712903e-06, "loss": 0.5388, "step": 25000 }, { "epoch": 1.8581196581196582, "grad_norm": 1.8512671110815306, "learning_rate": 6.683634742022844e-06, "loss": 0.5626, "step": 25001 }, { "epoch": 1.8581939799331104, "grad_norm": 1.7679743816392017, "learning_rate": 6.682877792683372e-06, "loss": 0.5027, "step": 25002 }, { "epoch": 1.8582683017465627, "grad_norm": 1.8535899237250617, "learning_rate": 6.682120864699361e-06, "loss": 0.5448, "step": 25003 }, { "epoch": 1.858342623560015, "grad_norm": 1.7955646783728518, "learning_rate": 6.681363958075686e-06, "loss": 0.5854, "step": 25004 }, { "epoch": 1.8584169453734671, "grad_norm": 1.7067470460579426, "learning_rate": 6.680607072817214e-06, "loss": 0.5043, "step": 25005 }, { "epoch": 1.8584912671869194, "grad_norm": 1.8359470265184246, "learning_rate": 6.679850208928823e-06, "loss": 0.6744, "step": 25006 }, { "epoch": 1.8585655890003716, "grad_norm": 1.6721059019199134, "learning_rate": 6.679093366415383e-06, "loss": 0.5073, "step": 25007 }, { "epoch": 1.8586399108138238, "grad_norm": 1.6361366749168649, "learning_rate": 6.678336545281773e-06, "loss": 0.5456, "step": 25008 }, { "epoch": 1.858714232627276, "grad_norm": 1.8505006004377924, "learning_rate": 6.677579745532855e-06, "loss": 0.567, "step": 25009 }, { "epoch": 1.8587885544407283, "grad_norm": 1.812071224595589, "learning_rate": 6.676822967173509e-06, "loss": 0.5792, "step": 25010 }, { "epoch": 1.8588628762541806, "grad_norm": 2.1712198837632157, "learning_rate": 6.676066210208601e-06, "loss": 0.6221, "step": 25011 }, { "epoch": 1.8589371980676328, "grad_norm": 1.5329844959973329, "learning_rate": 6.675309474643007e-06, "loss": 0.4078, "step": 25012 }, { "epoch": 1.859011519881085, "grad_norm": 1.846061984154547, "learning_rate": 6.6745527604815985e-06, "loss": 0.3464, "step": 25013 }, { "epoch": 1.8590858416945375, "grad_norm": 1.9849502969706465, "learning_rate": 6.673796067729248e-06, "loss": 0.6145, "step": 25014 }, { "epoch": 1.8591601635079895, "grad_norm": 1.8280819645325241, "learning_rate": 6.673039396390823e-06, "loss": 0.5351, "step": 25015 }, { "epoch": 1.859234485321442, "grad_norm": 1.9863859605005918, "learning_rate": 6.672282746471199e-06, "loss": 0.5831, "step": 25016 }, { "epoch": 1.859308807134894, "grad_norm": 2.2549501309408773, "learning_rate": 6.671526117975247e-06, "loss": 0.5207, "step": 25017 }, { "epoch": 1.8593831289483465, "grad_norm": 2.591270776950694, "learning_rate": 6.670769510907835e-06, "loss": 0.6016, "step": 25018 }, { "epoch": 1.8594574507617985, "grad_norm": 1.920371756263051, "learning_rate": 6.670012925273842e-06, "loss": 0.473, "step": 25019 }, { "epoch": 1.859531772575251, "grad_norm": 2.1141156649707216, "learning_rate": 6.669256361078129e-06, "loss": 0.6188, "step": 25020 }, { "epoch": 1.859606094388703, "grad_norm": 2.2692726549677573, "learning_rate": 6.668499818325571e-06, "loss": 0.579, "step": 25021 }, { "epoch": 1.8596804162021554, "grad_norm": 2.1063422961396294, "learning_rate": 6.667743297021037e-06, "loss": 0.5857, "step": 25022 }, { "epoch": 1.8597547380156074, "grad_norm": 1.7693267456036563, "learning_rate": 6.666986797169399e-06, "loss": 0.4589, "step": 25023 }, { "epoch": 1.8598290598290599, "grad_norm": 1.965515624476535, "learning_rate": 6.666230318775528e-06, "loss": 0.63, "step": 25024 }, { "epoch": 1.8599033816425121, "grad_norm": 2.0269851648793957, "learning_rate": 6.6654738618442935e-06, "loss": 0.5956, "step": 25025 }, { "epoch": 1.8599777034559644, "grad_norm": 1.855966596665383, "learning_rate": 6.6647174263805645e-06, "loss": 0.5497, "step": 25026 }, { "epoch": 1.8600520252694166, "grad_norm": 2.226725567964004, "learning_rate": 6.6639610123892105e-06, "loss": 0.5584, "step": 25027 }, { "epoch": 1.8601263470828688, "grad_norm": 2.46308269798697, "learning_rate": 6.6632046198751035e-06, "loss": 0.6609, "step": 25028 }, { "epoch": 1.860200668896321, "grad_norm": 2.1339929225800476, "learning_rate": 6.662448248843112e-06, "loss": 0.5279, "step": 25029 }, { "epoch": 1.8602749907097733, "grad_norm": 1.758061536901056, "learning_rate": 6.6616918992981095e-06, "loss": 0.531, "step": 25030 }, { "epoch": 1.8603493125232256, "grad_norm": 1.9280034246165922, "learning_rate": 6.660935571244957e-06, "loss": 0.5368, "step": 25031 }, { "epoch": 1.8604236343366778, "grad_norm": 1.6148996754821232, "learning_rate": 6.66017926468853e-06, "loss": 0.3874, "step": 25032 }, { "epoch": 1.86049795615013, "grad_norm": 2.341527470689516, "learning_rate": 6.659422979633694e-06, "loss": 0.6698, "step": 25033 }, { "epoch": 1.8605722779635823, "grad_norm": 1.9683712036966237, "learning_rate": 6.658666716085318e-06, "loss": 0.6529, "step": 25034 }, { "epoch": 1.8606465997770345, "grad_norm": 2.2857629568066686, "learning_rate": 6.657910474048275e-06, "loss": 0.7127, "step": 25035 }, { "epoch": 1.8607209215904867, "grad_norm": 1.8963325218039242, "learning_rate": 6.65715425352743e-06, "loss": 0.5372, "step": 25036 }, { "epoch": 1.8607952434039392, "grad_norm": 2.2365456625374476, "learning_rate": 6.656398054527653e-06, "loss": 0.6626, "step": 25037 }, { "epoch": 1.8608695652173912, "grad_norm": 1.910916957059273, "learning_rate": 6.65564187705381e-06, "loss": 0.5331, "step": 25038 }, { "epoch": 1.8609438870308437, "grad_norm": 2.2102467735790787, "learning_rate": 6.6548857211107705e-06, "loss": 0.6613, "step": 25039 }, { "epoch": 1.8610182088442957, "grad_norm": 2.1656557053633563, "learning_rate": 6.654129586703406e-06, "loss": 0.6837, "step": 25040 }, { "epoch": 1.8610925306577482, "grad_norm": 2.11243904484133, "learning_rate": 6.653373473836582e-06, "loss": 0.5922, "step": 25041 }, { "epoch": 1.8611668524712002, "grad_norm": 3.0875780393122505, "learning_rate": 6.652617382515165e-06, "loss": 0.543, "step": 25042 }, { "epoch": 1.8612411742846526, "grad_norm": 2.230144215577006, "learning_rate": 6.651861312744025e-06, "loss": 0.6758, "step": 25043 }, { "epoch": 1.8613154960981046, "grad_norm": 1.80339120602637, "learning_rate": 6.651105264528026e-06, "loss": 0.5023, "step": 25044 }, { "epoch": 1.861389817911557, "grad_norm": 1.9599417470834697, "learning_rate": 6.650349237872038e-06, "loss": 0.555, "step": 25045 }, { "epoch": 1.8614641397250091, "grad_norm": 1.5493835717062436, "learning_rate": 6.649593232780926e-06, "loss": 0.3659, "step": 25046 }, { "epoch": 1.8615384615384616, "grad_norm": 1.722013081564931, "learning_rate": 6.648837249259564e-06, "loss": 0.526, "step": 25047 }, { "epoch": 1.8616127833519138, "grad_norm": 1.7828435959115925, "learning_rate": 6.648081287312809e-06, "loss": 0.5396, "step": 25048 }, { "epoch": 1.861687105165366, "grad_norm": 1.786222452079679, "learning_rate": 6.647325346945534e-06, "loss": 0.5097, "step": 25049 }, { "epoch": 1.8617614269788183, "grad_norm": 2.1619555597224887, "learning_rate": 6.646569428162604e-06, "loss": 0.6453, "step": 25050 }, { "epoch": 1.8618357487922705, "grad_norm": 1.8912666192438565, "learning_rate": 6.645813530968885e-06, "loss": 0.4596, "step": 25051 }, { "epoch": 1.8619100706057228, "grad_norm": 1.9129284734509402, "learning_rate": 6.645057655369248e-06, "loss": 0.5354, "step": 25052 }, { "epoch": 1.861984392419175, "grad_norm": 1.7718189707391265, "learning_rate": 6.644301801368556e-06, "loss": 0.5942, "step": 25053 }, { "epoch": 1.8620587142326273, "grad_norm": 2.6749393365063296, "learning_rate": 6.643545968971674e-06, "loss": 0.6, "step": 25054 }, { "epoch": 1.8621330360460795, "grad_norm": 2.457562637377994, "learning_rate": 6.642790158183468e-06, "loss": 0.5903, "step": 25055 }, { "epoch": 1.8622073578595317, "grad_norm": 1.7667019839562845, "learning_rate": 6.642034369008803e-06, "loss": 0.5058, "step": 25056 }, { "epoch": 1.862281679672984, "grad_norm": 1.7562697474101343, "learning_rate": 6.641278601452548e-06, "loss": 0.5643, "step": 25057 }, { "epoch": 1.8623560014864364, "grad_norm": 1.9020584184059968, "learning_rate": 6.640522855519568e-06, "loss": 0.497, "step": 25058 }, { "epoch": 1.8624303232998884, "grad_norm": 1.9714449146725288, "learning_rate": 6.639767131214726e-06, "loss": 0.4739, "step": 25059 }, { "epoch": 1.862504645113341, "grad_norm": 1.9580913998843301, "learning_rate": 6.639011428542889e-06, "loss": 0.5281, "step": 25060 }, { "epoch": 1.862578966926793, "grad_norm": 2.3178284402673297, "learning_rate": 6.638255747508921e-06, "loss": 0.6562, "step": 25061 }, { "epoch": 1.8626532887402454, "grad_norm": 1.755809876586961, "learning_rate": 6.63750008811769e-06, "loss": 0.5847, "step": 25062 }, { "epoch": 1.8627276105536974, "grad_norm": 1.721634960618858, "learning_rate": 6.6367444503740595e-06, "loss": 0.5329, "step": 25063 }, { "epoch": 1.8628019323671499, "grad_norm": 1.7478765804184615, "learning_rate": 6.635988834282893e-06, "loss": 0.5056, "step": 25064 }, { "epoch": 1.8628762541806019, "grad_norm": 2.4346188993167566, "learning_rate": 6.6352332398490595e-06, "loss": 0.4844, "step": 25065 }, { "epoch": 1.8629505759940543, "grad_norm": 2.121874268526633, "learning_rate": 6.634477667077415e-06, "loss": 0.5846, "step": 25066 }, { "epoch": 1.8630248978075064, "grad_norm": 1.9004731505127352, "learning_rate": 6.633722115972828e-06, "loss": 0.5245, "step": 25067 }, { "epoch": 1.8630992196209588, "grad_norm": 1.9161438916011406, "learning_rate": 6.632966586540165e-06, "loss": 0.5228, "step": 25068 }, { "epoch": 1.863173541434411, "grad_norm": 1.800384648024355, "learning_rate": 6.632211078784289e-06, "loss": 0.6277, "step": 25069 }, { "epoch": 1.8632478632478633, "grad_norm": 1.9306216871775421, "learning_rate": 6.631455592710063e-06, "loss": 0.6075, "step": 25070 }, { "epoch": 1.8633221850613155, "grad_norm": 1.6090358674397582, "learning_rate": 6.630700128322349e-06, "loss": 0.4748, "step": 25071 }, { "epoch": 1.8633965068747678, "grad_norm": 2.0241390910845207, "learning_rate": 6.629944685626015e-06, "loss": 0.6184, "step": 25072 }, { "epoch": 1.86347082868822, "grad_norm": 1.9761712187934923, "learning_rate": 6.629189264625922e-06, "loss": 0.5402, "step": 25073 }, { "epoch": 1.8635451505016722, "grad_norm": 2.474762235333968, "learning_rate": 6.628433865326933e-06, "loss": 0.5594, "step": 25074 }, { "epoch": 1.8636194723151245, "grad_norm": 2.012338357915712, "learning_rate": 6.627678487733913e-06, "loss": 0.6626, "step": 25075 }, { "epoch": 1.8636937941285767, "grad_norm": 1.6171366721708897, "learning_rate": 6.626923131851726e-06, "loss": 0.4794, "step": 25076 }, { "epoch": 1.863768115942029, "grad_norm": 2.2393196228861854, "learning_rate": 6.62616779768523e-06, "loss": 0.5928, "step": 25077 }, { "epoch": 1.8638424377554812, "grad_norm": 1.9617662923310866, "learning_rate": 6.62541248523929e-06, "loss": 0.7151, "step": 25078 }, { "epoch": 1.8639167595689334, "grad_norm": 2.156770719882694, "learning_rate": 6.6246571945187685e-06, "loss": 0.6503, "step": 25079 }, { "epoch": 1.8639910813823857, "grad_norm": 1.8209408561020703, "learning_rate": 6.623901925528533e-06, "loss": 0.5925, "step": 25080 }, { "epoch": 1.8640654031958381, "grad_norm": 2.1073027283201906, "learning_rate": 6.623146678273438e-06, "loss": 0.6326, "step": 25081 }, { "epoch": 1.8641397250092901, "grad_norm": 1.9311046501536762, "learning_rate": 6.62239145275835e-06, "loss": 0.5508, "step": 25082 }, { "epoch": 1.8642140468227426, "grad_norm": 1.842374313076123, "learning_rate": 6.6216362489881305e-06, "loss": 0.6011, "step": 25083 }, { "epoch": 1.8642883686361946, "grad_norm": 2.6412610568065995, "learning_rate": 6.620881066967641e-06, "loss": 0.4973, "step": 25084 }, { "epoch": 1.864362690449647, "grad_norm": 1.8915808024673975, "learning_rate": 6.6201259067017466e-06, "loss": 0.4606, "step": 25085 }, { "epoch": 1.864437012263099, "grad_norm": 2.4295194846138104, "learning_rate": 6.619370768195304e-06, "loss": 0.6075, "step": 25086 }, { "epoch": 1.8645113340765516, "grad_norm": 1.5936201600167679, "learning_rate": 6.618615651453182e-06, "loss": 0.3816, "step": 25087 }, { "epoch": 1.8645856558900036, "grad_norm": 2.036253999374583, "learning_rate": 6.6178605564802315e-06, "loss": 0.7363, "step": 25088 }, { "epoch": 1.864659977703456, "grad_norm": 1.8499425568176275, "learning_rate": 6.617105483281321e-06, "loss": 0.656, "step": 25089 }, { "epoch": 1.864734299516908, "grad_norm": 1.3921475049944925, "learning_rate": 6.616350431861308e-06, "loss": 0.3416, "step": 25090 }, { "epoch": 1.8648086213303605, "grad_norm": 1.6859813172590674, "learning_rate": 6.615595402225058e-06, "loss": 0.5377, "step": 25091 }, { "epoch": 1.8648829431438128, "grad_norm": 2.535087746899545, "learning_rate": 6.614840394377426e-06, "loss": 0.5927, "step": 25092 }, { "epoch": 1.864957264957265, "grad_norm": 2.669693436524638, "learning_rate": 6.614085408323279e-06, "loss": 0.5807, "step": 25093 }, { "epoch": 1.8650315867707172, "grad_norm": 2.062232474096415, "learning_rate": 6.613330444067472e-06, "loss": 0.5426, "step": 25094 }, { "epoch": 1.8651059085841695, "grad_norm": 2.366512433805782, "learning_rate": 6.612575501614869e-06, "loss": 0.5735, "step": 25095 }, { "epoch": 1.8651802303976217, "grad_norm": 1.8586993445017088, "learning_rate": 6.611820580970329e-06, "loss": 0.4569, "step": 25096 }, { "epoch": 1.865254552211074, "grad_norm": 1.9695019390593254, "learning_rate": 6.6110656821387134e-06, "loss": 0.4864, "step": 25097 }, { "epoch": 1.8653288740245262, "grad_norm": 1.7112871307528001, "learning_rate": 6.610310805124883e-06, "loss": 0.587, "step": 25098 }, { "epoch": 1.8654031958379784, "grad_norm": 2.2652588948356653, "learning_rate": 6.6095559499336924e-06, "loss": 0.7514, "step": 25099 }, { "epoch": 1.8654775176514307, "grad_norm": 2.3982418111392265, "learning_rate": 6.608801116570005e-06, "loss": 0.824, "step": 25100 }, { "epoch": 1.865551839464883, "grad_norm": 2.209239935177111, "learning_rate": 6.6080463050386804e-06, "loss": 0.463, "step": 25101 }, { "epoch": 1.8656261612783351, "grad_norm": 2.4187860809233306, "learning_rate": 6.607291515344578e-06, "loss": 0.4877, "step": 25102 }, { "epoch": 1.8657004830917874, "grad_norm": 2.0429483898939753, "learning_rate": 6.606536747492555e-06, "loss": 0.6005, "step": 25103 }, { "epoch": 1.8657748049052398, "grad_norm": 1.8138848486730004, "learning_rate": 6.605782001487474e-06, "loss": 0.4743, "step": 25104 }, { "epoch": 1.8658491267186919, "grad_norm": 2.1921676143899864, "learning_rate": 6.605027277334192e-06, "loss": 0.5625, "step": 25105 }, { "epoch": 1.8659234485321443, "grad_norm": 1.9411622750863766, "learning_rate": 6.6042725750375665e-06, "loss": 0.6401, "step": 25106 }, { "epoch": 1.8659977703455963, "grad_norm": 1.6012146293034553, "learning_rate": 6.603517894602463e-06, "loss": 0.552, "step": 25107 }, { "epoch": 1.8660720921590488, "grad_norm": 1.717974592673651, "learning_rate": 6.602763236033731e-06, "loss": 0.5077, "step": 25108 }, { "epoch": 1.8661464139725008, "grad_norm": 1.7607714912598964, "learning_rate": 6.602008599336237e-06, "loss": 0.3977, "step": 25109 }, { "epoch": 1.8662207357859533, "grad_norm": 1.8864489435339147, "learning_rate": 6.6012539845148314e-06, "loss": 0.5829, "step": 25110 }, { "epoch": 1.8662950575994053, "grad_norm": 1.9466665240053254, "learning_rate": 6.600499391574376e-06, "loss": 0.5849, "step": 25111 }, { "epoch": 1.8663693794128577, "grad_norm": 1.5597798052996472, "learning_rate": 6.59974482051973e-06, "loss": 0.5757, "step": 25112 }, { "epoch": 1.8664437012263098, "grad_norm": 2.7716065254521403, "learning_rate": 6.598990271355752e-06, "loss": 0.622, "step": 25113 }, { "epoch": 1.8665180230397622, "grad_norm": 1.8503082873890282, "learning_rate": 6.598235744087297e-06, "loss": 0.3721, "step": 25114 }, { "epoch": 1.8665923448532145, "grad_norm": 1.9640770872977045, "learning_rate": 6.597481238719223e-06, "loss": 0.4072, "step": 25115 }, { "epoch": 1.8666666666666667, "grad_norm": 1.5065160589697844, "learning_rate": 6.596726755256389e-06, "loss": 0.5155, "step": 25116 }, { "epoch": 1.866740988480119, "grad_norm": 1.9770060677747983, "learning_rate": 6.59597229370365e-06, "loss": 0.6288, "step": 25117 }, { "epoch": 1.8668153102935712, "grad_norm": 1.6961910576769395, "learning_rate": 6.5952178540658676e-06, "loss": 0.5199, "step": 25118 }, { "epoch": 1.8668896321070234, "grad_norm": 1.9406040320016107, "learning_rate": 6.5944634363478934e-06, "loss": 0.5337, "step": 25119 }, { "epoch": 1.8669639539204756, "grad_norm": 1.7496943852214892, "learning_rate": 6.593709040554588e-06, "loss": 0.5493, "step": 25120 }, { "epoch": 1.8670382757339279, "grad_norm": 3.1450590590446255, "learning_rate": 6.5929546666908095e-06, "loss": 0.4797, "step": 25121 }, { "epoch": 1.8671125975473801, "grad_norm": 2.267931395474774, "learning_rate": 6.59220031476141e-06, "loss": 0.5245, "step": 25122 }, { "epoch": 1.8671869193608324, "grad_norm": 1.847054530591224, "learning_rate": 6.591445984771247e-06, "loss": 0.5336, "step": 25123 }, { "epoch": 1.8672612411742846, "grad_norm": 1.8524766066139258, "learning_rate": 6.590691676725181e-06, "loss": 0.5656, "step": 25124 }, { "epoch": 1.867335562987737, "grad_norm": 1.9389050654771671, "learning_rate": 6.58993739062806e-06, "loss": 0.6032, "step": 25125 }, { "epoch": 1.867409884801189, "grad_norm": 1.8450231008745028, "learning_rate": 6.589183126484748e-06, "loss": 0.6369, "step": 25126 }, { "epoch": 1.8674842066146415, "grad_norm": 2.363729491079638, "learning_rate": 6.5884288843000975e-06, "loss": 0.6349, "step": 25127 }, { "epoch": 1.8675585284280936, "grad_norm": 1.7284013529681679, "learning_rate": 6.587674664078965e-06, "loss": 0.5338, "step": 25128 }, { "epoch": 1.867632850241546, "grad_norm": 1.8674962965548854, "learning_rate": 6.586920465826207e-06, "loss": 0.5587, "step": 25129 }, { "epoch": 1.867707172054998, "grad_norm": 2.160564090365984, "learning_rate": 6.586166289546677e-06, "loss": 0.637, "step": 25130 }, { "epoch": 1.8677814938684505, "grad_norm": 2.029560909972885, "learning_rate": 6.585412135245231e-06, "loss": 0.578, "step": 25131 }, { "epoch": 1.8678558156819025, "grad_norm": 2.1259644031211975, "learning_rate": 6.584658002926728e-06, "loss": 0.5376, "step": 25132 }, { "epoch": 1.867930137495355, "grad_norm": 1.844833939049662, "learning_rate": 6.583903892596016e-06, "loss": 0.6463, "step": 25133 }, { "epoch": 1.868004459308807, "grad_norm": 1.852103394779826, "learning_rate": 6.583149804257953e-06, "loss": 0.4948, "step": 25134 }, { "epoch": 1.8680787811222594, "grad_norm": 2.3740942966858065, "learning_rate": 6.582395737917398e-06, "loss": 0.6768, "step": 25135 }, { "epoch": 1.8681531029357117, "grad_norm": 1.8038577505854732, "learning_rate": 6.581641693579198e-06, "loss": 0.5221, "step": 25136 }, { "epoch": 1.868227424749164, "grad_norm": 3.2716855269997063, "learning_rate": 6.580887671248212e-06, "loss": 0.6228, "step": 25137 }, { "epoch": 1.8683017465626162, "grad_norm": 1.9349836196709842, "learning_rate": 6.580133670929294e-06, "loss": 0.6071, "step": 25138 }, { "epoch": 1.8683760683760684, "grad_norm": 2.093968783954998, "learning_rate": 6.579379692627299e-06, "loss": 0.5987, "step": 25139 }, { "epoch": 1.8684503901895206, "grad_norm": 2.1765752161913605, "learning_rate": 6.5786257363470796e-06, "loss": 0.6174, "step": 25140 }, { "epoch": 1.8685247120029729, "grad_norm": 2.1544189697948317, "learning_rate": 6.57787180209349e-06, "loss": 0.5185, "step": 25141 }, { "epoch": 1.8685990338164251, "grad_norm": 1.573034943715298, "learning_rate": 6.577117889871383e-06, "loss": 0.5585, "step": 25142 }, { "epoch": 1.8686733556298774, "grad_norm": 2.1614967842738015, "learning_rate": 6.576363999685618e-06, "loss": 0.5712, "step": 25143 }, { "epoch": 1.8687476774433296, "grad_norm": 2.024286387951578, "learning_rate": 6.5756101315410395e-06, "loss": 0.5764, "step": 25144 }, { "epoch": 1.8688219992567818, "grad_norm": 1.613779353537367, "learning_rate": 6.574856285442506e-06, "loss": 0.5073, "step": 25145 }, { "epoch": 1.868896321070234, "grad_norm": 1.9227804339550456, "learning_rate": 6.574102461394871e-06, "loss": 0.6446, "step": 25146 }, { "epoch": 1.8689706428836863, "grad_norm": 1.9818405939771357, "learning_rate": 6.573348659402986e-06, "loss": 0.5823, "step": 25147 }, { "epoch": 1.8690449646971388, "grad_norm": 2.1350611553982057, "learning_rate": 6.5725948794717035e-06, "loss": 0.6125, "step": 25148 }, { "epoch": 1.8691192865105908, "grad_norm": 1.7471220341524658, "learning_rate": 6.5718411216058775e-06, "loss": 0.4583, "step": 25149 }, { "epoch": 1.8691936083240432, "grad_norm": 1.9100805114889439, "learning_rate": 6.571087385810363e-06, "loss": 0.4876, "step": 25150 }, { "epoch": 1.8692679301374953, "grad_norm": 3.702635317757113, "learning_rate": 6.570333672090008e-06, "loss": 0.623, "step": 25151 }, { "epoch": 1.8693422519509477, "grad_norm": 1.8881453777369934, "learning_rate": 6.5695799804496665e-06, "loss": 0.5787, "step": 25152 }, { "epoch": 1.8694165737643997, "grad_norm": 1.786712604514577, "learning_rate": 6.568826310894191e-06, "loss": 0.4889, "step": 25153 }, { "epoch": 1.8694908955778522, "grad_norm": 1.7864123270141374, "learning_rate": 6.568072663428433e-06, "loss": 0.5291, "step": 25154 }, { "epoch": 1.8695652173913042, "grad_norm": 2.2046428651948964, "learning_rate": 6.567319038057251e-06, "loss": 0.7047, "step": 25155 }, { "epoch": 1.8696395392047567, "grad_norm": 2.1836197563310504, "learning_rate": 6.5665654347854855e-06, "loss": 0.7493, "step": 25156 }, { "epoch": 1.8697138610182087, "grad_norm": 1.6548980253406411, "learning_rate": 6.565811853617996e-06, "loss": 0.5069, "step": 25157 }, { "epoch": 1.8697881828316611, "grad_norm": 1.9471015291457214, "learning_rate": 6.565058294559629e-06, "loss": 0.5935, "step": 25158 }, { "epoch": 1.8698625046451134, "grad_norm": 1.7589329229951194, "learning_rate": 6.56430475761524e-06, "loss": 0.5003, "step": 25159 }, { "epoch": 1.8699368264585656, "grad_norm": 2.063913970269909, "learning_rate": 6.563551242789678e-06, "loss": 0.6044, "step": 25160 }, { "epoch": 1.8700111482720179, "grad_norm": 1.8270837126066615, "learning_rate": 6.5627977500877956e-06, "loss": 0.5935, "step": 25161 }, { "epoch": 1.87008547008547, "grad_norm": 1.5232526762841136, "learning_rate": 6.562044279514442e-06, "loss": 0.457, "step": 25162 }, { "epoch": 1.8701597918989223, "grad_norm": 1.849395478659485, "learning_rate": 6.561290831074469e-06, "loss": 0.562, "step": 25163 }, { "epoch": 1.8702341137123746, "grad_norm": 2.0249099190653883, "learning_rate": 6.560537404772727e-06, "loss": 0.7075, "step": 25164 }, { "epoch": 1.8703084355258268, "grad_norm": 1.632185037465152, "learning_rate": 6.5597840006140665e-06, "loss": 0.4455, "step": 25165 }, { "epoch": 1.870382757339279, "grad_norm": 1.2340752718336583, "learning_rate": 6.5590306186033425e-06, "loss": 0.2943, "step": 25166 }, { "epoch": 1.8704570791527313, "grad_norm": 2.093641186564442, "learning_rate": 6.558277258745397e-06, "loss": 0.6168, "step": 25167 }, { "epoch": 1.8705314009661835, "grad_norm": 1.6286762013538727, "learning_rate": 6.557523921045086e-06, "loss": 0.4968, "step": 25168 }, { "epoch": 1.8706057227796358, "grad_norm": 1.9409831914760751, "learning_rate": 6.556770605507255e-06, "loss": 0.4854, "step": 25169 }, { "epoch": 1.870680044593088, "grad_norm": 1.5903369676557362, "learning_rate": 6.556017312136756e-06, "loss": 0.3476, "step": 25170 }, { "epoch": 1.8707543664065405, "grad_norm": 1.9041729964561005, "learning_rate": 6.5552640409384385e-06, "loss": 0.5493, "step": 25171 }, { "epoch": 1.8708286882199925, "grad_norm": 1.8307974192624612, "learning_rate": 6.554510791917155e-06, "loss": 0.4371, "step": 25172 }, { "epoch": 1.870903010033445, "grad_norm": 2.407048936218815, "learning_rate": 6.55375756507775e-06, "loss": 0.5981, "step": 25173 }, { "epoch": 1.870977331846897, "grad_norm": 2.814903951837499, "learning_rate": 6.5530043604250745e-06, "loss": 0.7913, "step": 25174 }, { "epoch": 1.8710516536603494, "grad_norm": 1.5953063256659576, "learning_rate": 6.5522511779639796e-06, "loss": 0.366, "step": 25175 }, { "epoch": 1.8711259754738014, "grad_norm": 2.3540771714006268, "learning_rate": 6.551498017699312e-06, "loss": 0.5606, "step": 25176 }, { "epoch": 1.871200297287254, "grad_norm": 1.9931221727600479, "learning_rate": 6.550744879635924e-06, "loss": 0.5877, "step": 25177 }, { "epoch": 1.871274619100706, "grad_norm": 2.493824775904326, "learning_rate": 6.5499917637786594e-06, "loss": 0.5675, "step": 25178 }, { "epoch": 1.8713489409141584, "grad_norm": 1.951599686277925, "learning_rate": 6.54923867013237e-06, "loss": 0.5107, "step": 25179 }, { "epoch": 1.8714232627276104, "grad_norm": 2.0750051309596698, "learning_rate": 6.548485598701901e-06, "loss": 0.6756, "step": 25180 }, { "epoch": 1.8714975845410629, "grad_norm": 2.359579319189285, "learning_rate": 6.547732549492103e-06, "loss": 0.8056, "step": 25181 }, { "epoch": 1.871571906354515, "grad_norm": 2.072903677040105, "learning_rate": 6.546979522507823e-06, "loss": 0.5875, "step": 25182 }, { "epoch": 1.8716462281679673, "grad_norm": 2.3152792690482924, "learning_rate": 6.546226517753911e-06, "loss": 0.7079, "step": 25183 }, { "epoch": 1.8717205499814196, "grad_norm": 2.3981046945337545, "learning_rate": 6.545473535235213e-06, "loss": 0.7058, "step": 25184 }, { "epoch": 1.8717948717948718, "grad_norm": 2.1297863420645244, "learning_rate": 6.544720574956577e-06, "loss": 0.57, "step": 25185 }, { "epoch": 1.871869193608324, "grad_norm": 1.7452160920952224, "learning_rate": 6.54396763692285e-06, "loss": 0.5315, "step": 25186 }, { "epoch": 1.8719435154217763, "grad_norm": 2.1371887978490736, "learning_rate": 6.543214721138879e-06, "loss": 0.5911, "step": 25187 }, { "epoch": 1.8720178372352285, "grad_norm": 2.3335726571409507, "learning_rate": 6.542461827609517e-06, "loss": 0.5882, "step": 25188 }, { "epoch": 1.8720921590486808, "grad_norm": 2.013003430843181, "learning_rate": 6.5417089563396054e-06, "loss": 0.6356, "step": 25189 }, { "epoch": 1.872166480862133, "grad_norm": 1.6743019528274006, "learning_rate": 6.540956107333991e-06, "loss": 0.5535, "step": 25190 }, { "epoch": 1.8722408026755852, "grad_norm": 1.8556761390828824, "learning_rate": 6.5402032805975205e-06, "loss": 0.6316, "step": 25191 }, { "epoch": 1.8723151244890377, "grad_norm": 1.8664348378667015, "learning_rate": 6.539450476135041e-06, "loss": 0.5063, "step": 25192 }, { "epoch": 1.8723894463024897, "grad_norm": 2.5208926301455645, "learning_rate": 6.5386976939514e-06, "loss": 0.5818, "step": 25193 }, { "epoch": 1.8724637681159422, "grad_norm": 1.8456190796040552, "learning_rate": 6.5379449340514465e-06, "loss": 0.5439, "step": 25194 }, { "epoch": 1.8725380899293942, "grad_norm": 1.7285412864245047, "learning_rate": 6.537192196440021e-06, "loss": 0.5916, "step": 25195 }, { "epoch": 1.8726124117428466, "grad_norm": 1.89792000269561, "learning_rate": 6.536439481121974e-06, "loss": 0.702, "step": 25196 }, { "epoch": 1.8726867335562987, "grad_norm": 1.8403374428028076, "learning_rate": 6.535686788102148e-06, "loss": 0.4484, "step": 25197 }, { "epoch": 1.8727610553697511, "grad_norm": 1.6965429767844862, "learning_rate": 6.534934117385392e-06, "loss": 0.5154, "step": 25198 }, { "epoch": 1.8728353771832031, "grad_norm": 1.5554995161656793, "learning_rate": 6.53418146897655e-06, "loss": 0.4985, "step": 25199 }, { "epoch": 1.8729096989966556, "grad_norm": 2.064336143540469, "learning_rate": 6.5334288428804715e-06, "loss": 0.5756, "step": 25200 }, { "epoch": 1.8729840208101076, "grad_norm": 2.5960706565377336, "learning_rate": 6.532676239101996e-06, "loss": 0.6762, "step": 25201 }, { "epoch": 1.87305834262356, "grad_norm": 2.192115274799771, "learning_rate": 6.531923657645969e-06, "loss": 0.672, "step": 25202 }, { "epoch": 1.8731326644370123, "grad_norm": 1.8841629834043851, "learning_rate": 6.531171098517238e-06, "loss": 0.4395, "step": 25203 }, { "epoch": 1.8732069862504646, "grad_norm": 2.580926319883873, "learning_rate": 6.530418561720648e-06, "loss": 0.4429, "step": 25204 }, { "epoch": 1.8732813080639168, "grad_norm": 2.114007926203297, "learning_rate": 6.529666047261044e-06, "loss": 0.6675, "step": 25205 }, { "epoch": 1.873355629877369, "grad_norm": 2.120752917281417, "learning_rate": 6.5289135551432684e-06, "loss": 0.5739, "step": 25206 }, { "epoch": 1.8734299516908213, "grad_norm": 4.737921818196388, "learning_rate": 6.5281610853721674e-06, "loss": 0.7464, "step": 25207 }, { "epoch": 1.8735042735042735, "grad_norm": 2.09494778997202, "learning_rate": 6.527408637952586e-06, "loss": 0.5587, "step": 25208 }, { "epoch": 1.8735785953177257, "grad_norm": 1.9338846187574878, "learning_rate": 6.5266562128893665e-06, "loss": 0.5792, "step": 25209 }, { "epoch": 1.873652917131178, "grad_norm": 1.6359537492031804, "learning_rate": 6.5259038101873575e-06, "loss": 0.5607, "step": 25210 }, { "epoch": 1.8737272389446302, "grad_norm": 2.253239569315217, "learning_rate": 6.525151429851397e-06, "loss": 0.5882, "step": 25211 }, { "epoch": 1.8738015607580825, "grad_norm": 2.154969301242368, "learning_rate": 6.524399071886334e-06, "loss": 0.5796, "step": 25212 }, { "epoch": 1.8738758825715347, "grad_norm": 1.9443825516890847, "learning_rate": 6.523646736297006e-06, "loss": 0.5342, "step": 25213 }, { "epoch": 1.873950204384987, "grad_norm": 2.015678015759276, "learning_rate": 6.52289442308826e-06, "loss": 0.5668, "step": 25214 }, { "epoch": 1.8740245261984394, "grad_norm": 1.5872984572735511, "learning_rate": 6.522142132264939e-06, "loss": 0.3915, "step": 25215 }, { "epoch": 1.8740988480118914, "grad_norm": 2.3014695585618403, "learning_rate": 6.521389863831888e-06, "loss": 0.6175, "step": 25216 }, { "epoch": 1.8741731698253439, "grad_norm": 2.1338607141493537, "learning_rate": 6.520637617793948e-06, "loss": 0.5739, "step": 25217 }, { "epoch": 1.874247491638796, "grad_norm": 2.3776746119833407, "learning_rate": 6.519885394155963e-06, "loss": 0.7307, "step": 25218 }, { "epoch": 1.8743218134522484, "grad_norm": 2.884097458427086, "learning_rate": 6.519133192922773e-06, "loss": 0.7406, "step": 25219 }, { "epoch": 1.8743961352657004, "grad_norm": 2.729831019714575, "learning_rate": 6.518381014099224e-06, "loss": 0.5866, "step": 25220 }, { "epoch": 1.8744704570791528, "grad_norm": 2.4795331119419664, "learning_rate": 6.51762885769016e-06, "loss": 0.6525, "step": 25221 }, { "epoch": 1.8745447788926048, "grad_norm": 2.077554899661564, "learning_rate": 6.516876723700421e-06, "loss": 0.5097, "step": 25222 }, { "epoch": 1.8746191007060573, "grad_norm": 1.9062919253427268, "learning_rate": 6.516124612134848e-06, "loss": 0.6427, "step": 25223 }, { "epoch": 1.8746934225195093, "grad_norm": 1.5134857673019095, "learning_rate": 6.515372522998282e-06, "loss": 0.4096, "step": 25224 }, { "epoch": 1.8747677443329618, "grad_norm": 2.121556054804206, "learning_rate": 6.514620456295566e-06, "loss": 0.5025, "step": 25225 }, { "epoch": 1.874842066146414, "grad_norm": 1.9101750773006925, "learning_rate": 6.513868412031544e-06, "loss": 0.589, "step": 25226 }, { "epoch": 1.8749163879598663, "grad_norm": 1.9899391579005934, "learning_rate": 6.5131163902110585e-06, "loss": 0.6698, "step": 25227 }, { "epoch": 1.8749907097733185, "grad_norm": 1.9171687399183406, "learning_rate": 6.512364390838946e-06, "loss": 0.5705, "step": 25228 }, { "epoch": 1.8750650315867707, "grad_norm": 1.834810509437676, "learning_rate": 6.511612413920051e-06, "loss": 0.6287, "step": 25229 }, { "epoch": 1.875139353400223, "grad_norm": 1.828518135012573, "learning_rate": 6.510860459459214e-06, "loss": 0.5429, "step": 25230 }, { "epoch": 1.8752136752136752, "grad_norm": 2.056312623139556, "learning_rate": 6.510108527461276e-06, "loss": 0.5455, "step": 25231 }, { "epoch": 1.8752879970271275, "grad_norm": 2.1046652570168325, "learning_rate": 6.509356617931081e-06, "loss": 0.6142, "step": 25232 }, { "epoch": 1.8753623188405797, "grad_norm": 2.0470802865009317, "learning_rate": 6.5086047308734644e-06, "loss": 0.5789, "step": 25233 }, { "epoch": 1.875436640654032, "grad_norm": 2.041236288526098, "learning_rate": 6.507852866293274e-06, "loss": 0.645, "step": 25234 }, { "epoch": 1.8755109624674842, "grad_norm": 1.7730146695764415, "learning_rate": 6.507101024195341e-06, "loss": 0.6336, "step": 25235 }, { "epoch": 1.8755852842809364, "grad_norm": 1.9988427672063207, "learning_rate": 6.50634920458451e-06, "loss": 0.5783, "step": 25236 }, { "epoch": 1.8756596060943886, "grad_norm": 1.9903545827938032, "learning_rate": 6.505597407465622e-06, "loss": 0.6448, "step": 25237 }, { "epoch": 1.875733927907841, "grad_norm": 2.1211416134202143, "learning_rate": 6.50484563284352e-06, "loss": 0.6554, "step": 25238 }, { "epoch": 1.8758082497212931, "grad_norm": 1.777759591752476, "learning_rate": 6.504093880723037e-06, "loss": 0.516, "step": 25239 }, { "epoch": 1.8758825715347456, "grad_norm": 2.1738201151613077, "learning_rate": 6.503342151109016e-06, "loss": 0.4832, "step": 25240 }, { "epoch": 1.8759568933481976, "grad_norm": 2.078681295394111, "learning_rate": 6.502590444006297e-06, "loss": 0.6519, "step": 25241 }, { "epoch": 1.87603121516165, "grad_norm": 2.088792627429418, "learning_rate": 6.50183875941972e-06, "loss": 0.5594, "step": 25242 }, { "epoch": 1.876105536975102, "grad_norm": 1.9507485613236264, "learning_rate": 6.501087097354124e-06, "loss": 0.572, "step": 25243 }, { "epoch": 1.8761798587885545, "grad_norm": 2.1521047052397875, "learning_rate": 6.500335457814347e-06, "loss": 0.5959, "step": 25244 }, { "epoch": 1.8762541806020065, "grad_norm": 1.6379412250922658, "learning_rate": 6.499583840805232e-06, "loss": 0.5887, "step": 25245 }, { "epoch": 1.876328502415459, "grad_norm": 2.0877497200937634, "learning_rate": 6.498832246331612e-06, "loss": 0.7013, "step": 25246 }, { "epoch": 1.876402824228911, "grad_norm": 2.10400787689127, "learning_rate": 6.498080674398328e-06, "loss": 0.5864, "step": 25247 }, { "epoch": 1.8764771460423635, "grad_norm": 1.8943666852566163, "learning_rate": 6.497329125010218e-06, "loss": 0.6151, "step": 25248 }, { "epoch": 1.8765514678558157, "grad_norm": 1.5155234681449103, "learning_rate": 6.496577598172124e-06, "loss": 0.4779, "step": 25249 }, { "epoch": 1.876625789669268, "grad_norm": 2.023263136327334, "learning_rate": 6.495826093888879e-06, "loss": 0.4818, "step": 25250 }, { "epoch": 1.8767001114827202, "grad_norm": 1.7790266809687023, "learning_rate": 6.495074612165324e-06, "loss": 0.4596, "step": 25251 }, { "epoch": 1.8767744332961724, "grad_norm": 1.6024896621284135, "learning_rate": 6.494323153006297e-06, "loss": 0.4847, "step": 25252 }, { "epoch": 1.8768487551096247, "grad_norm": 2.563438940749518, "learning_rate": 6.4935717164166355e-06, "loss": 0.7137, "step": 25253 }, { "epoch": 1.876923076923077, "grad_norm": 2.755793098138562, "learning_rate": 6.4928203024011794e-06, "loss": 0.5953, "step": 25254 }, { "epoch": 1.8769973987365292, "grad_norm": 1.8831496231539588, "learning_rate": 6.492068910964761e-06, "loss": 0.5753, "step": 25255 }, { "epoch": 1.8770717205499814, "grad_norm": 2.1919370451288898, "learning_rate": 6.491317542112228e-06, "loss": 0.5458, "step": 25256 }, { "epoch": 1.8771460423634336, "grad_norm": 3.0201763967601654, "learning_rate": 6.490566195848405e-06, "loss": 0.7212, "step": 25257 }, { "epoch": 1.8772203641768859, "grad_norm": 1.9697810910195124, "learning_rate": 6.4898148721781326e-06, "loss": 0.5484, "step": 25258 }, { "epoch": 1.8772946859903383, "grad_norm": 1.6380910098537638, "learning_rate": 6.489063571106253e-06, "loss": 0.5564, "step": 25259 }, { "epoch": 1.8773690078037903, "grad_norm": 1.507256984631193, "learning_rate": 6.488312292637601e-06, "loss": 0.5279, "step": 25260 }, { "epoch": 1.8774433296172428, "grad_norm": 1.904273238017755, "learning_rate": 6.4875610367770105e-06, "loss": 0.5064, "step": 25261 }, { "epoch": 1.8775176514306948, "grad_norm": 1.771581890236697, "learning_rate": 6.486809803529319e-06, "loss": 0.544, "step": 25262 }, { "epoch": 1.8775919732441473, "grad_norm": 1.6904073186569377, "learning_rate": 6.486058592899364e-06, "loss": 0.4899, "step": 25263 }, { "epoch": 1.8776662950575993, "grad_norm": 2.104518319846096, "learning_rate": 6.4853074048919805e-06, "loss": 0.712, "step": 25264 }, { "epoch": 1.8777406168710518, "grad_norm": 2.244023374204068, "learning_rate": 6.48455623951201e-06, "loss": 0.5395, "step": 25265 }, { "epoch": 1.8778149386845038, "grad_norm": 2.0817434745659815, "learning_rate": 6.48380509676428e-06, "loss": 0.6832, "step": 25266 }, { "epoch": 1.8778892604979562, "grad_norm": 1.751007539794177, "learning_rate": 6.4830539766536305e-06, "loss": 0.5162, "step": 25267 }, { "epoch": 1.8779635823114083, "grad_norm": 2.5248463913783175, "learning_rate": 6.482302879184902e-06, "loss": 0.7085, "step": 25268 }, { "epoch": 1.8780379041248607, "grad_norm": 1.5422867701949718, "learning_rate": 6.48155180436292e-06, "loss": 0.5037, "step": 25269 }, { "epoch": 1.8781122259383127, "grad_norm": 1.673000612555521, "learning_rate": 6.4808007521925255e-06, "loss": 0.5066, "step": 25270 }, { "epoch": 1.8781865477517652, "grad_norm": 2.289552636859673, "learning_rate": 6.480049722678555e-06, "loss": 0.5605, "step": 25271 }, { "epoch": 1.8782608695652174, "grad_norm": 1.7081105895343986, "learning_rate": 6.479298715825839e-06, "loss": 0.4939, "step": 25272 }, { "epoch": 1.8783351913786697, "grad_norm": 1.958165399105041, "learning_rate": 6.478547731639218e-06, "loss": 0.5951, "step": 25273 }, { "epoch": 1.878409513192122, "grad_norm": 2.3069715322484265, "learning_rate": 6.477796770123521e-06, "loss": 0.6819, "step": 25274 }, { "epoch": 1.8784838350055741, "grad_norm": 1.8078678461259856, "learning_rate": 6.477045831283588e-06, "loss": 0.4755, "step": 25275 }, { "epoch": 1.8785581568190264, "grad_norm": 1.7737419124562137, "learning_rate": 6.476294915124251e-06, "loss": 0.4375, "step": 25276 }, { "epoch": 1.8786324786324786, "grad_norm": 2.792137724054839, "learning_rate": 6.475544021650342e-06, "loss": 0.6863, "step": 25277 }, { "epoch": 1.8787068004459309, "grad_norm": 1.6305912194874472, "learning_rate": 6.4747931508666995e-06, "loss": 0.4644, "step": 25278 }, { "epoch": 1.878781122259383, "grad_norm": 2.6508929159714625, "learning_rate": 6.4740423027781595e-06, "loss": 0.7476, "step": 25279 }, { "epoch": 1.8788554440728353, "grad_norm": 2.1349760119894317, "learning_rate": 6.473291477389547e-06, "loss": 0.7108, "step": 25280 }, { "epoch": 1.8789297658862876, "grad_norm": 1.9408538813959293, "learning_rate": 6.472540674705703e-06, "loss": 0.6057, "step": 25281 }, { "epoch": 1.87900408769974, "grad_norm": 1.9917541250213222, "learning_rate": 6.471789894731459e-06, "loss": 0.51, "step": 25282 }, { "epoch": 1.879078409513192, "grad_norm": 1.6524883551161287, "learning_rate": 6.471039137471649e-06, "loss": 0.4714, "step": 25283 }, { "epoch": 1.8791527313266445, "grad_norm": 2.243377234314087, "learning_rate": 6.470288402931104e-06, "loss": 0.6536, "step": 25284 }, { "epoch": 1.8792270531400965, "grad_norm": 2.3241287945587237, "learning_rate": 6.46953769111466e-06, "loss": 0.5882, "step": 25285 }, { "epoch": 1.879301374953549, "grad_norm": 2.468264503922022, "learning_rate": 6.468787002027151e-06, "loss": 0.6587, "step": 25286 }, { "epoch": 1.879375696767001, "grad_norm": 2.2106676965859946, "learning_rate": 6.4680363356734055e-06, "loss": 0.7311, "step": 25287 }, { "epoch": 1.8794500185804535, "grad_norm": 1.8276202145129359, "learning_rate": 6.467285692058259e-06, "loss": 0.4366, "step": 25288 }, { "epoch": 1.8795243403939055, "grad_norm": 1.7921867158052702, "learning_rate": 6.466535071186545e-06, "loss": 0.512, "step": 25289 }, { "epoch": 1.879598662207358, "grad_norm": 1.9939858327301834, "learning_rate": 6.465784473063097e-06, "loss": 0.5808, "step": 25290 }, { "epoch": 1.87967298402081, "grad_norm": 1.6078991256887867, "learning_rate": 6.465033897692743e-06, "loss": 0.4439, "step": 25291 }, { "epoch": 1.8797473058342624, "grad_norm": 1.9304476873547125, "learning_rate": 6.464283345080317e-06, "loss": 0.5477, "step": 25292 }, { "epoch": 1.8798216276477147, "grad_norm": 1.701192717800989, "learning_rate": 6.463532815230653e-06, "loss": 0.4609, "step": 25293 }, { "epoch": 1.879895949461167, "grad_norm": 1.7221358499075254, "learning_rate": 6.462782308148579e-06, "loss": 0.4, "step": 25294 }, { "epoch": 1.8799702712746191, "grad_norm": 1.9268883617761907, "learning_rate": 6.4620318238389294e-06, "loss": 0.5547, "step": 25295 }, { "epoch": 1.8800445930880714, "grad_norm": 2.23417297658765, "learning_rate": 6.4612813623065355e-06, "loss": 0.5951, "step": 25296 }, { "epoch": 1.8801189149015236, "grad_norm": 1.8393751498589084, "learning_rate": 6.4605309235562295e-06, "loss": 0.4682, "step": 25297 }, { "epoch": 1.8801932367149758, "grad_norm": 1.9574896662856531, "learning_rate": 6.45978050759284e-06, "loss": 0.6084, "step": 25298 }, { "epoch": 1.880267558528428, "grad_norm": 2.255575222156297, "learning_rate": 6.459030114421201e-06, "loss": 0.6631, "step": 25299 }, { "epoch": 1.8803418803418803, "grad_norm": 1.9338870458898905, "learning_rate": 6.458279744046141e-06, "loss": 0.5225, "step": 25300 }, { "epoch": 1.8804162021553326, "grad_norm": 1.6885219481437317, "learning_rate": 6.457529396472497e-06, "loss": 0.495, "step": 25301 }, { "epoch": 1.8804905239687848, "grad_norm": 2.263147283927232, "learning_rate": 6.45677907170509e-06, "loss": 0.4816, "step": 25302 }, { "epoch": 1.880564845782237, "grad_norm": 1.6379163393404839, "learning_rate": 6.456028769748757e-06, "loss": 0.6029, "step": 25303 }, { "epoch": 1.8806391675956893, "grad_norm": 2.8055636214978885, "learning_rate": 6.4552784906083284e-06, "loss": 0.7127, "step": 25304 }, { "epoch": 1.8807134894091417, "grad_norm": 2.398304442387675, "learning_rate": 6.45452823428863e-06, "loss": 0.5173, "step": 25305 }, { "epoch": 1.8807878112225938, "grad_norm": 1.980965230133512, "learning_rate": 6.453778000794497e-06, "loss": 0.6008, "step": 25306 }, { "epoch": 1.8808621330360462, "grad_norm": 2.0972672262042353, "learning_rate": 6.453027790130756e-06, "loss": 0.4897, "step": 25307 }, { "epoch": 1.8809364548494982, "grad_norm": 2.4390390935808144, "learning_rate": 6.452277602302239e-06, "loss": 0.5616, "step": 25308 }, { "epoch": 1.8810107766629507, "grad_norm": 1.9056139921328628, "learning_rate": 6.4515274373137735e-06, "loss": 0.5156, "step": 25309 }, { "epoch": 1.8810850984764027, "grad_norm": 2.1824297947945865, "learning_rate": 6.450777295170192e-06, "loss": 0.7063, "step": 25310 }, { "epoch": 1.8811594202898552, "grad_norm": 2.1455180618673895, "learning_rate": 6.45002717587632e-06, "loss": 0.763, "step": 25311 }, { "epoch": 1.8812337421033072, "grad_norm": 1.8476030392805811, "learning_rate": 6.449277079436988e-06, "loss": 0.5767, "step": 25312 }, { "epoch": 1.8813080639167596, "grad_norm": 1.8378875471746183, "learning_rate": 6.448527005857033e-06, "loss": 0.4667, "step": 25313 }, { "epoch": 1.8813823857302117, "grad_norm": 2.32158081516953, "learning_rate": 6.447776955141271e-06, "loss": 0.4342, "step": 25314 }, { "epoch": 1.8814567075436641, "grad_norm": 1.459604565060753, "learning_rate": 6.44702692729454e-06, "loss": 0.3995, "step": 25315 }, { "epoch": 1.8815310293571164, "grad_norm": 2.3344181417160033, "learning_rate": 6.446276922321661e-06, "loss": 0.7922, "step": 25316 }, { "epoch": 1.8816053511705686, "grad_norm": 2.4145241993100224, "learning_rate": 6.445526940227468e-06, "loss": 0.7032, "step": 25317 }, { "epoch": 1.8816796729840208, "grad_norm": 1.7707786780743662, "learning_rate": 6.444776981016788e-06, "loss": 0.5686, "step": 25318 }, { "epoch": 1.881753994797473, "grad_norm": 2.903072931510247, "learning_rate": 6.444027044694452e-06, "loss": 0.5532, "step": 25319 }, { "epoch": 1.8818283166109253, "grad_norm": 1.923914405479814, "learning_rate": 6.443277131265282e-06, "loss": 0.641, "step": 25320 }, { "epoch": 1.8819026384243775, "grad_norm": 1.8164375927246486, "learning_rate": 6.44252724073411e-06, "loss": 0.5424, "step": 25321 }, { "epoch": 1.8819769602378298, "grad_norm": 2.6524335122502998, "learning_rate": 6.441777373105763e-06, "loss": 0.7153, "step": 25322 }, { "epoch": 1.882051282051282, "grad_norm": 1.86601814441631, "learning_rate": 6.4410275283850685e-06, "loss": 0.4458, "step": 25323 }, { "epoch": 1.8821256038647343, "grad_norm": 1.7977369449234393, "learning_rate": 6.4402777065768585e-06, "loss": 0.4919, "step": 25324 }, { "epoch": 1.8821999256781865, "grad_norm": 1.7090789815214935, "learning_rate": 6.4395279076859515e-06, "loss": 0.5467, "step": 25325 }, { "epoch": 1.8822742474916387, "grad_norm": 1.882630196853399, "learning_rate": 6.4387781317171824e-06, "loss": 0.555, "step": 25326 }, { "epoch": 1.882348569305091, "grad_norm": 1.8116861649744316, "learning_rate": 6.438028378675372e-06, "loss": 0.4243, "step": 25327 }, { "epoch": 1.8824228911185434, "grad_norm": 2.3174643836014557, "learning_rate": 6.437278648565349e-06, "loss": 0.6563, "step": 25328 }, { "epoch": 1.8824972129319955, "grad_norm": 1.7544167119581322, "learning_rate": 6.436528941391942e-06, "loss": 0.4897, "step": 25329 }, { "epoch": 1.882571534745448, "grad_norm": 2.1450931776785813, "learning_rate": 6.43577925715998e-06, "loss": 0.6535, "step": 25330 }, { "epoch": 1.8826458565589, "grad_norm": 1.8469832911204804, "learning_rate": 6.435029595874283e-06, "loss": 0.5151, "step": 25331 }, { "epoch": 1.8827201783723524, "grad_norm": 2.2893322462350714, "learning_rate": 6.43427995753968e-06, "loss": 0.5817, "step": 25332 }, { "epoch": 1.8827945001858044, "grad_norm": 2.5588314613119447, "learning_rate": 6.433530342160999e-06, "loss": 0.5837, "step": 25333 }, { "epoch": 1.8828688219992569, "grad_norm": 2.119156711201178, "learning_rate": 6.432780749743064e-06, "loss": 0.6711, "step": 25334 }, { "epoch": 1.8829431438127089, "grad_norm": 2.338474369066283, "learning_rate": 6.4320311802907055e-06, "loss": 0.5584, "step": 25335 }, { "epoch": 1.8830174656261613, "grad_norm": 2.0689113256199105, "learning_rate": 6.431281633808741e-06, "loss": 0.6557, "step": 25336 }, { "epoch": 1.8830917874396134, "grad_norm": 1.887474035171527, "learning_rate": 6.4305321103020014e-06, "loss": 0.4898, "step": 25337 }, { "epoch": 1.8831661092530658, "grad_norm": 2.251591037024032, "learning_rate": 6.429782609775309e-06, "loss": 0.6972, "step": 25338 }, { "epoch": 1.883240431066518, "grad_norm": 2.074222512771047, "learning_rate": 6.429033132233492e-06, "loss": 0.4543, "step": 25339 }, { "epoch": 1.8833147528799703, "grad_norm": 2.371460238808917, "learning_rate": 6.4282836776813735e-06, "loss": 0.5742, "step": 25340 }, { "epoch": 1.8833890746934225, "grad_norm": 1.9174564691718492, "learning_rate": 6.427534246123782e-06, "loss": 0.6478, "step": 25341 }, { "epoch": 1.8834633965068748, "grad_norm": 2.008690513363845, "learning_rate": 6.426784837565538e-06, "loss": 0.4985, "step": 25342 }, { "epoch": 1.883537718320327, "grad_norm": 1.873247752378496, "learning_rate": 6.4260354520114665e-06, "loss": 0.6044, "step": 25343 }, { "epoch": 1.8836120401337793, "grad_norm": 1.9540358692822983, "learning_rate": 6.425286089466394e-06, "loss": 0.5451, "step": 25344 }, { "epoch": 1.8836863619472315, "grad_norm": 2.024232253409789, "learning_rate": 6.424536749935144e-06, "loss": 0.5925, "step": 25345 }, { "epoch": 1.8837606837606837, "grad_norm": 1.5321091396201822, "learning_rate": 6.423787433422544e-06, "loss": 0.4377, "step": 25346 }, { "epoch": 1.883835005574136, "grad_norm": 2.218939364235563, "learning_rate": 6.423038139933413e-06, "loss": 0.7135, "step": 25347 }, { "epoch": 1.8839093273875882, "grad_norm": 1.9893439657511944, "learning_rate": 6.42228886947258e-06, "loss": 0.6918, "step": 25348 }, { "epoch": 1.8839836492010407, "grad_norm": 1.7378524844002063, "learning_rate": 6.42153962204486e-06, "loss": 0.445, "step": 25349 }, { "epoch": 1.8840579710144927, "grad_norm": 2.36706085670724, "learning_rate": 6.420790397655084e-06, "loss": 0.6544, "step": 25350 }, { "epoch": 1.8841322928279451, "grad_norm": 1.9511465977481246, "learning_rate": 6.420041196308073e-06, "loss": 0.4423, "step": 25351 }, { "epoch": 1.8842066146413972, "grad_norm": 1.6912707184948979, "learning_rate": 6.4192920180086534e-06, "loss": 0.5164, "step": 25352 }, { "epoch": 1.8842809364548496, "grad_norm": 1.7226939098067369, "learning_rate": 6.4185428627616456e-06, "loss": 0.5256, "step": 25353 }, { "epoch": 1.8843552582683016, "grad_norm": 2.0186872092985895, "learning_rate": 6.417793730571871e-06, "loss": 0.5135, "step": 25354 }, { "epoch": 1.884429580081754, "grad_norm": 2.568269419240544, "learning_rate": 6.417044621444155e-06, "loss": 0.5625, "step": 25355 }, { "epoch": 1.8845039018952061, "grad_norm": 1.7595766617567472, "learning_rate": 6.416295535383319e-06, "loss": 0.4545, "step": 25356 }, { "epoch": 1.8845782237086586, "grad_norm": 2.041073884274258, "learning_rate": 6.4155464723941895e-06, "loss": 0.6403, "step": 25357 }, { "epoch": 1.8846525455221106, "grad_norm": 2.0883452582820876, "learning_rate": 6.414797432481586e-06, "loss": 0.646, "step": 25358 }, { "epoch": 1.884726867335563, "grad_norm": 1.4571296902118958, "learning_rate": 6.414048415650329e-06, "loss": 0.467, "step": 25359 }, { "epoch": 1.8848011891490153, "grad_norm": 2.1075224072682843, "learning_rate": 6.413299421905242e-06, "loss": 0.6082, "step": 25360 }, { "epoch": 1.8848755109624675, "grad_norm": 1.8165165253625377, "learning_rate": 6.412550451251147e-06, "loss": 0.5676, "step": 25361 }, { "epoch": 1.8849498327759198, "grad_norm": 2.8270571343783826, "learning_rate": 6.411801503692865e-06, "loss": 0.5158, "step": 25362 }, { "epoch": 1.885024154589372, "grad_norm": 2.1191164406630314, "learning_rate": 6.411052579235222e-06, "loss": 0.5628, "step": 25363 }, { "epoch": 1.8850984764028242, "grad_norm": 3.446449146453953, "learning_rate": 6.410303677883034e-06, "loss": 0.6103, "step": 25364 }, { "epoch": 1.8851727982162765, "grad_norm": 1.7496045743717754, "learning_rate": 6.409554799641125e-06, "loss": 0.4848, "step": 25365 }, { "epoch": 1.8852471200297287, "grad_norm": 1.7666299436677824, "learning_rate": 6.408805944514313e-06, "loss": 0.6139, "step": 25366 }, { "epoch": 1.885321441843181, "grad_norm": 1.5820710267262352, "learning_rate": 6.408057112507425e-06, "loss": 0.5153, "step": 25367 }, { "epoch": 1.8853957636566332, "grad_norm": 2.101086411179833, "learning_rate": 6.407308303625281e-06, "loss": 0.57, "step": 25368 }, { "epoch": 1.8854700854700854, "grad_norm": 1.7737874168056285, "learning_rate": 6.406559517872699e-06, "loss": 0.6531, "step": 25369 }, { "epoch": 1.8855444072835377, "grad_norm": 2.0137438849313507, "learning_rate": 6.4058107552545e-06, "loss": 0.6241, "step": 25370 }, { "epoch": 1.88561872909699, "grad_norm": 2.168363381694851, "learning_rate": 6.4050620157755025e-06, "loss": 0.5314, "step": 25371 }, { "epoch": 1.8856930509104424, "grad_norm": 1.7644151476988499, "learning_rate": 6.40431329944053e-06, "loss": 0.4292, "step": 25372 }, { "epoch": 1.8857673727238944, "grad_norm": 2.445456200784186, "learning_rate": 6.403564606254402e-06, "loss": 0.6357, "step": 25373 }, { "epoch": 1.8858416945373468, "grad_norm": 1.7390204331967194, "learning_rate": 6.40281593622194e-06, "loss": 0.5547, "step": 25374 }, { "epoch": 1.8859160163507989, "grad_norm": 1.6601853218206462, "learning_rate": 6.4020672893479615e-06, "loss": 0.4809, "step": 25375 }, { "epoch": 1.8859903381642513, "grad_norm": 1.8106569961261836, "learning_rate": 6.401318665637286e-06, "loss": 0.6284, "step": 25376 }, { "epoch": 1.8860646599777033, "grad_norm": 1.7449665423316505, "learning_rate": 6.400570065094734e-06, "loss": 0.5377, "step": 25377 }, { "epoch": 1.8861389817911558, "grad_norm": 1.916292796172797, "learning_rate": 6.399821487725126e-06, "loss": 0.5799, "step": 25378 }, { "epoch": 1.8862133036046078, "grad_norm": 2.0013447710268935, "learning_rate": 6.399072933533281e-06, "loss": 0.5607, "step": 25379 }, { "epoch": 1.8862876254180603, "grad_norm": 2.2693412898654874, "learning_rate": 6.398324402524017e-06, "loss": 0.513, "step": 25380 }, { "epoch": 1.8863619472315123, "grad_norm": 1.7127642966947898, "learning_rate": 6.3975758947021585e-06, "loss": 0.6023, "step": 25381 }, { "epoch": 1.8864362690449648, "grad_norm": 1.7938128402346911, "learning_rate": 6.396827410072513e-06, "loss": 0.4893, "step": 25382 }, { "epoch": 1.886510590858417, "grad_norm": 2.2307676925335644, "learning_rate": 6.396078948639908e-06, "loss": 0.6463, "step": 25383 }, { "epoch": 1.8865849126718692, "grad_norm": 2.0725637526424148, "learning_rate": 6.395330510409159e-06, "loss": 0.4599, "step": 25384 }, { "epoch": 1.8866592344853215, "grad_norm": 2.053953027094952, "learning_rate": 6.394582095385087e-06, "loss": 0.6081, "step": 25385 }, { "epoch": 1.8867335562987737, "grad_norm": 1.9342084216872175, "learning_rate": 6.393833703572505e-06, "loss": 0.6136, "step": 25386 }, { "epoch": 1.886807878112226, "grad_norm": 1.8989089913254904, "learning_rate": 6.393085334976237e-06, "loss": 0.5973, "step": 25387 }, { "epoch": 1.8868821999256782, "grad_norm": 1.8061193399162108, "learning_rate": 6.392336989601095e-06, "loss": 0.5445, "step": 25388 }, { "epoch": 1.8869565217391304, "grad_norm": 2.0806103002964735, "learning_rate": 6.3915886674519035e-06, "loss": 0.5952, "step": 25389 }, { "epoch": 1.8870308435525827, "grad_norm": 2.0693549328330563, "learning_rate": 6.390840368533477e-06, "loss": 0.6575, "step": 25390 }, { "epoch": 1.887105165366035, "grad_norm": 2.2749369833268585, "learning_rate": 6.3900920928506326e-06, "loss": 0.5545, "step": 25391 }, { "epoch": 1.8871794871794871, "grad_norm": 2.0583381516092896, "learning_rate": 6.389343840408191e-06, "loss": 0.5066, "step": 25392 }, { "epoch": 1.8872538089929394, "grad_norm": 1.9036415130897828, "learning_rate": 6.388595611210964e-06, "loss": 0.4599, "step": 25393 }, { "epoch": 1.8873281308063916, "grad_norm": 1.658717784629952, "learning_rate": 6.387847405263769e-06, "loss": 0.5065, "step": 25394 }, { "epoch": 1.887402452619844, "grad_norm": 2.2190989222389557, "learning_rate": 6.3870992225714255e-06, "loss": 0.5524, "step": 25395 }, { "epoch": 1.887476774433296, "grad_norm": 2.2797280513987093, "learning_rate": 6.386351063138751e-06, "loss": 0.6491, "step": 25396 }, { "epoch": 1.8875510962467485, "grad_norm": 2.7232590709603564, "learning_rate": 6.38560292697056e-06, "loss": 0.5868, "step": 25397 }, { "epoch": 1.8876254180602006, "grad_norm": 2.032014823444707, "learning_rate": 6.38485481407167e-06, "loss": 0.6042, "step": 25398 }, { "epoch": 1.887699739873653, "grad_norm": 1.9584156121582668, "learning_rate": 6.384106724446897e-06, "loss": 0.4972, "step": 25399 }, { "epoch": 1.887774061687105, "grad_norm": 2.0144468414471635, "learning_rate": 6.38335865810106e-06, "loss": 0.6169, "step": 25400 }, { "epoch": 1.8878483835005575, "grad_norm": 1.6809487798835332, "learning_rate": 6.382610615038969e-06, "loss": 0.5181, "step": 25401 }, { "epoch": 1.8879227053140095, "grad_norm": 2.2397436507150337, "learning_rate": 6.381862595265442e-06, "loss": 0.6267, "step": 25402 }, { "epoch": 1.887997027127462, "grad_norm": 1.9719325208272303, "learning_rate": 6.381114598785303e-06, "loss": 0.6346, "step": 25403 }, { "epoch": 1.888071348940914, "grad_norm": 2.222005321323355, "learning_rate": 6.380366625603354e-06, "loss": 0.6522, "step": 25404 }, { "epoch": 1.8881456707543665, "grad_norm": 2.0376683424867843, "learning_rate": 6.379618675724417e-06, "loss": 0.639, "step": 25405 }, { "epoch": 1.8882199925678187, "grad_norm": 1.661202461894706, "learning_rate": 6.3788707491533084e-06, "loss": 0.5161, "step": 25406 }, { "epoch": 1.888294314381271, "grad_norm": 1.7419388774602986, "learning_rate": 6.378122845894843e-06, "loss": 0.4515, "step": 25407 }, { "epoch": 1.8883686361947232, "grad_norm": 2.379504894173751, "learning_rate": 6.3773749659538335e-06, "loss": 0.7232, "step": 25408 }, { "epoch": 1.8884429580081754, "grad_norm": 1.537582920561579, "learning_rate": 6.3766271093350964e-06, "loss": 0.3363, "step": 25409 }, { "epoch": 1.8885172798216276, "grad_norm": 2.1374679953959377, "learning_rate": 6.375879276043444e-06, "loss": 0.6588, "step": 25410 }, { "epoch": 1.8885916016350799, "grad_norm": 7.170943163176588, "learning_rate": 6.375131466083697e-06, "loss": 0.6145, "step": 25411 }, { "epoch": 1.8886659234485321, "grad_norm": 1.5790815081781806, "learning_rate": 6.374383679460663e-06, "loss": 0.5712, "step": 25412 }, { "epoch": 1.8887402452619844, "grad_norm": 2.2513871001940795, "learning_rate": 6.373635916179157e-06, "loss": 0.6189, "step": 25413 }, { "epoch": 1.8888145670754366, "grad_norm": 1.8713598128825517, "learning_rate": 6.372888176244001e-06, "loss": 0.6896, "step": 25414 }, { "epoch": 1.8888888888888888, "grad_norm": 2.0799415822935217, "learning_rate": 6.372140459659998e-06, "loss": 0.5772, "step": 25415 }, { "epoch": 1.8889632107023413, "grad_norm": 2.4776410264455424, "learning_rate": 6.371392766431966e-06, "loss": 0.5965, "step": 25416 }, { "epoch": 1.8890375325157933, "grad_norm": 1.7368391689011184, "learning_rate": 6.370645096564721e-06, "loss": 0.5457, "step": 25417 }, { "epoch": 1.8891118543292458, "grad_norm": 1.568958821672278, "learning_rate": 6.369897450063074e-06, "loss": 0.5321, "step": 25418 }, { "epoch": 1.8891861761426978, "grad_norm": 2.1046455550803227, "learning_rate": 6.369149826931838e-06, "loss": 0.5665, "step": 25419 }, { "epoch": 1.8892604979561503, "grad_norm": 3.192745071628965, "learning_rate": 6.3684022271758275e-06, "loss": 0.5996, "step": 25420 }, { "epoch": 1.8893348197696023, "grad_norm": 2.639048765019661, "learning_rate": 6.367654650799853e-06, "loss": 0.6348, "step": 25421 }, { "epoch": 1.8894091415830547, "grad_norm": 4.282703844131502, "learning_rate": 6.366907097808733e-06, "loss": 0.4726, "step": 25422 }, { "epoch": 1.8894834633965067, "grad_norm": 1.8860603880703706, "learning_rate": 6.366159568207275e-06, "loss": 0.4543, "step": 25423 }, { "epoch": 1.8895577852099592, "grad_norm": 1.3681956984461843, "learning_rate": 6.365412062000292e-06, "loss": 0.3939, "step": 25424 }, { "epoch": 1.8896321070234112, "grad_norm": 1.9900628640937623, "learning_rate": 6.364664579192599e-06, "loss": 0.5618, "step": 25425 }, { "epoch": 1.8897064288368637, "grad_norm": 1.91046497507898, "learning_rate": 6.36391711978901e-06, "loss": 0.5472, "step": 25426 }, { "epoch": 1.889780750650316, "grad_norm": 2.4778135094833367, "learning_rate": 6.3631696837943294e-06, "loss": 0.7129, "step": 25427 }, { "epoch": 1.8898550724637682, "grad_norm": 1.9899022963382667, "learning_rate": 6.362422271213373e-06, "loss": 0.7622, "step": 25428 }, { "epoch": 1.8899293942772204, "grad_norm": 2.0599580835048643, "learning_rate": 6.361674882050958e-06, "loss": 0.5711, "step": 25429 }, { "epoch": 1.8900037160906726, "grad_norm": 2.086867400388554, "learning_rate": 6.3609275163118875e-06, "loss": 0.6187, "step": 25430 }, { "epoch": 1.8900780379041249, "grad_norm": 1.568304448707849, "learning_rate": 6.360180174000976e-06, "loss": 0.394, "step": 25431 }, { "epoch": 1.8901523597175771, "grad_norm": 2.4628495775419736, "learning_rate": 6.359432855123037e-06, "loss": 0.6448, "step": 25432 }, { "epoch": 1.8902266815310294, "grad_norm": 2.194246266041238, "learning_rate": 6.358685559682883e-06, "loss": 0.5311, "step": 25433 }, { "epoch": 1.8903010033444816, "grad_norm": 1.738904475537825, "learning_rate": 6.357938287685319e-06, "loss": 0.3893, "step": 25434 }, { "epoch": 1.8903753251579338, "grad_norm": 2.260940168224754, "learning_rate": 6.35719103913516e-06, "loss": 0.6931, "step": 25435 }, { "epoch": 1.890449646971386, "grad_norm": 2.3860527098416835, "learning_rate": 6.356443814037217e-06, "loss": 0.607, "step": 25436 }, { "epoch": 1.8905239687848383, "grad_norm": 1.955091374194249, "learning_rate": 6.355696612396301e-06, "loss": 0.6157, "step": 25437 }, { "epoch": 1.8905982905982905, "grad_norm": 1.8761749887364643, "learning_rate": 6.354949434217219e-06, "loss": 0.5784, "step": 25438 }, { "epoch": 1.890672612411743, "grad_norm": 2.0025669231210648, "learning_rate": 6.354202279504785e-06, "loss": 0.7164, "step": 25439 }, { "epoch": 1.890746934225195, "grad_norm": 2.0857612032948447, "learning_rate": 6.3534551482638065e-06, "loss": 0.6295, "step": 25440 }, { "epoch": 1.8908212560386475, "grad_norm": 2.263881247497807, "learning_rate": 6.3527080404990935e-06, "loss": 0.6769, "step": 25441 }, { "epoch": 1.8908955778520995, "grad_norm": 1.8833250680046678, "learning_rate": 6.351960956215457e-06, "loss": 0.5775, "step": 25442 }, { "epoch": 1.890969899665552, "grad_norm": 2.1501579014812124, "learning_rate": 6.351213895417707e-06, "loss": 0.5864, "step": 25443 }, { "epoch": 1.891044221479004, "grad_norm": 1.7892706798019695, "learning_rate": 6.350466858110654e-06, "loss": 0.6408, "step": 25444 }, { "epoch": 1.8911185432924564, "grad_norm": 2.3713938657466866, "learning_rate": 6.349719844299105e-06, "loss": 0.6051, "step": 25445 }, { "epoch": 1.8911928651059084, "grad_norm": 2.0105448503365784, "learning_rate": 6.348972853987869e-06, "loss": 0.5937, "step": 25446 }, { "epoch": 1.891267186919361, "grad_norm": 2.059639218549967, "learning_rate": 6.348225887181758e-06, "loss": 0.6279, "step": 25447 }, { "epoch": 1.891341508732813, "grad_norm": 2.374277263567733, "learning_rate": 6.347478943885581e-06, "loss": 0.6854, "step": 25448 }, { "epoch": 1.8914158305462654, "grad_norm": 1.5640701686124807, "learning_rate": 6.346732024104143e-06, "loss": 0.375, "step": 25449 }, { "epoch": 1.8914901523597176, "grad_norm": 2.07286623879508, "learning_rate": 6.345985127842254e-06, "loss": 0.6677, "step": 25450 }, { "epoch": 1.8915644741731699, "grad_norm": 1.6501205704561794, "learning_rate": 6.345238255104726e-06, "loss": 0.4913, "step": 25451 }, { "epoch": 1.891638795986622, "grad_norm": 2.599438886835638, "learning_rate": 6.34449140589636e-06, "loss": 0.4954, "step": 25452 }, { "epoch": 1.8917131178000743, "grad_norm": 2.464997816435247, "learning_rate": 6.343744580221971e-06, "loss": 0.5948, "step": 25453 }, { "epoch": 1.8917874396135266, "grad_norm": 1.4059535046894307, "learning_rate": 6.342997778086364e-06, "loss": 0.3459, "step": 25454 }, { "epoch": 1.8918617614269788, "grad_norm": 2.004685633714736, "learning_rate": 6.34225099949435e-06, "loss": 0.5796, "step": 25455 }, { "epoch": 1.891936083240431, "grad_norm": 1.8964932178766571, "learning_rate": 6.3415042444507316e-06, "loss": 0.5572, "step": 25456 }, { "epoch": 1.8920104050538833, "grad_norm": 2.1248464421868674, "learning_rate": 6.340757512960318e-06, "loss": 0.7812, "step": 25457 }, { "epoch": 1.8920847268673355, "grad_norm": 1.8978059724890732, "learning_rate": 6.340010805027919e-06, "loss": 0.5859, "step": 25458 }, { "epoch": 1.8921590486807878, "grad_norm": 2.1175229998208236, "learning_rate": 6.33926412065834e-06, "loss": 0.6915, "step": 25459 }, { "epoch": 1.89223337049424, "grad_norm": 2.5519594051961807, "learning_rate": 6.338517459856394e-06, "loss": 0.6289, "step": 25460 }, { "epoch": 1.8923076923076922, "grad_norm": 1.593546000948756, "learning_rate": 6.337770822626877e-06, "loss": 0.4778, "step": 25461 }, { "epoch": 1.8923820141211447, "grad_norm": 2.0818570804175525, "learning_rate": 6.337024208974605e-06, "loss": 0.7069, "step": 25462 }, { "epoch": 1.8924563359345967, "grad_norm": 2.2307506411880484, "learning_rate": 6.3362776189043785e-06, "loss": 0.7227, "step": 25463 }, { "epoch": 1.8925306577480492, "grad_norm": 1.6706811183029266, "learning_rate": 6.335531052421006e-06, "loss": 0.5402, "step": 25464 }, { "epoch": 1.8926049795615012, "grad_norm": 2.323389373258542, "learning_rate": 6.334784509529295e-06, "loss": 0.5456, "step": 25465 }, { "epoch": 1.8926793013749537, "grad_norm": 1.8494834588788391, "learning_rate": 6.334037990234052e-06, "loss": 0.6466, "step": 25466 }, { "epoch": 1.8927536231884057, "grad_norm": 1.780938166773227, "learning_rate": 6.333291494540082e-06, "loss": 0.4735, "step": 25467 }, { "epoch": 1.8928279450018581, "grad_norm": 1.9558525769260902, "learning_rate": 6.3325450224521885e-06, "loss": 0.4752, "step": 25468 }, { "epoch": 1.8929022668153102, "grad_norm": 1.9273481117231348, "learning_rate": 6.331798573975182e-06, "loss": 0.677, "step": 25469 }, { "epoch": 1.8929765886287626, "grad_norm": 2.162389615842317, "learning_rate": 6.331052149113865e-06, "loss": 0.5228, "step": 25470 }, { "epoch": 1.8930509104422146, "grad_norm": 2.0626887242601817, "learning_rate": 6.330305747873049e-06, "loss": 0.6602, "step": 25471 }, { "epoch": 1.893125232255667, "grad_norm": 2.081793011059239, "learning_rate": 6.3295593702575295e-06, "loss": 0.5912, "step": 25472 }, { "epoch": 1.8931995540691193, "grad_norm": 2.5649949508443024, "learning_rate": 6.328813016272119e-06, "loss": 0.6806, "step": 25473 }, { "epoch": 1.8932738758825716, "grad_norm": 1.8238235170662118, "learning_rate": 6.3280666859216165e-06, "loss": 0.5, "step": 25474 }, { "epoch": 1.8933481976960238, "grad_norm": 2.3470565633256326, "learning_rate": 6.327320379210831e-06, "loss": 0.6833, "step": 25475 }, { "epoch": 1.893422519509476, "grad_norm": 1.8169309603107842, "learning_rate": 6.326574096144566e-06, "loss": 0.599, "step": 25476 }, { "epoch": 1.8934968413229283, "grad_norm": 2.183906093163565, "learning_rate": 6.3258278367276295e-06, "loss": 0.603, "step": 25477 }, { "epoch": 1.8935711631363805, "grad_norm": 2.19067431828308, "learning_rate": 6.32508160096482e-06, "loss": 0.5522, "step": 25478 }, { "epoch": 1.8936454849498328, "grad_norm": 2.294469168453409, "learning_rate": 6.324335388860943e-06, "loss": 0.5482, "step": 25479 }, { "epoch": 1.893719806763285, "grad_norm": 1.9842724257406377, "learning_rate": 6.323589200420806e-06, "loss": 0.3615, "step": 25480 }, { "epoch": 1.8937941285767372, "grad_norm": 2.064187918458237, "learning_rate": 6.3228430356492095e-06, "loss": 0.556, "step": 25481 }, { "epoch": 1.8938684503901895, "grad_norm": 1.616667248256747, "learning_rate": 6.322096894550964e-06, "loss": 0.5905, "step": 25482 }, { "epoch": 1.893942772203642, "grad_norm": 2.126721172817567, "learning_rate": 6.321350777130864e-06, "loss": 0.6257, "step": 25483 }, { "epoch": 1.894017094017094, "grad_norm": 1.9218363177342037, "learning_rate": 6.320604683393717e-06, "loss": 0.5999, "step": 25484 }, { "epoch": 1.8940914158305464, "grad_norm": 2.1328902052268806, "learning_rate": 6.319858613344325e-06, "loss": 0.5771, "step": 25485 }, { "epoch": 1.8941657376439984, "grad_norm": 3.201973597070366, "learning_rate": 6.319112566987493e-06, "loss": 0.5589, "step": 25486 }, { "epoch": 1.8942400594574509, "grad_norm": 2.3006507547233004, "learning_rate": 6.318366544328023e-06, "loss": 0.4843, "step": 25487 }, { "epoch": 1.894314381270903, "grad_norm": 1.9008143682645693, "learning_rate": 6.317620545370721e-06, "loss": 0.5476, "step": 25488 }, { "epoch": 1.8943887030843554, "grad_norm": 1.6383147786446404, "learning_rate": 6.316874570120384e-06, "loss": 0.4593, "step": 25489 }, { "epoch": 1.8944630248978074, "grad_norm": 2.002390564791934, "learning_rate": 6.3161286185818174e-06, "loss": 0.6127, "step": 25490 }, { "epoch": 1.8945373467112598, "grad_norm": 2.4070337218276685, "learning_rate": 6.3153826907598235e-06, "loss": 0.6954, "step": 25491 }, { "epoch": 1.8946116685247119, "grad_norm": 2.211291948270689, "learning_rate": 6.314636786659205e-06, "loss": 0.5736, "step": 25492 }, { "epoch": 1.8946859903381643, "grad_norm": 2.128375661313425, "learning_rate": 6.313890906284765e-06, "loss": 0.5077, "step": 25493 }, { "epoch": 1.8947603121516166, "grad_norm": 1.7861830092425863, "learning_rate": 6.313145049641305e-06, "loss": 0.5361, "step": 25494 }, { "epoch": 1.8948346339650688, "grad_norm": 1.9613851019774335, "learning_rate": 6.312399216733624e-06, "loss": 0.472, "step": 25495 }, { "epoch": 1.894908955778521, "grad_norm": 2.7294405967395847, "learning_rate": 6.311653407566526e-06, "loss": 0.6555, "step": 25496 }, { "epoch": 1.8949832775919733, "grad_norm": 1.5328543940058734, "learning_rate": 6.31090762214481e-06, "loss": 0.4366, "step": 25497 }, { "epoch": 1.8950575994054255, "grad_norm": 2.1098054220373883, "learning_rate": 6.310161860473281e-06, "loss": 0.6265, "step": 25498 }, { "epoch": 1.8951319212188777, "grad_norm": 1.5433545256823515, "learning_rate": 6.3094161225567394e-06, "loss": 0.4274, "step": 25499 }, { "epoch": 1.89520624303233, "grad_norm": 2.279557850339457, "learning_rate": 6.308670408399984e-06, "loss": 0.7182, "step": 25500 }, { "epoch": 1.8952805648457822, "grad_norm": 1.8691869335641271, "learning_rate": 6.307924718007816e-06, "loss": 0.6513, "step": 25501 }, { "epoch": 1.8953548866592345, "grad_norm": 1.9281482714668838, "learning_rate": 6.307179051385037e-06, "loss": 0.6685, "step": 25502 }, { "epoch": 1.8954292084726867, "grad_norm": 1.4600754278243488, "learning_rate": 6.306433408536448e-06, "loss": 0.4391, "step": 25503 }, { "epoch": 1.895503530286139, "grad_norm": 1.992136481252937, "learning_rate": 6.3056877894668525e-06, "loss": 0.5473, "step": 25504 }, { "epoch": 1.8955778520995912, "grad_norm": 2.5068092468017165, "learning_rate": 6.3049421941810465e-06, "loss": 0.6078, "step": 25505 }, { "epoch": 1.8956521739130436, "grad_norm": 1.872125293601123, "learning_rate": 6.30419662268383e-06, "loss": 0.5359, "step": 25506 }, { "epoch": 1.8957264957264957, "grad_norm": 2.552710522992534, "learning_rate": 6.303451074980003e-06, "loss": 0.5, "step": 25507 }, { "epoch": 1.8958008175399481, "grad_norm": 1.981802919370704, "learning_rate": 6.302705551074366e-06, "loss": 0.6164, "step": 25508 }, { "epoch": 1.8958751393534001, "grad_norm": 2.1892055665698655, "learning_rate": 6.301960050971718e-06, "loss": 0.6117, "step": 25509 }, { "epoch": 1.8959494611668526, "grad_norm": 2.174132957036406, "learning_rate": 6.301214574676862e-06, "loss": 0.5149, "step": 25510 }, { "epoch": 1.8960237829803046, "grad_norm": 1.8951031618617575, "learning_rate": 6.300469122194593e-06, "loss": 0.5928, "step": 25511 }, { "epoch": 1.896098104793757, "grad_norm": 1.9851447035529615, "learning_rate": 6.299723693529711e-06, "loss": 0.4727, "step": 25512 }, { "epoch": 1.896172426607209, "grad_norm": 2.431039830621703, "learning_rate": 6.298978288687017e-06, "loss": 0.5599, "step": 25513 }, { "epoch": 1.8962467484206615, "grad_norm": 1.6047537836965131, "learning_rate": 6.298232907671307e-06, "loss": 0.3844, "step": 25514 }, { "epoch": 1.8963210702341136, "grad_norm": 1.9036784840523808, "learning_rate": 6.297487550487385e-06, "loss": 0.5443, "step": 25515 }, { "epoch": 1.896395392047566, "grad_norm": 1.9857900040908691, "learning_rate": 6.296742217140046e-06, "loss": 0.7077, "step": 25516 }, { "epoch": 1.8964697138610183, "grad_norm": 2.071947987831672, "learning_rate": 6.295996907634087e-06, "loss": 0.4898, "step": 25517 }, { "epoch": 1.8965440356744705, "grad_norm": 2.0117022135409606, "learning_rate": 6.295251621974305e-06, "loss": 0.4865, "step": 25518 }, { "epoch": 1.8966183574879227, "grad_norm": 2.0923457443552036, "learning_rate": 6.294506360165504e-06, "loss": 0.5646, "step": 25519 }, { "epoch": 1.896692679301375, "grad_norm": 2.1819849779433467, "learning_rate": 6.2937611222124765e-06, "loss": 0.6062, "step": 25520 }, { "epoch": 1.8967670011148272, "grad_norm": 2.1162429251074033, "learning_rate": 6.293015908120026e-06, "loss": 0.6484, "step": 25521 }, { "epoch": 1.8968413229282794, "grad_norm": 2.056870672633125, "learning_rate": 6.292270717892943e-06, "loss": 0.4339, "step": 25522 }, { "epoch": 1.8969156447417317, "grad_norm": 2.209178342223176, "learning_rate": 6.291525551536031e-06, "loss": 0.5931, "step": 25523 }, { "epoch": 1.896989966555184, "grad_norm": 2.797088584574285, "learning_rate": 6.290780409054083e-06, "loss": 0.4876, "step": 25524 }, { "epoch": 1.8970642883686362, "grad_norm": 1.8554824845816784, "learning_rate": 6.290035290451899e-06, "loss": 0.4559, "step": 25525 }, { "epoch": 1.8971386101820884, "grad_norm": 1.6216328108414577, "learning_rate": 6.289290195734278e-06, "loss": 0.5166, "step": 25526 }, { "epoch": 1.8972129319955406, "grad_norm": 2.0623757804156826, "learning_rate": 6.288545124906016e-06, "loss": 0.4916, "step": 25527 }, { "epoch": 1.8972872538089929, "grad_norm": 1.828136205720614, "learning_rate": 6.287800077971904e-06, "loss": 0.478, "step": 25528 }, { "epoch": 1.8973615756224453, "grad_norm": 1.9559704663575528, "learning_rate": 6.2870550549367415e-06, "loss": 0.6476, "step": 25529 }, { "epoch": 1.8974358974358974, "grad_norm": 2.0231956920940823, "learning_rate": 6.286310055805327e-06, "loss": 0.5734, "step": 25530 }, { "epoch": 1.8975102192493498, "grad_norm": 1.8156531181214346, "learning_rate": 6.2855650805824546e-06, "loss": 0.5905, "step": 25531 }, { "epoch": 1.8975845410628018, "grad_norm": 1.7205395337300906, "learning_rate": 6.284820129272924e-06, "loss": 0.4798, "step": 25532 }, { "epoch": 1.8976588628762543, "grad_norm": 2.1716009158241714, "learning_rate": 6.2840752018815265e-06, "loss": 0.6526, "step": 25533 }, { "epoch": 1.8977331846897063, "grad_norm": 1.7090375806009572, "learning_rate": 6.28333029841306e-06, "loss": 0.5607, "step": 25534 }, { "epoch": 1.8978075065031588, "grad_norm": 2.0373137192218795, "learning_rate": 6.28258541887232e-06, "loss": 0.6582, "step": 25535 }, { "epoch": 1.8978818283166108, "grad_norm": 1.7192878468169268, "learning_rate": 6.281840563264105e-06, "loss": 0.5675, "step": 25536 }, { "epoch": 1.8979561501300632, "grad_norm": 1.8433104196123273, "learning_rate": 6.281095731593205e-06, "loss": 0.5387, "step": 25537 }, { "epoch": 1.8980304719435153, "grad_norm": 2.8049866634825107, "learning_rate": 6.2803509238644175e-06, "loss": 0.4981, "step": 25538 }, { "epoch": 1.8981047937569677, "grad_norm": 1.8378044906592461, "learning_rate": 6.279606140082542e-06, "loss": 0.4794, "step": 25539 }, { "epoch": 1.89817911557042, "grad_norm": 1.8352741726186448, "learning_rate": 6.278861380252366e-06, "loss": 0.6289, "step": 25540 }, { "epoch": 1.8982534373838722, "grad_norm": 1.913265138685971, "learning_rate": 6.278116644378687e-06, "loss": 0.5461, "step": 25541 }, { "epoch": 1.8983277591973244, "grad_norm": 2.1610795298741534, "learning_rate": 6.277371932466299e-06, "loss": 0.701, "step": 25542 }, { "epoch": 1.8984020810107767, "grad_norm": 1.7188608990412053, "learning_rate": 6.27662724452e-06, "loss": 0.5526, "step": 25543 }, { "epoch": 1.898476402824229, "grad_norm": 2.238201837401528, "learning_rate": 6.275882580544579e-06, "loss": 0.6956, "step": 25544 }, { "epoch": 1.8985507246376812, "grad_norm": 1.9884838756557266, "learning_rate": 6.275137940544833e-06, "loss": 0.5636, "step": 25545 }, { "epoch": 1.8986250464511334, "grad_norm": 2.0255932673507426, "learning_rate": 6.274393324525556e-06, "loss": 0.6166, "step": 25546 }, { "epoch": 1.8986993682645856, "grad_norm": 2.624627675349281, "learning_rate": 6.273648732491543e-06, "loss": 0.6006, "step": 25547 }, { "epoch": 1.8987736900780379, "grad_norm": 2.2059556244710476, "learning_rate": 6.272904164447584e-06, "loss": 0.61, "step": 25548 }, { "epoch": 1.89884801189149, "grad_norm": 2.2945158250978577, "learning_rate": 6.272159620398476e-06, "loss": 0.493, "step": 25549 }, { "epoch": 1.8989223337049426, "grad_norm": 1.6883469703043672, "learning_rate": 6.271415100349014e-06, "loss": 0.5329, "step": 25550 }, { "epoch": 1.8989966555183946, "grad_norm": 2.295976771528847, "learning_rate": 6.270670604303984e-06, "loss": 0.6704, "step": 25551 }, { "epoch": 1.899070977331847, "grad_norm": 3.21924441064734, "learning_rate": 6.269926132268183e-06, "loss": 0.3049, "step": 25552 }, { "epoch": 1.899145299145299, "grad_norm": 1.989780600832543, "learning_rate": 6.269181684246404e-06, "loss": 0.5835, "step": 25553 }, { "epoch": 1.8992196209587515, "grad_norm": 1.9367472375369612, "learning_rate": 6.2684372602434416e-06, "loss": 0.4971, "step": 25554 }, { "epoch": 1.8992939427722035, "grad_norm": 2.199442929134509, "learning_rate": 6.2676928602640855e-06, "loss": 0.5065, "step": 25555 }, { "epoch": 1.899368264585656, "grad_norm": 2.3782393006056854, "learning_rate": 6.266948484313129e-06, "loss": 0.5695, "step": 25556 }, { "epoch": 1.899442586399108, "grad_norm": 2.03225293353209, "learning_rate": 6.266204132395364e-06, "loss": 0.5865, "step": 25557 }, { "epoch": 1.8995169082125605, "grad_norm": 2.002229461380217, "learning_rate": 6.265459804515584e-06, "loss": 0.5808, "step": 25558 }, { "epoch": 1.8995912300260125, "grad_norm": 2.001450406765331, "learning_rate": 6.2647155006785795e-06, "loss": 0.522, "step": 25559 }, { "epoch": 1.899665551839465, "grad_norm": 2.0291105990733924, "learning_rate": 6.263971220889142e-06, "loss": 0.7062, "step": 25560 }, { "epoch": 1.899739873652917, "grad_norm": 1.9611171927297772, "learning_rate": 6.2632269651520686e-06, "loss": 0.5118, "step": 25561 }, { "epoch": 1.8998141954663694, "grad_norm": 1.986505520425675, "learning_rate": 6.262482733472142e-06, "loss": 0.5784, "step": 25562 }, { "epoch": 1.8998885172798217, "grad_norm": 1.8102468835275827, "learning_rate": 6.261738525854158e-06, "loss": 0.5667, "step": 25563 }, { "epoch": 1.899962839093274, "grad_norm": 2.2653688391481146, "learning_rate": 6.2609943423029065e-06, "loss": 0.7934, "step": 25564 }, { "epoch": 1.9000371609067261, "grad_norm": 1.6010254686312986, "learning_rate": 6.260250182823181e-06, "loss": 0.4358, "step": 25565 }, { "epoch": 1.9001114827201784, "grad_norm": 2.8951093490979716, "learning_rate": 6.25950604741977e-06, "loss": 0.7156, "step": 25566 }, { "epoch": 1.9001858045336306, "grad_norm": 4.771660665124585, "learning_rate": 6.258761936097464e-06, "loss": 0.5128, "step": 25567 }, { "epoch": 1.9002601263470829, "grad_norm": 2.31853028515432, "learning_rate": 6.258017848861055e-06, "loss": 0.5384, "step": 25568 }, { "epoch": 1.900334448160535, "grad_norm": 1.836239330567554, "learning_rate": 6.257273785715334e-06, "loss": 0.5199, "step": 25569 }, { "epoch": 1.9004087699739873, "grad_norm": 1.8974561966042254, "learning_rate": 6.25652974666509e-06, "loss": 0.4438, "step": 25570 }, { "epoch": 1.9004830917874396, "grad_norm": 1.7429490555110778, "learning_rate": 6.255785731715111e-06, "loss": 0.5382, "step": 25571 }, { "epoch": 1.9005574136008918, "grad_norm": 1.7000069265582305, "learning_rate": 6.255041740870191e-06, "loss": 0.5456, "step": 25572 }, { "epoch": 1.9006317354143443, "grad_norm": 2.0795646697709653, "learning_rate": 6.254297774135123e-06, "loss": 0.6026, "step": 25573 }, { "epoch": 1.9007060572277963, "grad_norm": 2.03708167638874, "learning_rate": 6.253553831514685e-06, "loss": 0.6243, "step": 25574 }, { "epoch": 1.9007803790412487, "grad_norm": 2.3151209034431672, "learning_rate": 6.252809913013674e-06, "loss": 0.5671, "step": 25575 }, { "epoch": 1.9008547008547008, "grad_norm": 2.0968604012778345, "learning_rate": 6.252066018636881e-06, "loss": 0.506, "step": 25576 }, { "epoch": 1.9009290226681532, "grad_norm": 1.6909625720202837, "learning_rate": 6.25132214838909e-06, "loss": 0.5282, "step": 25577 }, { "epoch": 1.9010033444816052, "grad_norm": 2.2508945654573784, "learning_rate": 6.250578302275092e-06, "loss": 0.6097, "step": 25578 }, { "epoch": 1.9010776662950577, "grad_norm": 3.106827502343087, "learning_rate": 6.249834480299677e-06, "loss": 0.699, "step": 25579 }, { "epoch": 1.9011519881085097, "grad_norm": 2.151285411645202, "learning_rate": 6.249090682467636e-06, "loss": 0.5989, "step": 25580 }, { "epoch": 1.9012263099219622, "grad_norm": 1.723797421403391, "learning_rate": 6.248346908783751e-06, "loss": 0.368, "step": 25581 }, { "epoch": 1.9013006317354142, "grad_norm": 1.7864509105127977, "learning_rate": 6.247603159252814e-06, "loss": 0.4018, "step": 25582 }, { "epoch": 1.9013749535488667, "grad_norm": 2.054929241973192, "learning_rate": 6.246859433879613e-06, "loss": 0.5462, "step": 25583 }, { "epoch": 1.901449275362319, "grad_norm": 2.352401389476084, "learning_rate": 6.246115732668942e-06, "loss": 0.563, "step": 25584 }, { "epoch": 1.9015235971757711, "grad_norm": 1.9604582301267266, "learning_rate": 6.245372055625578e-06, "loss": 0.4996, "step": 25585 }, { "epoch": 1.9015979189892234, "grad_norm": 1.6532421755401725, "learning_rate": 6.244628402754314e-06, "loss": 0.431, "step": 25586 }, { "epoch": 1.9016722408026756, "grad_norm": 2.5126842245216205, "learning_rate": 6.243884774059938e-06, "loss": 0.6137, "step": 25587 }, { "epoch": 1.9017465626161278, "grad_norm": 1.99254221150541, "learning_rate": 6.243141169547237e-06, "loss": 0.6139, "step": 25588 }, { "epoch": 1.90182088442958, "grad_norm": 1.838838504972754, "learning_rate": 6.242397589220996e-06, "loss": 0.5479, "step": 25589 }, { "epoch": 1.9018952062430323, "grad_norm": 2.041601226987686, "learning_rate": 6.241654033086006e-06, "loss": 0.621, "step": 25590 }, { "epoch": 1.9019695280564846, "grad_norm": 1.7711289614141597, "learning_rate": 6.240910501147055e-06, "loss": 0.5775, "step": 25591 }, { "epoch": 1.9020438498699368, "grad_norm": 1.9907993726899342, "learning_rate": 6.240166993408925e-06, "loss": 0.6086, "step": 25592 }, { "epoch": 1.902118171683389, "grad_norm": 2.1121446851275443, "learning_rate": 6.239423509876403e-06, "loss": 0.6659, "step": 25593 }, { "epoch": 1.9021924934968413, "grad_norm": 2.2115031370783815, "learning_rate": 6.238680050554279e-06, "loss": 0.6764, "step": 25594 }, { "epoch": 1.9022668153102935, "grad_norm": 1.8737256445810375, "learning_rate": 6.2379366154473405e-06, "loss": 0.6403, "step": 25595 }, { "epoch": 1.902341137123746, "grad_norm": 1.8422254855222122, "learning_rate": 6.237193204560368e-06, "loss": 0.4767, "step": 25596 }, { "epoch": 1.902415458937198, "grad_norm": 2.5978299689920585, "learning_rate": 6.23644981789815e-06, "loss": 0.4875, "step": 25597 }, { "epoch": 1.9024897807506504, "grad_norm": 2.276808096750277, "learning_rate": 6.235706455465475e-06, "loss": 0.6096, "step": 25598 }, { "epoch": 1.9025641025641025, "grad_norm": 2.1572337961229087, "learning_rate": 6.234963117267123e-06, "loss": 0.7047, "step": 25599 }, { "epoch": 1.902638424377555, "grad_norm": 2.276765591417122, "learning_rate": 6.234219803307885e-06, "loss": 0.6199, "step": 25600 }, { "epoch": 1.902712746191007, "grad_norm": 2.377932240601978, "learning_rate": 6.233476513592543e-06, "loss": 0.5991, "step": 25601 }, { "epoch": 1.9027870680044594, "grad_norm": 1.80935196423409, "learning_rate": 6.232733248125888e-06, "loss": 0.591, "step": 25602 }, { "epoch": 1.9028613898179114, "grad_norm": 1.972967850166835, "learning_rate": 6.231990006912696e-06, "loss": 0.6579, "step": 25603 }, { "epoch": 1.9029357116313639, "grad_norm": 2.264873339266069, "learning_rate": 6.231246789957759e-06, "loss": 0.506, "step": 25604 }, { "epoch": 1.903010033444816, "grad_norm": 2.296141084173564, "learning_rate": 6.2305035972658575e-06, "loss": 0.6736, "step": 25605 }, { "epoch": 1.9030843552582684, "grad_norm": 1.758599093994049, "learning_rate": 6.229760428841785e-06, "loss": 0.4925, "step": 25606 }, { "epoch": 1.9031586770717206, "grad_norm": 2.238371081738494, "learning_rate": 6.229017284690313e-06, "loss": 0.616, "step": 25607 }, { "epoch": 1.9032329988851728, "grad_norm": 1.9129266463927685, "learning_rate": 6.228274164816234e-06, "loss": 0.58, "step": 25608 }, { "epoch": 1.903307320698625, "grad_norm": 2.037606022169262, "learning_rate": 6.227531069224331e-06, "loss": 0.7235, "step": 25609 }, { "epoch": 1.9033816425120773, "grad_norm": 2.2268041503290816, "learning_rate": 6.2267879979193855e-06, "loss": 0.5913, "step": 25610 }, { "epoch": 1.9034559643255295, "grad_norm": 2.187706553754796, "learning_rate": 6.226044950906183e-06, "loss": 0.8133, "step": 25611 }, { "epoch": 1.9035302861389818, "grad_norm": 1.5573571172565457, "learning_rate": 6.225301928189506e-06, "loss": 0.5202, "step": 25612 }, { "epoch": 1.903604607952434, "grad_norm": 1.9737320183075473, "learning_rate": 6.224558929774144e-06, "loss": 0.6629, "step": 25613 }, { "epoch": 1.9036789297658863, "grad_norm": 2.522426294808361, "learning_rate": 6.223815955664872e-06, "loss": 0.5068, "step": 25614 }, { "epoch": 1.9037532515793385, "grad_norm": 2.030567655093121, "learning_rate": 6.2230730058664766e-06, "loss": 0.5677, "step": 25615 }, { "epoch": 1.9038275733927907, "grad_norm": 1.3549517500339698, "learning_rate": 6.222330080383743e-06, "loss": 0.367, "step": 25616 }, { "epoch": 1.903901895206243, "grad_norm": 1.918973375417107, "learning_rate": 6.221587179221451e-06, "loss": 0.7141, "step": 25617 }, { "epoch": 1.9039762170196952, "grad_norm": 1.6074340367748914, "learning_rate": 6.2208443023843876e-06, "loss": 0.3984, "step": 25618 }, { "epoch": 1.9040505388331477, "grad_norm": 1.5962529911146621, "learning_rate": 6.22010144987733e-06, "loss": 0.5124, "step": 25619 }, { "epoch": 1.9041248606465997, "grad_norm": 1.7354411936350824, "learning_rate": 6.219358621705065e-06, "loss": 0.4664, "step": 25620 }, { "epoch": 1.9041991824600522, "grad_norm": 1.9560674480201612, "learning_rate": 6.218615817872371e-06, "loss": 0.5898, "step": 25621 }, { "epoch": 1.9042735042735042, "grad_norm": 1.8423131246481705, "learning_rate": 6.217873038384032e-06, "loss": 0.5755, "step": 25622 }, { "epoch": 1.9043478260869566, "grad_norm": 2.1451375364490195, "learning_rate": 6.217130283244831e-06, "loss": 0.5975, "step": 25623 }, { "epoch": 1.9044221479004086, "grad_norm": 1.866619937152117, "learning_rate": 6.216387552459551e-06, "loss": 0.5681, "step": 25624 }, { "epoch": 1.904496469713861, "grad_norm": 1.85041269254925, "learning_rate": 6.215644846032969e-06, "loss": 0.6537, "step": 25625 }, { "epoch": 1.9045707915273131, "grad_norm": 2.261727065001439, "learning_rate": 6.214902163969868e-06, "loss": 0.6265, "step": 25626 }, { "epoch": 1.9046451133407656, "grad_norm": 2.220051660686227, "learning_rate": 6.214159506275032e-06, "loss": 0.6394, "step": 25627 }, { "epoch": 1.9047194351542176, "grad_norm": 2.2666424671693957, "learning_rate": 6.21341687295324e-06, "loss": 0.6423, "step": 25628 }, { "epoch": 1.90479375696767, "grad_norm": 2.7356138549336837, "learning_rate": 6.212674264009277e-06, "loss": 0.7494, "step": 25629 }, { "epoch": 1.9048680787811223, "grad_norm": 1.522185542441913, "learning_rate": 6.211931679447918e-06, "loss": 0.483, "step": 25630 }, { "epoch": 1.9049424005945745, "grad_norm": 2.0084390942784793, "learning_rate": 6.211189119273948e-06, "loss": 0.631, "step": 25631 }, { "epoch": 1.9050167224080268, "grad_norm": 1.96559563922082, "learning_rate": 6.210446583492142e-06, "loss": 0.4614, "step": 25632 }, { "epoch": 1.905091044221479, "grad_norm": 2.309044240311555, "learning_rate": 6.2097040721072864e-06, "loss": 0.6268, "step": 25633 }, { "epoch": 1.9051653660349313, "grad_norm": 1.9123541699122375, "learning_rate": 6.208961585124159e-06, "loss": 0.5721, "step": 25634 }, { "epoch": 1.9052396878483835, "grad_norm": 2.3092616498072434, "learning_rate": 6.208219122547542e-06, "loss": 0.7462, "step": 25635 }, { "epoch": 1.9053140096618357, "grad_norm": 2.176469130012564, "learning_rate": 6.207476684382212e-06, "loss": 0.6357, "step": 25636 }, { "epoch": 1.905388331475288, "grad_norm": 1.6625067471472135, "learning_rate": 6.206734270632949e-06, "loss": 0.5737, "step": 25637 }, { "epoch": 1.9054626532887402, "grad_norm": 2.3658823092056824, "learning_rate": 6.205991881304535e-06, "loss": 0.6593, "step": 25638 }, { "epoch": 1.9055369751021924, "grad_norm": 2.3476574081339217, "learning_rate": 6.205249516401748e-06, "loss": 0.6552, "step": 25639 }, { "epoch": 1.905611296915645, "grad_norm": 1.9762757119663767, "learning_rate": 6.204507175929373e-06, "loss": 0.5806, "step": 25640 }, { "epoch": 1.905685618729097, "grad_norm": 1.6988859659846416, "learning_rate": 6.2037648598921784e-06, "loss": 0.2485, "step": 25641 }, { "epoch": 1.9057599405425494, "grad_norm": 1.6775282154949454, "learning_rate": 6.203022568294951e-06, "loss": 0.5179, "step": 25642 }, { "epoch": 1.9058342623560014, "grad_norm": 1.9982920539724256, "learning_rate": 6.2022803011424645e-06, "loss": 0.491, "step": 25643 }, { "epoch": 1.9059085841694539, "grad_norm": 2.133827401315701, "learning_rate": 6.201538058439501e-06, "loss": 0.7189, "step": 25644 }, { "epoch": 1.9059829059829059, "grad_norm": 1.6743981243600552, "learning_rate": 6.200795840190838e-06, "loss": 0.396, "step": 25645 }, { "epoch": 1.9060572277963583, "grad_norm": 2.165326317335758, "learning_rate": 6.200053646401258e-06, "loss": 0.5746, "step": 25646 }, { "epoch": 1.9061315496098103, "grad_norm": 1.6519655082160716, "learning_rate": 6.199311477075532e-06, "loss": 0.4578, "step": 25647 }, { "epoch": 1.9062058714232628, "grad_norm": 2.2719535991545805, "learning_rate": 6.1985693322184426e-06, "loss": 0.6475, "step": 25648 }, { "epoch": 1.9062801932367148, "grad_norm": 1.748238020415082, "learning_rate": 6.197827211834765e-06, "loss": 0.4911, "step": 25649 }, { "epoch": 1.9063545150501673, "grad_norm": 1.4977051280789515, "learning_rate": 6.19708511592928e-06, "loss": 0.4488, "step": 25650 }, { "epoch": 1.9064288368636195, "grad_norm": 1.589680215374249, "learning_rate": 6.1963430445067656e-06, "loss": 0.4547, "step": 25651 }, { "epoch": 1.9065031586770718, "grad_norm": 1.9141654464707114, "learning_rate": 6.195600997571998e-06, "loss": 0.4435, "step": 25652 }, { "epoch": 1.906577480490524, "grad_norm": 1.79484354058415, "learning_rate": 6.194858975129749e-06, "loss": 0.5631, "step": 25653 }, { "epoch": 1.9066518023039762, "grad_norm": 1.9087441495946047, "learning_rate": 6.194116977184803e-06, "loss": 0.6199, "step": 25654 }, { "epoch": 1.9067261241174285, "grad_norm": 1.5459809250886034, "learning_rate": 6.193375003741934e-06, "loss": 0.437, "step": 25655 }, { "epoch": 1.9068004459308807, "grad_norm": 1.737810670602721, "learning_rate": 6.1926330548059185e-06, "loss": 0.4425, "step": 25656 }, { "epoch": 1.906874767744333, "grad_norm": 1.8683349204579738, "learning_rate": 6.191891130381536e-06, "loss": 0.6104, "step": 25657 }, { "epoch": 1.9069490895577852, "grad_norm": 2.180057109372343, "learning_rate": 6.191149230473559e-06, "loss": 0.5699, "step": 25658 }, { "epoch": 1.9070234113712374, "grad_norm": 1.7043218829371278, "learning_rate": 6.190407355086765e-06, "loss": 0.6612, "step": 25659 }, { "epoch": 1.9070977331846897, "grad_norm": 2.111066040148038, "learning_rate": 6.189665504225931e-06, "loss": 0.5079, "step": 25660 }, { "epoch": 1.907172054998142, "grad_norm": 2.0749272465627207, "learning_rate": 6.188923677895835e-06, "loss": 0.6356, "step": 25661 }, { "epoch": 1.9072463768115941, "grad_norm": 2.463632129647646, "learning_rate": 6.1881818761012495e-06, "loss": 0.3055, "step": 25662 }, { "epoch": 1.9073206986250466, "grad_norm": 2.3046092398855125, "learning_rate": 6.187440098846953e-06, "loss": 0.5559, "step": 25663 }, { "epoch": 1.9073950204384986, "grad_norm": 1.9742635513494575, "learning_rate": 6.1866983461377175e-06, "loss": 0.5449, "step": 25664 }, { "epoch": 1.907469342251951, "grad_norm": 2.367297022615477, "learning_rate": 6.185956617978319e-06, "loss": 0.5978, "step": 25665 }, { "epoch": 1.907543664065403, "grad_norm": 1.6841826486763598, "learning_rate": 6.185214914373534e-06, "loss": 0.5616, "step": 25666 }, { "epoch": 1.9076179858788556, "grad_norm": 2.064701148610528, "learning_rate": 6.184473235328139e-06, "loss": 0.7212, "step": 25667 }, { "epoch": 1.9076923076923076, "grad_norm": 2.1947073084466586, "learning_rate": 6.183731580846908e-06, "loss": 0.6585, "step": 25668 }, { "epoch": 1.90776662950576, "grad_norm": 1.8207700492875636, "learning_rate": 6.182989950934613e-06, "loss": 0.5161, "step": 25669 }, { "epoch": 1.907840951319212, "grad_norm": 2.0104452694467008, "learning_rate": 6.18224834559603e-06, "loss": 0.4263, "step": 25670 }, { "epoch": 1.9079152731326645, "grad_norm": 3.264048398980931, "learning_rate": 6.181506764835935e-06, "loss": 0.7124, "step": 25671 }, { "epoch": 1.9079895949461165, "grad_norm": 2.25980621754963, "learning_rate": 6.180765208659103e-06, "loss": 0.5316, "step": 25672 }, { "epoch": 1.908063916759569, "grad_norm": 1.6575040055661645, "learning_rate": 6.180023677070305e-06, "loss": 0.4904, "step": 25673 }, { "epoch": 1.9081382385730212, "grad_norm": 1.7994603623242285, "learning_rate": 6.17928217007432e-06, "loss": 0.4974, "step": 25674 }, { "epoch": 1.9082125603864735, "grad_norm": 2.05559223267158, "learning_rate": 6.178540687675914e-06, "loss": 0.6251, "step": 25675 }, { "epoch": 1.9082868821999257, "grad_norm": 1.9317881629895213, "learning_rate": 6.177799229879865e-06, "loss": 0.5221, "step": 25676 }, { "epoch": 1.908361204013378, "grad_norm": 1.9853433119671244, "learning_rate": 6.177057796690946e-06, "loss": 0.5274, "step": 25677 }, { "epoch": 1.9084355258268302, "grad_norm": 2.0042722883231634, "learning_rate": 6.176316388113931e-06, "loss": 0.6485, "step": 25678 }, { "epoch": 1.9085098476402824, "grad_norm": 1.5437822587431307, "learning_rate": 6.1755750041535935e-06, "loss": 0.5881, "step": 25679 }, { "epoch": 1.9085841694537347, "grad_norm": 1.7710809409141886, "learning_rate": 6.174833644814705e-06, "loss": 0.4556, "step": 25680 }, { "epoch": 1.908658491267187, "grad_norm": 2.163964714612292, "learning_rate": 6.174092310102039e-06, "loss": 0.6669, "step": 25681 }, { "epoch": 1.9087328130806391, "grad_norm": 4.84833377550887, "learning_rate": 6.173351000020367e-06, "loss": 0.7334, "step": 25682 }, { "epoch": 1.9088071348940914, "grad_norm": 1.9842694222263524, "learning_rate": 6.1726097145744665e-06, "loss": 0.6095, "step": 25683 }, { "epoch": 1.9088814567075436, "grad_norm": 2.277571789996627, "learning_rate": 6.171868453769103e-06, "loss": 0.6884, "step": 25684 }, { "epoch": 1.9089557785209958, "grad_norm": 2.0592442248612457, "learning_rate": 6.171127217609052e-06, "loss": 0.5773, "step": 25685 }, { "epoch": 1.9090301003344483, "grad_norm": 2.084971996811255, "learning_rate": 6.170386006099088e-06, "loss": 0.5054, "step": 25686 }, { "epoch": 1.9091044221479003, "grad_norm": 1.9187628037411277, "learning_rate": 6.169644819243979e-06, "loss": 0.6008, "step": 25687 }, { "epoch": 1.9091787439613528, "grad_norm": 2.8193062374715523, "learning_rate": 6.168903657048495e-06, "loss": 0.6461, "step": 25688 }, { "epoch": 1.9092530657748048, "grad_norm": 2.3608904731190195, "learning_rate": 6.168162519517411e-06, "loss": 0.6662, "step": 25689 }, { "epoch": 1.9093273875882573, "grad_norm": 2.6672125263022948, "learning_rate": 6.1674214066555005e-06, "loss": 0.5025, "step": 25690 }, { "epoch": 1.9094017094017093, "grad_norm": 2.007177114206057, "learning_rate": 6.1666803184675306e-06, "loss": 0.456, "step": 25691 }, { "epoch": 1.9094760312151617, "grad_norm": 1.895293048193302, "learning_rate": 6.165939254958273e-06, "loss": 0.529, "step": 25692 }, { "epoch": 1.9095503530286138, "grad_norm": 1.7398240301783112, "learning_rate": 6.165198216132501e-06, "loss": 0.4909, "step": 25693 }, { "epoch": 1.9096246748420662, "grad_norm": 1.9394427527394527, "learning_rate": 6.164457201994984e-06, "loss": 0.4729, "step": 25694 }, { "epoch": 1.9096989966555182, "grad_norm": 2.090145104942369, "learning_rate": 6.1637162125504925e-06, "loss": 0.4763, "step": 25695 }, { "epoch": 1.9097733184689707, "grad_norm": 1.9018127130121532, "learning_rate": 6.162975247803795e-06, "loss": 0.5235, "step": 25696 }, { "epoch": 1.909847640282423, "grad_norm": 1.6710124855384094, "learning_rate": 6.162234307759669e-06, "loss": 0.4604, "step": 25697 }, { "epoch": 1.9099219620958752, "grad_norm": 2.0431678273187925, "learning_rate": 6.161493392422876e-06, "loss": 0.4888, "step": 25698 }, { "epoch": 1.9099962839093274, "grad_norm": 2.042718573952602, "learning_rate": 6.160752501798189e-06, "loss": 0.698, "step": 25699 }, { "epoch": 1.9100706057227796, "grad_norm": 1.576331596747541, "learning_rate": 6.160011635890378e-06, "loss": 0.4633, "step": 25700 }, { "epoch": 1.9101449275362319, "grad_norm": 3.167064014581758, "learning_rate": 6.159270794704213e-06, "loss": 0.5401, "step": 25701 }, { "epoch": 1.9102192493496841, "grad_norm": 2.2037227548710825, "learning_rate": 6.158529978244463e-06, "loss": 0.6554, "step": 25702 }, { "epoch": 1.9102935711631364, "grad_norm": 1.969514559238864, "learning_rate": 6.157789186515898e-06, "loss": 0.5644, "step": 25703 }, { "epoch": 1.9103678929765886, "grad_norm": 1.9809021430262113, "learning_rate": 6.157048419523286e-06, "loss": 0.5061, "step": 25704 }, { "epoch": 1.9104422147900408, "grad_norm": 2.1632119685664204, "learning_rate": 6.156307677271399e-06, "loss": 0.5991, "step": 25705 }, { "epoch": 1.910516536603493, "grad_norm": 2.3926326214706957, "learning_rate": 6.155566959765002e-06, "loss": 0.6889, "step": 25706 }, { "epoch": 1.9105908584169455, "grad_norm": 2.2103983600977926, "learning_rate": 6.154826267008866e-06, "loss": 0.6157, "step": 25707 }, { "epoch": 1.9106651802303976, "grad_norm": 1.8646666852449474, "learning_rate": 6.154085599007762e-06, "loss": 0.5031, "step": 25708 }, { "epoch": 1.91073950204385, "grad_norm": 1.7306039133510522, "learning_rate": 6.153344955766452e-06, "loss": 0.4904, "step": 25709 }, { "epoch": 1.910813823857302, "grad_norm": 2.0246737456849546, "learning_rate": 6.1526043372897074e-06, "loss": 0.5088, "step": 25710 }, { "epoch": 1.9108881456707545, "grad_norm": 1.8166984873019985, "learning_rate": 6.151863743582296e-06, "loss": 0.5982, "step": 25711 }, { "epoch": 1.9109624674842065, "grad_norm": 1.8711960127597569, "learning_rate": 6.15112317464899e-06, "loss": 0.6838, "step": 25712 }, { "epoch": 1.911036789297659, "grad_norm": 1.4418225611066533, "learning_rate": 6.150382630494549e-06, "loss": 0.4213, "step": 25713 }, { "epoch": 1.911111111111111, "grad_norm": 2.050538590688724, "learning_rate": 6.149642111123746e-06, "loss": 0.596, "step": 25714 }, { "epoch": 1.9111854329245634, "grad_norm": 1.876100417125313, "learning_rate": 6.148901616541348e-06, "loss": 0.5289, "step": 25715 }, { "epoch": 1.9112597547380155, "grad_norm": 2.2723031932065694, "learning_rate": 6.1481611467521205e-06, "loss": 0.7342, "step": 25716 }, { "epoch": 1.911334076551468, "grad_norm": 1.9052289099695048, "learning_rate": 6.147420701760833e-06, "loss": 0.6051, "step": 25717 }, { "epoch": 1.9114083983649202, "grad_norm": 1.8939104598606185, "learning_rate": 6.14668028157225e-06, "loss": 0.5464, "step": 25718 }, { "epoch": 1.9114827201783724, "grad_norm": 1.8558287242868625, "learning_rate": 6.145939886191143e-06, "loss": 0.5622, "step": 25719 }, { "epoch": 1.9115570419918246, "grad_norm": 2.4237921117006214, "learning_rate": 6.145199515622271e-06, "loss": 0.6429, "step": 25720 }, { "epoch": 1.9116313638052769, "grad_norm": 1.9350951051997156, "learning_rate": 6.144459169870405e-06, "loss": 0.5675, "step": 25721 }, { "epoch": 1.9117056856187291, "grad_norm": 2.198698826142345, "learning_rate": 6.143718848940311e-06, "loss": 0.5711, "step": 25722 }, { "epoch": 1.9117800074321813, "grad_norm": 2.014980939781798, "learning_rate": 6.142978552836757e-06, "loss": 0.5959, "step": 25723 }, { "epoch": 1.9118543292456336, "grad_norm": 1.5293533086222975, "learning_rate": 6.142238281564504e-06, "loss": 0.4965, "step": 25724 }, { "epoch": 1.9119286510590858, "grad_norm": 2.1988673197799, "learning_rate": 6.141498035128321e-06, "loss": 0.5578, "step": 25725 }, { "epoch": 1.912002972872538, "grad_norm": 1.7135093566953825, "learning_rate": 6.140757813532975e-06, "loss": 0.5543, "step": 25726 }, { "epoch": 1.9120772946859903, "grad_norm": 2.3648014759209453, "learning_rate": 6.14001761678323e-06, "loss": 0.6657, "step": 25727 }, { "epoch": 1.9121516164994425, "grad_norm": 2.1246525663643836, "learning_rate": 6.139277444883851e-06, "loss": 0.5168, "step": 25728 }, { "epoch": 1.9122259383128948, "grad_norm": 1.7827590489967078, "learning_rate": 6.138537297839602e-06, "loss": 0.5989, "step": 25729 }, { "epoch": 1.9123002601263472, "grad_norm": 2.2374909695128866, "learning_rate": 6.13779717565525e-06, "loss": 0.6753, "step": 25730 }, { "epoch": 1.9123745819397993, "grad_norm": 2.6500959903089023, "learning_rate": 6.137057078335564e-06, "loss": 0.7216, "step": 25731 }, { "epoch": 1.9124489037532517, "grad_norm": 1.9219861433736793, "learning_rate": 6.136317005885299e-06, "loss": 0.635, "step": 25732 }, { "epoch": 1.9125232255667037, "grad_norm": 1.8309140696053214, "learning_rate": 6.135576958309226e-06, "loss": 0.4649, "step": 25733 }, { "epoch": 1.9125975473801562, "grad_norm": 2.1153688893250524, "learning_rate": 6.13483693561211e-06, "loss": 0.5928, "step": 25734 }, { "epoch": 1.9126718691936082, "grad_norm": 2.0762016660692315, "learning_rate": 6.13409693779871e-06, "loss": 0.6478, "step": 25735 }, { "epoch": 1.9127461910070607, "grad_norm": 1.6037050165916922, "learning_rate": 6.133356964873793e-06, "loss": 0.4507, "step": 25736 }, { "epoch": 1.9128205128205127, "grad_norm": 1.6670376246513257, "learning_rate": 6.132617016842124e-06, "loss": 0.3613, "step": 25737 }, { "epoch": 1.9128948346339651, "grad_norm": 2.136645033562676, "learning_rate": 6.13187709370847e-06, "loss": 0.5525, "step": 25738 }, { "epoch": 1.9129691564474172, "grad_norm": 2.1915104468874214, "learning_rate": 6.131137195477586e-06, "loss": 0.806, "step": 25739 }, { "epoch": 1.9130434782608696, "grad_norm": 2.2307194567458573, "learning_rate": 6.1303973221542405e-06, "loss": 0.6385, "step": 25740 }, { "epoch": 1.9131178000743219, "grad_norm": 1.6101464217520531, "learning_rate": 6.129657473743195e-06, "loss": 0.4636, "step": 25741 }, { "epoch": 1.913192121887774, "grad_norm": 2.103822808022712, "learning_rate": 6.12891765024922e-06, "loss": 0.5876, "step": 25742 }, { "epoch": 1.9132664437012263, "grad_norm": 2.4062169221548535, "learning_rate": 6.128177851677068e-06, "loss": 0.6405, "step": 25743 }, { "epoch": 1.9133407655146786, "grad_norm": 2.2774071954961097, "learning_rate": 6.127438078031505e-06, "loss": 0.4659, "step": 25744 }, { "epoch": 1.9134150873281308, "grad_norm": 1.9360663679785002, "learning_rate": 6.126698329317298e-06, "loss": 0.5478, "step": 25745 }, { "epoch": 1.913489409141583, "grad_norm": 1.6861011213281396, "learning_rate": 6.125958605539204e-06, "loss": 0.4721, "step": 25746 }, { "epoch": 1.9135637309550353, "grad_norm": 1.9000249788955896, "learning_rate": 6.125218906701986e-06, "loss": 0.5894, "step": 25747 }, { "epoch": 1.9136380527684875, "grad_norm": 2.162327695222545, "learning_rate": 6.124479232810409e-06, "loss": 0.5787, "step": 25748 }, { "epoch": 1.9137123745819398, "grad_norm": 1.8736039466070802, "learning_rate": 6.123739583869235e-06, "loss": 0.5331, "step": 25749 }, { "epoch": 1.913786696395392, "grad_norm": 1.7085594783824931, "learning_rate": 6.1229999598832225e-06, "loss": 0.5197, "step": 25750 }, { "epoch": 1.9138610182088442, "grad_norm": 1.8839195512323792, "learning_rate": 6.1222603608571355e-06, "loss": 0.5213, "step": 25751 }, { "epoch": 1.9139353400222965, "grad_norm": 2.223666352058603, "learning_rate": 6.1215207867957345e-06, "loss": 0.5736, "step": 25752 }, { "epoch": 1.914009661835749, "grad_norm": 2.044725575418283, "learning_rate": 6.1207812377037865e-06, "loss": 0.5157, "step": 25753 }, { "epoch": 1.914083983649201, "grad_norm": 1.643543128097204, "learning_rate": 6.1200417135860425e-06, "loss": 0.5489, "step": 25754 }, { "epoch": 1.9141583054626534, "grad_norm": 2.326729873338799, "learning_rate": 6.119302214447269e-06, "loss": 0.5056, "step": 25755 }, { "epoch": 1.9142326272761054, "grad_norm": 1.964809488946029, "learning_rate": 6.118562740292229e-06, "loss": 0.5679, "step": 25756 }, { "epoch": 1.914306949089558, "grad_norm": 2.006339141681465, "learning_rate": 6.1178232911256785e-06, "loss": 0.5947, "step": 25757 }, { "epoch": 1.91438127090301, "grad_norm": 1.9544017053072578, "learning_rate": 6.11708386695238e-06, "loss": 0.6515, "step": 25758 }, { "epoch": 1.9144555927164624, "grad_norm": 2.07252606486488, "learning_rate": 6.116344467777093e-06, "loss": 0.5943, "step": 25759 }, { "epoch": 1.9145299145299144, "grad_norm": 1.9491378662702787, "learning_rate": 6.1156050936045826e-06, "loss": 0.6107, "step": 25760 }, { "epoch": 1.9146042363433668, "grad_norm": 1.950329377898911, "learning_rate": 6.114865744439602e-06, "loss": 0.6707, "step": 25761 }, { "epoch": 1.9146785581568189, "grad_norm": 1.5846717571846973, "learning_rate": 6.114126420286916e-06, "loss": 0.4446, "step": 25762 }, { "epoch": 1.9147528799702713, "grad_norm": 1.6537967896027168, "learning_rate": 6.11338712115128e-06, "loss": 0.4667, "step": 25763 }, { "epoch": 1.9148272017837236, "grad_norm": 2.0580848833748115, "learning_rate": 6.112647847037458e-06, "loss": 0.6313, "step": 25764 }, { "epoch": 1.9149015235971758, "grad_norm": 2.1315752023118866, "learning_rate": 6.111908597950209e-06, "loss": 0.5196, "step": 25765 }, { "epoch": 1.914975845410628, "grad_norm": 2.2039165934300198, "learning_rate": 6.111169373894289e-06, "loss": 0.4429, "step": 25766 }, { "epoch": 1.9150501672240803, "grad_norm": 1.9327936601084843, "learning_rate": 6.11043017487446e-06, "loss": 0.5421, "step": 25767 }, { "epoch": 1.9151244890375325, "grad_norm": 1.8074902136668565, "learning_rate": 6.109691000895478e-06, "loss": 0.6104, "step": 25768 }, { "epoch": 1.9151988108509848, "grad_norm": 1.746277747020872, "learning_rate": 6.108951851962103e-06, "loss": 0.5431, "step": 25769 }, { "epoch": 1.915273132664437, "grad_norm": 2.096682596131115, "learning_rate": 6.108212728079095e-06, "loss": 0.5382, "step": 25770 }, { "epoch": 1.9153474544778892, "grad_norm": 2.2702194498285593, "learning_rate": 6.1074736292512125e-06, "loss": 0.5785, "step": 25771 }, { "epoch": 1.9154217762913415, "grad_norm": 1.7955931485787011, "learning_rate": 6.1067345554832115e-06, "loss": 0.4704, "step": 25772 }, { "epoch": 1.9154960981047937, "grad_norm": 2.0631971619071523, "learning_rate": 6.105995506779849e-06, "loss": 0.5954, "step": 25773 }, { "epoch": 1.9155704199182462, "grad_norm": 2.148988940244607, "learning_rate": 6.105256483145888e-06, "loss": 0.6371, "step": 25774 }, { "epoch": 1.9156447417316982, "grad_norm": 2.0322854924616647, "learning_rate": 6.104517484586083e-06, "loss": 0.619, "step": 25775 }, { "epoch": 1.9157190635451506, "grad_norm": 2.0152346787169, "learning_rate": 6.103778511105198e-06, "loss": 0.5259, "step": 25776 }, { "epoch": 1.9157933853586027, "grad_norm": 2.441051958103849, "learning_rate": 6.10303956270798e-06, "loss": 0.6157, "step": 25777 }, { "epoch": 1.9158677071720551, "grad_norm": 2.3811836735212775, "learning_rate": 6.10230063939919e-06, "loss": 0.6046, "step": 25778 }, { "epoch": 1.9159420289855071, "grad_norm": 1.7406153339093962, "learning_rate": 6.101561741183587e-06, "loss": 0.6107, "step": 25779 }, { "epoch": 1.9160163507989596, "grad_norm": 1.9309459652802377, "learning_rate": 6.100822868065928e-06, "loss": 0.5788, "step": 25780 }, { "epoch": 1.9160906726124116, "grad_norm": 2.3542920521883035, "learning_rate": 6.100084020050967e-06, "loss": 0.6015, "step": 25781 }, { "epoch": 1.916164994425864, "grad_norm": 2.17952323687913, "learning_rate": 6.099345197143465e-06, "loss": 0.5733, "step": 25782 }, { "epoch": 1.916239316239316, "grad_norm": 2.2229263962186856, "learning_rate": 6.098606399348175e-06, "loss": 0.579, "step": 25783 }, { "epoch": 1.9163136380527686, "grad_norm": 2.086591325039695, "learning_rate": 6.097867626669855e-06, "loss": 0.5713, "step": 25784 }, { "epoch": 1.9163879598662206, "grad_norm": 2.362913468790859, "learning_rate": 6.097128879113259e-06, "loss": 0.5773, "step": 25785 }, { "epoch": 1.916462281679673, "grad_norm": 1.8373403935744081, "learning_rate": 6.0963901566831465e-06, "loss": 0.5329, "step": 25786 }, { "epoch": 1.9165366034931253, "grad_norm": 2.09605865244758, "learning_rate": 6.095651459384275e-06, "loss": 0.52, "step": 25787 }, { "epoch": 1.9166109253065775, "grad_norm": 2.2128889190375114, "learning_rate": 6.094912787221394e-06, "loss": 0.6346, "step": 25788 }, { "epoch": 1.9166852471200297, "grad_norm": 1.645460090372751, "learning_rate": 6.094174140199262e-06, "loss": 0.3881, "step": 25789 }, { "epoch": 1.916759568933482, "grad_norm": 1.9003329210102446, "learning_rate": 6.093435518322633e-06, "loss": 0.5387, "step": 25790 }, { "epoch": 1.9168338907469342, "grad_norm": 5.8810471260412465, "learning_rate": 6.092696921596263e-06, "loss": 0.7154, "step": 25791 }, { "epoch": 1.9169082125603865, "grad_norm": 2.5455306690374964, "learning_rate": 6.091958350024907e-06, "loss": 0.6976, "step": 25792 }, { "epoch": 1.9169825343738387, "grad_norm": 1.846792305497794, "learning_rate": 6.091219803613324e-06, "loss": 0.5679, "step": 25793 }, { "epoch": 1.917056856187291, "grad_norm": 1.9996811389515714, "learning_rate": 6.090481282366262e-06, "loss": 0.6305, "step": 25794 }, { "epoch": 1.9171311780007432, "grad_norm": 2.4734362548516033, "learning_rate": 6.089742786288479e-06, "loss": 0.6649, "step": 25795 }, { "epoch": 1.9172054998141954, "grad_norm": 1.9838528714005674, "learning_rate": 6.089004315384729e-06, "loss": 0.5334, "step": 25796 }, { "epoch": 1.9172798216276479, "grad_norm": 2.005384732533669, "learning_rate": 6.088265869659769e-06, "loss": 0.6284, "step": 25797 }, { "epoch": 1.9173541434411, "grad_norm": 1.8298252704851257, "learning_rate": 6.087527449118347e-06, "loss": 0.5201, "step": 25798 }, { "epoch": 1.9174284652545523, "grad_norm": 1.6010844089011826, "learning_rate": 6.0867890537652255e-06, "loss": 0.5013, "step": 25799 }, { "epoch": 1.9175027870680044, "grad_norm": 2.0205589698315842, "learning_rate": 6.086050683605147e-06, "loss": 0.4942, "step": 25800 }, { "epoch": 1.9175771088814568, "grad_norm": 1.8421543184060796, "learning_rate": 6.085312338642872e-06, "loss": 0.503, "step": 25801 }, { "epoch": 1.9176514306949088, "grad_norm": 1.7204514081972995, "learning_rate": 6.084574018883154e-06, "loss": 0.5318, "step": 25802 }, { "epoch": 1.9177257525083613, "grad_norm": 1.9566976436762762, "learning_rate": 6.083835724330746e-06, "loss": 0.6245, "step": 25803 }, { "epoch": 1.9178000743218133, "grad_norm": 1.9219903498648578, "learning_rate": 6.083097454990401e-06, "loss": 0.4764, "step": 25804 }, { "epoch": 1.9178743961352658, "grad_norm": 1.5870721773601173, "learning_rate": 6.082359210866871e-06, "loss": 0.4577, "step": 25805 }, { "epoch": 1.9179487179487178, "grad_norm": 2.0221240686590005, "learning_rate": 6.081620991964907e-06, "loss": 0.7316, "step": 25806 }, { "epoch": 1.9180230397621703, "grad_norm": 1.9503225878816892, "learning_rate": 6.0808827982892645e-06, "loss": 0.5541, "step": 25807 }, { "epoch": 1.9180973615756225, "grad_norm": 2.37725554900601, "learning_rate": 6.080144629844697e-06, "loss": 0.6209, "step": 25808 }, { "epoch": 1.9181716833890747, "grad_norm": 1.7389269496506992, "learning_rate": 6.079406486635953e-06, "loss": 0.5623, "step": 25809 }, { "epoch": 1.918246005202527, "grad_norm": 1.9420508511086862, "learning_rate": 6.07866836866779e-06, "loss": 0.5283, "step": 25810 }, { "epoch": 1.9183203270159792, "grad_norm": 1.8086398718479648, "learning_rate": 6.077930275944955e-06, "loss": 0.585, "step": 25811 }, { "epoch": 1.9183946488294314, "grad_norm": 2.800785428777635, "learning_rate": 6.077192208472199e-06, "loss": 0.6809, "step": 25812 }, { "epoch": 1.9184689706428837, "grad_norm": 1.8756258938229156, "learning_rate": 6.076454166254278e-06, "loss": 0.5655, "step": 25813 }, { "epoch": 1.918543292456336, "grad_norm": 1.9821476408534526, "learning_rate": 6.075716149295939e-06, "loss": 0.4926, "step": 25814 }, { "epoch": 1.9186176142697882, "grad_norm": 2.3399197793515176, "learning_rate": 6.0749781576019394e-06, "loss": 0.5767, "step": 25815 }, { "epoch": 1.9186919360832404, "grad_norm": 2.3412782426605054, "learning_rate": 6.074240191177024e-06, "loss": 0.6073, "step": 25816 }, { "epoch": 1.9187662578966926, "grad_norm": 1.8996416589719816, "learning_rate": 6.073502250025948e-06, "loss": 0.6077, "step": 25817 }, { "epoch": 1.9188405797101449, "grad_norm": 1.8044003342865529, "learning_rate": 6.07276433415346e-06, "loss": 0.4976, "step": 25818 }, { "epoch": 1.9189149015235971, "grad_norm": 1.7670718484554244, "learning_rate": 6.072026443564313e-06, "loss": 0.5463, "step": 25819 }, { "epoch": 1.9189892233370496, "grad_norm": 2.163433491054059, "learning_rate": 6.071288578263255e-06, "loss": 0.5302, "step": 25820 }, { "epoch": 1.9190635451505016, "grad_norm": 1.757398470701997, "learning_rate": 6.070550738255039e-06, "loss": 0.4182, "step": 25821 }, { "epoch": 1.919137866963954, "grad_norm": 2.0655521914543864, "learning_rate": 6.069812923544411e-06, "loss": 0.5836, "step": 25822 }, { "epoch": 1.919212188777406, "grad_norm": 2.145274950480173, "learning_rate": 6.069075134136124e-06, "loss": 0.6314, "step": 25823 }, { "epoch": 1.9192865105908585, "grad_norm": 1.9503424406361995, "learning_rate": 6.068337370034927e-06, "loss": 0.4957, "step": 25824 }, { "epoch": 1.9193608324043105, "grad_norm": 1.6928351617932926, "learning_rate": 6.0675996312455694e-06, "loss": 0.4775, "step": 25825 }, { "epoch": 1.919435154217763, "grad_norm": 1.814294316250247, "learning_rate": 6.066861917772804e-06, "loss": 0.5019, "step": 25826 }, { "epoch": 1.919509476031215, "grad_norm": 1.84761247307181, "learning_rate": 6.066124229621375e-06, "loss": 0.548, "step": 25827 }, { "epoch": 1.9195837978446675, "grad_norm": 1.8966482352536198, "learning_rate": 6.065386566796034e-06, "loss": 0.5572, "step": 25828 }, { "epoch": 1.9196581196581195, "grad_norm": 1.6668341902413755, "learning_rate": 6.064648929301531e-06, "loss": 0.5461, "step": 25829 }, { "epoch": 1.919732441471572, "grad_norm": 1.8090198317756458, "learning_rate": 6.063911317142616e-06, "loss": 0.5596, "step": 25830 }, { "epoch": 1.9198067632850242, "grad_norm": 1.9433314720290273, "learning_rate": 6.0631737303240325e-06, "loss": 0.5875, "step": 25831 }, { "epoch": 1.9198810850984764, "grad_norm": 2.3200073824545226, "learning_rate": 6.062436168850538e-06, "loss": 0.5699, "step": 25832 }, { "epoch": 1.9199554069119287, "grad_norm": 2.612188389299249, "learning_rate": 6.061698632726869e-06, "loss": 0.8291, "step": 25833 }, { "epoch": 1.920029728725381, "grad_norm": 1.7839139717058146, "learning_rate": 6.06096112195778e-06, "loss": 0.4411, "step": 25834 }, { "epoch": 1.9201040505388332, "grad_norm": 2.6814703418613757, "learning_rate": 6.060223636548022e-06, "loss": 0.6388, "step": 25835 }, { "epoch": 1.9201783723522854, "grad_norm": 1.6033742516696352, "learning_rate": 6.059486176502336e-06, "loss": 0.4515, "step": 25836 }, { "epoch": 1.9202526941657376, "grad_norm": 1.8720937717999209, "learning_rate": 6.0587487418254774e-06, "loss": 0.5326, "step": 25837 }, { "epoch": 1.9203270159791899, "grad_norm": 1.6873649801168482, "learning_rate": 6.058011332522188e-06, "loss": 0.4956, "step": 25838 }, { "epoch": 1.920401337792642, "grad_norm": 1.7609627761237376, "learning_rate": 6.057273948597216e-06, "loss": 0.4967, "step": 25839 }, { "epoch": 1.9204756596060943, "grad_norm": 1.5691264440531583, "learning_rate": 6.056536590055311e-06, "loss": 0.3932, "step": 25840 }, { "epoch": 1.9205499814195468, "grad_norm": 2.315437027379049, "learning_rate": 6.0557992569012206e-06, "loss": 0.7736, "step": 25841 }, { "epoch": 1.9206243032329988, "grad_norm": 2.00400128684837, "learning_rate": 6.055061949139688e-06, "loss": 0.6439, "step": 25842 }, { "epoch": 1.9206986250464513, "grad_norm": 1.9141591603341552, "learning_rate": 6.054324666775462e-06, "loss": 0.544, "step": 25843 }, { "epoch": 1.9207729468599033, "grad_norm": 2.1180523031844802, "learning_rate": 6.053587409813292e-06, "loss": 0.6337, "step": 25844 }, { "epoch": 1.9208472686733558, "grad_norm": 2.228817218721647, "learning_rate": 6.0528501782579186e-06, "loss": 0.4306, "step": 25845 }, { "epoch": 1.9209215904868078, "grad_norm": 2.1622663604733523, "learning_rate": 6.05211297211409e-06, "loss": 0.5311, "step": 25846 }, { "epoch": 1.9209959123002602, "grad_norm": 2.0228520949873654, "learning_rate": 6.051375791386554e-06, "loss": 0.5473, "step": 25847 }, { "epoch": 1.9210702341137122, "grad_norm": 2.5650317590315512, "learning_rate": 6.0506386360800574e-06, "loss": 0.6705, "step": 25848 }, { "epoch": 1.9211445559271647, "grad_norm": 2.3497422331104523, "learning_rate": 6.049901506199343e-06, "loss": 0.6495, "step": 25849 }, { "epoch": 1.9212188777406167, "grad_norm": 2.3023638908704527, "learning_rate": 6.049164401749157e-06, "loss": 0.6847, "step": 25850 }, { "epoch": 1.9212931995540692, "grad_norm": 2.0028194620573574, "learning_rate": 6.048427322734245e-06, "loss": 0.6272, "step": 25851 }, { "epoch": 1.9213675213675212, "grad_norm": 2.5536096163191764, "learning_rate": 6.0476902691593564e-06, "loss": 0.739, "step": 25852 }, { "epoch": 1.9214418431809737, "grad_norm": 2.362878056647213, "learning_rate": 6.046953241029229e-06, "loss": 0.6973, "step": 25853 }, { "epoch": 1.921516164994426, "grad_norm": 1.7545451844880084, "learning_rate": 6.046216238348612e-06, "loss": 0.53, "step": 25854 }, { "epoch": 1.9215904868078781, "grad_norm": 2.303422439587489, "learning_rate": 6.045479261122256e-06, "loss": 0.6601, "step": 25855 }, { "epoch": 1.9216648086213304, "grad_norm": 1.9575737409602219, "learning_rate": 6.044742309354893e-06, "loss": 0.5407, "step": 25856 }, { "epoch": 1.9217391304347826, "grad_norm": 1.7713543489612584, "learning_rate": 6.044005383051275e-06, "loss": 0.6542, "step": 25857 }, { "epoch": 1.9218134522482349, "grad_norm": 2.054831300705078, "learning_rate": 6.043268482216143e-06, "loss": 0.6411, "step": 25858 }, { "epoch": 1.921887774061687, "grad_norm": 1.8476383096191842, "learning_rate": 6.042531606854247e-06, "loss": 0.4765, "step": 25859 }, { "epoch": 1.9219620958751393, "grad_norm": 2.025207561615713, "learning_rate": 6.041794756970325e-06, "loss": 0.5069, "step": 25860 }, { "epoch": 1.9220364176885916, "grad_norm": 2.1159205739713713, "learning_rate": 6.041057932569123e-06, "loss": 0.6934, "step": 25861 }, { "epoch": 1.9221107395020438, "grad_norm": 1.9858331175251507, "learning_rate": 6.040321133655385e-06, "loss": 0.585, "step": 25862 }, { "epoch": 1.922185061315496, "grad_norm": 1.9827348433687666, "learning_rate": 6.0395843602338545e-06, "loss": 0.5271, "step": 25863 }, { "epoch": 1.9222593831289485, "grad_norm": 3.391145921829505, "learning_rate": 6.038847612309273e-06, "loss": 0.526, "step": 25864 }, { "epoch": 1.9223337049424005, "grad_norm": 2.644175153100278, "learning_rate": 6.038110889886386e-06, "loss": 0.6512, "step": 25865 }, { "epoch": 1.922408026755853, "grad_norm": 1.8748165275936008, "learning_rate": 6.0373741929699395e-06, "loss": 0.6016, "step": 25866 }, { "epoch": 1.922482348569305, "grad_norm": 2.1503654433435733, "learning_rate": 6.036637521564668e-06, "loss": 0.5544, "step": 25867 }, { "epoch": 1.9225566703827575, "grad_norm": 2.4210298297802137, "learning_rate": 6.035900875675318e-06, "loss": 0.6173, "step": 25868 }, { "epoch": 1.9226309921962095, "grad_norm": 1.7624892444672107, "learning_rate": 6.0351642553066326e-06, "loss": 0.3779, "step": 25869 }, { "epoch": 1.922705314009662, "grad_norm": 1.9630695850379085, "learning_rate": 6.034427660463356e-06, "loss": 0.6275, "step": 25870 }, { "epoch": 1.922779635823114, "grad_norm": 1.842904268788123, "learning_rate": 6.033691091150227e-06, "loss": 0.5223, "step": 25871 }, { "epoch": 1.9228539576365664, "grad_norm": 2.0985642916601446, "learning_rate": 6.032954547371987e-06, "loss": 0.6187, "step": 25872 }, { "epoch": 1.9229282794500184, "grad_norm": 2.407198651089888, "learning_rate": 6.032218029133382e-06, "loss": 0.424, "step": 25873 }, { "epoch": 1.923002601263471, "grad_norm": 1.7974523753556018, "learning_rate": 6.031481536439153e-06, "loss": 0.4001, "step": 25874 }, { "epoch": 1.9230769230769231, "grad_norm": 2.2837309965196826, "learning_rate": 6.030745069294037e-06, "loss": 0.6726, "step": 25875 }, { "epoch": 1.9231512448903754, "grad_norm": 2.2677212589647593, "learning_rate": 6.030008627702779e-06, "loss": 0.5206, "step": 25876 }, { "epoch": 1.9232255667038276, "grad_norm": 2.0138838877793668, "learning_rate": 6.029272211670121e-06, "loss": 0.6018, "step": 25877 }, { "epoch": 1.9232998885172798, "grad_norm": 1.8384394001677267, "learning_rate": 6.028535821200804e-06, "loss": 0.523, "step": 25878 }, { "epoch": 1.923374210330732, "grad_norm": 1.9404181794839317, "learning_rate": 6.027799456299565e-06, "loss": 0.4576, "step": 25879 }, { "epoch": 1.9234485321441843, "grad_norm": 2.2128431500785823, "learning_rate": 6.027063116971146e-06, "loss": 0.749, "step": 25880 }, { "epoch": 1.9235228539576366, "grad_norm": 1.7567013701959069, "learning_rate": 6.0263268032202905e-06, "loss": 0.5681, "step": 25881 }, { "epoch": 1.9235971757710888, "grad_norm": 2.20889883213262, "learning_rate": 6.025590515051736e-06, "loss": 0.5128, "step": 25882 }, { "epoch": 1.923671497584541, "grad_norm": 1.8348227125450556, "learning_rate": 6.024854252470221e-06, "loss": 0.4478, "step": 25883 }, { "epoch": 1.9237458193979933, "grad_norm": 2.0018770498864558, "learning_rate": 6.02411801548049e-06, "loss": 0.5606, "step": 25884 }, { "epoch": 1.9238201412114455, "grad_norm": 2.024582714881393, "learning_rate": 6.023381804087283e-06, "loss": 0.6748, "step": 25885 }, { "epoch": 1.9238944630248977, "grad_norm": 2.23711074280664, "learning_rate": 6.022645618295335e-06, "loss": 0.524, "step": 25886 }, { "epoch": 1.9239687848383502, "grad_norm": 1.9646705647310057, "learning_rate": 6.0219094581093885e-06, "loss": 0.6577, "step": 25887 }, { "epoch": 1.9240431066518022, "grad_norm": 2.393425825121496, "learning_rate": 6.021173323534182e-06, "loss": 0.4644, "step": 25888 }, { "epoch": 1.9241174284652547, "grad_norm": 2.148113296027341, "learning_rate": 6.020437214574459e-06, "loss": 0.6421, "step": 25889 }, { "epoch": 1.9241917502787067, "grad_norm": 1.968936782254675, "learning_rate": 6.019701131234952e-06, "loss": 0.487, "step": 25890 }, { "epoch": 1.9242660720921592, "grad_norm": 2.260489061567322, "learning_rate": 6.018965073520402e-06, "loss": 0.7235, "step": 25891 }, { "epoch": 1.9243403939056112, "grad_norm": 2.0564155919329927, "learning_rate": 6.01822904143555e-06, "loss": 0.729, "step": 25892 }, { "epoch": 1.9244147157190636, "grad_norm": 1.9670843782058443, "learning_rate": 6.017493034985131e-06, "loss": 0.6484, "step": 25893 }, { "epoch": 1.9244890375325157, "grad_norm": 2.34166554140718, "learning_rate": 6.016757054173885e-06, "loss": 0.6076, "step": 25894 }, { "epoch": 1.9245633593459681, "grad_norm": 1.8541881934592788, "learning_rate": 6.016021099006552e-06, "loss": 0.4518, "step": 25895 }, { "epoch": 1.9246376811594201, "grad_norm": 2.0242370770558034, "learning_rate": 6.015285169487869e-06, "loss": 0.6597, "step": 25896 }, { "epoch": 1.9247120029728726, "grad_norm": 1.5922617609047083, "learning_rate": 6.014549265622571e-06, "loss": 0.5056, "step": 25897 }, { "epoch": 1.9247863247863248, "grad_norm": 1.6026407855034042, "learning_rate": 6.013813387415399e-06, "loss": 0.4536, "step": 25898 }, { "epoch": 1.924860646599777, "grad_norm": 2.2655568178895296, "learning_rate": 6.013077534871088e-06, "loss": 0.6509, "step": 25899 }, { "epoch": 1.9249349684132293, "grad_norm": 1.9472368932732855, "learning_rate": 6.012341707994383e-06, "loss": 0.5713, "step": 25900 }, { "epoch": 1.9250092902266815, "grad_norm": 1.7724043385043828, "learning_rate": 6.01160590679001e-06, "loss": 0.4837, "step": 25901 }, { "epoch": 1.9250836120401338, "grad_norm": 1.8918780118717022, "learning_rate": 6.010870131262711e-06, "loss": 0.6139, "step": 25902 }, { "epoch": 1.925157933853586, "grad_norm": 2.052781934176639, "learning_rate": 6.010134381417225e-06, "loss": 0.6198, "step": 25903 }, { "epoch": 1.9252322556670383, "grad_norm": 2.147070876167724, "learning_rate": 6.009398657258287e-06, "loss": 0.5364, "step": 25904 }, { "epoch": 1.9253065774804905, "grad_norm": 4.789034603330148, "learning_rate": 6.008662958790631e-06, "loss": 0.6187, "step": 25905 }, { "epoch": 1.9253808992939427, "grad_norm": 2.1787712199441063, "learning_rate": 6.007927286018996e-06, "loss": 0.7037, "step": 25906 }, { "epoch": 1.925455221107395, "grad_norm": 1.8394684115551974, "learning_rate": 6.0071916389481215e-06, "loss": 0.5885, "step": 25907 }, { "epoch": 1.9255295429208472, "grad_norm": 1.8385604676225837, "learning_rate": 6.006456017582736e-06, "loss": 0.5309, "step": 25908 }, { "epoch": 1.9256038647342995, "grad_norm": 2.1241110120167233, "learning_rate": 6.005720421927581e-06, "loss": 0.7032, "step": 25909 }, { "epoch": 1.925678186547752, "grad_norm": 2.7230010569672123, "learning_rate": 6.00498485198739e-06, "loss": 0.6706, "step": 25910 }, { "epoch": 1.925752508361204, "grad_norm": 2.064562454755294, "learning_rate": 6.0042493077669e-06, "loss": 0.6937, "step": 25911 }, { "epoch": 1.9258268301746564, "grad_norm": 2.219745471158653, "learning_rate": 6.003513789270849e-06, "loss": 0.7277, "step": 25912 }, { "epoch": 1.9259011519881084, "grad_norm": 2.2480403463362992, "learning_rate": 6.002778296503966e-06, "loss": 0.4329, "step": 25913 }, { "epoch": 1.9259754738015609, "grad_norm": 2.0580171164116954, "learning_rate": 6.002042829470989e-06, "loss": 0.5836, "step": 25914 }, { "epoch": 1.9260497956150129, "grad_norm": 1.9822849729988015, "learning_rate": 6.001307388176652e-06, "loss": 0.557, "step": 25915 }, { "epoch": 1.9261241174284653, "grad_norm": 1.9810548105444985, "learning_rate": 6.00057197262569e-06, "loss": 0.519, "step": 25916 }, { "epoch": 1.9261984392419174, "grad_norm": 2.404007305426173, "learning_rate": 5.999836582822838e-06, "loss": 0.484, "step": 25917 }, { "epoch": 1.9262727610553698, "grad_norm": 1.7570045924446442, "learning_rate": 5.999101218772833e-06, "loss": 0.6398, "step": 25918 }, { "epoch": 1.9263470828688218, "grad_norm": 2.395219270159567, "learning_rate": 5.998365880480405e-06, "loss": 0.767, "step": 25919 }, { "epoch": 1.9264214046822743, "grad_norm": 1.9865343878501638, "learning_rate": 5.997630567950289e-06, "loss": 0.6438, "step": 25920 }, { "epoch": 1.9264957264957265, "grad_norm": 1.7482230459647967, "learning_rate": 5.996895281187221e-06, "loss": 0.5081, "step": 25921 }, { "epoch": 1.9265700483091788, "grad_norm": 2.142740359305907, "learning_rate": 5.996160020195934e-06, "loss": 0.6657, "step": 25922 }, { "epoch": 1.926644370122631, "grad_norm": 1.560839305861736, "learning_rate": 5.995424784981161e-06, "loss": 0.3865, "step": 25923 }, { "epoch": 1.9267186919360832, "grad_norm": 2.0498858990566537, "learning_rate": 5.994689575547637e-06, "loss": 0.542, "step": 25924 }, { "epoch": 1.9267930137495355, "grad_norm": 2.4943787555034356, "learning_rate": 5.993954391900091e-06, "loss": 0.5448, "step": 25925 }, { "epoch": 1.9268673355629877, "grad_norm": 2.1164171607044744, "learning_rate": 5.9932192340432586e-06, "loss": 0.5899, "step": 25926 }, { "epoch": 1.92694165737644, "grad_norm": 2.1229250602453287, "learning_rate": 5.992484101981873e-06, "loss": 0.5809, "step": 25927 }, { "epoch": 1.9270159791898922, "grad_norm": 1.9794687973598455, "learning_rate": 5.991748995720666e-06, "loss": 0.7026, "step": 25928 }, { "epoch": 1.9270903010033444, "grad_norm": 2.2281921397688627, "learning_rate": 5.991013915264374e-06, "loss": 0.6442, "step": 25929 }, { "epoch": 1.9271646228167967, "grad_norm": 1.6762785286356012, "learning_rate": 5.990278860617724e-06, "loss": 0.5373, "step": 25930 }, { "epoch": 1.9272389446302491, "grad_norm": 2.278980314361245, "learning_rate": 5.989543831785451e-06, "loss": 0.6654, "step": 25931 }, { "epoch": 1.9273132664437012, "grad_norm": 2.16545741170546, "learning_rate": 5.988808828772285e-06, "loss": 0.6428, "step": 25932 }, { "epoch": 1.9273875882571536, "grad_norm": 1.8177000449676435, "learning_rate": 5.988073851582965e-06, "loss": 0.5192, "step": 25933 }, { "epoch": 1.9274619100706056, "grad_norm": 3.6997888210773433, "learning_rate": 5.987338900222215e-06, "loss": 0.6484, "step": 25934 }, { "epoch": 1.927536231884058, "grad_norm": 2.4525490855060177, "learning_rate": 5.986603974694769e-06, "loss": 0.6039, "step": 25935 }, { "epoch": 1.92761055369751, "grad_norm": 1.9327833207881524, "learning_rate": 5.985869075005357e-06, "loss": 0.5295, "step": 25936 }, { "epoch": 1.9276848755109626, "grad_norm": 1.7342064809450868, "learning_rate": 5.985134201158711e-06, "loss": 0.5401, "step": 25937 }, { "epoch": 1.9277591973244146, "grad_norm": 1.5852713125711517, "learning_rate": 5.984399353159564e-06, "loss": 0.4765, "step": 25938 }, { "epoch": 1.927833519137867, "grad_norm": 6.019849990817553, "learning_rate": 5.983664531012645e-06, "loss": 0.642, "step": 25939 }, { "epoch": 1.927907840951319, "grad_norm": 2.3048725384841298, "learning_rate": 5.982929734722686e-06, "loss": 0.5337, "step": 25940 }, { "epoch": 1.9279821627647715, "grad_norm": 2.1847007930256104, "learning_rate": 5.982194964294417e-06, "loss": 0.5648, "step": 25941 }, { "epoch": 1.9280564845782238, "grad_norm": 2.3640797591189084, "learning_rate": 5.981460219732567e-06, "loss": 0.5591, "step": 25942 }, { "epoch": 1.928130806391676, "grad_norm": 1.76483250486086, "learning_rate": 5.980725501041868e-06, "loss": 0.4448, "step": 25943 }, { "epoch": 1.9282051282051282, "grad_norm": 2.151989947005781, "learning_rate": 5.979990808227052e-06, "loss": 0.7256, "step": 25944 }, { "epoch": 1.9282794500185805, "grad_norm": 2.0901600500303696, "learning_rate": 5.979256141292846e-06, "loss": 0.599, "step": 25945 }, { "epoch": 1.9283537718320327, "grad_norm": 2.404971766734125, "learning_rate": 5.978521500243981e-06, "loss": 0.6474, "step": 25946 }, { "epoch": 1.928428093645485, "grad_norm": 1.5513787071341358, "learning_rate": 5.977786885085184e-06, "loss": 0.5437, "step": 25947 }, { "epoch": 1.9285024154589372, "grad_norm": 2.395271571393188, "learning_rate": 5.977052295821185e-06, "loss": 0.4712, "step": 25948 }, { "epoch": 1.9285767372723894, "grad_norm": 1.8090569720613592, "learning_rate": 5.976317732456715e-06, "loss": 0.4558, "step": 25949 }, { "epoch": 1.9286510590858417, "grad_norm": 1.7821892657550114, "learning_rate": 5.975583194996504e-06, "loss": 0.5687, "step": 25950 }, { "epoch": 1.928725380899294, "grad_norm": 1.8679202938687132, "learning_rate": 5.97484868344528e-06, "loss": 0.57, "step": 25951 }, { "epoch": 1.9287997027127461, "grad_norm": 1.9599333219478337, "learning_rate": 5.97411419780777e-06, "loss": 0.6477, "step": 25952 }, { "epoch": 1.9288740245261984, "grad_norm": 2.532440839253945, "learning_rate": 5.973379738088705e-06, "loss": 0.6785, "step": 25953 }, { "epoch": 1.9289483463396508, "grad_norm": 1.8880210722889421, "learning_rate": 5.97264530429281e-06, "loss": 0.6655, "step": 25954 }, { "epoch": 1.9290226681531029, "grad_norm": 1.8513886784858269, "learning_rate": 5.971910896424819e-06, "loss": 0.4049, "step": 25955 }, { "epoch": 1.9290969899665553, "grad_norm": 1.7329794238660008, "learning_rate": 5.9711765144894544e-06, "loss": 0.4503, "step": 25956 }, { "epoch": 1.9291713117800073, "grad_norm": 2.2903725663742938, "learning_rate": 5.9704421584914505e-06, "loss": 0.6857, "step": 25957 }, { "epoch": 1.9292456335934598, "grad_norm": 1.99579231522132, "learning_rate": 5.969707828435527e-06, "loss": 0.5641, "step": 25958 }, { "epoch": 1.9293199554069118, "grad_norm": 1.6113186325625082, "learning_rate": 5.968973524326416e-06, "loss": 0.4391, "step": 25959 }, { "epoch": 1.9293942772203643, "grad_norm": 1.7070529541066228, "learning_rate": 5.9682392461688435e-06, "loss": 0.4271, "step": 25960 }, { "epoch": 1.9294685990338163, "grad_norm": 1.9647260776806394, "learning_rate": 5.967504993967537e-06, "loss": 0.5872, "step": 25961 }, { "epoch": 1.9295429208472687, "grad_norm": 2.1148028982043, "learning_rate": 5.966770767727226e-06, "loss": 0.5731, "step": 25962 }, { "epoch": 1.9296172426607208, "grad_norm": 2.180898286059065, "learning_rate": 5.966036567452634e-06, "loss": 0.5951, "step": 25963 }, { "epoch": 1.9296915644741732, "grad_norm": 2.0413006023462543, "learning_rate": 5.96530239314849e-06, "loss": 0.5317, "step": 25964 }, { "epoch": 1.9297658862876255, "grad_norm": 2.508637282455097, "learning_rate": 5.96456824481952e-06, "loss": 0.7632, "step": 25965 }, { "epoch": 1.9298402081010777, "grad_norm": 2.010568489189071, "learning_rate": 5.963834122470451e-06, "loss": 0.6175, "step": 25966 }, { "epoch": 1.92991452991453, "grad_norm": 9.036470846268408, "learning_rate": 5.963100026106007e-06, "loss": 0.7274, "step": 25967 }, { "epoch": 1.9299888517279822, "grad_norm": 2.2530596106804515, "learning_rate": 5.96236595573092e-06, "loss": 0.5786, "step": 25968 }, { "epoch": 1.9300631735414344, "grad_norm": 2.1764975913182165, "learning_rate": 5.961631911349907e-06, "loss": 0.6243, "step": 25969 }, { "epoch": 1.9301374953548867, "grad_norm": 1.9427505379698224, "learning_rate": 5.960897892967698e-06, "loss": 0.4723, "step": 25970 }, { "epoch": 1.930211817168339, "grad_norm": 2.052199935862615, "learning_rate": 5.960163900589019e-06, "loss": 0.7205, "step": 25971 }, { "epoch": 1.9302861389817911, "grad_norm": 1.9096856244432256, "learning_rate": 5.959429934218594e-06, "loss": 0.6033, "step": 25972 }, { "epoch": 1.9303604607952434, "grad_norm": 2.1029232727013945, "learning_rate": 5.958695993861154e-06, "loss": 0.6514, "step": 25973 }, { "epoch": 1.9304347826086956, "grad_norm": 1.89339114310851, "learning_rate": 5.957962079521415e-06, "loss": 0.5367, "step": 25974 }, { "epoch": 1.9305091044221478, "grad_norm": 2.5889949338533036, "learning_rate": 5.957228191204109e-06, "loss": 0.5489, "step": 25975 }, { "epoch": 1.9305834262356, "grad_norm": 1.9287853158773116, "learning_rate": 5.956494328913956e-06, "loss": 0.6042, "step": 25976 }, { "epoch": 1.9306577480490525, "grad_norm": 1.7780634868998104, "learning_rate": 5.9557604926556866e-06, "loss": 0.5572, "step": 25977 }, { "epoch": 1.9307320698625046, "grad_norm": 2.2178561627713265, "learning_rate": 5.955026682434018e-06, "loss": 0.6214, "step": 25978 }, { "epoch": 1.930806391675957, "grad_norm": 1.710436693475792, "learning_rate": 5.9542928982536815e-06, "loss": 0.3897, "step": 25979 }, { "epoch": 1.930880713489409, "grad_norm": 2.4727486757236252, "learning_rate": 5.953559140119394e-06, "loss": 0.6129, "step": 25980 }, { "epoch": 1.9309550353028615, "grad_norm": 1.9793326815287766, "learning_rate": 5.952825408035883e-06, "loss": 0.5819, "step": 25981 }, { "epoch": 1.9310293571163135, "grad_norm": 2.287283481078508, "learning_rate": 5.952091702007871e-06, "loss": 0.5418, "step": 25982 }, { "epoch": 1.931103678929766, "grad_norm": 2.1281116689521227, "learning_rate": 5.951358022040083e-06, "loss": 0.6587, "step": 25983 }, { "epoch": 1.931178000743218, "grad_norm": 1.6968970799992231, "learning_rate": 5.950624368137245e-06, "loss": 0.5338, "step": 25984 }, { "epoch": 1.9312523225566705, "grad_norm": 1.9234949863769801, "learning_rate": 5.949890740304074e-06, "loss": 0.4934, "step": 25985 }, { "epoch": 1.9313266443701225, "grad_norm": 1.7519761850696733, "learning_rate": 5.949157138545296e-06, "loss": 0.5178, "step": 25986 }, { "epoch": 1.931400966183575, "grad_norm": 1.797478994457493, "learning_rate": 5.948423562865635e-06, "loss": 0.3919, "step": 25987 }, { "epoch": 1.9314752879970272, "grad_norm": 1.6534975925611668, "learning_rate": 5.9476900132698155e-06, "loss": 0.5086, "step": 25988 }, { "epoch": 1.9315496098104794, "grad_norm": 2.1088286074316307, "learning_rate": 5.946956489762554e-06, "loss": 0.5217, "step": 25989 }, { "epoch": 1.9316239316239316, "grad_norm": 1.8843685744512704, "learning_rate": 5.946222992348578e-06, "loss": 0.5899, "step": 25990 }, { "epoch": 1.9316982534373839, "grad_norm": 2.0225153426511464, "learning_rate": 5.9454895210326104e-06, "loss": 0.5897, "step": 25991 }, { "epoch": 1.9317725752508361, "grad_norm": 1.9898966509339964, "learning_rate": 5.944756075819368e-06, "loss": 0.5865, "step": 25992 }, { "epoch": 1.9318468970642884, "grad_norm": 2.446231627094022, "learning_rate": 5.944022656713575e-06, "loss": 0.6635, "step": 25993 }, { "epoch": 1.9319212188777406, "grad_norm": 1.6847115842672797, "learning_rate": 5.9432892637199535e-06, "loss": 0.5097, "step": 25994 }, { "epoch": 1.9319955406911928, "grad_norm": 1.7362157344861568, "learning_rate": 5.942555896843227e-06, "loss": 0.5222, "step": 25995 }, { "epoch": 1.932069862504645, "grad_norm": 2.1046023006172767, "learning_rate": 5.941822556088115e-06, "loss": 0.5054, "step": 25996 }, { "epoch": 1.9321441843180973, "grad_norm": 1.859595468635443, "learning_rate": 5.941089241459337e-06, "loss": 0.5315, "step": 25997 }, { "epoch": 1.9322185061315498, "grad_norm": 2.2868164964828357, "learning_rate": 5.940355952961616e-06, "loss": 0.7038, "step": 25998 }, { "epoch": 1.9322928279450018, "grad_norm": 1.9614495669970995, "learning_rate": 5.939622690599675e-06, "loss": 0.5932, "step": 25999 }, { "epoch": 1.9323671497584543, "grad_norm": 2.299459080761697, "learning_rate": 5.938889454378231e-06, "loss": 0.5975, "step": 26000 }, { "epoch": 1.9324414715719063, "grad_norm": 1.8804621633129543, "learning_rate": 5.938156244302005e-06, "loss": 0.4857, "step": 26001 }, { "epoch": 1.9325157933853587, "grad_norm": 1.9467943659974403, "learning_rate": 5.937423060375723e-06, "loss": 0.5787, "step": 26002 }, { "epoch": 1.9325901151988107, "grad_norm": 1.9320614674930536, "learning_rate": 5.936689902604096e-06, "loss": 0.5363, "step": 26003 }, { "epoch": 1.9326644370122632, "grad_norm": 2.469027035191457, "learning_rate": 5.9359567709918486e-06, "loss": 0.5614, "step": 26004 }, { "epoch": 1.9327387588257152, "grad_norm": 1.9900998354808932, "learning_rate": 5.935223665543701e-06, "loss": 0.6137, "step": 26005 }, { "epoch": 1.9328130806391677, "grad_norm": 2.3858383073590663, "learning_rate": 5.934490586264375e-06, "loss": 0.5419, "step": 26006 }, { "epoch": 1.9328874024526197, "grad_norm": 1.8363339259728084, "learning_rate": 5.9337575331585864e-06, "loss": 0.4302, "step": 26007 }, { "epoch": 1.9329617242660722, "grad_norm": 2.1481954580255143, "learning_rate": 5.933024506231053e-06, "loss": 0.5243, "step": 26008 }, { "epoch": 1.9330360460795244, "grad_norm": 1.9809185449581412, "learning_rate": 5.932291505486498e-06, "loss": 0.5044, "step": 26009 }, { "epoch": 1.9331103678929766, "grad_norm": 2.0814423963863344, "learning_rate": 5.931558530929642e-06, "loss": 0.4257, "step": 26010 }, { "epoch": 1.9331846897064289, "grad_norm": 2.096291053181889, "learning_rate": 5.9308255825651995e-06, "loss": 0.5701, "step": 26011 }, { "epoch": 1.933259011519881, "grad_norm": 1.7157568629230395, "learning_rate": 5.930092660397891e-06, "loss": 0.5038, "step": 26012 }, { "epoch": 1.9333333333333333, "grad_norm": 2.6793176109322316, "learning_rate": 5.929359764432436e-06, "loss": 0.5697, "step": 26013 }, { "epoch": 1.9334076551467856, "grad_norm": 1.684936133039042, "learning_rate": 5.92862689467355e-06, "loss": 0.4006, "step": 26014 }, { "epoch": 1.9334819769602378, "grad_norm": 1.9443940337796832, "learning_rate": 5.927894051125952e-06, "loss": 0.5664, "step": 26015 }, { "epoch": 1.93355629877369, "grad_norm": 2.3980469491727736, "learning_rate": 5.927161233794361e-06, "loss": 0.5741, "step": 26016 }, { "epoch": 1.9336306205871423, "grad_norm": 1.9271555343552913, "learning_rate": 5.9264284426834965e-06, "loss": 0.5502, "step": 26017 }, { "epoch": 1.9337049424005945, "grad_norm": 1.9540119637270918, "learning_rate": 5.925695677798072e-06, "loss": 0.6066, "step": 26018 }, { "epoch": 1.9337792642140468, "grad_norm": 2.1883579396204054, "learning_rate": 5.9249629391428084e-06, "loss": 0.6104, "step": 26019 }, { "epoch": 1.933853586027499, "grad_norm": 1.621575686593253, "learning_rate": 5.92423022672242e-06, "loss": 0.3733, "step": 26020 }, { "epoch": 1.9339279078409515, "grad_norm": 2.0200307933514283, "learning_rate": 5.92349754054163e-06, "loss": 0.5693, "step": 26021 }, { "epoch": 1.9340022296544035, "grad_norm": 2.110563961353852, "learning_rate": 5.92276488060515e-06, "loss": 0.6718, "step": 26022 }, { "epoch": 1.934076551467856, "grad_norm": 2.178424469666364, "learning_rate": 5.922032246917696e-06, "loss": 0.5882, "step": 26023 }, { "epoch": 1.934150873281308, "grad_norm": 1.5779410720366243, "learning_rate": 5.921299639483991e-06, "loss": 0.4642, "step": 26024 }, { "epoch": 1.9342251950947604, "grad_norm": 2.439809778297859, "learning_rate": 5.920567058308744e-06, "loss": 0.5296, "step": 26025 }, { "epoch": 1.9342995169082124, "grad_norm": 1.9249301937587264, "learning_rate": 5.919834503396676e-06, "loss": 0.5103, "step": 26026 }, { "epoch": 1.934373838721665, "grad_norm": 2.4963093448592897, "learning_rate": 5.919101974752499e-06, "loss": 0.7153, "step": 26027 }, { "epoch": 1.934448160535117, "grad_norm": 1.8718378885256475, "learning_rate": 5.918369472380935e-06, "loss": 0.6209, "step": 26028 }, { "epoch": 1.9345224823485694, "grad_norm": 2.208480062942838, "learning_rate": 5.917636996286694e-06, "loss": 0.7252, "step": 26029 }, { "epoch": 1.9345968041620214, "grad_norm": 2.062911588192678, "learning_rate": 5.916904546474495e-06, "loss": 0.6308, "step": 26030 }, { "epoch": 1.9346711259754739, "grad_norm": 2.676605698268085, "learning_rate": 5.916172122949052e-06, "loss": 0.5729, "step": 26031 }, { "epoch": 1.934745447788926, "grad_norm": 2.7315380488561836, "learning_rate": 5.915439725715084e-06, "loss": 0.4786, "step": 26032 }, { "epoch": 1.9348197696023783, "grad_norm": 1.7130814399182945, "learning_rate": 5.914707354777301e-06, "loss": 0.4393, "step": 26033 }, { "epoch": 1.9348940914158306, "grad_norm": 2.3868338634318085, "learning_rate": 5.913975010140418e-06, "loss": 0.6061, "step": 26034 }, { "epoch": 1.9349684132292828, "grad_norm": 2.406623066312892, "learning_rate": 5.913242691809153e-06, "loss": 0.6241, "step": 26035 }, { "epoch": 1.935042735042735, "grad_norm": 2.247367442313972, "learning_rate": 5.912510399788225e-06, "loss": 0.5325, "step": 26036 }, { "epoch": 1.9351170568561873, "grad_norm": 2.3732012805812075, "learning_rate": 5.911778134082338e-06, "loss": 0.744, "step": 26037 }, { "epoch": 1.9351913786696395, "grad_norm": 2.358648284915904, "learning_rate": 5.9110458946962124e-06, "loss": 0.4966, "step": 26038 }, { "epoch": 1.9352657004830918, "grad_norm": 1.856607132629779, "learning_rate": 5.910313681634561e-06, "loss": 0.4134, "step": 26039 }, { "epoch": 1.935340022296544, "grad_norm": 1.681137006859019, "learning_rate": 5.909581494902097e-06, "loss": 0.4169, "step": 26040 }, { "epoch": 1.9354143441099962, "grad_norm": 1.6315839140051833, "learning_rate": 5.9088493345035345e-06, "loss": 0.5078, "step": 26041 }, { "epoch": 1.9354886659234485, "grad_norm": 1.8330869918105983, "learning_rate": 5.9081172004435886e-06, "loss": 0.5916, "step": 26042 }, { "epoch": 1.9355629877369007, "grad_norm": 1.6593436115985885, "learning_rate": 5.907385092726973e-06, "loss": 0.5762, "step": 26043 }, { "epoch": 1.9356373095503532, "grad_norm": 2.095006070288264, "learning_rate": 5.906653011358398e-06, "loss": 0.6359, "step": 26044 }, { "epoch": 1.9357116313638052, "grad_norm": 1.6338189252114903, "learning_rate": 5.905920956342579e-06, "loss": 0.5138, "step": 26045 }, { "epoch": 1.9357859531772577, "grad_norm": 1.797305117129372, "learning_rate": 5.905188927684228e-06, "loss": 0.4575, "step": 26046 }, { "epoch": 1.9358602749907097, "grad_norm": 1.9244518986901156, "learning_rate": 5.9044569253880625e-06, "loss": 0.5446, "step": 26047 }, { "epoch": 1.9359345968041621, "grad_norm": 2.588647624592095, "learning_rate": 5.903724949458787e-06, "loss": 0.4338, "step": 26048 }, { "epoch": 1.9360089186176141, "grad_norm": 2.261862944714528, "learning_rate": 5.902992999901118e-06, "loss": 0.521, "step": 26049 }, { "epoch": 1.9360832404310666, "grad_norm": 2.0958284961592666, "learning_rate": 5.902261076719767e-06, "loss": 0.4832, "step": 26050 }, { "epoch": 1.9361575622445186, "grad_norm": 1.9946875411169158, "learning_rate": 5.901529179919448e-06, "loss": 0.6085, "step": 26051 }, { "epoch": 1.936231884057971, "grad_norm": 1.9110835367759031, "learning_rate": 5.900797309504869e-06, "loss": 0.5279, "step": 26052 }, { "epoch": 1.936306205871423, "grad_norm": 1.8596576627435994, "learning_rate": 5.900065465480745e-06, "loss": 0.534, "step": 26053 }, { "epoch": 1.9363805276848756, "grad_norm": 1.8685112538748472, "learning_rate": 5.899333647851789e-06, "loss": 0.5179, "step": 26054 }, { "epoch": 1.9364548494983278, "grad_norm": 1.8006698998726967, "learning_rate": 5.898601856622708e-06, "loss": 0.478, "step": 26055 }, { "epoch": 1.93652917131178, "grad_norm": 2.1026160800606766, "learning_rate": 5.897870091798217e-06, "loss": 0.6332, "step": 26056 }, { "epoch": 1.9366034931252323, "grad_norm": 2.7187005394449337, "learning_rate": 5.897138353383023e-06, "loss": 0.6448, "step": 26057 }, { "epoch": 1.9366778149386845, "grad_norm": 1.6884908577945406, "learning_rate": 5.896406641381844e-06, "loss": 0.3805, "step": 26058 }, { "epoch": 1.9367521367521368, "grad_norm": 1.876032339549395, "learning_rate": 5.895674955799383e-06, "loss": 0.5703, "step": 26059 }, { "epoch": 1.936826458565589, "grad_norm": 2.136537387941751, "learning_rate": 5.8949432966403565e-06, "loss": 0.6258, "step": 26060 }, { "epoch": 1.9369007803790412, "grad_norm": 2.83320773737508, "learning_rate": 5.8942116639094685e-06, "loss": 0.5231, "step": 26061 }, { "epoch": 1.9369751021924935, "grad_norm": 1.9055091894406668, "learning_rate": 5.893480057611433e-06, "loss": 0.5658, "step": 26062 }, { "epoch": 1.9370494240059457, "grad_norm": 2.0821622466584153, "learning_rate": 5.89274847775096e-06, "loss": 0.7568, "step": 26063 }, { "epoch": 1.937123745819398, "grad_norm": 1.9707215671102694, "learning_rate": 5.89201692433276e-06, "loss": 0.6118, "step": 26064 }, { "epoch": 1.9371980676328504, "grad_norm": 2.000186152534679, "learning_rate": 5.891285397361542e-06, "loss": 0.441, "step": 26065 }, { "epoch": 1.9372723894463024, "grad_norm": 1.967104302307667, "learning_rate": 5.890553896842015e-06, "loss": 0.6177, "step": 26066 }, { "epoch": 1.9373467112597549, "grad_norm": 1.8208502950534113, "learning_rate": 5.8898224227788885e-06, "loss": 0.6343, "step": 26067 }, { "epoch": 1.937421033073207, "grad_norm": 1.5753566576839004, "learning_rate": 5.889090975176871e-06, "loss": 0.512, "step": 26068 }, { "epoch": 1.9374953548866594, "grad_norm": 1.9380184134316556, "learning_rate": 5.888359554040674e-06, "loss": 0.5742, "step": 26069 }, { "epoch": 1.9375696767001114, "grad_norm": 1.9375553627077546, "learning_rate": 5.887628159375007e-06, "loss": 0.5265, "step": 26070 }, { "epoch": 1.9376439985135638, "grad_norm": 2.230337833791962, "learning_rate": 5.8868967911845755e-06, "loss": 0.5466, "step": 26071 }, { "epoch": 1.9377183203270159, "grad_norm": 5.923495720102256, "learning_rate": 5.8861654494740886e-06, "loss": 0.6568, "step": 26072 }, { "epoch": 1.9377926421404683, "grad_norm": 1.9766398635929845, "learning_rate": 5.8854341342482535e-06, "loss": 0.5903, "step": 26073 }, { "epoch": 1.9378669639539203, "grad_norm": 2.4006335215939907, "learning_rate": 5.88470284551178e-06, "loss": 0.8116, "step": 26074 }, { "epoch": 1.9379412857673728, "grad_norm": 1.7257181509429238, "learning_rate": 5.883971583269376e-06, "loss": 0.568, "step": 26075 }, { "epoch": 1.9380156075808248, "grad_norm": 1.8292095059565792, "learning_rate": 5.8832403475257514e-06, "loss": 0.5423, "step": 26076 }, { "epoch": 1.9380899293942773, "grad_norm": 1.7416897781204295, "learning_rate": 5.88250913828561e-06, "loss": 0.3832, "step": 26077 }, { "epoch": 1.9381642512077295, "grad_norm": 1.7867766729436323, "learning_rate": 5.8817779555536605e-06, "loss": 0.5224, "step": 26078 }, { "epoch": 1.9382385730211817, "grad_norm": 1.7835274895712516, "learning_rate": 5.881046799334612e-06, "loss": 0.5684, "step": 26079 }, { "epoch": 1.938312894834634, "grad_norm": 2.300818323642927, "learning_rate": 5.880315669633172e-06, "loss": 0.6617, "step": 26080 }, { "epoch": 1.9383872166480862, "grad_norm": 1.904344942711727, "learning_rate": 5.879584566454047e-06, "loss": 0.63, "step": 26081 }, { "epoch": 1.9384615384615385, "grad_norm": 2.253354119625125, "learning_rate": 5.878853489801941e-06, "loss": 0.6185, "step": 26082 }, { "epoch": 1.9385358602749907, "grad_norm": 1.9350844804595881, "learning_rate": 5.87812243968156e-06, "loss": 0.5568, "step": 26083 }, { "epoch": 1.938610182088443, "grad_norm": 2.5689268023533343, "learning_rate": 5.8773914160976155e-06, "loss": 0.4774, "step": 26084 }, { "epoch": 1.9386845039018952, "grad_norm": 1.8436510498620704, "learning_rate": 5.8766604190548095e-06, "loss": 0.5486, "step": 26085 }, { "epoch": 1.9387588257153474, "grad_norm": 2.022726665396046, "learning_rate": 5.875929448557849e-06, "loss": 0.6078, "step": 26086 }, { "epoch": 1.9388331475287996, "grad_norm": 2.167641537857073, "learning_rate": 5.875198504611443e-06, "loss": 0.5176, "step": 26087 }, { "epoch": 1.938907469342252, "grad_norm": 1.909376047837362, "learning_rate": 5.8744675872202935e-06, "loss": 0.6177, "step": 26088 }, { "epoch": 1.9389817911557041, "grad_norm": 2.4190688307443486, "learning_rate": 5.873736696389109e-06, "loss": 0.5915, "step": 26089 }, { "epoch": 1.9390561129691566, "grad_norm": 2.0693856963748196, "learning_rate": 5.873005832122591e-06, "loss": 0.5979, "step": 26090 }, { "epoch": 1.9391304347826086, "grad_norm": 1.510686475836124, "learning_rate": 5.872274994425451e-06, "loss": 0.4243, "step": 26091 }, { "epoch": 1.939204756596061, "grad_norm": 1.624043178999655, "learning_rate": 5.871544183302391e-06, "loss": 0.3406, "step": 26092 }, { "epoch": 1.939279078409513, "grad_norm": 2.5428584419488134, "learning_rate": 5.870813398758114e-06, "loss": 0.6841, "step": 26093 }, { "epoch": 1.9393534002229655, "grad_norm": 2.271896777274889, "learning_rate": 5.8700826407973244e-06, "loss": 0.505, "step": 26094 }, { "epoch": 1.9394277220364176, "grad_norm": 1.9799115520830404, "learning_rate": 5.869351909424729e-06, "loss": 0.6154, "step": 26095 }, { "epoch": 1.93950204384987, "grad_norm": 2.352531340066709, "learning_rate": 5.868621204645031e-06, "loss": 0.5711, "step": 26096 }, { "epoch": 1.939576365663322, "grad_norm": 2.65099599025041, "learning_rate": 5.867890526462936e-06, "loss": 0.6261, "step": 26097 }, { "epoch": 1.9396506874767745, "grad_norm": 2.0429806544978524, "learning_rate": 5.867159874883148e-06, "loss": 0.4895, "step": 26098 }, { "epoch": 1.9397250092902267, "grad_norm": 2.461196928234585, "learning_rate": 5.866429249910369e-06, "loss": 0.7224, "step": 26099 }, { "epoch": 1.939799331103679, "grad_norm": 2.1914740646617856, "learning_rate": 5.865698651549305e-06, "loss": 0.5391, "step": 26100 }, { "epoch": 1.9398736529171312, "grad_norm": 1.9156903143881179, "learning_rate": 5.864968079804657e-06, "loss": 0.6005, "step": 26101 }, { "epoch": 1.9399479747305834, "grad_norm": 2.17909718697753, "learning_rate": 5.864237534681133e-06, "loss": 0.7633, "step": 26102 }, { "epoch": 1.9400222965440357, "grad_norm": 1.6309960077621442, "learning_rate": 5.863507016183432e-06, "loss": 0.4676, "step": 26103 }, { "epoch": 1.940096618357488, "grad_norm": 1.8965489364822707, "learning_rate": 5.86277652431626e-06, "loss": 0.5748, "step": 26104 }, { "epoch": 1.9401709401709402, "grad_norm": 1.7094739466819366, "learning_rate": 5.862046059084316e-06, "loss": 0.5337, "step": 26105 }, { "epoch": 1.9402452619843924, "grad_norm": 1.9184820052005274, "learning_rate": 5.861315620492303e-06, "loss": 0.4975, "step": 26106 }, { "epoch": 1.9403195837978446, "grad_norm": 2.674964763284435, "learning_rate": 5.860585208544927e-06, "loss": 0.4904, "step": 26107 }, { "epoch": 1.9403939056112969, "grad_norm": 1.8254179256091831, "learning_rate": 5.859854823246887e-06, "loss": 0.4324, "step": 26108 }, { "epoch": 1.9404682274247491, "grad_norm": 1.8308705716438243, "learning_rate": 5.85912446460289e-06, "loss": 0.5369, "step": 26109 }, { "epoch": 1.9405425492382014, "grad_norm": 1.8460113138132794, "learning_rate": 5.858394132617633e-06, "loss": 0.5449, "step": 26110 }, { "epoch": 1.9406168710516538, "grad_norm": 1.648346809693358, "learning_rate": 5.857663827295819e-06, "loss": 0.5515, "step": 26111 }, { "epoch": 1.9406911928651058, "grad_norm": 1.9844029477154264, "learning_rate": 5.85693354864215e-06, "loss": 0.5336, "step": 26112 }, { "epoch": 1.9407655146785583, "grad_norm": 2.3559720555187154, "learning_rate": 5.856203296661331e-06, "loss": 0.5066, "step": 26113 }, { "epoch": 1.9408398364920103, "grad_norm": 1.9546596912950267, "learning_rate": 5.855473071358057e-06, "loss": 0.7087, "step": 26114 }, { "epoch": 1.9409141583054628, "grad_norm": 2.27718371987081, "learning_rate": 5.854742872737037e-06, "loss": 0.5824, "step": 26115 }, { "epoch": 1.9409884801189148, "grad_norm": 1.7956548774818772, "learning_rate": 5.854012700802964e-06, "loss": 0.5704, "step": 26116 }, { "epoch": 1.9410628019323672, "grad_norm": 2.6580664574843538, "learning_rate": 5.853282555560541e-06, "loss": 0.5827, "step": 26117 }, { "epoch": 1.9411371237458193, "grad_norm": 1.825147205269962, "learning_rate": 5.85255243701447e-06, "loss": 0.571, "step": 26118 }, { "epoch": 1.9412114455592717, "grad_norm": 4.038030937505274, "learning_rate": 5.851822345169451e-06, "loss": 0.7612, "step": 26119 }, { "epoch": 1.9412857673727237, "grad_norm": 1.5794032738614754, "learning_rate": 5.851092280030186e-06, "loss": 0.5327, "step": 26120 }, { "epoch": 1.9413600891861762, "grad_norm": 1.7149404058300495, "learning_rate": 5.850362241601373e-06, "loss": 0.485, "step": 26121 }, { "epoch": 1.9414344109996284, "grad_norm": 1.9737326817321994, "learning_rate": 5.8496322298877115e-06, "loss": 0.5175, "step": 26122 }, { "epoch": 1.9415087328130807, "grad_norm": 2.4404225723952786, "learning_rate": 5.848902244893904e-06, "loss": 0.5649, "step": 26123 }, { "epoch": 1.941583054626533, "grad_norm": 2.0100996914225333, "learning_rate": 5.848172286624649e-06, "loss": 0.6833, "step": 26124 }, { "epoch": 1.9416573764399851, "grad_norm": 2.0084970777710605, "learning_rate": 5.847442355084644e-06, "loss": 0.6175, "step": 26125 }, { "epoch": 1.9417316982534374, "grad_norm": 1.967661374319899, "learning_rate": 5.846712450278593e-06, "loss": 0.4879, "step": 26126 }, { "epoch": 1.9418060200668896, "grad_norm": 1.7292467478764637, "learning_rate": 5.8459825722111885e-06, "loss": 0.4905, "step": 26127 }, { "epoch": 1.9418803418803419, "grad_norm": 2.3400881650862484, "learning_rate": 5.845252720887131e-06, "loss": 0.6988, "step": 26128 }, { "epoch": 1.941954663693794, "grad_norm": 1.603078737159531, "learning_rate": 5.844522896311123e-06, "loss": 0.4879, "step": 26129 }, { "epoch": 1.9420289855072463, "grad_norm": 1.8667596285254089, "learning_rate": 5.84379309848786e-06, "loss": 0.5678, "step": 26130 }, { "epoch": 1.9421033073206986, "grad_norm": 1.5844153430874586, "learning_rate": 5.843063327422044e-06, "loss": 0.4427, "step": 26131 }, { "epoch": 1.9421776291341508, "grad_norm": 2.0206251421932913, "learning_rate": 5.842333583118368e-06, "loss": 0.6294, "step": 26132 }, { "epoch": 1.942251950947603, "grad_norm": 2.3739678766446537, "learning_rate": 5.841603865581533e-06, "loss": 0.5726, "step": 26133 }, { "epoch": 1.9423262727610555, "grad_norm": 1.8516348634181938, "learning_rate": 5.840874174816236e-06, "loss": 0.6704, "step": 26134 }, { "epoch": 1.9424005945745075, "grad_norm": 2.6991355282735, "learning_rate": 5.840144510827178e-06, "loss": 0.6913, "step": 26135 }, { "epoch": 1.94247491638796, "grad_norm": 2.142704225977309, "learning_rate": 5.839414873619051e-06, "loss": 0.4894, "step": 26136 }, { "epoch": 1.942549238201412, "grad_norm": 1.9108696788325554, "learning_rate": 5.838685263196559e-06, "loss": 0.5211, "step": 26137 }, { "epoch": 1.9426235600148645, "grad_norm": 2.6655236899805796, "learning_rate": 5.837955679564391e-06, "loss": 0.6039, "step": 26138 }, { "epoch": 1.9426978818283165, "grad_norm": 1.8853834924578943, "learning_rate": 5.837226122727251e-06, "loss": 0.5152, "step": 26139 }, { "epoch": 1.942772203641769, "grad_norm": 2.0063732395641973, "learning_rate": 5.836496592689833e-06, "loss": 0.716, "step": 26140 }, { "epoch": 1.942846525455221, "grad_norm": 3.284982752844872, "learning_rate": 5.835767089456832e-06, "loss": 0.4858, "step": 26141 }, { "epoch": 1.9429208472686734, "grad_norm": 2.2608024949482375, "learning_rate": 5.835037613032948e-06, "loss": 0.6101, "step": 26142 }, { "epoch": 1.9429951690821254, "grad_norm": 2.7777211399108754, "learning_rate": 5.834308163422873e-06, "loss": 0.6434, "step": 26143 }, { "epoch": 1.943069490895578, "grad_norm": 2.042579689198997, "learning_rate": 5.83357874063131e-06, "loss": 0.5484, "step": 26144 }, { "epoch": 1.9431438127090301, "grad_norm": 2.512945179969827, "learning_rate": 5.8328493446629495e-06, "loss": 0.5986, "step": 26145 }, { "epoch": 1.9432181345224824, "grad_norm": 2.1187999948028926, "learning_rate": 5.832119975522485e-06, "loss": 0.727, "step": 26146 }, { "epoch": 1.9432924563359346, "grad_norm": 2.57590245690392, "learning_rate": 5.831390633214622e-06, "loss": 0.6175, "step": 26147 }, { "epoch": 1.9433667781493869, "grad_norm": 1.9126318671313605, "learning_rate": 5.830661317744047e-06, "loss": 0.5589, "step": 26148 }, { "epoch": 1.943441099962839, "grad_norm": 1.7836103333331317, "learning_rate": 5.829932029115459e-06, "loss": 0.5085, "step": 26149 }, { "epoch": 1.9435154217762913, "grad_norm": 1.8461671074227837, "learning_rate": 5.8292027673335535e-06, "loss": 0.5911, "step": 26150 }, { "epoch": 1.9435897435897436, "grad_norm": 2.1326994590691895, "learning_rate": 5.8284735324030194e-06, "loss": 0.6318, "step": 26151 }, { "epoch": 1.9436640654031958, "grad_norm": 2.2203185601002433, "learning_rate": 5.82774432432856e-06, "loss": 0.5973, "step": 26152 }, { "epoch": 1.943738387216648, "grad_norm": 2.3109249652695594, "learning_rate": 5.827015143114863e-06, "loss": 0.5233, "step": 26153 }, { "epoch": 1.9438127090301003, "grad_norm": 1.5851206252994945, "learning_rate": 5.826285988766629e-06, "loss": 0.5713, "step": 26154 }, { "epoch": 1.9438870308435527, "grad_norm": 1.6796417928740495, "learning_rate": 5.825556861288549e-06, "loss": 0.5175, "step": 26155 }, { "epoch": 1.9439613526570048, "grad_norm": 1.7761001263817693, "learning_rate": 5.8248277606853145e-06, "loss": 0.5773, "step": 26156 }, { "epoch": 1.9440356744704572, "grad_norm": 3.407935097283663, "learning_rate": 5.824098686961626e-06, "loss": 0.4689, "step": 26157 }, { "epoch": 1.9441099962839092, "grad_norm": 1.8283420506350438, "learning_rate": 5.8233696401221676e-06, "loss": 0.4101, "step": 26158 }, { "epoch": 1.9441843180973617, "grad_norm": 2.0588634292223027, "learning_rate": 5.822640620171645e-06, "loss": 0.5251, "step": 26159 }, { "epoch": 1.9442586399108137, "grad_norm": 1.7970868852925557, "learning_rate": 5.821911627114745e-06, "loss": 0.5199, "step": 26160 }, { "epoch": 1.9443329617242662, "grad_norm": 1.9592543945130376, "learning_rate": 5.821182660956159e-06, "loss": 0.6795, "step": 26161 }, { "epoch": 1.9444072835377182, "grad_norm": 2.754243143521703, "learning_rate": 5.820453721700585e-06, "loss": 0.4573, "step": 26162 }, { "epoch": 1.9444816053511706, "grad_norm": 2.058703917879484, "learning_rate": 5.819724809352708e-06, "loss": 0.5078, "step": 26163 }, { "epoch": 1.9445559271646227, "grad_norm": 2.181242725090084, "learning_rate": 5.81899592391723e-06, "loss": 0.5684, "step": 26164 }, { "epoch": 1.9446302489780751, "grad_norm": 1.8881394889374497, "learning_rate": 5.818267065398835e-06, "loss": 0.5023, "step": 26165 }, { "epoch": 1.9447045707915274, "grad_norm": 2.691252246742278, "learning_rate": 5.817538233802223e-06, "loss": 0.6018, "step": 26166 }, { "epoch": 1.9447788926049796, "grad_norm": 2.0103291201736586, "learning_rate": 5.816809429132084e-06, "loss": 0.5135, "step": 26167 }, { "epoch": 1.9448532144184318, "grad_norm": 1.9978652217643655, "learning_rate": 5.816080651393105e-06, "loss": 0.691, "step": 26168 }, { "epoch": 1.944927536231884, "grad_norm": 2.1165499665722916, "learning_rate": 5.815351900589985e-06, "loss": 0.5734, "step": 26169 }, { "epoch": 1.9450018580453363, "grad_norm": 1.7886193129599004, "learning_rate": 5.814623176727412e-06, "loss": 0.3878, "step": 26170 }, { "epoch": 1.9450761798587886, "grad_norm": 2.1870038387726654, "learning_rate": 5.813894479810078e-06, "loss": 0.5154, "step": 26171 }, { "epoch": 1.9451505016722408, "grad_norm": 1.7115386326636337, "learning_rate": 5.813165809842675e-06, "loss": 0.4834, "step": 26172 }, { "epoch": 1.945224823485693, "grad_norm": 2.117882364058846, "learning_rate": 5.812437166829888e-06, "loss": 0.6169, "step": 26173 }, { "epoch": 1.9452991452991453, "grad_norm": 1.8254124673879093, "learning_rate": 5.811708550776418e-06, "loss": 0.503, "step": 26174 }, { "epoch": 1.9453734671125975, "grad_norm": 2.1776739413910993, "learning_rate": 5.810979961686947e-06, "loss": 0.6327, "step": 26175 }, { "epoch": 1.9454477889260497, "grad_norm": 1.838338784514059, "learning_rate": 5.810251399566173e-06, "loss": 0.441, "step": 26176 }, { "epoch": 1.945522110739502, "grad_norm": 4.366100271097233, "learning_rate": 5.809522864418784e-06, "loss": 0.5013, "step": 26177 }, { "epoch": 1.9455964325529544, "grad_norm": 1.9683582230262213, "learning_rate": 5.808794356249465e-06, "loss": 0.7056, "step": 26178 }, { "epoch": 1.9456707543664065, "grad_norm": 2.081956595096467, "learning_rate": 5.8080658750629145e-06, "loss": 0.6404, "step": 26179 }, { "epoch": 1.945745076179859, "grad_norm": 2.120042801785616, "learning_rate": 5.8073374208638144e-06, "loss": 0.7248, "step": 26180 }, { "epoch": 1.945819397993311, "grad_norm": 1.6976548116203884, "learning_rate": 5.806608993656863e-06, "loss": 0.4457, "step": 26181 }, { "epoch": 1.9458937198067634, "grad_norm": 2.437340771053655, "learning_rate": 5.805880593446745e-06, "loss": 0.5884, "step": 26182 }, { "epoch": 1.9459680416202154, "grad_norm": 1.7500302753437833, "learning_rate": 5.805152220238151e-06, "loss": 0.5262, "step": 26183 }, { "epoch": 1.9460423634336679, "grad_norm": 1.8238501510654035, "learning_rate": 5.8044238740357675e-06, "loss": 0.5981, "step": 26184 }, { "epoch": 1.94611668524712, "grad_norm": 1.9073287683087718, "learning_rate": 5.8036955548442816e-06, "loss": 0.6266, "step": 26185 }, { "epoch": 1.9461910070605724, "grad_norm": 1.805739845329822, "learning_rate": 5.8029672626683905e-06, "loss": 0.4261, "step": 26186 }, { "epoch": 1.9462653288740244, "grad_norm": 1.9738664344370407, "learning_rate": 5.802238997512774e-06, "loss": 0.634, "step": 26187 }, { "epoch": 1.9463396506874768, "grad_norm": 1.8394915019813962, "learning_rate": 5.80151075938213e-06, "loss": 0.5268, "step": 26188 }, { "epoch": 1.946413972500929, "grad_norm": 2.0747582967104314, "learning_rate": 5.800782548281141e-06, "loss": 0.5649, "step": 26189 }, { "epoch": 1.9464882943143813, "grad_norm": 1.8791854503928282, "learning_rate": 5.800054364214494e-06, "loss": 0.4819, "step": 26190 }, { "epoch": 1.9465626161278335, "grad_norm": 1.9953584287536648, "learning_rate": 5.79932620718688e-06, "loss": 0.4907, "step": 26191 }, { "epoch": 1.9466369379412858, "grad_norm": 1.828288391564822, "learning_rate": 5.7985980772029884e-06, "loss": 0.4893, "step": 26192 }, { "epoch": 1.946711259754738, "grad_norm": 3.0360101978173955, "learning_rate": 5.797869974267498e-06, "loss": 0.5582, "step": 26193 }, { "epoch": 1.9467855815681903, "grad_norm": 2.063894132310221, "learning_rate": 5.797141898385112e-06, "loss": 0.5266, "step": 26194 }, { "epoch": 1.9468599033816425, "grad_norm": 2.1429659175628375, "learning_rate": 5.7964138495605e-06, "loss": 0.5699, "step": 26195 }, { "epoch": 1.9469342251950947, "grad_norm": 2.009038257271188, "learning_rate": 5.795685827798363e-06, "loss": 0.5606, "step": 26196 }, { "epoch": 1.947008547008547, "grad_norm": 2.696056691400147, "learning_rate": 5.794957833103378e-06, "loss": 0.4981, "step": 26197 }, { "epoch": 1.9470828688219992, "grad_norm": 2.0844949189896567, "learning_rate": 5.794229865480239e-06, "loss": 0.4905, "step": 26198 }, { "epoch": 1.9471571906354515, "grad_norm": 2.2269443420956336, "learning_rate": 5.793501924933631e-06, "loss": 0.5323, "step": 26199 }, { "epoch": 1.9472315124489037, "grad_norm": 2.206160393014715, "learning_rate": 5.792774011468235e-06, "loss": 0.5889, "step": 26200 }, { "epoch": 1.9473058342623562, "grad_norm": 2.7773129167292785, "learning_rate": 5.792046125088746e-06, "loss": 0.5051, "step": 26201 }, { "epoch": 1.9473801560758082, "grad_norm": 2.555644227149251, "learning_rate": 5.791318265799841e-06, "loss": 0.5984, "step": 26202 }, { "epoch": 1.9474544778892606, "grad_norm": 2.1213524472512795, "learning_rate": 5.790590433606216e-06, "loss": 0.4678, "step": 26203 }, { "epoch": 1.9475287997027126, "grad_norm": 1.8660095218028918, "learning_rate": 5.7898626285125504e-06, "loss": 0.5485, "step": 26204 }, { "epoch": 1.947603121516165, "grad_norm": 2.0049705466387295, "learning_rate": 5.78913485052353e-06, "loss": 0.5847, "step": 26205 }, { "epoch": 1.9476774433296171, "grad_norm": 1.776768180432579, "learning_rate": 5.7884070996438415e-06, "loss": 0.5438, "step": 26206 }, { "epoch": 1.9477517651430696, "grad_norm": 2.103539726911214, "learning_rate": 5.7876793758781656e-06, "loss": 0.5053, "step": 26207 }, { "epoch": 1.9478260869565216, "grad_norm": 1.8637914346605946, "learning_rate": 5.7869516792311955e-06, "loss": 0.606, "step": 26208 }, { "epoch": 1.947900408769974, "grad_norm": 1.9114407259313124, "learning_rate": 5.786224009707609e-06, "loss": 0.4829, "step": 26209 }, { "epoch": 1.947974730583426, "grad_norm": 2.940066934988073, "learning_rate": 5.785496367312096e-06, "loss": 0.6061, "step": 26210 }, { "epoch": 1.9480490523968785, "grad_norm": 1.6164339982431515, "learning_rate": 5.784768752049339e-06, "loss": 0.5396, "step": 26211 }, { "epoch": 1.9481233742103308, "grad_norm": 1.9786237567707596, "learning_rate": 5.784041163924018e-06, "loss": 0.7688, "step": 26212 }, { "epoch": 1.948197696023783, "grad_norm": 2.2727849996381355, "learning_rate": 5.783313602940825e-06, "loss": 0.5055, "step": 26213 }, { "epoch": 1.9482720178372352, "grad_norm": 2.360569081613648, "learning_rate": 5.782586069104441e-06, "loss": 0.4969, "step": 26214 }, { "epoch": 1.9483463396506875, "grad_norm": 1.7586354955511196, "learning_rate": 5.781858562419545e-06, "loss": 0.5316, "step": 26215 }, { "epoch": 1.9484206614641397, "grad_norm": 2.099669565873992, "learning_rate": 5.781131082890828e-06, "loss": 0.5882, "step": 26216 }, { "epoch": 1.948494983277592, "grad_norm": 1.8286126051991891, "learning_rate": 5.78040363052297e-06, "loss": 0.457, "step": 26217 }, { "epoch": 1.9485693050910442, "grad_norm": 2.1717964286733604, "learning_rate": 5.779676205320657e-06, "loss": 0.7326, "step": 26218 }, { "epoch": 1.9486436269044964, "grad_norm": 2.089661245124395, "learning_rate": 5.7789488072885625e-06, "loss": 0.4872, "step": 26219 }, { "epoch": 1.9487179487179487, "grad_norm": 2.313341224735609, "learning_rate": 5.778221436431382e-06, "loss": 0.58, "step": 26220 }, { "epoch": 1.948792270531401, "grad_norm": 2.0132996233386873, "learning_rate": 5.777494092753793e-06, "loss": 0.5989, "step": 26221 }, { "epoch": 1.9488665923448534, "grad_norm": 1.797085489454685, "learning_rate": 5.776766776260474e-06, "loss": 0.4595, "step": 26222 }, { "epoch": 1.9489409141583054, "grad_norm": 1.745324569437682, "learning_rate": 5.776039486956115e-06, "loss": 0.5053, "step": 26223 }, { "epoch": 1.9490152359717579, "grad_norm": 1.9334841920846726, "learning_rate": 5.77531222484539e-06, "loss": 0.4985, "step": 26224 }, { "epoch": 1.9490895577852099, "grad_norm": 1.775458900997112, "learning_rate": 5.774584989932991e-06, "loss": 0.4768, "step": 26225 }, { "epoch": 1.9491638795986623, "grad_norm": 1.869462923767567, "learning_rate": 5.773857782223594e-06, "loss": 0.6482, "step": 26226 }, { "epoch": 1.9492382014121143, "grad_norm": 1.9136105692575525, "learning_rate": 5.773130601721877e-06, "loss": 0.5473, "step": 26227 }, { "epoch": 1.9493125232255668, "grad_norm": 2.1686534981875982, "learning_rate": 5.772403448432534e-06, "loss": 0.6771, "step": 26228 }, { "epoch": 1.9493868450390188, "grad_norm": 2.277929333920171, "learning_rate": 5.771676322360231e-06, "loss": 0.6547, "step": 26229 }, { "epoch": 1.9494611668524713, "grad_norm": 1.6502094059725267, "learning_rate": 5.7709492235096585e-06, "loss": 0.445, "step": 26230 }, { "epoch": 1.9495354886659233, "grad_norm": 1.6967084274425717, "learning_rate": 5.770222151885498e-06, "loss": 0.5599, "step": 26231 }, { "epoch": 1.9496098104793758, "grad_norm": 2.470016978616409, "learning_rate": 5.769495107492422e-06, "loss": 0.5582, "step": 26232 }, { "epoch": 1.949684132292828, "grad_norm": 2.404122974604656, "learning_rate": 5.768768090335122e-06, "loss": 0.5293, "step": 26233 }, { "epoch": 1.9497584541062802, "grad_norm": 1.7005112446586264, "learning_rate": 5.768041100418269e-06, "loss": 0.5044, "step": 26234 }, { "epoch": 1.9498327759197325, "grad_norm": 2.171763401116857, "learning_rate": 5.767314137746552e-06, "loss": 0.5068, "step": 26235 }, { "epoch": 1.9499070977331847, "grad_norm": 2.912070537471909, "learning_rate": 5.766587202324647e-06, "loss": 0.5382, "step": 26236 }, { "epoch": 1.949981419546637, "grad_norm": 2.265063727347144, "learning_rate": 5.765860294157229e-06, "loss": 0.5335, "step": 26237 }, { "epoch": 1.9500557413600892, "grad_norm": 2.4072686165688446, "learning_rate": 5.765133413248988e-06, "loss": 0.8285, "step": 26238 }, { "epoch": 1.9501300631735414, "grad_norm": 2.364204895734374, "learning_rate": 5.7644065596045986e-06, "loss": 0.6328, "step": 26239 }, { "epoch": 1.9502043849869937, "grad_norm": 2.180646889976212, "learning_rate": 5.763679733228738e-06, "loss": 0.6807, "step": 26240 }, { "epoch": 1.950278706800446, "grad_norm": 2.359963227241891, "learning_rate": 5.7629529341260845e-06, "loss": 0.6759, "step": 26241 }, { "epoch": 1.9503530286138981, "grad_norm": 1.7532165204539212, "learning_rate": 5.7622261623013255e-06, "loss": 0.4572, "step": 26242 }, { "epoch": 1.9504273504273504, "grad_norm": 1.887435372678289, "learning_rate": 5.7614994177591335e-06, "loss": 0.5803, "step": 26243 }, { "epoch": 1.9505016722408026, "grad_norm": 1.9898604462573017, "learning_rate": 5.760772700504185e-06, "loss": 0.5567, "step": 26244 }, { "epoch": 1.950575994054255, "grad_norm": 1.5256694472222518, "learning_rate": 5.760046010541165e-06, "loss": 0.4291, "step": 26245 }, { "epoch": 1.950650315867707, "grad_norm": 2.022542899685293, "learning_rate": 5.759319347874744e-06, "loss": 0.5699, "step": 26246 }, { "epoch": 1.9507246376811596, "grad_norm": 2.1414332894510433, "learning_rate": 5.75859271250961e-06, "loss": 0.6283, "step": 26247 }, { "epoch": 1.9507989594946116, "grad_norm": 2.100333296325878, "learning_rate": 5.7578661044504354e-06, "loss": 0.6064, "step": 26248 }, { "epoch": 1.950873281308064, "grad_norm": 2.665140348939478, "learning_rate": 5.757139523701895e-06, "loss": 0.6027, "step": 26249 }, { "epoch": 1.950947603121516, "grad_norm": 1.8510646775960233, "learning_rate": 5.756412970268679e-06, "loss": 0.6048, "step": 26250 }, { "epoch": 1.9510219249349685, "grad_norm": 1.9237736580831934, "learning_rate": 5.7556864441554485e-06, "loss": 0.4806, "step": 26251 }, { "epoch": 1.9510962467484205, "grad_norm": 2.2025121571870896, "learning_rate": 5.754959945366891e-06, "loss": 0.7086, "step": 26252 }, { "epoch": 1.951170568561873, "grad_norm": 2.0150136451622007, "learning_rate": 5.754233473907682e-06, "loss": 0.6368, "step": 26253 }, { "epoch": 1.951244890375325, "grad_norm": 2.3805880431279944, "learning_rate": 5.753507029782493e-06, "loss": 0.5435, "step": 26254 }, { "epoch": 1.9513192121887775, "grad_norm": 2.3578335377673243, "learning_rate": 5.75278061299601e-06, "loss": 0.7026, "step": 26255 }, { "epoch": 1.9513935340022297, "grad_norm": 1.8094789457229055, "learning_rate": 5.752054223552901e-06, "loss": 0.5305, "step": 26256 }, { "epoch": 1.951467855815682, "grad_norm": 1.4825506504298147, "learning_rate": 5.751327861457851e-06, "loss": 0.5386, "step": 26257 }, { "epoch": 1.9515421776291342, "grad_norm": 1.4636333016899892, "learning_rate": 5.7506015267155305e-06, "loss": 0.3195, "step": 26258 }, { "epoch": 1.9516164994425864, "grad_norm": 2.5157330843863144, "learning_rate": 5.749875219330614e-06, "loss": 0.675, "step": 26259 }, { "epoch": 1.9516908212560387, "grad_norm": 1.864265582358249, "learning_rate": 5.7491489393077825e-06, "loss": 0.5471, "step": 26260 }, { "epoch": 1.951765143069491, "grad_norm": 1.740627835050247, "learning_rate": 5.7484226866517065e-06, "loss": 0.5492, "step": 26261 }, { "epoch": 1.9518394648829431, "grad_norm": 2.056142167094174, "learning_rate": 5.747696461367074e-06, "loss": 0.5707, "step": 26262 }, { "epoch": 1.9519137866963954, "grad_norm": 2.0680977316980838, "learning_rate": 5.7469702634585426e-06, "loss": 0.641, "step": 26263 }, { "epoch": 1.9519881085098476, "grad_norm": 2.5778807163582966, "learning_rate": 5.746244092930798e-06, "loss": 0.6291, "step": 26264 }, { "epoch": 1.9520624303232998, "grad_norm": 1.94122450215346, "learning_rate": 5.745517949788515e-06, "loss": 0.6363, "step": 26265 }, { "epoch": 1.952136752136752, "grad_norm": 1.674563039251402, "learning_rate": 5.7447918340363626e-06, "loss": 0.4922, "step": 26266 }, { "epoch": 1.9522110739502043, "grad_norm": 1.923074812358888, "learning_rate": 5.744065745679023e-06, "loss": 0.7219, "step": 26267 }, { "epoch": 1.9522853957636568, "grad_norm": 1.9820688803211703, "learning_rate": 5.743339684721163e-06, "loss": 0.48, "step": 26268 }, { "epoch": 1.9523597175771088, "grad_norm": 2.456840482447406, "learning_rate": 5.7426136511674656e-06, "loss": 0.6624, "step": 26269 }, { "epoch": 1.9524340393905613, "grad_norm": 2.0762576182833996, "learning_rate": 5.741887645022601e-06, "loss": 0.618, "step": 26270 }, { "epoch": 1.9525083612040133, "grad_norm": 2.362624422656489, "learning_rate": 5.741161666291239e-06, "loss": 0.4421, "step": 26271 }, { "epoch": 1.9525826830174657, "grad_norm": 1.8298432240663132, "learning_rate": 5.740435714978061e-06, "loss": 0.667, "step": 26272 }, { "epoch": 1.9526570048309178, "grad_norm": 1.6459873747279412, "learning_rate": 5.739709791087737e-06, "loss": 0.5314, "step": 26273 }, { "epoch": 1.9527313266443702, "grad_norm": 2.10561248995121, "learning_rate": 5.73898389462494e-06, "loss": 0.5831, "step": 26274 }, { "epoch": 1.9528056484578222, "grad_norm": 2.3097751197678638, "learning_rate": 5.738258025594343e-06, "loss": 0.5495, "step": 26275 }, { "epoch": 1.9528799702712747, "grad_norm": 2.3685516077950353, "learning_rate": 5.737532184000616e-06, "loss": 0.6891, "step": 26276 }, { "epoch": 1.9529542920847267, "grad_norm": 2.343100706515034, "learning_rate": 5.73680636984844e-06, "loss": 0.5697, "step": 26277 }, { "epoch": 1.9530286138981792, "grad_norm": 2.1413810988880737, "learning_rate": 5.736080583142481e-06, "loss": 0.7097, "step": 26278 }, { "epoch": 1.9531029357116314, "grad_norm": 1.9593655163693207, "learning_rate": 5.7353548238874155e-06, "loss": 0.4832, "step": 26279 }, { "epoch": 1.9531772575250836, "grad_norm": 2.0756915577613224, "learning_rate": 5.7346290920879154e-06, "loss": 0.66, "step": 26280 }, { "epoch": 1.9532515793385359, "grad_norm": 2.1149050640757725, "learning_rate": 5.7339033877486475e-06, "loss": 0.4877, "step": 26281 }, { "epoch": 1.9533259011519881, "grad_norm": 2.067979644443581, "learning_rate": 5.7331777108742916e-06, "loss": 0.6261, "step": 26282 }, { "epoch": 1.9534002229654404, "grad_norm": 2.044056144075393, "learning_rate": 5.732452061469513e-06, "loss": 0.624, "step": 26283 }, { "epoch": 1.9534745447788926, "grad_norm": 1.9230447170770681, "learning_rate": 5.731726439538996e-06, "loss": 0.5013, "step": 26284 }, { "epoch": 1.9535488665923448, "grad_norm": 1.7432499872022256, "learning_rate": 5.731000845087393e-06, "loss": 0.3924, "step": 26285 }, { "epoch": 1.953623188405797, "grad_norm": 1.9905671911597116, "learning_rate": 5.730275278119388e-06, "loss": 0.6388, "step": 26286 }, { "epoch": 1.9536975102192493, "grad_norm": 1.5721373252293505, "learning_rate": 5.729549738639651e-06, "loss": 0.4005, "step": 26287 }, { "epoch": 1.9537718320327015, "grad_norm": 1.8953151227659268, "learning_rate": 5.728824226652847e-06, "loss": 0.4636, "step": 26288 }, { "epoch": 1.953846153846154, "grad_norm": 1.9613836267267093, "learning_rate": 5.7280987421636545e-06, "loss": 0.5434, "step": 26289 }, { "epoch": 1.953920475659606, "grad_norm": 2.400779508948598, "learning_rate": 5.7273732851767385e-06, "loss": 0.6163, "step": 26290 }, { "epoch": 1.9539947974730585, "grad_norm": 1.7974860118209042, "learning_rate": 5.726647855696773e-06, "loss": 0.4216, "step": 26291 }, { "epoch": 1.9540691192865105, "grad_norm": 2.630471224892925, "learning_rate": 5.725922453728428e-06, "loss": 0.5007, "step": 26292 }, { "epoch": 1.954143441099963, "grad_norm": 2.0523779108746227, "learning_rate": 5.725197079276368e-06, "loss": 0.6131, "step": 26293 }, { "epoch": 1.954217762913415, "grad_norm": 2.454226472453235, "learning_rate": 5.724471732345271e-06, "loss": 0.6088, "step": 26294 }, { "epoch": 1.9542920847268674, "grad_norm": 2.0996871359364864, "learning_rate": 5.723746412939806e-06, "loss": 0.7126, "step": 26295 }, { "epoch": 1.9543664065403195, "grad_norm": 1.7256280500641303, "learning_rate": 5.723021121064637e-06, "loss": 0.5006, "step": 26296 }, { "epoch": 1.954440728353772, "grad_norm": 3.3884519180675854, "learning_rate": 5.722295856724437e-06, "loss": 0.4714, "step": 26297 }, { "epoch": 1.954515050167224, "grad_norm": 3.4420064704248134, "learning_rate": 5.72157061992387e-06, "loss": 0.6237, "step": 26298 }, { "epoch": 1.9545893719806764, "grad_norm": 1.9138502055233497, "learning_rate": 5.7208454106676124e-06, "loss": 0.6384, "step": 26299 }, { "epoch": 1.9546636937941286, "grad_norm": 2.15385231744699, "learning_rate": 5.7201202289603285e-06, "loss": 0.6043, "step": 26300 }, { "epoch": 1.9547380156075809, "grad_norm": 2.164314033813096, "learning_rate": 5.719395074806691e-06, "loss": 0.6056, "step": 26301 }, { "epoch": 1.954812337421033, "grad_norm": 1.8856333209118072, "learning_rate": 5.718669948211366e-06, "loss": 0.593, "step": 26302 }, { "epoch": 1.9548866592344853, "grad_norm": 2.2371736872817163, "learning_rate": 5.7179448491790165e-06, "loss": 0.6796, "step": 26303 }, { "epoch": 1.9549609810479376, "grad_norm": 1.9022238491949122, "learning_rate": 5.7172197777143225e-06, "loss": 0.5394, "step": 26304 }, { "epoch": 1.9550353028613898, "grad_norm": 1.9762142160173561, "learning_rate": 5.716494733821939e-06, "loss": 0.5163, "step": 26305 }, { "epoch": 1.955109624674842, "grad_norm": 2.0374390648971317, "learning_rate": 5.715769717506545e-06, "loss": 0.5463, "step": 26306 }, { "epoch": 1.9551839464882943, "grad_norm": 1.7249141037315012, "learning_rate": 5.715044728772803e-06, "loss": 0.4252, "step": 26307 }, { "epoch": 1.9552582683017465, "grad_norm": 2.0754104060765464, "learning_rate": 5.714319767625381e-06, "loss": 0.6128, "step": 26308 }, { "epoch": 1.9553325901151988, "grad_norm": 1.5739056726430374, "learning_rate": 5.713594834068946e-06, "loss": 0.4564, "step": 26309 }, { "epoch": 1.955406911928651, "grad_norm": 1.9838618278944613, "learning_rate": 5.712869928108162e-06, "loss": 0.5341, "step": 26310 }, { "epoch": 1.9554812337421033, "grad_norm": 2.4213364285053443, "learning_rate": 5.7121450497477016e-06, "loss": 0.5066, "step": 26311 }, { "epoch": 1.9555555555555557, "grad_norm": 1.9519864454873834, "learning_rate": 5.711420198992226e-06, "loss": 0.6374, "step": 26312 }, { "epoch": 1.9556298773690077, "grad_norm": 1.6716110888568498, "learning_rate": 5.710695375846407e-06, "loss": 0.4765, "step": 26313 }, { "epoch": 1.9557041991824602, "grad_norm": 2.1574181123626195, "learning_rate": 5.70997058031491e-06, "loss": 0.6539, "step": 26314 }, { "epoch": 1.9557785209959122, "grad_norm": 1.7098064467387304, "learning_rate": 5.709245812402395e-06, "loss": 0.4422, "step": 26315 }, { "epoch": 1.9558528428093647, "grad_norm": 2.3643920389532176, "learning_rate": 5.708521072113538e-06, "loss": 0.597, "step": 26316 }, { "epoch": 1.9559271646228167, "grad_norm": 2.3980708005352365, "learning_rate": 5.707796359452998e-06, "loss": 0.5101, "step": 26317 }, { "epoch": 1.9560014864362691, "grad_norm": 1.5751334166449975, "learning_rate": 5.707071674425444e-06, "loss": 0.5292, "step": 26318 }, { "epoch": 1.9560758082497212, "grad_norm": 2.059949343953613, "learning_rate": 5.706347017035539e-06, "loss": 0.5908, "step": 26319 }, { "epoch": 1.9561501300631736, "grad_norm": 1.9024922725150573, "learning_rate": 5.705622387287945e-06, "loss": 0.6111, "step": 26320 }, { "epoch": 1.9562244518766256, "grad_norm": 2.0103459337751515, "learning_rate": 5.704897785187336e-06, "loss": 0.7069, "step": 26321 }, { "epoch": 1.956298773690078, "grad_norm": 2.241996798316996, "learning_rate": 5.704173210738367e-06, "loss": 0.5717, "step": 26322 }, { "epoch": 1.9563730955035303, "grad_norm": 2.396075246151118, "learning_rate": 5.703448663945712e-06, "loss": 0.5544, "step": 26323 }, { "epoch": 1.9564474173169826, "grad_norm": 1.696499923119769, "learning_rate": 5.7027241448140325e-06, "loss": 0.5074, "step": 26324 }, { "epoch": 1.9565217391304348, "grad_norm": 2.007205262994088, "learning_rate": 5.7019996533479875e-06, "loss": 0.5868, "step": 26325 }, { "epoch": 1.956596060943887, "grad_norm": 2.601190913925193, "learning_rate": 5.701275189552249e-06, "loss": 0.6015, "step": 26326 }, { "epoch": 1.9566703827573393, "grad_norm": 2.0924470862356435, "learning_rate": 5.700550753431475e-06, "loss": 0.5335, "step": 26327 }, { "epoch": 1.9567447045707915, "grad_norm": 1.7936329689955977, "learning_rate": 5.6998263449903356e-06, "loss": 0.4161, "step": 26328 }, { "epoch": 1.9568190263842438, "grad_norm": 2.06534037930986, "learning_rate": 5.699101964233491e-06, "loss": 0.5462, "step": 26329 }, { "epoch": 1.956893348197696, "grad_norm": 1.952969037958364, "learning_rate": 5.6983776111656065e-06, "loss": 0.6171, "step": 26330 }, { "epoch": 1.9569676700111482, "grad_norm": 2.058328875057173, "learning_rate": 5.697653285791342e-06, "loss": 0.6455, "step": 26331 }, { "epoch": 1.9570419918246005, "grad_norm": 1.5549478244472612, "learning_rate": 5.696928988115358e-06, "loss": 0.4191, "step": 26332 }, { "epoch": 1.9571163136380527, "grad_norm": 1.7733311498142281, "learning_rate": 5.696204718142326e-06, "loss": 0.5849, "step": 26333 }, { "epoch": 1.957190635451505, "grad_norm": 1.9277949782644475, "learning_rate": 5.695480475876902e-06, "loss": 0.4955, "step": 26334 }, { "epoch": 1.9572649572649574, "grad_norm": 2.5123585431991677, "learning_rate": 5.694756261323755e-06, "loss": 0.6194, "step": 26335 }, { "epoch": 1.9573392790784094, "grad_norm": 1.9766507490834064, "learning_rate": 5.694032074487543e-06, "loss": 0.524, "step": 26336 }, { "epoch": 1.957413600891862, "grad_norm": 1.8413923533065653, "learning_rate": 5.693307915372925e-06, "loss": 0.5461, "step": 26337 }, { "epoch": 1.957487922705314, "grad_norm": 1.7224775166582493, "learning_rate": 5.692583783984573e-06, "loss": 0.4779, "step": 26338 }, { "epoch": 1.9575622445187664, "grad_norm": 2.193005872340466, "learning_rate": 5.691859680327141e-06, "loss": 0.6009, "step": 26339 }, { "epoch": 1.9576365663322184, "grad_norm": 2.288103102745901, "learning_rate": 5.6911356044052904e-06, "loss": 0.7013, "step": 26340 }, { "epoch": 1.9577108881456708, "grad_norm": 1.7807661850472334, "learning_rate": 5.690411556223692e-06, "loss": 0.636, "step": 26341 }, { "epoch": 1.9577852099591229, "grad_norm": 1.5950974823928723, "learning_rate": 5.689687535786994e-06, "loss": 0.5138, "step": 26342 }, { "epoch": 1.9578595317725753, "grad_norm": 2.0202185262937604, "learning_rate": 5.688963543099866e-06, "loss": 0.6743, "step": 26343 }, { "epoch": 1.9579338535860273, "grad_norm": 1.7185336883402793, "learning_rate": 5.688239578166964e-06, "loss": 0.588, "step": 26344 }, { "epoch": 1.9580081753994798, "grad_norm": 1.7478672948931844, "learning_rate": 5.687515640992956e-06, "loss": 0.5296, "step": 26345 }, { "epoch": 1.958082497212932, "grad_norm": 1.8963783093024609, "learning_rate": 5.686791731582499e-06, "loss": 0.5349, "step": 26346 }, { "epoch": 1.9581568190263843, "grad_norm": 1.808747356378528, "learning_rate": 5.6860678499402475e-06, "loss": 0.4432, "step": 26347 }, { "epoch": 1.9582311408398365, "grad_norm": 2.1285743049828256, "learning_rate": 5.6853439960708725e-06, "loss": 0.5565, "step": 26348 }, { "epoch": 1.9583054626532888, "grad_norm": 2.206571524336187, "learning_rate": 5.684620169979025e-06, "loss": 0.6675, "step": 26349 }, { "epoch": 1.958379784466741, "grad_norm": 1.9906967437313863, "learning_rate": 5.683896371669373e-06, "loss": 0.5561, "step": 26350 }, { "epoch": 1.9584541062801932, "grad_norm": 1.8070702213882324, "learning_rate": 5.6831726011465715e-06, "loss": 0.5662, "step": 26351 }, { "epoch": 1.9585284280936455, "grad_norm": 1.987737549682053, "learning_rate": 5.682448858415281e-06, "loss": 0.4562, "step": 26352 }, { "epoch": 1.9586027499070977, "grad_norm": 1.8404483287153448, "learning_rate": 5.681725143480161e-06, "loss": 0.5009, "step": 26353 }, { "epoch": 1.95867707172055, "grad_norm": 2.0072962659856426, "learning_rate": 5.681001456345866e-06, "loss": 0.5434, "step": 26354 }, { "epoch": 1.9587513935340022, "grad_norm": 2.3401967626177074, "learning_rate": 5.680277797017064e-06, "loss": 0.5528, "step": 26355 }, { "epoch": 1.9588257153474546, "grad_norm": 2.002491623830689, "learning_rate": 5.679554165498405e-06, "loss": 0.6007, "step": 26356 }, { "epoch": 1.9589000371609067, "grad_norm": 2.002301189059041, "learning_rate": 5.6788305617945575e-06, "loss": 0.6176, "step": 26357 }, { "epoch": 1.9589743589743591, "grad_norm": 1.9121508449396283, "learning_rate": 5.678106985910173e-06, "loss": 0.5207, "step": 26358 }, { "epoch": 1.9590486807878111, "grad_norm": 2.165860913765083, "learning_rate": 5.677383437849908e-06, "loss": 0.656, "step": 26359 }, { "epoch": 1.9591230026012636, "grad_norm": 2.186554358437343, "learning_rate": 5.676659917618428e-06, "loss": 0.6334, "step": 26360 }, { "epoch": 1.9591973244147156, "grad_norm": 1.8562915258383883, "learning_rate": 5.6759364252203874e-06, "loss": 0.5423, "step": 26361 }, { "epoch": 1.959271646228168, "grad_norm": 1.6054694314424645, "learning_rate": 5.675212960660439e-06, "loss": 0.4918, "step": 26362 }, { "epoch": 1.95934596804162, "grad_norm": 1.9379268570312227, "learning_rate": 5.674489523943254e-06, "loss": 0.5252, "step": 26363 }, { "epoch": 1.9594202898550726, "grad_norm": 2.377090557197545, "learning_rate": 5.673766115073472e-06, "loss": 0.7108, "step": 26364 }, { "epoch": 1.9594946116685246, "grad_norm": 2.0143426746442263, "learning_rate": 5.673042734055764e-06, "loss": 0.6298, "step": 26365 }, { "epoch": 1.959568933481977, "grad_norm": 1.9503430334948444, "learning_rate": 5.67231938089478e-06, "loss": 0.5982, "step": 26366 }, { "epoch": 1.959643255295429, "grad_norm": 2.058161743701623, "learning_rate": 5.671596055595181e-06, "loss": 0.4742, "step": 26367 }, { "epoch": 1.9597175771088815, "grad_norm": 2.2261682488971695, "learning_rate": 5.670872758161623e-06, "loss": 0.6379, "step": 26368 }, { "epoch": 1.9597918989223337, "grad_norm": 1.906392414184251, "learning_rate": 5.6701494885987575e-06, "loss": 0.5859, "step": 26369 }, { "epoch": 1.959866220735786, "grad_norm": 1.5837259736054556, "learning_rate": 5.669426246911248e-06, "loss": 0.3596, "step": 26370 }, { "epoch": 1.9599405425492382, "grad_norm": 1.9289008568752999, "learning_rate": 5.668703033103744e-06, "loss": 0.61, "step": 26371 }, { "epoch": 1.9600148643626905, "grad_norm": 2.1429089228801668, "learning_rate": 5.66797984718091e-06, "loss": 0.5385, "step": 26372 }, { "epoch": 1.9600891861761427, "grad_norm": 1.7794683604914798, "learning_rate": 5.6672566891473954e-06, "loss": 0.429, "step": 26373 }, { "epoch": 1.960163507989595, "grad_norm": 2.644721729166253, "learning_rate": 5.6665335590078555e-06, "loss": 0.7088, "step": 26374 }, { "epoch": 1.9602378298030472, "grad_norm": 2.1601346781983533, "learning_rate": 5.6658104567669534e-06, "loss": 0.5853, "step": 26375 }, { "epoch": 1.9603121516164994, "grad_norm": 1.9834109144045458, "learning_rate": 5.6650873824293326e-06, "loss": 0.5826, "step": 26376 }, { "epoch": 1.9603864734299516, "grad_norm": 2.2900343991553833, "learning_rate": 5.664364335999658e-06, "loss": 0.6007, "step": 26377 }, { "epoch": 1.9604607952434039, "grad_norm": 2.132527554871808, "learning_rate": 5.66364131748258e-06, "loss": 0.4998, "step": 26378 }, { "epoch": 1.9605351170568563, "grad_norm": 2.3583194338062334, "learning_rate": 5.662918326882752e-06, "loss": 0.7548, "step": 26379 }, { "epoch": 1.9606094388703084, "grad_norm": 2.0546000704686262, "learning_rate": 5.662195364204832e-06, "loss": 0.5488, "step": 26380 }, { "epoch": 1.9606837606837608, "grad_norm": 1.7675443092908194, "learning_rate": 5.6614724294534715e-06, "loss": 0.4869, "step": 26381 }, { "epoch": 1.9607580824972128, "grad_norm": 1.8870081125977811, "learning_rate": 5.660749522633329e-06, "loss": 0.5795, "step": 26382 }, { "epoch": 1.9608324043106653, "grad_norm": 1.8664620726778272, "learning_rate": 5.660026643749057e-06, "loss": 0.4998, "step": 26383 }, { "epoch": 1.9609067261241173, "grad_norm": 1.7543957930757375, "learning_rate": 5.659303792805304e-06, "loss": 0.4597, "step": 26384 }, { "epoch": 1.9609810479375698, "grad_norm": 1.7262740782395747, "learning_rate": 5.6585809698067305e-06, "loss": 0.5364, "step": 26385 }, { "epoch": 1.9610553697510218, "grad_norm": 2.2080037070261245, "learning_rate": 5.657858174757988e-06, "loss": 0.654, "step": 26386 }, { "epoch": 1.9611296915644743, "grad_norm": 2.0856394214094243, "learning_rate": 5.657135407663732e-06, "loss": 0.658, "step": 26387 }, { "epoch": 1.9612040133779263, "grad_norm": 1.8384739912304362, "learning_rate": 5.656412668528606e-06, "loss": 0.5054, "step": 26388 }, { "epoch": 1.9612783351913787, "grad_norm": 1.6781010858283463, "learning_rate": 5.655689957357276e-06, "loss": 0.4693, "step": 26389 }, { "epoch": 1.961352657004831, "grad_norm": 4.446704084256268, "learning_rate": 5.654967274154386e-06, "loss": 0.402, "step": 26390 }, { "epoch": 1.9614269788182832, "grad_norm": 1.8910986916643198, "learning_rate": 5.654244618924589e-06, "loss": 0.5641, "step": 26391 }, { "epoch": 1.9615013006317354, "grad_norm": 2.2410678305888614, "learning_rate": 5.653521991672543e-06, "loss": 0.6448, "step": 26392 }, { "epoch": 1.9615756224451877, "grad_norm": 2.625751389397318, "learning_rate": 5.6527993924028936e-06, "loss": 0.6528, "step": 26393 }, { "epoch": 1.96164994425864, "grad_norm": 4.545740658014972, "learning_rate": 5.652076821120299e-06, "loss": 0.7796, "step": 26394 }, { "epoch": 1.9617242660720922, "grad_norm": 1.7991766377283, "learning_rate": 5.651354277829409e-06, "loss": 0.5931, "step": 26395 }, { "epoch": 1.9617985878855444, "grad_norm": 2.4281841587156925, "learning_rate": 5.65063176253487e-06, "loss": 0.6666, "step": 26396 }, { "epoch": 1.9618729096989966, "grad_norm": 1.5787295709134843, "learning_rate": 5.649909275241347e-06, "loss": 0.4019, "step": 26397 }, { "epoch": 1.9619472315124489, "grad_norm": 1.544623040799515, "learning_rate": 5.649186815953474e-06, "loss": 0.3823, "step": 26398 }, { "epoch": 1.9620215533259011, "grad_norm": 1.9201844814372535, "learning_rate": 5.648464384675914e-06, "loss": 0.4441, "step": 26399 }, { "epoch": 1.9620958751393534, "grad_norm": 2.304783900896199, "learning_rate": 5.647741981413315e-06, "loss": 0.476, "step": 26400 }, { "epoch": 1.9621701969528056, "grad_norm": 1.7697293150510462, "learning_rate": 5.647019606170323e-06, "loss": 0.5572, "step": 26401 }, { "epoch": 1.962244518766258, "grad_norm": 1.9664463003883779, "learning_rate": 5.646297258951597e-06, "loss": 0.5327, "step": 26402 }, { "epoch": 1.96231884057971, "grad_norm": 2.0214133823571525, "learning_rate": 5.6455749397617795e-06, "loss": 0.6202, "step": 26403 }, { "epoch": 1.9623931623931625, "grad_norm": 2.0016222212923367, "learning_rate": 5.644852648605529e-06, "loss": 0.6276, "step": 26404 }, { "epoch": 1.9624674842066145, "grad_norm": 2.2945554722226684, "learning_rate": 5.644130385487491e-06, "loss": 0.658, "step": 26405 }, { "epoch": 1.962541806020067, "grad_norm": 1.8689938833453896, "learning_rate": 5.64340815041231e-06, "loss": 0.6008, "step": 26406 }, { "epoch": 1.962616127833519, "grad_norm": 1.9823955957187371, "learning_rate": 5.642685943384647e-06, "loss": 0.644, "step": 26407 }, { "epoch": 1.9626904496469715, "grad_norm": 1.765876071404624, "learning_rate": 5.641963764409142e-06, "loss": 0.4797, "step": 26408 }, { "epoch": 1.9627647714604235, "grad_norm": 1.8527422691120488, "learning_rate": 5.641241613490455e-06, "loss": 0.5311, "step": 26409 }, { "epoch": 1.962839093273876, "grad_norm": 2.328491561040615, "learning_rate": 5.640519490633221e-06, "loss": 0.6274, "step": 26410 }, { "epoch": 1.962913415087328, "grad_norm": 2.3363869849768277, "learning_rate": 5.639797395842101e-06, "loss": 0.6585, "step": 26411 }, { "epoch": 1.9629877369007804, "grad_norm": 1.861208634292381, "learning_rate": 5.639075329121738e-06, "loss": 0.5018, "step": 26412 }, { "epoch": 1.9630620587142327, "grad_norm": 2.1002751564447624, "learning_rate": 5.638353290476778e-06, "loss": 0.6007, "step": 26413 }, { "epoch": 1.963136380527685, "grad_norm": 2.2955451193705447, "learning_rate": 5.637631279911877e-06, "loss": 0.8514, "step": 26414 }, { "epoch": 1.9632107023411371, "grad_norm": 1.7944225815088306, "learning_rate": 5.636909297431676e-06, "loss": 0.5453, "step": 26415 }, { "epoch": 1.9632850241545894, "grad_norm": 2.0821659028219, "learning_rate": 5.636187343040831e-06, "loss": 0.5477, "step": 26416 }, { "epoch": 1.9633593459680416, "grad_norm": 1.7353315743915296, "learning_rate": 5.635465416743983e-06, "loss": 0.4991, "step": 26417 }, { "epoch": 1.9634336677814939, "grad_norm": 2.033436725685486, "learning_rate": 5.634743518545782e-06, "loss": 0.6122, "step": 26418 }, { "epoch": 1.963507989594946, "grad_norm": 1.9132312777822038, "learning_rate": 5.634021648450877e-06, "loss": 0.6231, "step": 26419 }, { "epoch": 1.9635823114083983, "grad_norm": 7.526834056103766, "learning_rate": 5.633299806463914e-06, "loss": 0.7767, "step": 26420 }, { "epoch": 1.9636566332218506, "grad_norm": 2.1023867937262173, "learning_rate": 5.6325779925895416e-06, "loss": 0.6622, "step": 26421 }, { "epoch": 1.9637309550353028, "grad_norm": 1.6494355970309282, "learning_rate": 5.631856206832405e-06, "loss": 0.505, "step": 26422 }, { "epoch": 1.963805276848755, "grad_norm": 1.7259487228637482, "learning_rate": 5.631134449197148e-06, "loss": 0.4213, "step": 26423 }, { "epoch": 1.9638795986622073, "grad_norm": 2.1584832036284203, "learning_rate": 5.630412719688424e-06, "loss": 0.6961, "step": 26424 }, { "epoch": 1.9639539204756598, "grad_norm": 2.1827057353834376, "learning_rate": 5.629691018310872e-06, "loss": 0.6542, "step": 26425 }, { "epoch": 1.9640282422891118, "grad_norm": 1.6945303841831556, "learning_rate": 5.6289693450691466e-06, "loss": 0.4939, "step": 26426 }, { "epoch": 1.9641025641025642, "grad_norm": 2.1333445158110225, "learning_rate": 5.62824769996789e-06, "loss": 0.6482, "step": 26427 }, { "epoch": 1.9641768859160162, "grad_norm": 1.9920482498030754, "learning_rate": 5.627526083011745e-06, "loss": 0.5607, "step": 26428 }, { "epoch": 1.9642512077294687, "grad_norm": 2.415903766113248, "learning_rate": 5.626804494205362e-06, "loss": 0.492, "step": 26429 }, { "epoch": 1.9643255295429207, "grad_norm": 1.9105783991280227, "learning_rate": 5.626082933553383e-06, "loss": 0.5489, "step": 26430 }, { "epoch": 1.9643998513563732, "grad_norm": 1.6729407665934577, "learning_rate": 5.625361401060461e-06, "loss": 0.4162, "step": 26431 }, { "epoch": 1.9644741731698252, "grad_norm": 1.6190066589822796, "learning_rate": 5.624639896731228e-06, "loss": 0.4633, "step": 26432 }, { "epoch": 1.9645484949832777, "grad_norm": 1.7670654041084528, "learning_rate": 5.623918420570341e-06, "loss": 0.5394, "step": 26433 }, { "epoch": 1.9646228167967297, "grad_norm": 1.9187001284685017, "learning_rate": 5.62319697258244e-06, "loss": 0.6824, "step": 26434 }, { "epoch": 1.9646971386101821, "grad_norm": 2.1076451038768407, "learning_rate": 5.622475552772166e-06, "loss": 0.4894, "step": 26435 }, { "epoch": 1.9647714604236344, "grad_norm": 2.1185068502284943, "learning_rate": 5.62175416114417e-06, "loss": 0.6491, "step": 26436 }, { "epoch": 1.9648457822370866, "grad_norm": 1.9491325649297395, "learning_rate": 5.62103279770309e-06, "loss": 0.4882, "step": 26437 }, { "epoch": 1.9649201040505389, "grad_norm": 2.0799456899626354, "learning_rate": 5.620311462453578e-06, "loss": 0.6366, "step": 26438 }, { "epoch": 1.964994425863991, "grad_norm": 2.4864447381686587, "learning_rate": 5.619590155400274e-06, "loss": 0.5348, "step": 26439 }, { "epoch": 1.9650687476774433, "grad_norm": 1.8677230758577559, "learning_rate": 5.618868876547816e-06, "loss": 0.5981, "step": 26440 }, { "epoch": 1.9651430694908956, "grad_norm": 1.8336373575985354, "learning_rate": 5.618147625900858e-06, "loss": 0.6085, "step": 26441 }, { "epoch": 1.9652173913043478, "grad_norm": 1.876338171695871, "learning_rate": 5.617426403464038e-06, "loss": 0.4915, "step": 26442 }, { "epoch": 1.9652917131178, "grad_norm": 2.2537446884806878, "learning_rate": 5.6167052092419995e-06, "loss": 0.6863, "step": 26443 }, { "epoch": 1.9653660349312523, "grad_norm": 1.6123900685940564, "learning_rate": 5.615984043239385e-06, "loss": 0.4675, "step": 26444 }, { "epoch": 1.9654403567447045, "grad_norm": 1.9357594392305622, "learning_rate": 5.615262905460834e-06, "loss": 0.4359, "step": 26445 }, { "epoch": 1.965514678558157, "grad_norm": 4.072710270701297, "learning_rate": 5.614541795910997e-06, "loss": 0.6614, "step": 26446 }, { "epoch": 1.965589000371609, "grad_norm": 2.3990117870227827, "learning_rate": 5.613820714594509e-06, "loss": 0.8001, "step": 26447 }, { "epoch": 1.9656633221850615, "grad_norm": 1.7395802286549238, "learning_rate": 5.6130996615160184e-06, "loss": 0.5379, "step": 26448 }, { "epoch": 1.9657376439985135, "grad_norm": 2.2939105595392952, "learning_rate": 5.612378636680165e-06, "loss": 0.4968, "step": 26449 }, { "epoch": 1.965811965811966, "grad_norm": 2.0839131108363644, "learning_rate": 5.611657640091586e-06, "loss": 0.4008, "step": 26450 }, { "epoch": 1.965886287625418, "grad_norm": 2.15274516649506, "learning_rate": 5.610936671754932e-06, "loss": 0.5065, "step": 26451 }, { "epoch": 1.9659606094388704, "grad_norm": 1.8782056487095455, "learning_rate": 5.610215731674835e-06, "loss": 0.5065, "step": 26452 }, { "epoch": 1.9660349312523224, "grad_norm": 1.9116207928606144, "learning_rate": 5.609494819855947e-06, "loss": 0.4848, "step": 26453 }, { "epoch": 1.9661092530657749, "grad_norm": 1.9471541785652855, "learning_rate": 5.608773936302902e-06, "loss": 0.6667, "step": 26454 }, { "epoch": 1.966183574879227, "grad_norm": 2.0762177481934887, "learning_rate": 5.608053081020344e-06, "loss": 0.5246, "step": 26455 }, { "epoch": 1.9662578966926794, "grad_norm": 1.7436689245397499, "learning_rate": 5.607332254012911e-06, "loss": 0.3804, "step": 26456 }, { "epoch": 1.9663322185061316, "grad_norm": 2.04715148375511, "learning_rate": 5.606611455285242e-06, "loss": 0.6227, "step": 26457 }, { "epoch": 1.9664065403195838, "grad_norm": 1.9810670330306563, "learning_rate": 5.6058906848419834e-06, "loss": 0.6237, "step": 26458 }, { "epoch": 1.966480862133036, "grad_norm": 1.8073642799079388, "learning_rate": 5.60516994268777e-06, "loss": 0.4816, "step": 26459 }, { "epoch": 1.9665551839464883, "grad_norm": 2.1384739244715045, "learning_rate": 5.6044492288272475e-06, "loss": 0.4955, "step": 26460 }, { "epoch": 1.9666295057599406, "grad_norm": 2.5359266003252263, "learning_rate": 5.603728543265054e-06, "loss": 0.7575, "step": 26461 }, { "epoch": 1.9667038275733928, "grad_norm": 1.900986439694902, "learning_rate": 5.603007886005822e-06, "loss": 0.5351, "step": 26462 }, { "epoch": 1.966778149386845, "grad_norm": 1.894797065161284, "learning_rate": 5.602287257054202e-06, "loss": 0.6649, "step": 26463 }, { "epoch": 1.9668524712002973, "grad_norm": 1.847344873978245, "learning_rate": 5.601566656414829e-06, "loss": 0.5263, "step": 26464 }, { "epoch": 1.9669267930137495, "grad_norm": 2.0225588401456194, "learning_rate": 5.6008460840923415e-06, "loss": 0.6477, "step": 26465 }, { "epoch": 1.9670011148272017, "grad_norm": 2.3661716877718857, "learning_rate": 5.600125540091379e-06, "loss": 0.6325, "step": 26466 }, { "epoch": 1.967075436640654, "grad_norm": 2.359215519223434, "learning_rate": 5.599405024416576e-06, "loss": 0.5198, "step": 26467 }, { "epoch": 1.9671497584541062, "grad_norm": 3.601341266980875, "learning_rate": 5.598684537072578e-06, "loss": 0.5872, "step": 26468 }, { "epoch": 1.9672240802675587, "grad_norm": 1.8789682312537868, "learning_rate": 5.597964078064016e-06, "loss": 0.5721, "step": 26469 }, { "epoch": 1.9672984020810107, "grad_norm": 1.7536877504915593, "learning_rate": 5.597243647395538e-06, "loss": 0.5588, "step": 26470 }, { "epoch": 1.9673727238944632, "grad_norm": 2.328734033076803, "learning_rate": 5.596523245071778e-06, "loss": 0.5655, "step": 26471 }, { "epoch": 1.9674470457079152, "grad_norm": 2.5609030992906363, "learning_rate": 5.595802871097366e-06, "loss": 0.7002, "step": 26472 }, { "epoch": 1.9675213675213676, "grad_norm": 2.2842653055034767, "learning_rate": 5.595082525476953e-06, "loss": 0.578, "step": 26473 }, { "epoch": 1.9675956893348197, "grad_norm": 1.7746632883139342, "learning_rate": 5.594362208215166e-06, "loss": 0.5366, "step": 26474 }, { "epoch": 1.9676700111482721, "grad_norm": 1.6844136978540614, "learning_rate": 5.59364191931665e-06, "loss": 0.5109, "step": 26475 }, { "epoch": 1.9677443329617241, "grad_norm": 1.9907626000942542, "learning_rate": 5.592921658786039e-06, "loss": 0.6081, "step": 26476 }, { "epoch": 1.9678186547751766, "grad_norm": 2.217271434627794, "learning_rate": 5.59220142662797e-06, "loss": 0.7152, "step": 26477 }, { "epoch": 1.9678929765886286, "grad_norm": 2.3281179022772958, "learning_rate": 5.591481222847078e-06, "loss": 0.6032, "step": 26478 }, { "epoch": 1.967967298402081, "grad_norm": 1.6146280004073936, "learning_rate": 5.590761047447999e-06, "loss": 0.4891, "step": 26479 }, { "epoch": 1.9680416202155333, "grad_norm": 1.6468057258481579, "learning_rate": 5.590040900435374e-06, "loss": 0.4739, "step": 26480 }, { "epoch": 1.9681159420289855, "grad_norm": 2.2765184764831545, "learning_rate": 5.589320781813834e-06, "loss": 0.6051, "step": 26481 }, { "epoch": 1.9681902638424378, "grad_norm": 2.1868199389228264, "learning_rate": 5.588600691588022e-06, "loss": 0.6724, "step": 26482 }, { "epoch": 1.96826458565589, "grad_norm": 1.970465712778946, "learning_rate": 5.587880629762569e-06, "loss": 0.7488, "step": 26483 }, { "epoch": 1.9683389074693423, "grad_norm": 1.9076614783555779, "learning_rate": 5.5871605963421076e-06, "loss": 0.6089, "step": 26484 }, { "epoch": 1.9684132292827945, "grad_norm": 2.232358416559857, "learning_rate": 5.586440591331281e-06, "loss": 0.614, "step": 26485 }, { "epoch": 1.9684875510962467, "grad_norm": 1.8244153191449566, "learning_rate": 5.585720614734722e-06, "loss": 0.5391, "step": 26486 }, { "epoch": 1.968561872909699, "grad_norm": 2.094314716479738, "learning_rate": 5.58500066655706e-06, "loss": 0.6441, "step": 26487 }, { "epoch": 1.9686361947231512, "grad_norm": 1.7792205957666904, "learning_rate": 5.5842807468029435e-06, "loss": 0.4966, "step": 26488 }, { "epoch": 1.9687105165366034, "grad_norm": 1.7176646402293592, "learning_rate": 5.583560855476989e-06, "loss": 0.4683, "step": 26489 }, { "epoch": 1.9687848383500557, "grad_norm": 2.0889069254118673, "learning_rate": 5.582840992583845e-06, "loss": 0.599, "step": 26490 }, { "epoch": 1.968859160163508, "grad_norm": 1.945958129623761, "learning_rate": 5.582121158128138e-06, "loss": 0.4949, "step": 26491 }, { "epoch": 1.9689334819769604, "grad_norm": 1.8309864624758811, "learning_rate": 5.581401352114511e-06, "loss": 0.4673, "step": 26492 }, { "epoch": 1.9690078037904124, "grad_norm": 1.9350648251827451, "learning_rate": 5.580681574547591e-06, "loss": 0.5227, "step": 26493 }, { "epoch": 1.9690821256038649, "grad_norm": 2.253003677884444, "learning_rate": 5.5799618254320095e-06, "loss": 0.5562, "step": 26494 }, { "epoch": 1.9691564474173169, "grad_norm": 1.6532956260022662, "learning_rate": 5.579242104772409e-06, "loss": 0.5391, "step": 26495 }, { "epoch": 1.9692307692307693, "grad_norm": 2.12958566394691, "learning_rate": 5.578522412573415e-06, "loss": 0.6803, "step": 26496 }, { "epoch": 1.9693050910442214, "grad_norm": 2.467656507529769, "learning_rate": 5.577802748839667e-06, "loss": 0.6035, "step": 26497 }, { "epoch": 1.9693794128576738, "grad_norm": 1.9899999182747268, "learning_rate": 5.5770831135757965e-06, "loss": 0.4609, "step": 26498 }, { "epoch": 1.9694537346711258, "grad_norm": 2.4656866877357655, "learning_rate": 5.576363506786435e-06, "loss": 0.7923, "step": 26499 }, { "epoch": 1.9695280564845783, "grad_norm": 2.1677957990869356, "learning_rate": 5.575643928476215e-06, "loss": 0.5623, "step": 26500 }, { "epoch": 1.9696023782980303, "grad_norm": 6.000739996451214, "learning_rate": 5.574924378649767e-06, "loss": 0.6494, "step": 26501 }, { "epoch": 1.9696767001114828, "grad_norm": 2.1965924926171727, "learning_rate": 5.57420485731173e-06, "loss": 0.6337, "step": 26502 }, { "epoch": 1.969751021924935, "grad_norm": 1.8841366507029922, "learning_rate": 5.573485364466733e-06, "loss": 0.5359, "step": 26503 }, { "epoch": 1.9698253437383872, "grad_norm": 2.0834519788833674, "learning_rate": 5.5727659001194035e-06, "loss": 0.6696, "step": 26504 }, { "epoch": 1.9698996655518395, "grad_norm": 1.8573548562172038, "learning_rate": 5.5720464642743796e-06, "loss": 0.466, "step": 26505 }, { "epoch": 1.9699739873652917, "grad_norm": 2.0991554342988743, "learning_rate": 5.5713270569362884e-06, "loss": 0.6421, "step": 26506 }, { "epoch": 1.970048309178744, "grad_norm": 1.9650022130159128, "learning_rate": 5.570607678109768e-06, "loss": 0.5644, "step": 26507 }, { "epoch": 1.9701226309921962, "grad_norm": 2.182630344487598, "learning_rate": 5.5698883277994444e-06, "loss": 0.6702, "step": 26508 }, { "epoch": 1.9701969528056484, "grad_norm": 1.8279961024060771, "learning_rate": 5.569169006009947e-06, "loss": 0.4565, "step": 26509 }, { "epoch": 1.9702712746191007, "grad_norm": 6.081334123910013, "learning_rate": 5.568449712745917e-06, "loss": 0.6164, "step": 26510 }, { "epoch": 1.970345596432553, "grad_norm": 1.8536274448593544, "learning_rate": 5.56773044801197e-06, "loss": 0.5351, "step": 26511 }, { "epoch": 1.9704199182460052, "grad_norm": 2.0237995298811353, "learning_rate": 5.567011211812749e-06, "loss": 0.626, "step": 26512 }, { "epoch": 1.9704942400594576, "grad_norm": 1.753740550773292, "learning_rate": 5.566292004152875e-06, "loss": 0.5642, "step": 26513 }, { "epoch": 1.9705685618729096, "grad_norm": 1.8525208976576062, "learning_rate": 5.565572825036987e-06, "loss": 0.5456, "step": 26514 }, { "epoch": 1.970642883686362, "grad_norm": 1.9782231349011272, "learning_rate": 5.564853674469711e-06, "loss": 0.4925, "step": 26515 }, { "epoch": 1.970717205499814, "grad_norm": 2.4052207972927175, "learning_rate": 5.564134552455673e-06, "loss": 0.7214, "step": 26516 }, { "epoch": 1.9707915273132666, "grad_norm": 1.7726402635873761, "learning_rate": 5.5634154589995115e-06, "loss": 0.4123, "step": 26517 }, { "epoch": 1.9708658491267186, "grad_norm": 2.049086827062925, "learning_rate": 5.5626963941058465e-06, "loss": 0.4912, "step": 26518 }, { "epoch": 1.970940170940171, "grad_norm": 2.2277855745081054, "learning_rate": 5.561977357779315e-06, "loss": 0.5463, "step": 26519 }, { "epoch": 1.971014492753623, "grad_norm": 2.241335670809941, "learning_rate": 5.561258350024544e-06, "loss": 0.6849, "step": 26520 }, { "epoch": 1.9710888145670755, "grad_norm": 1.8116933413797758, "learning_rate": 5.560539370846158e-06, "loss": 0.5453, "step": 26521 }, { "epoch": 1.9711631363805275, "grad_norm": 2.3162023082825027, "learning_rate": 5.559820420248796e-06, "loss": 0.7185, "step": 26522 }, { "epoch": 1.97123745819398, "grad_norm": 2.23289134138216, "learning_rate": 5.559101498237072e-06, "loss": 0.6641, "step": 26523 }, { "epoch": 1.9713117800074322, "grad_norm": 3.185060197390595, "learning_rate": 5.558382604815627e-06, "loss": 0.6422, "step": 26524 }, { "epoch": 1.9713861018208845, "grad_norm": 2.1118272708120562, "learning_rate": 5.557663739989083e-06, "loss": 0.5914, "step": 26525 }, { "epoch": 1.9714604236343367, "grad_norm": 2.1252515808907373, "learning_rate": 5.556944903762065e-06, "loss": 0.6087, "step": 26526 }, { "epoch": 1.971534745447789, "grad_norm": 2.2486492105985003, "learning_rate": 5.55622609613921e-06, "loss": 0.5068, "step": 26527 }, { "epoch": 1.9716090672612412, "grad_norm": 2.1999882527729753, "learning_rate": 5.555507317125137e-06, "loss": 0.5094, "step": 26528 }, { "epoch": 1.9716833890746934, "grad_norm": 2.6680418516761515, "learning_rate": 5.554788566724482e-06, "loss": 0.4937, "step": 26529 }, { "epoch": 1.9717577108881457, "grad_norm": 1.4781930503807374, "learning_rate": 5.554069844941867e-06, "loss": 0.4078, "step": 26530 }, { "epoch": 1.971832032701598, "grad_norm": 2.5075560644741803, "learning_rate": 5.553351151781915e-06, "loss": 0.5173, "step": 26531 }, { "epoch": 1.9719063545150501, "grad_norm": 1.9098137156934887, "learning_rate": 5.552632487249262e-06, "loss": 0.6416, "step": 26532 }, { "epoch": 1.9719806763285024, "grad_norm": 2.1043976920868213, "learning_rate": 5.55191385134853e-06, "loss": 0.6273, "step": 26533 }, { "epoch": 1.9720549981419546, "grad_norm": 2.1841130407051166, "learning_rate": 5.551195244084346e-06, "loss": 0.7676, "step": 26534 }, { "epoch": 1.9721293199554069, "grad_norm": 2.0209246729654233, "learning_rate": 5.550476665461333e-06, "loss": 0.5603, "step": 26535 }, { "epoch": 1.9722036417688593, "grad_norm": 1.7833726428677974, "learning_rate": 5.549758115484123e-06, "loss": 0.5232, "step": 26536 }, { "epoch": 1.9722779635823113, "grad_norm": 2.114601400964966, "learning_rate": 5.549039594157341e-06, "loss": 0.6829, "step": 26537 }, { "epoch": 1.9723522853957638, "grad_norm": 2.354657274964433, "learning_rate": 5.548321101485605e-06, "loss": 0.622, "step": 26538 }, { "epoch": 1.9724266072092158, "grad_norm": 2.283914723911563, "learning_rate": 5.547602637473553e-06, "loss": 0.5742, "step": 26539 }, { "epoch": 1.9725009290226683, "grad_norm": 1.8269260190446264, "learning_rate": 5.546884202125798e-06, "loss": 0.5808, "step": 26540 }, { "epoch": 1.9725752508361203, "grad_norm": 2.0044437217911804, "learning_rate": 5.546165795446977e-06, "loss": 0.6659, "step": 26541 }, { "epoch": 1.9726495726495727, "grad_norm": 2.1421530987666797, "learning_rate": 5.54544741744171e-06, "loss": 0.5387, "step": 26542 }, { "epoch": 1.9727238944630248, "grad_norm": 2.1539676928187226, "learning_rate": 5.544729068114616e-06, "loss": 0.5677, "step": 26543 }, { "epoch": 1.9727982162764772, "grad_norm": 2.338016149803078, "learning_rate": 5.544010747470333e-06, "loss": 0.4685, "step": 26544 }, { "epoch": 1.9728725380899292, "grad_norm": 2.0988634979703615, "learning_rate": 5.543292455513471e-06, "loss": 0.5601, "step": 26545 }, { "epoch": 1.9729468599033817, "grad_norm": 2.278049480455163, "learning_rate": 5.542574192248663e-06, "loss": 0.5458, "step": 26546 }, { "epoch": 1.973021181716834, "grad_norm": 2.0196773168582327, "learning_rate": 5.541855957680533e-06, "loss": 0.5081, "step": 26547 }, { "epoch": 1.9730955035302862, "grad_norm": 1.9990881201660216, "learning_rate": 5.5411377518136976e-06, "loss": 0.5162, "step": 26548 }, { "epoch": 1.9731698253437384, "grad_norm": 1.8526375807160225, "learning_rate": 5.540419574652791e-06, "loss": 0.4981, "step": 26549 }, { "epoch": 1.9732441471571907, "grad_norm": 1.8381861713901384, "learning_rate": 5.539701426202428e-06, "loss": 0.5641, "step": 26550 }, { "epoch": 1.973318468970643, "grad_norm": 2.111709693468292, "learning_rate": 5.538983306467239e-06, "loss": 0.5649, "step": 26551 }, { "epoch": 1.9733927907840951, "grad_norm": 2.446049820258987, "learning_rate": 5.5382652154518435e-06, "loss": 0.4657, "step": 26552 }, { "epoch": 1.9734671125975474, "grad_norm": 2.3051341935822522, "learning_rate": 5.537547153160862e-06, "loss": 0.6292, "step": 26553 }, { "epoch": 1.9735414344109996, "grad_norm": 2.33672714383897, "learning_rate": 5.536829119598923e-06, "loss": 0.6943, "step": 26554 }, { "epoch": 1.9736157562244518, "grad_norm": 1.7310516510724991, "learning_rate": 5.536111114770648e-06, "loss": 0.4137, "step": 26555 }, { "epoch": 1.973690078037904, "grad_norm": 1.9994772374137761, "learning_rate": 5.535393138680658e-06, "loss": 0.5204, "step": 26556 }, { "epoch": 1.9737643998513563, "grad_norm": 2.064955457376758, "learning_rate": 5.534675191333572e-06, "loss": 0.5599, "step": 26557 }, { "epoch": 1.9738387216648086, "grad_norm": 3.202536312495973, "learning_rate": 5.5339572727340175e-06, "loss": 0.6639, "step": 26558 }, { "epoch": 1.973913043478261, "grad_norm": 1.8971864599740489, "learning_rate": 5.5332393828866145e-06, "loss": 0.481, "step": 26559 }, { "epoch": 1.973987365291713, "grad_norm": 1.688096858109298, "learning_rate": 5.532521521795982e-06, "loss": 0.4439, "step": 26560 }, { "epoch": 1.9740616871051655, "grad_norm": 2.1228688325397664, "learning_rate": 5.5318036894667485e-06, "loss": 0.6723, "step": 26561 }, { "epoch": 1.9741360089186175, "grad_norm": 1.3496163830877028, "learning_rate": 5.531085885903525e-06, "loss": 0.3598, "step": 26562 }, { "epoch": 1.97421033073207, "grad_norm": 2.030809253376891, "learning_rate": 5.530368111110944e-06, "loss": 0.3995, "step": 26563 }, { "epoch": 1.974284652545522, "grad_norm": 1.9429603907141284, "learning_rate": 5.529650365093621e-06, "loss": 0.5412, "step": 26564 }, { "epoch": 1.9743589743589745, "grad_norm": 2.160557615111772, "learning_rate": 5.528932647856173e-06, "loss": 0.6506, "step": 26565 }, { "epoch": 1.9744332961724265, "grad_norm": 1.56082271563642, "learning_rate": 5.528214959403229e-06, "loss": 0.5006, "step": 26566 }, { "epoch": 1.974507617985879, "grad_norm": 1.9397280151149698, "learning_rate": 5.5274972997394035e-06, "loss": 0.6681, "step": 26567 }, { "epoch": 1.974581939799331, "grad_norm": 1.9828628611463548, "learning_rate": 5.52677966886932e-06, "loss": 0.6516, "step": 26568 }, { "epoch": 1.9746562616127834, "grad_norm": 2.1718605804718845, "learning_rate": 5.526062066797595e-06, "loss": 0.7522, "step": 26569 }, { "epoch": 1.9747305834262356, "grad_norm": 1.5731322830565266, "learning_rate": 5.525344493528848e-06, "loss": 0.5189, "step": 26570 }, { "epoch": 1.9748049052396879, "grad_norm": 2.57508279853629, "learning_rate": 5.524626949067705e-06, "loss": 0.7457, "step": 26571 }, { "epoch": 1.9748792270531401, "grad_norm": 1.5714477586219426, "learning_rate": 5.523909433418777e-06, "loss": 0.4261, "step": 26572 }, { "epoch": 1.9749535488665924, "grad_norm": 1.8703938231418056, "learning_rate": 5.523191946586692e-06, "loss": 0.582, "step": 26573 }, { "epoch": 1.9750278706800446, "grad_norm": 2.0286758448172, "learning_rate": 5.5224744885760636e-06, "loss": 0.464, "step": 26574 }, { "epoch": 1.9751021924934968, "grad_norm": 1.6556132274874793, "learning_rate": 5.52175705939151e-06, "loss": 0.4567, "step": 26575 }, { "epoch": 1.975176514306949, "grad_norm": 2.372498070545616, "learning_rate": 5.521039659037656e-06, "loss": 0.5383, "step": 26576 }, { "epoch": 1.9752508361204013, "grad_norm": 1.831039800570718, "learning_rate": 5.520322287519111e-06, "loss": 0.5808, "step": 26577 }, { "epoch": 1.9753251579338535, "grad_norm": 2.2900908351405453, "learning_rate": 5.519604944840509e-06, "loss": 0.5746, "step": 26578 }, { "epoch": 1.9753994797473058, "grad_norm": 1.7520561348918067, "learning_rate": 5.518887631006446e-06, "loss": 0.4312, "step": 26579 }, { "epoch": 1.9754738015607582, "grad_norm": 1.78201465856319, "learning_rate": 5.518170346021559e-06, "loss": 0.5771, "step": 26580 }, { "epoch": 1.9755481233742103, "grad_norm": 1.72389396533642, "learning_rate": 5.517453089890459e-06, "loss": 0.4479, "step": 26581 }, { "epoch": 1.9756224451876627, "grad_norm": 1.8778595069268842, "learning_rate": 5.516735862617758e-06, "loss": 0.4727, "step": 26582 }, { "epoch": 1.9756967670011147, "grad_norm": 2.4579245398347345, "learning_rate": 5.516018664208082e-06, "loss": 0.5038, "step": 26583 }, { "epoch": 1.9757710888145672, "grad_norm": 1.9974379016127894, "learning_rate": 5.515301494666043e-06, "loss": 0.5996, "step": 26584 }, { "epoch": 1.9758454106280192, "grad_norm": 3.4989085433812206, "learning_rate": 5.514584353996264e-06, "loss": 0.6543, "step": 26585 }, { "epoch": 1.9759197324414717, "grad_norm": 1.6366591961917845, "learning_rate": 5.513867242203359e-06, "loss": 0.3944, "step": 26586 }, { "epoch": 1.9759940542549237, "grad_norm": 2.121086139930584, "learning_rate": 5.513150159291938e-06, "loss": 0.559, "step": 26587 }, { "epoch": 1.9760683760683762, "grad_norm": 2.1412053202158874, "learning_rate": 5.512433105266628e-06, "loss": 0.5312, "step": 26588 }, { "epoch": 1.9761426978818282, "grad_norm": 2.0181084503590836, "learning_rate": 5.511716080132041e-06, "loss": 0.5965, "step": 26589 }, { "epoch": 1.9762170196952806, "grad_norm": 1.9642307705228321, "learning_rate": 5.510999083892794e-06, "loss": 0.6934, "step": 26590 }, { "epoch": 1.9762913415087329, "grad_norm": 1.933471922637058, "learning_rate": 5.5102821165535e-06, "loss": 0.5561, "step": 26591 }, { "epoch": 1.976365663322185, "grad_norm": 1.9086092872150422, "learning_rate": 5.509565178118775e-06, "loss": 0.5287, "step": 26592 }, { "epoch": 1.9764399851356373, "grad_norm": 1.4560635780089635, "learning_rate": 5.508848268593239e-06, "loss": 0.3841, "step": 26593 }, { "epoch": 1.9765143069490896, "grad_norm": 2.0024349428849826, "learning_rate": 5.508131387981501e-06, "loss": 0.5457, "step": 26594 }, { "epoch": 1.9765886287625418, "grad_norm": 1.7551871271023332, "learning_rate": 5.5074145362881845e-06, "loss": 0.5757, "step": 26595 }, { "epoch": 1.976662950575994, "grad_norm": 2.22364715464884, "learning_rate": 5.506697713517899e-06, "loss": 0.7564, "step": 26596 }, { "epoch": 1.9767372723894463, "grad_norm": 1.6619244509873108, "learning_rate": 5.5059809196752574e-06, "loss": 0.4458, "step": 26597 }, { "epoch": 1.9768115942028985, "grad_norm": 2.0106775023929706, "learning_rate": 5.5052641547648824e-06, "loss": 0.4953, "step": 26598 }, { "epoch": 1.9768859160163508, "grad_norm": 1.8565176120448534, "learning_rate": 5.504547418791378e-06, "loss": 0.5006, "step": 26599 }, { "epoch": 1.976960237829803, "grad_norm": 1.8896599398220155, "learning_rate": 5.503830711759368e-06, "loss": 0.6045, "step": 26600 }, { "epoch": 1.9770345596432553, "grad_norm": 1.8589862612199342, "learning_rate": 5.503114033673464e-06, "loss": 0.5323, "step": 26601 }, { "epoch": 1.9771088814567075, "grad_norm": 1.8688228043499764, "learning_rate": 5.5023973845382774e-06, "loss": 0.5337, "step": 26602 }, { "epoch": 1.97718320327016, "grad_norm": 2.0737947143829967, "learning_rate": 5.501680764358424e-06, "loss": 0.5786, "step": 26603 }, { "epoch": 1.977257525083612, "grad_norm": 4.535982127265872, "learning_rate": 5.500964173138511e-06, "loss": 0.5441, "step": 26604 }, { "epoch": 1.9773318468970644, "grad_norm": 1.9229733823240336, "learning_rate": 5.500247610883162e-06, "loss": 0.594, "step": 26605 }, { "epoch": 1.9774061687105164, "grad_norm": 2.088580427757965, "learning_rate": 5.499531077596982e-06, "loss": 0.6703, "step": 26606 }, { "epoch": 1.977480490523969, "grad_norm": 2.084497342332139, "learning_rate": 5.4988145732845895e-06, "loss": 0.5629, "step": 26607 }, { "epoch": 1.977554812337421, "grad_norm": 1.7503272857863204, "learning_rate": 5.498098097950597e-06, "loss": 0.6048, "step": 26608 }, { "epoch": 1.9776291341508734, "grad_norm": 2.5820096650102298, "learning_rate": 5.497381651599612e-06, "loss": 0.5734, "step": 26609 }, { "epoch": 1.9777034559643254, "grad_norm": 1.8555905186074746, "learning_rate": 5.496665234236253e-06, "loss": 0.4712, "step": 26610 }, { "epoch": 1.9777777777777779, "grad_norm": 2.31288112405565, "learning_rate": 5.495948845865129e-06, "loss": 0.6452, "step": 26611 }, { "epoch": 1.9778520995912299, "grad_norm": 1.8418055633286923, "learning_rate": 5.495232486490854e-06, "loss": 0.5998, "step": 26612 }, { "epoch": 1.9779264214046823, "grad_norm": 1.491697326993598, "learning_rate": 5.494516156118037e-06, "loss": 0.4586, "step": 26613 }, { "epoch": 1.9780007432181346, "grad_norm": 1.7912894162629365, "learning_rate": 5.493799854751288e-06, "loss": 0.537, "step": 26614 }, { "epoch": 1.9780750650315868, "grad_norm": 4.176872768714001, "learning_rate": 5.493083582395226e-06, "loss": 0.6562, "step": 26615 }, { "epoch": 1.978149386845039, "grad_norm": 1.8651451493451368, "learning_rate": 5.492367339054454e-06, "loss": 0.4608, "step": 26616 }, { "epoch": 1.9782237086584913, "grad_norm": 1.7920696469088524, "learning_rate": 5.491651124733591e-06, "loss": 0.4548, "step": 26617 }, { "epoch": 1.9782980304719435, "grad_norm": 1.7065170221810453, "learning_rate": 5.490934939437244e-06, "loss": 0.5489, "step": 26618 }, { "epoch": 1.9783723522853958, "grad_norm": 1.7507926313813422, "learning_rate": 5.49021878317002e-06, "loss": 0.6445, "step": 26619 }, { "epoch": 1.978446674098848, "grad_norm": 1.9447424590578557, "learning_rate": 5.489502655936539e-06, "loss": 0.6547, "step": 26620 }, { "epoch": 1.9785209959123002, "grad_norm": 2.1932122397959173, "learning_rate": 5.488786557741399e-06, "loss": 0.6152, "step": 26621 }, { "epoch": 1.9785953177257525, "grad_norm": 2.473857843148603, "learning_rate": 5.488070488589223e-06, "loss": 0.5601, "step": 26622 }, { "epoch": 1.9786696395392047, "grad_norm": 1.9042478946267773, "learning_rate": 5.487354448484614e-06, "loss": 0.6439, "step": 26623 }, { "epoch": 1.978743961352657, "grad_norm": 2.3596819774416593, "learning_rate": 5.486638437432184e-06, "loss": 0.5777, "step": 26624 }, { "epoch": 1.9788182831661092, "grad_norm": 1.9429211592689446, "learning_rate": 5.4859224554365406e-06, "loss": 0.6099, "step": 26625 }, { "epoch": 1.9788926049795617, "grad_norm": 1.7240555684741248, "learning_rate": 5.485206502502292e-06, "loss": 0.4514, "step": 26626 }, { "epoch": 1.9789669267930137, "grad_norm": 2.6548033986978923, "learning_rate": 5.484490578634052e-06, "loss": 0.6006, "step": 26627 }, { "epoch": 1.9790412486064661, "grad_norm": 1.9781319375480622, "learning_rate": 5.483774683836427e-06, "loss": 0.4653, "step": 26628 }, { "epoch": 1.9791155704199181, "grad_norm": 2.0718444776379554, "learning_rate": 5.483058818114025e-06, "loss": 0.5262, "step": 26629 }, { "epoch": 1.9791898922333706, "grad_norm": 1.6998267796779627, "learning_rate": 5.482342981471456e-06, "loss": 0.516, "step": 26630 }, { "epoch": 1.9792642140468226, "grad_norm": 1.7618913633607254, "learning_rate": 5.481627173913326e-06, "loss": 0.5201, "step": 26631 }, { "epoch": 1.979338535860275, "grad_norm": 2.0702074528995107, "learning_rate": 5.48091139544425e-06, "loss": 0.4733, "step": 26632 }, { "epoch": 1.979412857673727, "grad_norm": 1.8627986199866797, "learning_rate": 5.4801956460688334e-06, "loss": 0.4821, "step": 26633 }, { "epoch": 1.9794871794871796, "grad_norm": 1.9500739959340139, "learning_rate": 5.479479925791682e-06, "loss": 0.5145, "step": 26634 }, { "epoch": 1.9795615013006316, "grad_norm": 2.1524023272451482, "learning_rate": 5.478764234617403e-06, "loss": 0.5283, "step": 26635 }, { "epoch": 1.979635823114084, "grad_norm": 2.2495586479274587, "learning_rate": 5.478048572550603e-06, "loss": 0.7986, "step": 26636 }, { "epoch": 1.9797101449275363, "grad_norm": 2.397815513145006, "learning_rate": 5.477332939595894e-06, "loss": 0.6872, "step": 26637 }, { "epoch": 1.9797844667409885, "grad_norm": 1.8392032633650757, "learning_rate": 5.476617335757878e-06, "loss": 0.6116, "step": 26638 }, { "epoch": 1.9798587885544408, "grad_norm": 1.7112505440584445, "learning_rate": 5.475901761041168e-06, "loss": 0.4416, "step": 26639 }, { "epoch": 1.979933110367893, "grad_norm": 1.934591948271459, "learning_rate": 5.475186215450368e-06, "loss": 0.5595, "step": 26640 }, { "epoch": 1.9800074321813452, "grad_norm": 2.5424953405322643, "learning_rate": 5.474470698990081e-06, "loss": 0.5713, "step": 26641 }, { "epoch": 1.9800817539947975, "grad_norm": 1.9159590239335296, "learning_rate": 5.4737552116649194e-06, "loss": 0.5987, "step": 26642 }, { "epoch": 1.9801560758082497, "grad_norm": 1.4204642347490786, "learning_rate": 5.4730397534794855e-06, "loss": 0.3145, "step": 26643 }, { "epoch": 1.980230397621702, "grad_norm": 1.7798538426990291, "learning_rate": 5.4723243244383885e-06, "loss": 0.4696, "step": 26644 }, { "epoch": 1.9803047194351542, "grad_norm": 1.9180492074562399, "learning_rate": 5.471608924546234e-06, "loss": 0.513, "step": 26645 }, { "epoch": 1.9803790412486064, "grad_norm": 1.8782096851627683, "learning_rate": 5.470893553807625e-06, "loss": 0.5455, "step": 26646 }, { "epoch": 1.9804533630620589, "grad_norm": 1.7142952714289754, "learning_rate": 5.470178212227168e-06, "loss": 0.471, "step": 26647 }, { "epoch": 1.980527684875511, "grad_norm": 1.7582940080392817, "learning_rate": 5.4694628998094654e-06, "loss": 0.462, "step": 26648 }, { "epoch": 1.9806020066889634, "grad_norm": 1.9794567794243596, "learning_rate": 5.4687476165591305e-06, "loss": 0.5097, "step": 26649 }, { "epoch": 1.9806763285024154, "grad_norm": 1.9276456480169075, "learning_rate": 5.468032362480762e-06, "loss": 0.4551, "step": 26650 }, { "epoch": 1.9807506503158678, "grad_norm": 2.1078527151205715, "learning_rate": 5.467317137578962e-06, "loss": 0.6847, "step": 26651 }, { "epoch": 1.9808249721293198, "grad_norm": 1.9835750132862693, "learning_rate": 5.466601941858345e-06, "loss": 0.5724, "step": 26652 }, { "epoch": 1.9808992939427723, "grad_norm": 1.7655850892647216, "learning_rate": 5.4658867753235035e-06, "loss": 0.5587, "step": 26653 }, { "epoch": 1.9809736157562243, "grad_norm": 1.8123096409686152, "learning_rate": 5.465171637979052e-06, "loss": 0.3784, "step": 26654 }, { "epoch": 1.9810479375696768, "grad_norm": 2.588531679131047, "learning_rate": 5.46445652982959e-06, "loss": 0.6935, "step": 26655 }, { "epoch": 1.9811222593831288, "grad_norm": 1.7241163265961308, "learning_rate": 5.463741450879718e-06, "loss": 0.5249, "step": 26656 }, { "epoch": 1.9811965811965813, "grad_norm": 2.19796732114032, "learning_rate": 5.463026401134052e-06, "loss": 0.4964, "step": 26657 }, { "epoch": 1.9812709030100333, "grad_norm": 1.9518660638276288, "learning_rate": 5.462311380597177e-06, "loss": 0.5053, "step": 26658 }, { "epoch": 1.9813452248234857, "grad_norm": 1.7424278269828066, "learning_rate": 5.461596389273711e-06, "loss": 0.5453, "step": 26659 }, { "epoch": 1.981419546636938, "grad_norm": 2.0966234825557213, "learning_rate": 5.460881427168247e-06, "loss": 0.6438, "step": 26660 }, { "epoch": 1.9814938684503902, "grad_norm": 2.368252670085563, "learning_rate": 5.460166494285397e-06, "loss": 0.6431, "step": 26661 }, { "epoch": 1.9815681902638425, "grad_norm": 1.8570315915318887, "learning_rate": 5.4594515906297605e-06, "loss": 0.5549, "step": 26662 }, { "epoch": 1.9816425120772947, "grad_norm": 1.712872656887264, "learning_rate": 5.458736716205935e-06, "loss": 0.4403, "step": 26663 }, { "epoch": 1.981716833890747, "grad_norm": 1.937752342266742, "learning_rate": 5.458021871018531e-06, "loss": 0.5485, "step": 26664 }, { "epoch": 1.9817911557041992, "grad_norm": 4.451748195509542, "learning_rate": 5.457307055072141e-06, "loss": 0.6376, "step": 26665 }, { "epoch": 1.9818654775176514, "grad_norm": 2.3380364607428934, "learning_rate": 5.456592268371378e-06, "loss": 0.5477, "step": 26666 }, { "epoch": 1.9819397993311036, "grad_norm": 1.9351801529212032, "learning_rate": 5.455877510920837e-06, "loss": 0.6092, "step": 26667 }, { "epoch": 1.9820141211445559, "grad_norm": 1.9010932158010587, "learning_rate": 5.455162782725122e-06, "loss": 0.4901, "step": 26668 }, { "epoch": 1.9820884429580081, "grad_norm": 1.7031753129415022, "learning_rate": 5.454448083788832e-06, "loss": 0.6763, "step": 26669 }, { "epoch": 1.9821627647714606, "grad_norm": 1.7393987672719908, "learning_rate": 5.453733414116566e-06, "loss": 0.5477, "step": 26670 }, { "epoch": 1.9822370865849126, "grad_norm": 1.6186976588563435, "learning_rate": 5.453018773712933e-06, "loss": 0.5125, "step": 26671 }, { "epoch": 1.982311408398365, "grad_norm": 2.102198477566109, "learning_rate": 5.452304162582529e-06, "loss": 0.5558, "step": 26672 }, { "epoch": 1.982385730211817, "grad_norm": 2.3863808972693206, "learning_rate": 5.4515895807299505e-06, "loss": 0.5074, "step": 26673 }, { "epoch": 1.9824600520252695, "grad_norm": 2.395646345239545, "learning_rate": 5.450875028159806e-06, "loss": 0.664, "step": 26674 }, { "epoch": 1.9825343738387216, "grad_norm": 2.0998967865628906, "learning_rate": 5.450160504876689e-06, "loss": 0.5817, "step": 26675 }, { "epoch": 1.982608695652174, "grad_norm": 1.4803534807225218, "learning_rate": 5.449446010885205e-06, "loss": 0.3717, "step": 26676 }, { "epoch": 1.982683017465626, "grad_norm": 2.2868690693445943, "learning_rate": 5.448731546189953e-06, "loss": 0.5929, "step": 26677 }, { "epoch": 1.9827573392790785, "grad_norm": 2.220567706719414, "learning_rate": 5.448017110795527e-06, "loss": 0.5131, "step": 26678 }, { "epoch": 1.9828316610925305, "grad_norm": 1.9021434808061324, "learning_rate": 5.447302704706534e-06, "loss": 0.565, "step": 26679 }, { "epoch": 1.982905982905983, "grad_norm": 1.7432231802312228, "learning_rate": 5.446588327927569e-06, "loss": 0.4263, "step": 26680 }, { "epoch": 1.9829803047194352, "grad_norm": 2.11987254077205, "learning_rate": 5.4458739804632346e-06, "loss": 0.564, "step": 26681 }, { "epoch": 1.9830546265328874, "grad_norm": 2.079840419799854, "learning_rate": 5.445159662318121e-06, "loss": 0.5301, "step": 26682 }, { "epoch": 1.9831289483463397, "grad_norm": 1.9052932971184586, "learning_rate": 5.444445373496838e-06, "loss": 0.5624, "step": 26683 }, { "epoch": 1.983203270159792, "grad_norm": 1.722166957346695, "learning_rate": 5.4437311140039794e-06, "loss": 0.5361, "step": 26684 }, { "epoch": 1.9832775919732442, "grad_norm": 1.9618897571059697, "learning_rate": 5.443016883844139e-06, "loss": 0.4036, "step": 26685 }, { "epoch": 1.9833519137866964, "grad_norm": 2.2084868292522044, "learning_rate": 5.442302683021925e-06, "loss": 0.6572, "step": 26686 }, { "epoch": 1.9834262356001486, "grad_norm": 1.9944383027862207, "learning_rate": 5.441588511541924e-06, "loss": 0.6889, "step": 26687 }, { "epoch": 1.9835005574136009, "grad_norm": 1.7028471031525632, "learning_rate": 5.440874369408744e-06, "loss": 0.5271, "step": 26688 }, { "epoch": 1.9835748792270531, "grad_norm": 2.2981613349421095, "learning_rate": 5.440160256626979e-06, "loss": 0.6746, "step": 26689 }, { "epoch": 1.9836492010405053, "grad_norm": 1.9755497636660067, "learning_rate": 5.439446173201222e-06, "loss": 0.5777, "step": 26690 }, { "epoch": 1.9837235228539576, "grad_norm": 2.2434833948974875, "learning_rate": 5.438732119136081e-06, "loss": 0.5211, "step": 26691 }, { "epoch": 1.9837978446674098, "grad_norm": 2.347191671912999, "learning_rate": 5.438018094436137e-06, "loss": 0.5987, "step": 26692 }, { "epoch": 1.9838721664808623, "grad_norm": 2.142135649453391, "learning_rate": 5.437304099106002e-06, "loss": 0.6022, "step": 26693 }, { "epoch": 1.9839464882943143, "grad_norm": 2.164662846260813, "learning_rate": 5.436590133150265e-06, "loss": 0.4856, "step": 26694 }, { "epoch": 1.9840208101077668, "grad_norm": 2.0339083278840984, "learning_rate": 5.43587619657352e-06, "loss": 0.5442, "step": 26695 }, { "epoch": 1.9840951319212188, "grad_norm": 2.0444729897615037, "learning_rate": 5.435162289380371e-06, "loss": 0.5656, "step": 26696 }, { "epoch": 1.9841694537346712, "grad_norm": 2.1131274494541796, "learning_rate": 5.4344484115754085e-06, "loss": 0.4727, "step": 26697 }, { "epoch": 1.9842437755481233, "grad_norm": 1.8710779066179228, "learning_rate": 5.433734563163231e-06, "loss": 0.6053, "step": 26698 }, { "epoch": 1.9843180973615757, "grad_norm": 2.316217907921451, "learning_rate": 5.433020744148436e-06, "loss": 0.6694, "step": 26699 }, { "epoch": 1.9843924191750277, "grad_norm": 1.933263070163586, "learning_rate": 5.432306954535611e-06, "loss": 0.5773, "step": 26700 }, { "epoch": 1.9844667409884802, "grad_norm": 1.8122934161854125, "learning_rate": 5.431593194329362e-06, "loss": 0.4843, "step": 26701 }, { "epoch": 1.9845410628019322, "grad_norm": 2.190222695011618, "learning_rate": 5.4308794635342776e-06, "loss": 0.5247, "step": 26702 }, { "epoch": 1.9846153846153847, "grad_norm": 1.6746788066392195, "learning_rate": 5.430165762154956e-06, "loss": 0.5497, "step": 26703 }, { "epoch": 1.984689706428837, "grad_norm": 2.167385684753583, "learning_rate": 5.429452090195985e-06, "loss": 0.6158, "step": 26704 }, { "epoch": 1.9847640282422891, "grad_norm": 2.355689485539463, "learning_rate": 5.428738447661968e-06, "loss": 0.608, "step": 26705 }, { "epoch": 1.9848383500557414, "grad_norm": 2.176267900238446, "learning_rate": 5.428024834557496e-06, "loss": 0.6349, "step": 26706 }, { "epoch": 1.9849126718691936, "grad_norm": 1.982465184598509, "learning_rate": 5.427311250887159e-06, "loss": 0.5647, "step": 26707 }, { "epoch": 1.9849869936826459, "grad_norm": 2.2239507593939596, "learning_rate": 5.426597696655558e-06, "loss": 0.5417, "step": 26708 }, { "epoch": 1.985061315496098, "grad_norm": 1.5412023976114795, "learning_rate": 5.4258841718672816e-06, "loss": 0.4832, "step": 26709 }, { "epoch": 1.9851356373095503, "grad_norm": 2.023560873064973, "learning_rate": 5.4251706765269274e-06, "loss": 0.6522, "step": 26710 }, { "epoch": 1.9852099591230026, "grad_norm": 1.8824783574648973, "learning_rate": 5.424457210639089e-06, "loss": 0.539, "step": 26711 }, { "epoch": 1.9852842809364548, "grad_norm": 1.7630688145316382, "learning_rate": 5.423743774208352e-06, "loss": 0.5624, "step": 26712 }, { "epoch": 1.985358602749907, "grad_norm": 1.7948083978086327, "learning_rate": 5.42303036723932e-06, "loss": 0.3807, "step": 26713 }, { "epoch": 1.9854329245633593, "grad_norm": 2.739327109210679, "learning_rate": 5.422316989736581e-06, "loss": 0.6231, "step": 26714 }, { "epoch": 1.9855072463768115, "grad_norm": 1.9704433287018601, "learning_rate": 5.421603641704728e-06, "loss": 0.438, "step": 26715 }, { "epoch": 1.985581568190264, "grad_norm": 2.286042967355465, "learning_rate": 5.420890323148353e-06, "loss": 0.6747, "step": 26716 }, { "epoch": 1.985655890003716, "grad_norm": 2.115431125855083, "learning_rate": 5.420177034072045e-06, "loss": 0.4899, "step": 26717 }, { "epoch": 1.9857302118171685, "grad_norm": 2.1426021015615846, "learning_rate": 5.419463774480402e-06, "loss": 0.5391, "step": 26718 }, { "epoch": 1.9858045336306205, "grad_norm": 1.8822821939579921, "learning_rate": 5.418750544378011e-06, "loss": 0.5384, "step": 26719 }, { "epoch": 1.985878855444073, "grad_norm": 2.1023580017261314, "learning_rate": 5.4180373437694685e-06, "loss": 0.7044, "step": 26720 }, { "epoch": 1.985953177257525, "grad_norm": 1.9720712606559938, "learning_rate": 5.417324172659366e-06, "loss": 0.5632, "step": 26721 }, { "epoch": 1.9860274990709774, "grad_norm": 1.8055351966139128, "learning_rate": 5.416611031052288e-06, "loss": 0.5185, "step": 26722 }, { "epoch": 1.9861018208844294, "grad_norm": 1.8030562981624423, "learning_rate": 5.4158979189528345e-06, "loss": 0.5841, "step": 26723 }, { "epoch": 1.986176142697882, "grad_norm": 1.993998785178226, "learning_rate": 5.415184836365588e-06, "loss": 0.5437, "step": 26724 }, { "epoch": 1.986250464511334, "grad_norm": 2.4301053537691875, "learning_rate": 5.414471783295152e-06, "loss": 0.6344, "step": 26725 }, { "epoch": 1.9863247863247864, "grad_norm": 1.754724885820505, "learning_rate": 5.413758759746099e-06, "loss": 0.5176, "step": 26726 }, { "epoch": 1.9863991081382386, "grad_norm": 2.110475320428631, "learning_rate": 5.413045765723035e-06, "loss": 0.555, "step": 26727 }, { "epoch": 1.9864734299516909, "grad_norm": 2.1852206126934335, "learning_rate": 5.412332801230544e-06, "loss": 0.6377, "step": 26728 }, { "epoch": 1.986547751765143, "grad_norm": 2.2632613709353646, "learning_rate": 5.411619866273212e-06, "loss": 0.7739, "step": 26729 }, { "epoch": 1.9866220735785953, "grad_norm": 1.7316212306585355, "learning_rate": 5.410906960855637e-06, "loss": 0.4623, "step": 26730 }, { "epoch": 1.9866963953920476, "grad_norm": 1.6674139742999095, "learning_rate": 5.410194084982403e-06, "loss": 0.4192, "step": 26731 }, { "epoch": 1.9867707172054998, "grad_norm": 1.713158497483157, "learning_rate": 5.409481238658103e-06, "loss": 0.4618, "step": 26732 }, { "epoch": 1.986845039018952, "grad_norm": 2.2182169377427057, "learning_rate": 5.408768421887325e-06, "loss": 0.6872, "step": 26733 }, { "epoch": 1.9869193608324043, "grad_norm": 1.8633471878416978, "learning_rate": 5.408055634674654e-06, "loss": 0.5055, "step": 26734 }, { "epoch": 1.9869936826458565, "grad_norm": 1.856450233099109, "learning_rate": 5.407342877024687e-06, "loss": 0.3269, "step": 26735 }, { "epoch": 1.9870680044593088, "grad_norm": 1.6392027934658737, "learning_rate": 5.406630148942008e-06, "loss": 0.5288, "step": 26736 }, { "epoch": 1.9871423262727612, "grad_norm": 1.9202789187125764, "learning_rate": 5.4059174504312064e-06, "loss": 0.5647, "step": 26737 }, { "epoch": 1.9872166480862132, "grad_norm": 1.982053238172273, "learning_rate": 5.4052047814968685e-06, "loss": 0.5718, "step": 26738 }, { "epoch": 1.9872909698996657, "grad_norm": 1.8797318604870787, "learning_rate": 5.404492142143581e-06, "loss": 0.5257, "step": 26739 }, { "epoch": 1.9873652917131177, "grad_norm": 2.262947835257865, "learning_rate": 5.403779532375937e-06, "loss": 0.6437, "step": 26740 }, { "epoch": 1.9874396135265702, "grad_norm": 1.8157347957107035, "learning_rate": 5.403066952198519e-06, "loss": 0.5892, "step": 26741 }, { "epoch": 1.9875139353400222, "grad_norm": 5.1194581005730635, "learning_rate": 5.402354401615921e-06, "loss": 0.576, "step": 26742 }, { "epoch": 1.9875882571534746, "grad_norm": 1.9095173797064595, "learning_rate": 5.401641880632727e-06, "loss": 0.4879, "step": 26743 }, { "epoch": 1.9876625789669267, "grad_norm": 2.380892668314151, "learning_rate": 5.400929389253521e-06, "loss": 0.6859, "step": 26744 }, { "epoch": 1.9877369007803791, "grad_norm": 1.791446705523924, "learning_rate": 5.400216927482896e-06, "loss": 0.4816, "step": 26745 }, { "epoch": 1.9878112225938311, "grad_norm": 2.2245388025450383, "learning_rate": 5.399504495325431e-06, "loss": 0.6022, "step": 26746 }, { "epoch": 1.9878855444072836, "grad_norm": 2.1427100143555564, "learning_rate": 5.398792092785728e-06, "loss": 0.5205, "step": 26747 }, { "epoch": 1.9879598662207358, "grad_norm": 2.7737540341448224, "learning_rate": 5.398079719868352e-06, "loss": 0.6998, "step": 26748 }, { "epoch": 1.988034188034188, "grad_norm": 2.250590665213573, "learning_rate": 5.397367376577905e-06, "loss": 0.6254, "step": 26749 }, { "epoch": 1.9881085098476403, "grad_norm": 2.31310417134273, "learning_rate": 5.396655062918967e-06, "loss": 0.7345, "step": 26750 }, { "epoch": 1.9881828316610926, "grad_norm": 1.7524234570331858, "learning_rate": 5.395942778896123e-06, "loss": 0.3989, "step": 26751 }, { "epoch": 1.9882571534745448, "grad_norm": 2.0126702864808306, "learning_rate": 5.395230524513963e-06, "loss": 0.7108, "step": 26752 }, { "epoch": 1.988331475287997, "grad_norm": 2.313778183354853, "learning_rate": 5.394518299777071e-06, "loss": 0.3767, "step": 26753 }, { "epoch": 1.9884057971014493, "grad_norm": 1.949661833266914, "learning_rate": 5.393806104690027e-06, "loss": 0.5016, "step": 26754 }, { "epoch": 1.9884801189149015, "grad_norm": 2.2631453815774987, "learning_rate": 5.393093939257424e-06, "loss": 0.5995, "step": 26755 }, { "epoch": 1.9885544407283537, "grad_norm": 1.6477129948265072, "learning_rate": 5.392381803483838e-06, "loss": 0.5695, "step": 26756 }, { "epoch": 1.988628762541806, "grad_norm": 1.8579314797904551, "learning_rate": 5.391669697373864e-06, "loss": 0.4584, "step": 26757 }, { "epoch": 1.9887030843552582, "grad_norm": 1.9133924715778932, "learning_rate": 5.3909576209320815e-06, "loss": 0.5702, "step": 26758 }, { "epoch": 1.9887774061687105, "grad_norm": 1.4940751322564796, "learning_rate": 5.390245574163074e-06, "loss": 0.4131, "step": 26759 }, { "epoch": 1.988851727982163, "grad_norm": 2.2114938349148923, "learning_rate": 5.389533557071427e-06, "loss": 0.5157, "step": 26760 }, { "epoch": 1.988926049795615, "grad_norm": 2.18687506511459, "learning_rate": 5.38882156966172e-06, "loss": 0.4351, "step": 26761 }, { "epoch": 1.9890003716090674, "grad_norm": 1.9950582560704149, "learning_rate": 5.388109611938543e-06, "loss": 0.4859, "step": 26762 }, { "epoch": 1.9890746934225194, "grad_norm": 2.028412149833452, "learning_rate": 5.387397683906475e-06, "loss": 0.6241, "step": 26763 }, { "epoch": 1.9891490152359719, "grad_norm": 2.196309349608614, "learning_rate": 5.386685785570104e-06, "loss": 0.5236, "step": 26764 }, { "epoch": 1.989223337049424, "grad_norm": 1.9029649542118052, "learning_rate": 5.385973916934011e-06, "loss": 0.5677, "step": 26765 }, { "epoch": 1.9892976588628764, "grad_norm": 1.8642486672342866, "learning_rate": 5.385262078002774e-06, "loss": 0.5238, "step": 26766 }, { "epoch": 1.9893719806763284, "grad_norm": 2.0248065421215706, "learning_rate": 5.384550268780984e-06, "loss": 0.5537, "step": 26767 }, { "epoch": 1.9894463024897808, "grad_norm": 1.8764887983483178, "learning_rate": 5.3838384892732175e-06, "loss": 0.5994, "step": 26768 }, { "epoch": 1.9895206243032328, "grad_norm": 2.1726737870000243, "learning_rate": 5.383126739484062e-06, "loss": 0.5988, "step": 26769 }, { "epoch": 1.9895949461166853, "grad_norm": 1.9710607708027148, "learning_rate": 5.382415019418098e-06, "loss": 0.5494, "step": 26770 }, { "epoch": 1.9896692679301375, "grad_norm": 2.025262396536089, "learning_rate": 5.381703329079906e-06, "loss": 0.5574, "step": 26771 }, { "epoch": 1.9897435897435898, "grad_norm": 1.8531422525591874, "learning_rate": 5.380991668474068e-06, "loss": 0.5325, "step": 26772 }, { "epoch": 1.989817911557042, "grad_norm": 1.9502514374050521, "learning_rate": 5.380280037605162e-06, "loss": 0.4771, "step": 26773 }, { "epoch": 1.9898922333704943, "grad_norm": 1.898815570150309, "learning_rate": 5.379568436477778e-06, "loss": 0.5511, "step": 26774 }, { "epoch": 1.9899665551839465, "grad_norm": 1.7669002738441075, "learning_rate": 5.378856865096492e-06, "loss": 0.4209, "step": 26775 }, { "epoch": 1.9900408769973987, "grad_norm": 2.1857566712579355, "learning_rate": 5.378145323465882e-06, "loss": 0.7556, "step": 26776 }, { "epoch": 1.990115198810851, "grad_norm": 1.833947686699566, "learning_rate": 5.377433811590538e-06, "loss": 0.5173, "step": 26777 }, { "epoch": 1.9901895206243032, "grad_norm": 2.255920803537723, "learning_rate": 5.376722329475029e-06, "loss": 0.5351, "step": 26778 }, { "epoch": 1.9902638424377554, "grad_norm": 1.9998789962702987, "learning_rate": 5.376010877123947e-06, "loss": 0.6944, "step": 26779 }, { "epoch": 1.9903381642512077, "grad_norm": 1.7025052627511903, "learning_rate": 5.375299454541867e-06, "loss": 0.4829, "step": 26780 }, { "epoch": 1.99041248606466, "grad_norm": 1.8619076272209327, "learning_rate": 5.374588061733368e-06, "loss": 0.5459, "step": 26781 }, { "epoch": 1.9904868078781122, "grad_norm": 1.6200066526939016, "learning_rate": 5.3738766987030326e-06, "loss": 0.5005, "step": 26782 }, { "epoch": 1.9905611296915646, "grad_norm": 1.9514633386480376, "learning_rate": 5.373165365455435e-06, "loss": 0.516, "step": 26783 }, { "epoch": 1.9906354515050166, "grad_norm": 1.8979420077669027, "learning_rate": 5.372454061995161e-06, "loss": 0.5966, "step": 26784 }, { "epoch": 1.990709773318469, "grad_norm": 1.65470257727316, "learning_rate": 5.371742788326785e-06, "loss": 0.5216, "step": 26785 }, { "epoch": 1.9907840951319211, "grad_norm": 1.758832055083729, "learning_rate": 5.371031544454892e-06, "loss": 0.5718, "step": 26786 }, { "epoch": 1.9908584169453736, "grad_norm": 1.9482715738775163, "learning_rate": 5.370320330384058e-06, "loss": 0.6529, "step": 26787 }, { "epoch": 1.9909327387588256, "grad_norm": 2.6022817936158518, "learning_rate": 5.369609146118858e-06, "loss": 0.4153, "step": 26788 }, { "epoch": 1.991007060572278, "grad_norm": 1.6831078692947254, "learning_rate": 5.3688979916638786e-06, "loss": 0.4772, "step": 26789 }, { "epoch": 1.99108138238573, "grad_norm": 1.9645199981991308, "learning_rate": 5.368186867023688e-06, "loss": 0.663, "step": 26790 }, { "epoch": 1.9911557041991825, "grad_norm": 1.9768452713863343, "learning_rate": 5.367475772202875e-06, "loss": 0.5358, "step": 26791 }, { "epoch": 1.9912300260126345, "grad_norm": 2.296839120013541, "learning_rate": 5.366764707206013e-06, "loss": 0.5258, "step": 26792 }, { "epoch": 1.991304347826087, "grad_norm": 2.2265045887328823, "learning_rate": 5.366053672037678e-06, "loss": 0.5941, "step": 26793 }, { "epoch": 1.9913786696395392, "grad_norm": 2.2719968631238117, "learning_rate": 5.365342666702451e-06, "loss": 0.6797, "step": 26794 }, { "epoch": 1.9914529914529915, "grad_norm": 2.5789370996772085, "learning_rate": 5.3646316912049025e-06, "loss": 0.7149, "step": 26795 }, { "epoch": 1.9915273132664437, "grad_norm": 1.9004174030916183, "learning_rate": 5.3639207455496174e-06, "loss": 0.5284, "step": 26796 }, { "epoch": 1.991601635079896, "grad_norm": 2.043848869457292, "learning_rate": 5.363209829741171e-06, "loss": 0.6702, "step": 26797 }, { "epoch": 1.9916759568933482, "grad_norm": 2.444933455721427, "learning_rate": 5.362498943784136e-06, "loss": 0.6126, "step": 26798 }, { "epoch": 1.9917502787068004, "grad_norm": 2.073802201454819, "learning_rate": 5.361788087683095e-06, "loss": 0.5735, "step": 26799 }, { "epoch": 1.9918246005202527, "grad_norm": 18.28555547267695, "learning_rate": 5.361077261442618e-06, "loss": 0.5719, "step": 26800 }, { "epoch": 1.991898922333705, "grad_norm": 1.7408361880240268, "learning_rate": 5.3603664650672885e-06, "loss": 0.6008, "step": 26801 }, { "epoch": 1.9919732441471572, "grad_norm": 2.1234897232159806, "learning_rate": 5.359655698561679e-06, "loss": 0.5021, "step": 26802 }, { "epoch": 1.9920475659606094, "grad_norm": 2.0794586254856626, "learning_rate": 5.358944961930363e-06, "loss": 0.5515, "step": 26803 }, { "epoch": 1.9921218877740619, "grad_norm": 2.020207679721369, "learning_rate": 5.358234255177924e-06, "loss": 0.6536, "step": 26804 }, { "epoch": 1.9921962095875139, "grad_norm": 1.8295489261621576, "learning_rate": 5.357523578308925e-06, "loss": 0.5943, "step": 26805 }, { "epoch": 1.9922705314009663, "grad_norm": 1.729055225803426, "learning_rate": 5.356812931327953e-06, "loss": 0.4466, "step": 26806 }, { "epoch": 1.9923448532144183, "grad_norm": 2.36250657371367, "learning_rate": 5.356102314239575e-06, "loss": 0.7105, "step": 26807 }, { "epoch": 1.9924191750278708, "grad_norm": 2.220257006817347, "learning_rate": 5.355391727048371e-06, "loss": 0.5796, "step": 26808 }, { "epoch": 1.9924934968413228, "grad_norm": 2.1513592439580065, "learning_rate": 5.3546811697589165e-06, "loss": 0.7099, "step": 26809 }, { "epoch": 1.9925678186547753, "grad_norm": 1.8567539412738552, "learning_rate": 5.3539706423757785e-06, "loss": 0.5064, "step": 26810 }, { "epoch": 1.9926421404682273, "grad_norm": 1.745870764124625, "learning_rate": 5.3532601449035405e-06, "loss": 0.5783, "step": 26811 }, { "epoch": 1.9927164622816798, "grad_norm": 1.8578186851922984, "learning_rate": 5.352549677346768e-06, "loss": 0.459, "step": 26812 }, { "epoch": 1.9927907840951318, "grad_norm": 2.0086404886645677, "learning_rate": 5.351839239710044e-06, "loss": 0.615, "step": 26813 }, { "epoch": 1.9928651059085842, "grad_norm": 2.0723435074324996, "learning_rate": 5.351128831997938e-06, "loss": 0.5795, "step": 26814 }, { "epoch": 1.9929394277220365, "grad_norm": 1.8881172316563657, "learning_rate": 5.3504184542150226e-06, "loss": 0.633, "step": 26815 }, { "epoch": 1.9930137495354887, "grad_norm": 2.0129917079002118, "learning_rate": 5.349708106365873e-06, "loss": 0.5278, "step": 26816 }, { "epoch": 1.993088071348941, "grad_norm": 2.240036803654647, "learning_rate": 5.348997788455057e-06, "loss": 0.5607, "step": 26817 }, { "epoch": 1.9931623931623932, "grad_norm": 3.48489865711588, "learning_rate": 5.348287500487156e-06, "loss": 0.6054, "step": 26818 }, { "epoch": 1.9932367149758454, "grad_norm": 2.1882689119316114, "learning_rate": 5.3475772424667385e-06, "loss": 0.6122, "step": 26819 }, { "epoch": 1.9933110367892977, "grad_norm": 1.7343515627305617, "learning_rate": 5.3468670143983736e-06, "loss": 0.4114, "step": 26820 }, { "epoch": 1.99338535860275, "grad_norm": 1.7444964974400352, "learning_rate": 5.346156816286641e-06, "loss": 0.5629, "step": 26821 }, { "epoch": 1.9934596804162021, "grad_norm": 2.0522801696808015, "learning_rate": 5.345446648136106e-06, "loss": 0.5922, "step": 26822 }, { "epoch": 1.9935340022296544, "grad_norm": 1.9579060646138062, "learning_rate": 5.344736509951347e-06, "loss": 0.4282, "step": 26823 }, { "epoch": 1.9936083240431066, "grad_norm": 1.765740759612708, "learning_rate": 5.344026401736933e-06, "loss": 0.6254, "step": 26824 }, { "epoch": 1.9936826458565589, "grad_norm": 2.1471038580973474, "learning_rate": 5.343316323497431e-06, "loss": 0.6043, "step": 26825 }, { "epoch": 1.993756967670011, "grad_norm": 2.2130253082965035, "learning_rate": 5.342606275237423e-06, "loss": 0.4023, "step": 26826 }, { "epoch": 1.9938312894834636, "grad_norm": 2.347057356844008, "learning_rate": 5.341896256961472e-06, "loss": 0.596, "step": 26827 }, { "epoch": 1.9939056112969156, "grad_norm": 1.8732722062988505, "learning_rate": 5.341186268674152e-06, "loss": 0.5585, "step": 26828 }, { "epoch": 1.993979933110368, "grad_norm": 1.9673172448907927, "learning_rate": 5.3404763103800296e-06, "loss": 0.5558, "step": 26829 }, { "epoch": 1.99405425492382, "grad_norm": 2.476912859719615, "learning_rate": 5.339766382083682e-06, "loss": 0.6377, "step": 26830 }, { "epoch": 1.9941285767372725, "grad_norm": 2.1373748937794748, "learning_rate": 5.339056483789678e-06, "loss": 0.6491, "step": 26831 }, { "epoch": 1.9942028985507245, "grad_norm": 2.009339673556637, "learning_rate": 5.338346615502581e-06, "loss": 0.5849, "step": 26832 }, { "epoch": 1.994277220364177, "grad_norm": 1.971233046342617, "learning_rate": 5.337636777226971e-06, "loss": 0.4952, "step": 26833 }, { "epoch": 1.994351542177629, "grad_norm": 1.571973812820714, "learning_rate": 5.33692696896741e-06, "loss": 0.4508, "step": 26834 }, { "epoch": 1.9944258639910815, "grad_norm": 2.205653643611178, "learning_rate": 5.336217190728474e-06, "loss": 0.5711, "step": 26835 }, { "epoch": 1.9945001858045335, "grad_norm": 2.064008663751119, "learning_rate": 5.3355074425147314e-06, "loss": 0.5171, "step": 26836 }, { "epoch": 1.994574507617986, "grad_norm": 1.8753681056538416, "learning_rate": 5.334797724330745e-06, "loss": 0.5157, "step": 26837 }, { "epoch": 1.9946488294314382, "grad_norm": 1.9996167415993606, "learning_rate": 5.334088036181096e-06, "loss": 0.5303, "step": 26838 }, { "epoch": 1.9947231512448904, "grad_norm": 1.8808193073296666, "learning_rate": 5.33337837807034e-06, "loss": 0.4451, "step": 26839 }, { "epoch": 1.9947974730583427, "grad_norm": 1.8876326578610751, "learning_rate": 5.332668750003054e-06, "loss": 0.5831, "step": 26840 }, { "epoch": 1.994871794871795, "grad_norm": 2.150920402190045, "learning_rate": 5.3319591519838056e-06, "loss": 0.5256, "step": 26841 }, { "epoch": 1.9949461166852471, "grad_norm": 1.9137097833612182, "learning_rate": 5.331249584017158e-06, "loss": 0.6456, "step": 26842 }, { "epoch": 1.9950204384986994, "grad_norm": 1.6717667155008837, "learning_rate": 5.3305400461076865e-06, "loss": 0.4801, "step": 26843 }, { "epoch": 1.9950947603121516, "grad_norm": 2.2996662590924406, "learning_rate": 5.3298305382599525e-06, "loss": 0.6159, "step": 26844 }, { "epoch": 1.9951690821256038, "grad_norm": 1.8937594227891676, "learning_rate": 5.3291210604785305e-06, "loss": 0.6077, "step": 26845 }, { "epoch": 1.995243403939056, "grad_norm": 1.9031146232365734, "learning_rate": 5.328411612767985e-06, "loss": 0.563, "step": 26846 }, { "epoch": 1.9953177257525083, "grad_norm": 1.705993150051083, "learning_rate": 5.32770219513288e-06, "loss": 0.4826, "step": 26847 }, { "epoch": 1.9953920475659606, "grad_norm": 2.0095556826912406, "learning_rate": 5.32699280757779e-06, "loss": 0.6424, "step": 26848 }, { "epoch": 1.9954663693794128, "grad_norm": 2.1070999541988242, "learning_rate": 5.326283450107277e-06, "loss": 0.6883, "step": 26849 }, { "epoch": 1.9955406911928653, "grad_norm": 2.0211312163552955, "learning_rate": 5.325574122725909e-06, "loss": 0.6523, "step": 26850 }, { "epoch": 1.9956150130063173, "grad_norm": 2.1909206834984323, "learning_rate": 5.3248648254382475e-06, "loss": 0.6874, "step": 26851 }, { "epoch": 1.9956893348197697, "grad_norm": 2.0213504212867592, "learning_rate": 5.324155558248869e-06, "loss": 0.6173, "step": 26852 }, { "epoch": 1.9957636566332217, "grad_norm": 1.933452965989852, "learning_rate": 5.323446321162333e-06, "loss": 0.5919, "step": 26853 }, { "epoch": 1.9958379784466742, "grad_norm": 2.3572374626093744, "learning_rate": 5.322737114183203e-06, "loss": 0.6174, "step": 26854 }, { "epoch": 1.9959123002601262, "grad_norm": 1.9194440085014164, "learning_rate": 5.322027937316054e-06, "loss": 0.4239, "step": 26855 }, { "epoch": 1.9959866220735787, "grad_norm": 1.6447745141713654, "learning_rate": 5.321318790565442e-06, "loss": 0.5877, "step": 26856 }, { "epoch": 1.9960609438870307, "grad_norm": 2.2247522328502467, "learning_rate": 5.32060967393594e-06, "loss": 0.6486, "step": 26857 }, { "epoch": 1.9961352657004832, "grad_norm": 1.821744741807431, "learning_rate": 5.3199005874321115e-06, "loss": 0.5868, "step": 26858 }, { "epoch": 1.9962095875139352, "grad_norm": 1.7668197602041074, "learning_rate": 5.319191531058515e-06, "loss": 0.5179, "step": 26859 }, { "epoch": 1.9962839093273876, "grad_norm": 2.0542739296478367, "learning_rate": 5.318482504819729e-06, "loss": 0.6304, "step": 26860 }, { "epoch": 1.9963582311408399, "grad_norm": 1.7189466022759838, "learning_rate": 5.317773508720302e-06, "loss": 0.4095, "step": 26861 }, { "epoch": 1.9964325529542921, "grad_norm": 1.929549788003971, "learning_rate": 5.3170645427648096e-06, "loss": 0.4834, "step": 26862 }, { "epoch": 1.9965068747677444, "grad_norm": 1.9907867426835384, "learning_rate": 5.316355606957813e-06, "loss": 0.5723, "step": 26863 }, { "epoch": 1.9965811965811966, "grad_norm": 1.6055538944774073, "learning_rate": 5.315646701303872e-06, "loss": 0.4379, "step": 26864 }, { "epoch": 1.9966555183946488, "grad_norm": 1.8527810209663897, "learning_rate": 5.314937825807558e-06, "loss": 0.5065, "step": 26865 }, { "epoch": 1.996729840208101, "grad_norm": 2.192313046169824, "learning_rate": 5.314228980473428e-06, "loss": 0.5086, "step": 26866 }, { "epoch": 1.9968041620215533, "grad_norm": 2.070729306095315, "learning_rate": 5.313520165306052e-06, "loss": 0.5668, "step": 26867 }, { "epoch": 1.9968784838350055, "grad_norm": 2.0041804225894566, "learning_rate": 5.31281138030999e-06, "loss": 0.6558, "step": 26868 }, { "epoch": 1.9969528056484578, "grad_norm": 2.0423489852887178, "learning_rate": 5.312102625489802e-06, "loss": 0.6383, "step": 26869 }, { "epoch": 1.99702712746191, "grad_norm": 1.895759459970712, "learning_rate": 5.3113939008500585e-06, "loss": 0.6538, "step": 26870 }, { "epoch": 1.9971014492753625, "grad_norm": 1.7886332691660305, "learning_rate": 5.310685206395312e-06, "loss": 0.5554, "step": 26871 }, { "epoch": 1.9971757710888145, "grad_norm": 2.291874909847153, "learning_rate": 5.30997654213014e-06, "loss": 0.7561, "step": 26872 }, { "epoch": 1.997250092902267, "grad_norm": 2.1745144106785346, "learning_rate": 5.309267908059088e-06, "loss": 0.5049, "step": 26873 }, { "epoch": 1.997324414715719, "grad_norm": 2.1207025853121695, "learning_rate": 5.308559304186729e-06, "loss": 0.661, "step": 26874 }, { "epoch": 1.9973987365291714, "grad_norm": 1.9586324457381754, "learning_rate": 5.30785073051762e-06, "loss": 0.5682, "step": 26875 }, { "epoch": 1.9974730583426235, "grad_norm": 2.103577253369206, "learning_rate": 5.3071421870563245e-06, "loss": 0.5324, "step": 26876 }, { "epoch": 1.997547380156076, "grad_norm": 2.044887639976335, "learning_rate": 5.3064336738074056e-06, "loss": 0.598, "step": 26877 }, { "epoch": 1.997621701969528, "grad_norm": 1.8108634598193636, "learning_rate": 5.305725190775419e-06, "loss": 0.5313, "step": 26878 }, { "epoch": 1.9976960237829804, "grad_norm": 2.167621124175363, "learning_rate": 5.305016737964935e-06, "loss": 0.5661, "step": 26879 }, { "epoch": 1.9977703455964324, "grad_norm": 2.0891961742642073, "learning_rate": 5.30430831538051e-06, "loss": 0.6281, "step": 26880 }, { "epoch": 1.9978446674098849, "grad_norm": 2.6879383872077316, "learning_rate": 5.303599923026699e-06, "loss": 0.5683, "step": 26881 }, { "epoch": 1.9979189892233369, "grad_norm": 1.835318181288952, "learning_rate": 5.302891560908073e-06, "loss": 0.5855, "step": 26882 }, { "epoch": 1.9979933110367893, "grad_norm": 2.1052742933256634, "learning_rate": 5.302183229029188e-06, "loss": 0.5386, "step": 26883 }, { "epoch": 1.9980676328502416, "grad_norm": 1.9093673965832139, "learning_rate": 5.301474927394602e-06, "loss": 0.4957, "step": 26884 }, { "epoch": 1.9981419546636938, "grad_norm": 1.896177505470706, "learning_rate": 5.300766656008878e-06, "loss": 0.453, "step": 26885 }, { "epoch": 1.998216276477146, "grad_norm": 2.0986168271149515, "learning_rate": 5.30005841487657e-06, "loss": 0.618, "step": 26886 }, { "epoch": 1.9982905982905983, "grad_norm": 2.832575663747123, "learning_rate": 5.2993502040022445e-06, "loss": 0.4532, "step": 26887 }, { "epoch": 1.9983649201040505, "grad_norm": 2.061145892855384, "learning_rate": 5.298642023390457e-06, "loss": 0.5634, "step": 26888 }, { "epoch": 1.9984392419175028, "grad_norm": 1.839538965590467, "learning_rate": 5.297933873045771e-06, "loss": 0.5179, "step": 26889 }, { "epoch": 1.998513563730955, "grad_norm": 2.0690200125501135, "learning_rate": 5.297225752972743e-06, "loss": 0.4135, "step": 26890 }, { "epoch": 1.9985878855444072, "grad_norm": 1.9499415077338702, "learning_rate": 5.296517663175927e-06, "loss": 0.4952, "step": 26891 }, { "epoch": 1.9986622073578595, "grad_norm": 2.5514253652170145, "learning_rate": 5.295809603659892e-06, "loss": 0.5582, "step": 26892 }, { "epoch": 1.9987365291713117, "grad_norm": 1.8905007577682074, "learning_rate": 5.295101574429184e-06, "loss": 0.438, "step": 26893 }, { "epoch": 1.9988108509847642, "grad_norm": 2.216710858868272, "learning_rate": 5.294393575488378e-06, "loss": 0.5356, "step": 26894 }, { "epoch": 1.9988851727982162, "grad_norm": 2.130611074572875, "learning_rate": 5.293685606842014e-06, "loss": 0.6477, "step": 26895 }, { "epoch": 1.9989594946116687, "grad_norm": 2.048657484204592, "learning_rate": 5.292977668494661e-06, "loss": 0.6283, "step": 26896 }, { "epoch": 1.9990338164251207, "grad_norm": 1.7368456050913608, "learning_rate": 5.292269760450874e-06, "loss": 0.3791, "step": 26897 }, { "epoch": 1.9991081382385731, "grad_norm": 2.4823181584239955, "learning_rate": 5.291561882715206e-06, "loss": 0.6605, "step": 26898 }, { "epoch": 1.9991824600520252, "grad_norm": 2.0242461127687186, "learning_rate": 5.290854035292221e-06, "loss": 0.4608, "step": 26899 }, { "epoch": 1.9992567818654776, "grad_norm": 1.6087802970849803, "learning_rate": 5.290146218186475e-06, "loss": 0.5268, "step": 26900 }, { "epoch": 1.9993311036789296, "grad_norm": 1.999237563832499, "learning_rate": 5.289438431402518e-06, "loss": 0.548, "step": 26901 }, { "epoch": 1.999405425492382, "grad_norm": 2.038171512077731, "learning_rate": 5.288730674944918e-06, "loss": 0.5848, "step": 26902 }, { "epoch": 1.999479747305834, "grad_norm": 2.0873140131222896, "learning_rate": 5.288022948818219e-06, "loss": 0.5234, "step": 26903 }, { "epoch": 1.9995540691192866, "grad_norm": 2.0462845001248082, "learning_rate": 5.28731525302699e-06, "loss": 0.4484, "step": 26904 }, { "epoch": 1.9996283909327388, "grad_norm": 2.362171692991387, "learning_rate": 5.286607587575779e-06, "loss": 0.6485, "step": 26905 }, { "epoch": 1.999702712746191, "grad_norm": 1.748553256827234, "learning_rate": 5.285899952469145e-06, "loss": 0.4848, "step": 26906 }, { "epoch": 1.9997770345596433, "grad_norm": 4.78480701607897, "learning_rate": 5.285192347711642e-06, "loss": 0.5017, "step": 26907 }, { "epoch": 1.9998513563730955, "grad_norm": 1.9040078917646732, "learning_rate": 5.284484773307822e-06, "loss": 0.4217, "step": 26908 }, { "epoch": 1.9999256781865478, "grad_norm": 1.8476580958314601, "learning_rate": 5.283777229262248e-06, "loss": 0.5247, "step": 26909 }, { "epoch": 2.0, "grad_norm": 1.6250703425366129, "learning_rate": 5.283069715579469e-06, "loss": 0.4208, "step": 26910 }, { "epoch": 2.0000743218134525, "grad_norm": 1.6156582951458345, "learning_rate": 5.2823622322640446e-06, "loss": 0.3295, "step": 26911 }, { "epoch": 2.0001486436269045, "grad_norm": 1.960765402126161, "learning_rate": 5.2816547793205284e-06, "loss": 0.3781, "step": 26912 }, { "epoch": 2.000222965440357, "grad_norm": 1.7229369152076788, "learning_rate": 5.28094735675347e-06, "loss": 0.3096, "step": 26913 }, { "epoch": 2.000297287253809, "grad_norm": 1.9588797234731359, "learning_rate": 5.28023996456743e-06, "loss": 0.3641, "step": 26914 }, { "epoch": 2.0003716090672614, "grad_norm": 2.102483748640431, "learning_rate": 5.279532602766959e-06, "loss": 0.4282, "step": 26915 }, { "epoch": 2.0004459308807134, "grad_norm": 1.9631819063359672, "learning_rate": 5.2788252713566145e-06, "loss": 0.3888, "step": 26916 }, { "epoch": 2.000520252694166, "grad_norm": 1.814851712602985, "learning_rate": 5.278117970340948e-06, "loss": 0.3072, "step": 26917 }, { "epoch": 2.000594574507618, "grad_norm": 1.5150436955378077, "learning_rate": 5.2774106997245135e-06, "loss": 0.3117, "step": 26918 }, { "epoch": 2.0006688963210704, "grad_norm": 1.5332857853430102, "learning_rate": 5.276703459511863e-06, "loss": 0.3208, "step": 26919 }, { "epoch": 2.0007432181345224, "grad_norm": 1.6172905219060472, "learning_rate": 5.275996249707548e-06, "loss": 0.297, "step": 26920 }, { "epoch": 2.000817539947975, "grad_norm": 1.3878591028484455, "learning_rate": 5.275289070316128e-06, "loss": 0.279, "step": 26921 }, { "epoch": 2.000891861761427, "grad_norm": 3.5948305108881797, "learning_rate": 5.27458192134215e-06, "loss": 0.3169, "step": 26922 }, { "epoch": 2.0009661835748793, "grad_norm": 1.637351361980788, "learning_rate": 5.273874802790165e-06, "loss": 0.2878, "step": 26923 }, { "epoch": 2.0010405053883313, "grad_norm": 2.065238227330141, "learning_rate": 5.273167714664733e-06, "loss": 0.3634, "step": 26924 }, { "epoch": 2.001114827201784, "grad_norm": 2.036676028211514, "learning_rate": 5.2724606569703986e-06, "loss": 0.2873, "step": 26925 }, { "epoch": 2.001189149015236, "grad_norm": 2.436403047281392, "learning_rate": 5.27175362971172e-06, "loss": 0.3763, "step": 26926 }, { "epoch": 2.0012634708286883, "grad_norm": 2.045419713987856, "learning_rate": 5.271046632893248e-06, "loss": 0.2912, "step": 26927 }, { "epoch": 2.0013377926421403, "grad_norm": 2.44738163708121, "learning_rate": 5.270339666519531e-06, "loss": 0.3383, "step": 26928 }, { "epoch": 2.0014121144555928, "grad_norm": 2.202690732290089, "learning_rate": 5.269632730595122e-06, "loss": 0.2593, "step": 26929 }, { "epoch": 2.0014864362690448, "grad_norm": 2.2508104942628866, "learning_rate": 5.268925825124569e-06, "loss": 0.2974, "step": 26930 }, { "epoch": 2.0015607580824972, "grad_norm": 2.2012376224812007, "learning_rate": 5.268218950112429e-06, "loss": 0.2954, "step": 26931 }, { "epoch": 2.0016350798959492, "grad_norm": 2.010939119711379, "learning_rate": 5.267512105563246e-06, "loss": 0.3306, "step": 26932 }, { "epoch": 2.0017094017094017, "grad_norm": 2.178830103580522, "learning_rate": 5.266805291481578e-06, "loss": 0.3315, "step": 26933 }, { "epoch": 2.001783723522854, "grad_norm": 1.8105273811214295, "learning_rate": 5.266098507871973e-06, "loss": 0.29, "step": 26934 }, { "epoch": 2.001858045336306, "grad_norm": 2.085755557708916, "learning_rate": 5.265391754738975e-06, "loss": 0.3571, "step": 26935 }, { "epoch": 2.0019323671497586, "grad_norm": 1.5828228336647858, "learning_rate": 5.264685032087145e-06, "loss": 0.2324, "step": 26936 }, { "epoch": 2.0020066889632107, "grad_norm": 2.16418715566083, "learning_rate": 5.263978339921022e-06, "loss": 0.3312, "step": 26937 }, { "epoch": 2.002081010776663, "grad_norm": 2.0857313555544255, "learning_rate": 5.263271678245165e-06, "loss": 0.2191, "step": 26938 }, { "epoch": 2.002155332590115, "grad_norm": 2.3954260261818563, "learning_rate": 5.262565047064119e-06, "loss": 0.3387, "step": 26939 }, { "epoch": 2.0022296544035676, "grad_norm": 1.9036447487468622, "learning_rate": 5.261858446382433e-06, "loss": 0.2582, "step": 26940 }, { "epoch": 2.0023039762170196, "grad_norm": 1.7692720971327613, "learning_rate": 5.261151876204658e-06, "loss": 0.2762, "step": 26941 }, { "epoch": 2.002378298030472, "grad_norm": 1.9783974985190897, "learning_rate": 5.260445336535336e-06, "loss": 0.2827, "step": 26942 }, { "epoch": 2.002452619843924, "grad_norm": 1.8332252044689992, "learning_rate": 5.259738827379024e-06, "loss": 0.2998, "step": 26943 }, { "epoch": 2.0025269416573765, "grad_norm": 1.735824266355903, "learning_rate": 5.2590323487402695e-06, "loss": 0.2798, "step": 26944 }, { "epoch": 2.0026012634708286, "grad_norm": 2.500178380645686, "learning_rate": 5.258325900623613e-06, "loss": 0.4623, "step": 26945 }, { "epoch": 2.002675585284281, "grad_norm": 1.6858894211877307, "learning_rate": 5.2576194830336136e-06, "loss": 0.2561, "step": 26946 }, { "epoch": 2.002749907097733, "grad_norm": 2.217083090512688, "learning_rate": 5.25691309597481e-06, "loss": 0.3284, "step": 26947 }, { "epoch": 2.0028242289111855, "grad_norm": 2.2438075754332063, "learning_rate": 5.256206739451757e-06, "loss": 0.3943, "step": 26948 }, { "epoch": 2.0028985507246375, "grad_norm": 2.084001075409973, "learning_rate": 5.255500413468999e-06, "loss": 0.3964, "step": 26949 }, { "epoch": 2.00297287253809, "grad_norm": 2.093137567241107, "learning_rate": 5.25479411803108e-06, "loss": 0.3157, "step": 26950 }, { "epoch": 2.003047194351542, "grad_norm": 1.9380717075759402, "learning_rate": 5.2540878531425574e-06, "loss": 0.2592, "step": 26951 }, { "epoch": 2.0031215161649945, "grad_norm": 1.7139935141122546, "learning_rate": 5.253381618807963e-06, "loss": 0.2867, "step": 26952 }, { "epoch": 2.0031958379784465, "grad_norm": 1.7102595236499698, "learning_rate": 5.252675415031856e-06, "loss": 0.2277, "step": 26953 }, { "epoch": 2.003270159791899, "grad_norm": 1.8254489661228552, "learning_rate": 5.251969241818774e-06, "loss": 0.2775, "step": 26954 }, { "epoch": 2.0033444816053514, "grad_norm": 2.0086694119600543, "learning_rate": 5.251263099173272e-06, "loss": 0.3448, "step": 26955 }, { "epoch": 2.0034188034188034, "grad_norm": 1.654667231631463, "learning_rate": 5.250556987099891e-06, "loss": 0.2629, "step": 26956 }, { "epoch": 2.003493125232256, "grad_norm": 1.9909055518526522, "learning_rate": 5.249850905603174e-06, "loss": 0.3223, "step": 26957 }, { "epoch": 2.003567447045708, "grad_norm": 2.172270361630294, "learning_rate": 5.2491448546876744e-06, "loss": 0.3823, "step": 26958 }, { "epoch": 2.0036417688591603, "grad_norm": 1.9748646475602882, "learning_rate": 5.2484388343579295e-06, "loss": 0.3063, "step": 26959 }, { "epoch": 2.0037160906726124, "grad_norm": 1.9925048264668794, "learning_rate": 5.247732844618494e-06, "loss": 0.2928, "step": 26960 }, { "epoch": 2.003790412486065, "grad_norm": 2.1755772222576093, "learning_rate": 5.247026885473907e-06, "loss": 0.3351, "step": 26961 }, { "epoch": 2.003864734299517, "grad_norm": 1.9477978302571752, "learning_rate": 5.246320956928714e-06, "loss": 0.2649, "step": 26962 }, { "epoch": 2.0039390561129693, "grad_norm": 2.3778240022680586, "learning_rate": 5.245615058987461e-06, "loss": 0.2763, "step": 26963 }, { "epoch": 2.0040133779264213, "grad_norm": 2.401289266321766, "learning_rate": 5.244909191654686e-06, "loss": 0.3635, "step": 26964 }, { "epoch": 2.0040876997398738, "grad_norm": 2.944711037789899, "learning_rate": 5.244203354934945e-06, "loss": 0.3286, "step": 26965 }, { "epoch": 2.004162021553326, "grad_norm": 2.0410165122297053, "learning_rate": 5.243497548832775e-06, "loss": 0.3079, "step": 26966 }, { "epoch": 2.0042363433667783, "grad_norm": 2.0913438224576817, "learning_rate": 5.242791773352715e-06, "loss": 0.3733, "step": 26967 }, { "epoch": 2.0043106651802303, "grad_norm": 1.804281929655403, "learning_rate": 5.2420860284993224e-06, "loss": 0.2959, "step": 26968 }, { "epoch": 2.0043849869936827, "grad_norm": 1.7518186924808474, "learning_rate": 5.241380314277127e-06, "loss": 0.2178, "step": 26969 }, { "epoch": 2.0044593088071347, "grad_norm": 2.6227851359266694, "learning_rate": 5.240674630690682e-06, "loss": 0.3222, "step": 26970 }, { "epoch": 2.004533630620587, "grad_norm": 1.8505797948796154, "learning_rate": 5.239968977744527e-06, "loss": 0.3377, "step": 26971 }, { "epoch": 2.004607952434039, "grad_norm": 1.8962559896402154, "learning_rate": 5.239263355443202e-06, "loss": 0.3238, "step": 26972 }, { "epoch": 2.0046822742474917, "grad_norm": 2.963401551819269, "learning_rate": 5.238557763791259e-06, "loss": 0.3252, "step": 26973 }, { "epoch": 2.0047565960609437, "grad_norm": 1.818036489726181, "learning_rate": 5.237852202793226e-06, "loss": 0.2892, "step": 26974 }, { "epoch": 2.004830917874396, "grad_norm": 2.1413879375670803, "learning_rate": 5.237146672453659e-06, "loss": 0.2377, "step": 26975 }, { "epoch": 2.004905239687848, "grad_norm": 2.1160943404785226, "learning_rate": 5.2364411727770894e-06, "loss": 0.2943, "step": 26976 }, { "epoch": 2.0049795615013006, "grad_norm": 2.528128921934507, "learning_rate": 5.235735703768068e-06, "loss": 0.3573, "step": 26977 }, { "epoch": 2.005053883314753, "grad_norm": 2.6224751106961217, "learning_rate": 5.235030265431134e-06, "loss": 0.3097, "step": 26978 }, { "epoch": 2.005128205128205, "grad_norm": 1.819845667364956, "learning_rate": 5.234324857770823e-06, "loss": 0.2675, "step": 26979 }, { "epoch": 2.0052025269416576, "grad_norm": 2.0479399805648986, "learning_rate": 5.233619480791685e-06, "loss": 0.3332, "step": 26980 }, { "epoch": 2.0052768487551096, "grad_norm": 1.7957092205221759, "learning_rate": 5.2329141344982546e-06, "loss": 0.2384, "step": 26981 }, { "epoch": 2.005351170568562, "grad_norm": 3.3781161090031953, "learning_rate": 5.232208818895079e-06, "loss": 0.27, "step": 26982 }, { "epoch": 2.005425492382014, "grad_norm": 2.149914975448351, "learning_rate": 5.2315035339866975e-06, "loss": 0.2862, "step": 26983 }, { "epoch": 2.0054998141954665, "grad_norm": 1.6375529579197832, "learning_rate": 5.2307982797776425e-06, "loss": 0.1877, "step": 26984 }, { "epoch": 2.0055741360089185, "grad_norm": 3.8477885216163794, "learning_rate": 5.23009305627247e-06, "loss": 0.3313, "step": 26985 }, { "epoch": 2.005648457822371, "grad_norm": 2.2509723103918287, "learning_rate": 5.229387863475701e-06, "loss": 0.3589, "step": 26986 }, { "epoch": 2.005722779635823, "grad_norm": 2.687315752018299, "learning_rate": 5.228682701391892e-06, "loss": 0.403, "step": 26987 }, { "epoch": 2.0057971014492755, "grad_norm": 2.154613067651155, "learning_rate": 5.227977570025574e-06, "loss": 0.3242, "step": 26988 }, { "epoch": 2.0058714232627275, "grad_norm": 2.2593594878631342, "learning_rate": 5.227272469381286e-06, "loss": 0.3208, "step": 26989 }, { "epoch": 2.00594574507618, "grad_norm": 1.9941125371112072, "learning_rate": 5.226567399463574e-06, "loss": 0.3167, "step": 26990 }, { "epoch": 2.006020066889632, "grad_norm": 2.491464102031585, "learning_rate": 5.22586236027697e-06, "loss": 0.3829, "step": 26991 }, { "epoch": 2.0060943887030844, "grad_norm": 2.1781024361496697, "learning_rate": 5.225157351826019e-06, "loss": 0.2893, "step": 26992 }, { "epoch": 2.0061687105165364, "grad_norm": 1.6759315714801615, "learning_rate": 5.224452374115258e-06, "loss": 0.2236, "step": 26993 }, { "epoch": 2.006243032329989, "grad_norm": 2.155868947427481, "learning_rate": 5.223747427149221e-06, "loss": 0.2934, "step": 26994 }, { "epoch": 2.006317354143441, "grad_norm": 2.5053907240376523, "learning_rate": 5.223042510932455e-06, "loss": 0.3546, "step": 26995 }, { "epoch": 2.0063916759568934, "grad_norm": 1.973670866958189, "learning_rate": 5.222337625469491e-06, "loss": 0.2673, "step": 26996 }, { "epoch": 2.0064659977703454, "grad_norm": 2.697716367086444, "learning_rate": 5.221632770764871e-06, "loss": 0.4007, "step": 26997 }, { "epoch": 2.006540319583798, "grad_norm": 2.0781969058211684, "learning_rate": 5.220927946823128e-06, "loss": 0.3214, "step": 26998 }, { "epoch": 2.00661464139725, "grad_norm": 1.9377143689419347, "learning_rate": 5.220223153648806e-06, "loss": 0.284, "step": 26999 }, { "epoch": 2.0066889632107023, "grad_norm": 1.7480000080001088, "learning_rate": 5.219518391246438e-06, "loss": 0.261, "step": 27000 }, { "epoch": 2.006763285024155, "grad_norm": 2.2524726355867863, "learning_rate": 5.2188136596205605e-06, "loss": 0.3654, "step": 27001 }, { "epoch": 2.006837606837607, "grad_norm": 1.6471777368758305, "learning_rate": 5.218108958775716e-06, "loss": 0.2407, "step": 27002 }, { "epoch": 2.0069119286510593, "grad_norm": 2.3463062820747647, "learning_rate": 5.2174042887164346e-06, "loss": 0.2933, "step": 27003 }, { "epoch": 2.0069862504645113, "grad_norm": 1.663252706693311, "learning_rate": 5.21669964944726e-06, "loss": 0.2456, "step": 27004 }, { "epoch": 2.0070605722779638, "grad_norm": 1.9296451795413017, "learning_rate": 5.2159950409727245e-06, "loss": 0.2849, "step": 27005 }, { "epoch": 2.0071348940914158, "grad_norm": 2.148353496043716, "learning_rate": 5.215290463297361e-06, "loss": 0.2742, "step": 27006 }, { "epoch": 2.0072092159048682, "grad_norm": 2.6006091373111353, "learning_rate": 5.214585916425716e-06, "loss": 0.4258, "step": 27007 }, { "epoch": 2.0072835377183202, "grad_norm": 2.469415710460114, "learning_rate": 5.213881400362313e-06, "loss": 0.3595, "step": 27008 }, { "epoch": 2.0073578595317727, "grad_norm": 2.22643674615805, "learning_rate": 5.213176915111695e-06, "loss": 0.3117, "step": 27009 }, { "epoch": 2.0074321813452247, "grad_norm": 2.6657974979434482, "learning_rate": 5.212472460678397e-06, "loss": 0.4083, "step": 27010 }, { "epoch": 2.007506503158677, "grad_norm": 1.965065490223732, "learning_rate": 5.211768037066948e-06, "loss": 0.2756, "step": 27011 }, { "epoch": 2.007580824972129, "grad_norm": 2.7067813842437927, "learning_rate": 5.2110636442818915e-06, "loss": 0.3645, "step": 27012 }, { "epoch": 2.0076551467855817, "grad_norm": 2.596552452593286, "learning_rate": 5.210359282327756e-06, "loss": 0.4764, "step": 27013 }, { "epoch": 2.0077294685990337, "grad_norm": 2.2411705662062302, "learning_rate": 5.2096549512090825e-06, "loss": 0.3339, "step": 27014 }, { "epoch": 2.007803790412486, "grad_norm": 2.065944289557725, "learning_rate": 5.208950650930402e-06, "loss": 0.2634, "step": 27015 }, { "epoch": 2.007878112225938, "grad_norm": 1.9358544878526773, "learning_rate": 5.208246381496244e-06, "loss": 0.2706, "step": 27016 }, { "epoch": 2.0079524340393906, "grad_norm": 1.6408664903621082, "learning_rate": 5.207542142911152e-06, "loss": 0.1514, "step": 27017 }, { "epoch": 2.0080267558528426, "grad_norm": 1.8718209106516013, "learning_rate": 5.2068379351796505e-06, "loss": 0.2495, "step": 27018 }, { "epoch": 2.008101077666295, "grad_norm": 2.6520928403462003, "learning_rate": 5.206133758306286e-06, "loss": 0.3196, "step": 27019 }, { "epoch": 2.008175399479747, "grad_norm": 2.0359392344681844, "learning_rate": 5.205429612295575e-06, "loss": 0.3176, "step": 27020 }, { "epoch": 2.0082497212931996, "grad_norm": 2.0320098934898168, "learning_rate": 5.2047254971520636e-06, "loss": 0.2703, "step": 27021 }, { "epoch": 2.0083240431066516, "grad_norm": 2.6495940437076357, "learning_rate": 5.204021412880281e-06, "loss": 0.2357, "step": 27022 }, { "epoch": 2.008398364920104, "grad_norm": 2.307418210624366, "learning_rate": 5.203317359484757e-06, "loss": 0.2628, "step": 27023 }, { "epoch": 2.0084726867335565, "grad_norm": 2.276550850088913, "learning_rate": 5.202613336970028e-06, "loss": 0.3137, "step": 27024 }, { "epoch": 2.0085470085470085, "grad_norm": 2.199737470010624, "learning_rate": 5.201909345340628e-06, "loss": 0.3471, "step": 27025 }, { "epoch": 2.008621330360461, "grad_norm": 2.4536748303101725, "learning_rate": 5.20120538460108e-06, "loss": 0.3095, "step": 27026 }, { "epoch": 2.008695652173913, "grad_norm": 2.061865921547656, "learning_rate": 5.200501454755929e-06, "loss": 0.3003, "step": 27027 }, { "epoch": 2.0087699739873655, "grad_norm": 1.914949824530002, "learning_rate": 5.199797555809696e-06, "loss": 0.2313, "step": 27028 }, { "epoch": 2.0088442958008175, "grad_norm": 2.1594125860466087, "learning_rate": 5.199093687766921e-06, "loss": 0.3171, "step": 27029 }, { "epoch": 2.00891861761427, "grad_norm": 2.032939560826188, "learning_rate": 5.1983898506321325e-06, "loss": 0.3289, "step": 27030 }, { "epoch": 2.008992939427722, "grad_norm": 2.0616248955094325, "learning_rate": 5.19768604440986e-06, "loss": 0.3443, "step": 27031 }, { "epoch": 2.0090672612411744, "grad_norm": 1.9121443031242658, "learning_rate": 5.196982269104636e-06, "loss": 0.2069, "step": 27032 }, { "epoch": 2.0091415830546264, "grad_norm": 2.6256919293160794, "learning_rate": 5.196278524720988e-06, "loss": 0.3598, "step": 27033 }, { "epoch": 2.009215904868079, "grad_norm": 1.637896462237606, "learning_rate": 5.195574811263452e-06, "loss": 0.2294, "step": 27034 }, { "epoch": 2.009290226681531, "grad_norm": 2.5028650244526074, "learning_rate": 5.194871128736551e-06, "loss": 0.2323, "step": 27035 }, { "epoch": 2.0093645484949834, "grad_norm": 2.404999281747412, "learning_rate": 5.194167477144827e-06, "loss": 0.3543, "step": 27036 }, { "epoch": 2.0094388703084354, "grad_norm": 2.3622156784445467, "learning_rate": 5.193463856492802e-06, "loss": 0.2996, "step": 27037 }, { "epoch": 2.009513192121888, "grad_norm": 1.9092630778889992, "learning_rate": 5.192760266785004e-06, "loss": 0.318, "step": 27038 }, { "epoch": 2.00958751393534, "grad_norm": 2.097104452211519, "learning_rate": 5.19205670802597e-06, "loss": 0.2152, "step": 27039 }, { "epoch": 2.0096618357487923, "grad_norm": 1.9913272958975927, "learning_rate": 5.191353180220222e-06, "loss": 0.2985, "step": 27040 }, { "epoch": 2.0097361575622443, "grad_norm": 1.5620773600115285, "learning_rate": 5.1906496833723e-06, "loss": 0.2377, "step": 27041 }, { "epoch": 2.009810479375697, "grad_norm": 2.4166784053436303, "learning_rate": 5.1899462174867165e-06, "loss": 0.3693, "step": 27042 }, { "epoch": 2.009884801189149, "grad_norm": 2.000725568336886, "learning_rate": 5.189242782568016e-06, "loss": 0.3723, "step": 27043 }, { "epoch": 2.0099591230026013, "grad_norm": 2.3645553520246034, "learning_rate": 5.188539378620718e-06, "loss": 0.3352, "step": 27044 }, { "epoch": 2.0100334448160537, "grad_norm": 2.129316714822647, "learning_rate": 5.187836005649353e-06, "loss": 0.277, "step": 27045 }, { "epoch": 2.0101077666295057, "grad_norm": 2.281732804030263, "learning_rate": 5.1871326636584515e-06, "loss": 0.326, "step": 27046 }, { "epoch": 2.010182088442958, "grad_norm": 2.135967166473686, "learning_rate": 5.186429352652542e-06, "loss": 0.2761, "step": 27047 }, { "epoch": 2.01025641025641, "grad_norm": 2.0458021650464593, "learning_rate": 5.185726072636146e-06, "loss": 0.263, "step": 27048 }, { "epoch": 2.0103307320698627, "grad_norm": 2.2441821708988967, "learning_rate": 5.185022823613799e-06, "loss": 0.3453, "step": 27049 }, { "epoch": 2.0104050538833147, "grad_norm": 1.9927539075832603, "learning_rate": 5.184319605590022e-06, "loss": 0.2678, "step": 27050 }, { "epoch": 2.010479375696767, "grad_norm": 1.8096297264307133, "learning_rate": 5.183616418569349e-06, "loss": 0.2325, "step": 27051 }, { "epoch": 2.010553697510219, "grad_norm": 1.5535317764498602, "learning_rate": 5.182913262556304e-06, "loss": 0.2201, "step": 27052 }, { "epoch": 2.0106280193236716, "grad_norm": 2.2129486823555853, "learning_rate": 5.182210137555415e-06, "loss": 0.3121, "step": 27053 }, { "epoch": 2.0107023411371236, "grad_norm": 1.616577641463218, "learning_rate": 5.181507043571205e-06, "loss": 0.2508, "step": 27054 }, { "epoch": 2.010776662950576, "grad_norm": 2.027442034634817, "learning_rate": 5.1808039806081985e-06, "loss": 0.3033, "step": 27055 }, { "epoch": 2.010850984764028, "grad_norm": 2.7047945725836215, "learning_rate": 5.180100948670931e-06, "loss": 0.3376, "step": 27056 }, { "epoch": 2.0109253065774806, "grad_norm": 1.5218163728738807, "learning_rate": 5.17939794776392e-06, "loss": 0.1964, "step": 27057 }, { "epoch": 2.0109996283909326, "grad_norm": 1.8097969631224815, "learning_rate": 5.178694977891697e-06, "loss": 0.2515, "step": 27058 }, { "epoch": 2.011073950204385, "grad_norm": 1.891534929861526, "learning_rate": 5.1779920390587865e-06, "loss": 0.2377, "step": 27059 }, { "epoch": 2.011148272017837, "grad_norm": 1.6516150505022342, "learning_rate": 5.17728913126971e-06, "loss": 0.2142, "step": 27060 }, { "epoch": 2.0112225938312895, "grad_norm": 1.7707340980989634, "learning_rate": 5.176586254528999e-06, "loss": 0.2911, "step": 27061 }, { "epoch": 2.0112969156447416, "grad_norm": 1.9428034180229101, "learning_rate": 5.175883408841171e-06, "loss": 0.331, "step": 27062 }, { "epoch": 2.011371237458194, "grad_norm": 2.117558605312914, "learning_rate": 5.17518059421076e-06, "loss": 0.335, "step": 27063 }, { "epoch": 2.011445559271646, "grad_norm": 1.5064360561635959, "learning_rate": 5.1744778106422855e-06, "loss": 0.2347, "step": 27064 }, { "epoch": 2.0115198810850985, "grad_norm": 2.0664848788425774, "learning_rate": 5.173775058140273e-06, "loss": 0.3265, "step": 27065 }, { "epoch": 2.0115942028985505, "grad_norm": 2.1662936638678936, "learning_rate": 5.173072336709246e-06, "loss": 0.2762, "step": 27066 }, { "epoch": 2.011668524712003, "grad_norm": 2.095232940041469, "learning_rate": 5.172369646353724e-06, "loss": 0.3004, "step": 27067 }, { "epoch": 2.0117428465254554, "grad_norm": 2.7977190711253397, "learning_rate": 5.171666987078241e-06, "loss": 0.248, "step": 27068 }, { "epoch": 2.0118171683389074, "grad_norm": 2.115917800139434, "learning_rate": 5.170964358887315e-06, "loss": 0.3399, "step": 27069 }, { "epoch": 2.01189149015236, "grad_norm": 2.3397505286322775, "learning_rate": 5.170261761785466e-06, "loss": 0.3349, "step": 27070 }, { "epoch": 2.011965811965812, "grad_norm": 2.3323138909720176, "learning_rate": 5.1695591957772255e-06, "loss": 0.3775, "step": 27071 }, { "epoch": 2.0120401337792644, "grad_norm": 3.0808883338695603, "learning_rate": 5.168856660867107e-06, "loss": 0.2832, "step": 27072 }, { "epoch": 2.0121144555927164, "grad_norm": 2.2450334543797674, "learning_rate": 5.168154157059644e-06, "loss": 0.3171, "step": 27073 }, { "epoch": 2.012188777406169, "grad_norm": 1.4490280213656253, "learning_rate": 5.167451684359353e-06, "loss": 0.2255, "step": 27074 }, { "epoch": 2.012263099219621, "grad_norm": 2.366029896210112, "learning_rate": 5.1667492427707565e-06, "loss": 0.2454, "step": 27075 }, { "epoch": 2.0123374210330733, "grad_norm": 2.301760747262598, "learning_rate": 5.166046832298379e-06, "loss": 0.3174, "step": 27076 }, { "epoch": 2.0124117428465254, "grad_norm": 1.9669803710472085, "learning_rate": 5.1653444529467366e-06, "loss": 0.2634, "step": 27077 }, { "epoch": 2.012486064659978, "grad_norm": 2.233185489409557, "learning_rate": 5.164642104720361e-06, "loss": 0.3113, "step": 27078 }, { "epoch": 2.01256038647343, "grad_norm": 2.139283510484326, "learning_rate": 5.163939787623764e-06, "loss": 0.2952, "step": 27079 }, { "epoch": 2.0126347082868823, "grad_norm": 1.9098537804240838, "learning_rate": 5.1632375016614755e-06, "loss": 0.2608, "step": 27080 }, { "epoch": 2.0127090301003343, "grad_norm": 2.3988123582119187, "learning_rate": 5.1625352468380115e-06, "loss": 0.3423, "step": 27081 }, { "epoch": 2.0127833519137868, "grad_norm": 2.078977132822771, "learning_rate": 5.161833023157893e-06, "loss": 0.2683, "step": 27082 }, { "epoch": 2.012857673727239, "grad_norm": 2.310802775390003, "learning_rate": 5.161130830625645e-06, "loss": 0.3682, "step": 27083 }, { "epoch": 2.0129319955406912, "grad_norm": 2.5110567887258353, "learning_rate": 5.160428669245783e-06, "loss": 0.2711, "step": 27084 }, { "epoch": 2.0130063173541433, "grad_norm": 2.0715130742750314, "learning_rate": 5.159726539022832e-06, "loss": 0.2432, "step": 27085 }, { "epoch": 2.0130806391675957, "grad_norm": 2.290967772399802, "learning_rate": 5.159024439961312e-06, "loss": 0.3005, "step": 27086 }, { "epoch": 2.0131549609810477, "grad_norm": 2.5246487114279224, "learning_rate": 5.15832237206574e-06, "loss": 0.2205, "step": 27087 }, { "epoch": 2.0132292827945, "grad_norm": 2.070466194192891, "learning_rate": 5.1576203353406395e-06, "loss": 0.2017, "step": 27088 }, { "epoch": 2.013303604607952, "grad_norm": 2.3628171950978567, "learning_rate": 5.156918329790524e-06, "loss": 0.2285, "step": 27089 }, { "epoch": 2.0133779264214047, "grad_norm": 2.2699088362499533, "learning_rate": 5.156216355419918e-06, "loss": 0.2876, "step": 27090 }, { "epoch": 2.013452248234857, "grad_norm": 2.4857654840653223, "learning_rate": 5.1555144122333425e-06, "loss": 0.2681, "step": 27091 }, { "epoch": 2.013526570048309, "grad_norm": 2.13130997801387, "learning_rate": 5.1548125002353085e-06, "loss": 0.3053, "step": 27092 }, { "epoch": 2.0136008918617616, "grad_norm": 2.380867129996213, "learning_rate": 5.154110619430346e-06, "loss": 0.3201, "step": 27093 }, { "epoch": 2.0136752136752136, "grad_norm": 2.068617508497246, "learning_rate": 5.153408769822963e-06, "loss": 0.2872, "step": 27094 }, { "epoch": 2.013749535488666, "grad_norm": 2.0151870136708743, "learning_rate": 5.152706951417686e-06, "loss": 0.2896, "step": 27095 }, { "epoch": 2.013823857302118, "grad_norm": 1.8230830251318917, "learning_rate": 5.1520051642190305e-06, "loss": 0.2475, "step": 27096 }, { "epoch": 2.0138981791155706, "grad_norm": 2.3589656229498903, "learning_rate": 5.151303408231511e-06, "loss": 0.3852, "step": 27097 }, { "epoch": 2.0139725009290226, "grad_norm": 2.182283790750782, "learning_rate": 5.150601683459655e-06, "loss": 0.2988, "step": 27098 }, { "epoch": 2.014046822742475, "grad_norm": 2.4477406445305507, "learning_rate": 5.149899989907967e-06, "loss": 0.2849, "step": 27099 }, { "epoch": 2.014121144555927, "grad_norm": 2.3158974884229626, "learning_rate": 5.149198327580975e-06, "loss": 0.2114, "step": 27100 }, { "epoch": 2.0141954663693795, "grad_norm": 2.698711637904365, "learning_rate": 5.148496696483188e-06, "loss": 0.3966, "step": 27101 }, { "epoch": 2.0142697881828315, "grad_norm": 2.4755972243711626, "learning_rate": 5.147795096619133e-06, "loss": 0.3308, "step": 27102 }, { "epoch": 2.014344109996284, "grad_norm": 2.171938999979127, "learning_rate": 5.147093527993319e-06, "loss": 0.2522, "step": 27103 }, { "epoch": 2.014418431809736, "grad_norm": 2.2431577055909298, "learning_rate": 5.146391990610263e-06, "loss": 0.3031, "step": 27104 }, { "epoch": 2.0144927536231885, "grad_norm": 2.365363356611837, "learning_rate": 5.145690484474487e-06, "loss": 0.3414, "step": 27105 }, { "epoch": 2.0145670754366405, "grad_norm": 2.1750143862956044, "learning_rate": 5.1449890095905e-06, "loss": 0.3242, "step": 27106 }, { "epoch": 2.014641397250093, "grad_norm": 2.0688843746270122, "learning_rate": 5.1442875659628254e-06, "loss": 0.3038, "step": 27107 }, { "epoch": 2.014715719063545, "grad_norm": 2.015177164576839, "learning_rate": 5.143586153595976e-06, "loss": 0.2579, "step": 27108 }, { "epoch": 2.0147900408769974, "grad_norm": 2.261065549024806, "learning_rate": 5.142884772494466e-06, "loss": 0.3908, "step": 27109 }, { "epoch": 2.0148643626904494, "grad_norm": 2.2298139172226676, "learning_rate": 5.142183422662812e-06, "loss": 0.367, "step": 27110 }, { "epoch": 2.014938684503902, "grad_norm": 2.861184837682498, "learning_rate": 5.141482104105526e-06, "loss": 0.3331, "step": 27111 }, { "epoch": 2.0150130063173544, "grad_norm": 2.2881429709525394, "learning_rate": 5.14078081682713e-06, "loss": 0.3355, "step": 27112 }, { "epoch": 2.0150873281308064, "grad_norm": 1.6584107574150493, "learning_rate": 5.140079560832134e-06, "loss": 0.2601, "step": 27113 }, { "epoch": 2.015161649944259, "grad_norm": 2.004257379390045, "learning_rate": 5.1393783361250505e-06, "loss": 0.3486, "step": 27114 }, { "epoch": 2.015235971757711, "grad_norm": 2.3632707762767886, "learning_rate": 5.138677142710401e-06, "loss": 0.312, "step": 27115 }, { "epoch": 2.0153102935711633, "grad_norm": 2.3551956319141616, "learning_rate": 5.137975980592691e-06, "loss": 0.3254, "step": 27116 }, { "epoch": 2.0153846153846153, "grad_norm": 2.2403079322943977, "learning_rate": 5.137274849776444e-06, "loss": 0.2811, "step": 27117 }, { "epoch": 2.015458937198068, "grad_norm": 2.0814454926745007, "learning_rate": 5.136573750266169e-06, "loss": 0.2744, "step": 27118 }, { "epoch": 2.01553325901152, "grad_norm": 2.761313979176237, "learning_rate": 5.135872682066376e-06, "loss": 0.4283, "step": 27119 }, { "epoch": 2.0156075808249723, "grad_norm": 1.8035537455217365, "learning_rate": 5.135171645181591e-06, "loss": 0.2425, "step": 27120 }, { "epoch": 2.0156819026384243, "grad_norm": 2.311306493346103, "learning_rate": 5.13447063961631e-06, "loss": 0.361, "step": 27121 }, { "epoch": 2.0157562244518767, "grad_norm": 2.024962607534714, "learning_rate": 5.133769665375059e-06, "loss": 0.2837, "step": 27122 }, { "epoch": 2.0158305462653288, "grad_norm": 1.8942655232177075, "learning_rate": 5.133068722462342e-06, "loss": 0.2584, "step": 27123 }, { "epoch": 2.015904868078781, "grad_norm": 1.8124323232772688, "learning_rate": 5.132367810882679e-06, "loss": 0.2866, "step": 27124 }, { "epoch": 2.0159791898922332, "grad_norm": 2.01473802009239, "learning_rate": 5.131666930640581e-06, "loss": 0.2554, "step": 27125 }, { "epoch": 2.0160535117056857, "grad_norm": 2.561475363680217, "learning_rate": 5.130966081740554e-06, "loss": 0.402, "step": 27126 }, { "epoch": 2.0161278335191377, "grad_norm": 1.894345237881372, "learning_rate": 5.130265264187119e-06, "loss": 0.2518, "step": 27127 }, { "epoch": 2.01620215533259, "grad_norm": 2.322673846414165, "learning_rate": 5.129564477984779e-06, "loss": 0.299, "step": 27128 }, { "epoch": 2.016276477146042, "grad_norm": 2.2889037344361163, "learning_rate": 5.1288637231380545e-06, "loss": 0.3374, "step": 27129 }, { "epoch": 2.0163507989594947, "grad_norm": 1.6276099258974408, "learning_rate": 5.128162999651453e-06, "loss": 0.2206, "step": 27130 }, { "epoch": 2.0164251207729467, "grad_norm": 2.286656094834639, "learning_rate": 5.1274623075294805e-06, "loss": 0.3198, "step": 27131 }, { "epoch": 2.016499442586399, "grad_norm": 2.4240956236395084, "learning_rate": 5.126761646776661e-06, "loss": 0.2142, "step": 27132 }, { "epoch": 2.016573764399851, "grad_norm": 1.9125346264454526, "learning_rate": 5.12606101739749e-06, "loss": 0.2117, "step": 27133 }, { "epoch": 2.0166480862133036, "grad_norm": 2.4262199712624892, "learning_rate": 5.125360419396488e-06, "loss": 0.3101, "step": 27134 }, { "epoch": 2.016722408026756, "grad_norm": 2.204553715810098, "learning_rate": 5.124659852778164e-06, "loss": 0.3333, "step": 27135 }, { "epoch": 2.016796729840208, "grad_norm": 2.122360753127936, "learning_rate": 5.1239593175470225e-06, "loss": 0.2543, "step": 27136 }, { "epoch": 2.0168710516536605, "grad_norm": 3.2111148402705547, "learning_rate": 5.123258813707581e-06, "loss": 0.4321, "step": 27137 }, { "epoch": 2.0169453734671126, "grad_norm": 1.9890336782616296, "learning_rate": 5.122558341264343e-06, "loss": 0.2467, "step": 27138 }, { "epoch": 2.017019695280565, "grad_norm": 2.827717585614063, "learning_rate": 5.121857900221825e-06, "loss": 0.45, "step": 27139 }, { "epoch": 2.017094017094017, "grad_norm": 2.3709361821830948, "learning_rate": 5.121157490584534e-06, "loss": 0.3596, "step": 27140 }, { "epoch": 2.0171683389074695, "grad_norm": 2.1435923406667174, "learning_rate": 5.1204571123569725e-06, "loss": 0.2935, "step": 27141 }, { "epoch": 2.0172426607209215, "grad_norm": 2.0125355006537617, "learning_rate": 5.11975676554366e-06, "loss": 0.3132, "step": 27142 }, { "epoch": 2.017316982534374, "grad_norm": 2.589935824630525, "learning_rate": 5.119056450149099e-06, "loss": 0.3176, "step": 27143 }, { "epoch": 2.017391304347826, "grad_norm": 2.4967522478544053, "learning_rate": 5.1183561661778e-06, "loss": 0.3571, "step": 27144 }, { "epoch": 2.0174656261612784, "grad_norm": 2.1020373429569084, "learning_rate": 5.117655913634267e-06, "loss": 0.3766, "step": 27145 }, { "epoch": 2.0175399479747305, "grad_norm": 2.259440340890659, "learning_rate": 5.116955692523016e-06, "loss": 0.291, "step": 27146 }, { "epoch": 2.017614269788183, "grad_norm": 2.5060464251372387, "learning_rate": 5.116255502848549e-06, "loss": 0.356, "step": 27147 }, { "epoch": 2.017688591601635, "grad_norm": 1.855268825644474, "learning_rate": 5.115555344615374e-06, "loss": 0.2865, "step": 27148 }, { "epoch": 2.0177629134150874, "grad_norm": 1.738208095523405, "learning_rate": 5.114855217828003e-06, "loss": 0.2692, "step": 27149 }, { "epoch": 2.0178372352285394, "grad_norm": 2.063855009639852, "learning_rate": 5.11415512249094e-06, "loss": 0.3453, "step": 27150 }, { "epoch": 2.017911557041992, "grad_norm": 1.7517059459989728, "learning_rate": 5.11345505860869e-06, "loss": 0.278, "step": 27151 }, { "epoch": 2.017985878855444, "grad_norm": 1.9012083499824315, "learning_rate": 5.1127550261857676e-06, "loss": 0.2971, "step": 27152 }, { "epoch": 2.0180602006688964, "grad_norm": 2.087701719620942, "learning_rate": 5.11205502522667e-06, "loss": 0.2887, "step": 27153 }, { "epoch": 2.0181345224823484, "grad_norm": 1.96858682513426, "learning_rate": 5.111355055735916e-06, "loss": 0.3091, "step": 27154 }, { "epoch": 2.018208844295801, "grad_norm": 2.3590282142393764, "learning_rate": 5.1106551177179966e-06, "loss": 0.2947, "step": 27155 }, { "epoch": 2.018283166109253, "grad_norm": 2.1181420598960066, "learning_rate": 5.10995521117743e-06, "loss": 0.2858, "step": 27156 }, { "epoch": 2.0183574879227053, "grad_norm": 1.8847860830778203, "learning_rate": 5.109255336118718e-06, "loss": 0.299, "step": 27157 }, { "epoch": 2.0184318097361578, "grad_norm": 2.2565189377327335, "learning_rate": 5.108555492546363e-06, "loss": 0.2476, "step": 27158 }, { "epoch": 2.01850613154961, "grad_norm": 1.9049618041731151, "learning_rate": 5.107855680464877e-06, "loss": 0.3467, "step": 27159 }, { "epoch": 2.0185804533630622, "grad_norm": 5.736785738664002, "learning_rate": 5.107155899878759e-06, "loss": 0.3279, "step": 27160 }, { "epoch": 2.0186547751765143, "grad_norm": 2.2825172578518744, "learning_rate": 5.10645615079252e-06, "loss": 0.3246, "step": 27161 }, { "epoch": 2.0187290969899667, "grad_norm": 2.736220037710411, "learning_rate": 5.105756433210665e-06, "loss": 0.3504, "step": 27162 }, { "epoch": 2.0188034188034187, "grad_norm": 2.4124355049176307, "learning_rate": 5.10505674713769e-06, "loss": 0.361, "step": 27163 }, { "epoch": 2.018877740616871, "grad_norm": 2.039661110787351, "learning_rate": 5.10435709257811e-06, "loss": 0.2762, "step": 27164 }, { "epoch": 2.018952062430323, "grad_norm": 2.560425354370545, "learning_rate": 5.103657469536425e-06, "loss": 0.2841, "step": 27165 }, { "epoch": 2.0190263842437757, "grad_norm": 2.191985899884867, "learning_rate": 5.1029578780171405e-06, "loss": 0.3139, "step": 27166 }, { "epoch": 2.0191007060572277, "grad_norm": 1.9868595783683312, "learning_rate": 5.102258318024753e-06, "loss": 0.294, "step": 27167 }, { "epoch": 2.01917502787068, "grad_norm": 1.8290647559682898, "learning_rate": 5.101558789563778e-06, "loss": 0.2281, "step": 27168 }, { "epoch": 2.019249349684132, "grad_norm": 2.286700499390257, "learning_rate": 5.100859292638713e-06, "loss": 0.3118, "step": 27169 }, { "epoch": 2.0193236714975846, "grad_norm": 2.266616614157382, "learning_rate": 5.1001598272540565e-06, "loss": 0.316, "step": 27170 }, { "epoch": 2.0193979933110366, "grad_norm": 2.229989697588375, "learning_rate": 5.099460393414322e-06, "loss": 0.3179, "step": 27171 }, { "epoch": 2.019472315124489, "grad_norm": 1.9755425535710869, "learning_rate": 5.098760991124008e-06, "loss": 0.2981, "step": 27172 }, { "epoch": 2.019546636937941, "grad_norm": 2.0915231682577016, "learning_rate": 5.098061620387611e-06, "loss": 0.2617, "step": 27173 }, { "epoch": 2.0196209587513936, "grad_norm": 2.2232443592253888, "learning_rate": 5.097362281209644e-06, "loss": 0.2294, "step": 27174 }, { "epoch": 2.0196952805648456, "grad_norm": 1.794721736069518, "learning_rate": 5.0966629735945994e-06, "loss": 0.2937, "step": 27175 }, { "epoch": 2.019769602378298, "grad_norm": 2.442906686502824, "learning_rate": 5.095963697546988e-06, "loss": 0.345, "step": 27176 }, { "epoch": 2.01984392419175, "grad_norm": 2.343494386655265, "learning_rate": 5.095264453071308e-06, "loss": 0.3016, "step": 27177 }, { "epoch": 2.0199182460052025, "grad_norm": 2.109125675743224, "learning_rate": 5.09456524017206e-06, "loss": 0.2657, "step": 27178 }, { "epoch": 2.019992567818655, "grad_norm": 2.224893533882429, "learning_rate": 5.093866058853749e-06, "loss": 0.3276, "step": 27179 }, { "epoch": 2.020066889632107, "grad_norm": 1.8608377733177761, "learning_rate": 5.093166909120868e-06, "loss": 0.2258, "step": 27180 }, { "epoch": 2.0201412114455595, "grad_norm": 2.1926474054423224, "learning_rate": 5.0924677909779285e-06, "loss": 0.3458, "step": 27181 }, { "epoch": 2.0202155332590115, "grad_norm": 1.9802708049630557, "learning_rate": 5.091768704429423e-06, "loss": 0.2619, "step": 27182 }, { "epoch": 2.020289855072464, "grad_norm": 1.9680034557142476, "learning_rate": 5.091069649479858e-06, "loss": 0.3252, "step": 27183 }, { "epoch": 2.020364176885916, "grad_norm": 2.067840753835877, "learning_rate": 5.090370626133734e-06, "loss": 0.2695, "step": 27184 }, { "epoch": 2.0204384986993684, "grad_norm": 2.3202830674321424, "learning_rate": 5.089671634395544e-06, "loss": 0.2688, "step": 27185 }, { "epoch": 2.0205128205128204, "grad_norm": 2.2838088030336063, "learning_rate": 5.0889726742697974e-06, "loss": 0.354, "step": 27186 }, { "epoch": 2.020587142326273, "grad_norm": 1.9531290331639763, "learning_rate": 5.088273745760986e-06, "loss": 0.2377, "step": 27187 }, { "epoch": 2.020661464139725, "grad_norm": 1.870822817737352, "learning_rate": 5.087574848873622e-06, "loss": 0.3078, "step": 27188 }, { "epoch": 2.0207357859531774, "grad_norm": 1.9446860413968854, "learning_rate": 5.086875983612187e-06, "loss": 0.2499, "step": 27189 }, { "epoch": 2.0208101077666294, "grad_norm": 1.7782532500188177, "learning_rate": 5.086177149981195e-06, "loss": 0.2702, "step": 27190 }, { "epoch": 2.020884429580082, "grad_norm": 1.5709047519528225, "learning_rate": 5.0854783479851385e-06, "loss": 0.2223, "step": 27191 }, { "epoch": 2.020958751393534, "grad_norm": 1.955984165230596, "learning_rate": 5.084779577628514e-06, "loss": 0.299, "step": 27192 }, { "epoch": 2.0210330732069863, "grad_norm": 2.2297422448009234, "learning_rate": 5.084080838915827e-06, "loss": 0.282, "step": 27193 }, { "epoch": 2.0211073950204383, "grad_norm": 2.28996239849445, "learning_rate": 5.083382131851573e-06, "loss": 0.2647, "step": 27194 }, { "epoch": 2.021181716833891, "grad_norm": 2.2897683346701827, "learning_rate": 5.082683456440245e-06, "loss": 0.3377, "step": 27195 }, { "epoch": 2.021256038647343, "grad_norm": 2.348461411220816, "learning_rate": 5.081984812686351e-06, "loss": 0.382, "step": 27196 }, { "epoch": 2.0213303604607953, "grad_norm": 2.0817905759457327, "learning_rate": 5.081286200594379e-06, "loss": 0.2868, "step": 27197 }, { "epoch": 2.0214046822742473, "grad_norm": 2.2284689415194943, "learning_rate": 5.0805876201688345e-06, "loss": 0.2261, "step": 27198 }, { "epoch": 2.0214790040876998, "grad_norm": 1.9119006063083475, "learning_rate": 5.079889071414213e-06, "loss": 0.2399, "step": 27199 }, { "epoch": 2.0215533259011518, "grad_norm": 2.2750879063959477, "learning_rate": 5.079190554335008e-06, "loss": 0.2957, "step": 27200 }, { "epoch": 2.0216276477146042, "grad_norm": 1.8732031984411275, "learning_rate": 5.078492068935721e-06, "loss": 0.2897, "step": 27201 }, { "epoch": 2.0217019695280567, "grad_norm": 2.2734737143500725, "learning_rate": 5.077793615220843e-06, "loss": 0.2856, "step": 27202 }, { "epoch": 2.0217762913415087, "grad_norm": 2.248438074417807, "learning_rate": 5.077095193194877e-06, "loss": 0.2914, "step": 27203 }, { "epoch": 2.021850613154961, "grad_norm": 2.1441394918463366, "learning_rate": 5.076396802862314e-06, "loss": 0.3352, "step": 27204 }, { "epoch": 2.021924934968413, "grad_norm": 5.692432892232715, "learning_rate": 5.075698444227655e-06, "loss": 0.3041, "step": 27205 }, { "epoch": 2.0219992567818657, "grad_norm": 2.052815228351096, "learning_rate": 5.075000117295394e-06, "loss": 0.2503, "step": 27206 }, { "epoch": 2.0220735785953177, "grad_norm": 2.615467330185608, "learning_rate": 5.074301822070025e-06, "loss": 0.2929, "step": 27207 }, { "epoch": 2.02214790040877, "grad_norm": 2.492388310579034, "learning_rate": 5.073603558556049e-06, "loss": 0.3921, "step": 27208 }, { "epoch": 2.022222222222222, "grad_norm": 1.9120317213609872, "learning_rate": 5.072905326757951e-06, "loss": 0.2507, "step": 27209 }, { "epoch": 2.0222965440356746, "grad_norm": 1.7078894423912772, "learning_rate": 5.07220712668024e-06, "loss": 0.2546, "step": 27210 }, { "epoch": 2.0223708658491266, "grad_norm": 2.3475730628028812, "learning_rate": 5.071508958327401e-06, "loss": 0.3434, "step": 27211 }, { "epoch": 2.022445187662579, "grad_norm": 2.3866545132794794, "learning_rate": 5.0708108217039335e-06, "loss": 0.385, "step": 27212 }, { "epoch": 2.022519509476031, "grad_norm": 2.3605034401839187, "learning_rate": 5.07011271681433e-06, "loss": 0.3699, "step": 27213 }, { "epoch": 2.0225938312894836, "grad_norm": 2.4406072374425545, "learning_rate": 5.069414643663079e-06, "loss": 0.3118, "step": 27214 }, { "epoch": 2.0226681531029356, "grad_norm": 2.445843551554167, "learning_rate": 5.068716602254688e-06, "loss": 0.3169, "step": 27215 }, { "epoch": 2.022742474916388, "grad_norm": 2.0455567986805376, "learning_rate": 5.068018592593642e-06, "loss": 0.2884, "step": 27216 }, { "epoch": 2.02281679672984, "grad_norm": 2.1638916537056754, "learning_rate": 5.0673206146844325e-06, "loss": 0.2919, "step": 27217 }, { "epoch": 2.0228911185432925, "grad_norm": 2.05825013892932, "learning_rate": 5.06662266853156e-06, "loss": 0.2476, "step": 27218 }, { "epoch": 2.0229654403567445, "grad_norm": 2.037228581297357, "learning_rate": 5.065924754139512e-06, "loss": 0.3044, "step": 27219 }, { "epoch": 2.023039762170197, "grad_norm": 2.35935930996584, "learning_rate": 5.065226871512787e-06, "loss": 0.35, "step": 27220 }, { "epoch": 2.023114083983649, "grad_norm": 1.8654695520117153, "learning_rate": 5.064529020655877e-06, "loss": 0.2919, "step": 27221 }, { "epoch": 2.0231884057971015, "grad_norm": 1.2918668013737722, "learning_rate": 5.0638312015732724e-06, "loss": 0.1538, "step": 27222 }, { "epoch": 2.0232627276105535, "grad_norm": 1.9088084190795727, "learning_rate": 5.063133414269468e-06, "loss": 0.2451, "step": 27223 }, { "epoch": 2.023337049424006, "grad_norm": 2.1464908057728502, "learning_rate": 5.062435658748948e-06, "loss": 0.2885, "step": 27224 }, { "epoch": 2.0234113712374584, "grad_norm": 2.163484926950678, "learning_rate": 5.061737935016216e-06, "loss": 0.2271, "step": 27225 }, { "epoch": 2.0234856930509104, "grad_norm": 3.832384124369869, "learning_rate": 5.061040243075755e-06, "loss": 0.3026, "step": 27226 }, { "epoch": 2.023560014864363, "grad_norm": 2.547671920743233, "learning_rate": 5.060342582932065e-06, "loss": 0.3855, "step": 27227 }, { "epoch": 2.023634336677815, "grad_norm": 1.9873208121731645, "learning_rate": 5.059644954589633e-06, "loss": 0.23, "step": 27228 }, { "epoch": 2.0237086584912674, "grad_norm": 1.6954531632623744, "learning_rate": 5.0589473580529455e-06, "loss": 0.2489, "step": 27229 }, { "epoch": 2.0237829803047194, "grad_norm": 1.8755565308401978, "learning_rate": 5.058249793326504e-06, "loss": 0.2464, "step": 27230 }, { "epoch": 2.023857302118172, "grad_norm": 1.8219670743419933, "learning_rate": 5.057552260414788e-06, "loss": 0.2743, "step": 27231 }, { "epoch": 2.023931623931624, "grad_norm": 2.7674103234677476, "learning_rate": 5.056854759322301e-06, "loss": 0.3947, "step": 27232 }, { "epoch": 2.0240059457450763, "grad_norm": 1.932354404119794, "learning_rate": 5.056157290053524e-06, "loss": 0.2648, "step": 27233 }, { "epoch": 2.0240802675585283, "grad_norm": 2.117984430670409, "learning_rate": 5.055459852612951e-06, "loss": 0.2595, "step": 27234 }, { "epoch": 2.024154589371981, "grad_norm": 2.3566370625863353, "learning_rate": 5.0547624470050705e-06, "loss": 0.2589, "step": 27235 }, { "epoch": 2.024228911185433, "grad_norm": 1.9213270645738147, "learning_rate": 5.054065073234369e-06, "loss": 0.2868, "step": 27236 }, { "epoch": 2.0243032329988853, "grad_norm": 2.2444170043607152, "learning_rate": 5.053367731305343e-06, "loss": 0.3624, "step": 27237 }, { "epoch": 2.0243775548123373, "grad_norm": 1.815129494759272, "learning_rate": 5.052670421222481e-06, "loss": 0.2447, "step": 27238 }, { "epoch": 2.0244518766257897, "grad_norm": 2.5733997010003717, "learning_rate": 5.051973142990265e-06, "loss": 0.4036, "step": 27239 }, { "epoch": 2.0245261984392418, "grad_norm": 2.389342939675055, "learning_rate": 5.051275896613193e-06, "loss": 0.3842, "step": 27240 }, { "epoch": 2.024600520252694, "grad_norm": 1.9962231956994514, "learning_rate": 5.050578682095746e-06, "loss": 0.2845, "step": 27241 }, { "epoch": 2.0246748420661462, "grad_norm": 2.3257110255758753, "learning_rate": 5.049881499442419e-06, "loss": 0.3005, "step": 27242 }, { "epoch": 2.0247491638795987, "grad_norm": 2.4275937166099255, "learning_rate": 5.049184348657701e-06, "loss": 0.2471, "step": 27243 }, { "epoch": 2.0248234856930507, "grad_norm": 2.3253783333471367, "learning_rate": 5.048487229746072e-06, "loss": 0.3967, "step": 27244 }, { "epoch": 2.024897807506503, "grad_norm": 2.481417132072175, "learning_rate": 5.047790142712032e-06, "loss": 0.2908, "step": 27245 }, { "epoch": 2.0249721293199556, "grad_norm": 1.824717314462891, "learning_rate": 5.047093087560057e-06, "loss": 0.2727, "step": 27246 }, { "epoch": 2.0250464511334076, "grad_norm": 2.1688340075700445, "learning_rate": 5.046396064294642e-06, "loss": 0.3486, "step": 27247 }, { "epoch": 2.02512077294686, "grad_norm": 2.1107250748802597, "learning_rate": 5.045699072920268e-06, "loss": 0.2319, "step": 27248 }, { "epoch": 2.025195094760312, "grad_norm": 2.5319334135858753, "learning_rate": 5.045002113441431e-06, "loss": 0.3882, "step": 27249 }, { "epoch": 2.0252694165737646, "grad_norm": 2.4683345565844217, "learning_rate": 5.044305185862612e-06, "loss": 0.3913, "step": 27250 }, { "epoch": 2.0253437383872166, "grad_norm": 2.159981524422797, "learning_rate": 5.043608290188297e-06, "loss": 0.2716, "step": 27251 }, { "epoch": 2.025418060200669, "grad_norm": 2.2040586843337473, "learning_rate": 5.042911426422977e-06, "loss": 0.306, "step": 27252 }, { "epoch": 2.025492382014121, "grad_norm": 2.5682932977949076, "learning_rate": 5.042214594571133e-06, "loss": 0.3477, "step": 27253 }, { "epoch": 2.0255667038275735, "grad_norm": 2.54627636331086, "learning_rate": 5.0415177946372575e-06, "loss": 0.2441, "step": 27254 }, { "epoch": 2.0256410256410255, "grad_norm": 2.3813642720050234, "learning_rate": 5.040821026625833e-06, "loss": 0.275, "step": 27255 }, { "epoch": 2.025715347454478, "grad_norm": 2.3489182748013073, "learning_rate": 5.0401242905413464e-06, "loss": 0.3614, "step": 27256 }, { "epoch": 2.02578966926793, "grad_norm": 2.4858024190566486, "learning_rate": 5.039427586388281e-06, "loss": 0.35, "step": 27257 }, { "epoch": 2.0258639910813825, "grad_norm": 1.7679467643484883, "learning_rate": 5.038730914171122e-06, "loss": 0.2552, "step": 27258 }, { "epoch": 2.0259383128948345, "grad_norm": 2.143583531806019, "learning_rate": 5.038034273894357e-06, "loss": 0.2954, "step": 27259 }, { "epoch": 2.026012634708287, "grad_norm": 2.337982403421887, "learning_rate": 5.037337665562472e-06, "loss": 0.2735, "step": 27260 }, { "epoch": 2.026086956521739, "grad_norm": 2.4432646860187837, "learning_rate": 5.036641089179943e-06, "loss": 0.3055, "step": 27261 }, { "epoch": 2.0261612783351914, "grad_norm": 1.8549967456737366, "learning_rate": 5.0359445447512676e-06, "loss": 0.2533, "step": 27262 }, { "epoch": 2.0262356001486435, "grad_norm": 2.3438154850459543, "learning_rate": 5.03524803228092e-06, "loss": 0.3109, "step": 27263 }, { "epoch": 2.026309921962096, "grad_norm": 1.5691362370642787, "learning_rate": 5.03455155177339e-06, "loss": 0.18, "step": 27264 }, { "epoch": 2.026384243775548, "grad_norm": 2.184038492343206, "learning_rate": 5.03385510323316e-06, "loss": 0.2273, "step": 27265 }, { "epoch": 2.0264585655890004, "grad_norm": 2.1327566874649015, "learning_rate": 5.03315868666471e-06, "loss": 0.2809, "step": 27266 }, { "epoch": 2.0265328874024524, "grad_norm": 2.808568547900964, "learning_rate": 5.0324623020725335e-06, "loss": 0.3694, "step": 27267 }, { "epoch": 2.026607209215905, "grad_norm": 2.5818024495718643, "learning_rate": 5.031765949461099e-06, "loss": 0.2925, "step": 27268 }, { "epoch": 2.0266815310293573, "grad_norm": 2.545819337837184, "learning_rate": 5.031069628834903e-06, "loss": 0.3453, "step": 27269 }, { "epoch": 2.0267558528428093, "grad_norm": 3.003801314159112, "learning_rate": 5.030373340198418e-06, "loss": 0.3191, "step": 27270 }, { "epoch": 2.026830174656262, "grad_norm": 2.146652907827958, "learning_rate": 5.029677083556135e-06, "loss": 0.3011, "step": 27271 }, { "epoch": 2.026904496469714, "grad_norm": 2.484670482738143, "learning_rate": 5.0289808589125334e-06, "loss": 0.3035, "step": 27272 }, { "epoch": 2.0269788182831663, "grad_norm": 2.437843761412504, "learning_rate": 5.028284666272092e-06, "loss": 0.2392, "step": 27273 }, { "epoch": 2.0270531400966183, "grad_norm": 1.8830069323894185, "learning_rate": 5.027588505639298e-06, "loss": 0.2978, "step": 27274 }, { "epoch": 2.0271274619100708, "grad_norm": 2.2447508370326648, "learning_rate": 5.026892377018633e-06, "loss": 0.2317, "step": 27275 }, { "epoch": 2.0272017837235228, "grad_norm": 2.2972656117739554, "learning_rate": 5.026196280414572e-06, "loss": 0.3029, "step": 27276 }, { "epoch": 2.0272761055369752, "grad_norm": 1.8802662265289343, "learning_rate": 5.025500215831607e-06, "loss": 0.2366, "step": 27277 }, { "epoch": 2.0273504273504273, "grad_norm": 2.917558939906027, "learning_rate": 5.024804183274211e-06, "loss": 0.3849, "step": 27278 }, { "epoch": 2.0274247491638797, "grad_norm": 1.7683023137502256, "learning_rate": 5.024108182746869e-06, "loss": 0.2139, "step": 27279 }, { "epoch": 2.0274990709773317, "grad_norm": 3.3767650288674083, "learning_rate": 5.023412214254055e-06, "loss": 0.3456, "step": 27280 }, { "epoch": 2.027573392790784, "grad_norm": 1.8565040892921116, "learning_rate": 5.022716277800261e-06, "loss": 0.2394, "step": 27281 }, { "epoch": 2.027647714604236, "grad_norm": 1.929376599363793, "learning_rate": 5.02202037338996e-06, "loss": 0.2547, "step": 27282 }, { "epoch": 2.0277220364176887, "grad_norm": 2.0377915825764346, "learning_rate": 5.021324501027629e-06, "loss": 0.3015, "step": 27283 }, { "epoch": 2.0277963582311407, "grad_norm": 2.1342827907389754, "learning_rate": 5.020628660717758e-06, "loss": 0.3203, "step": 27284 }, { "epoch": 2.027870680044593, "grad_norm": 3.19380955198953, "learning_rate": 5.019932852464817e-06, "loss": 0.384, "step": 27285 }, { "epoch": 2.027945001858045, "grad_norm": 2.153454805149632, "learning_rate": 5.019237076273293e-06, "loss": 0.326, "step": 27286 }, { "epoch": 2.0280193236714976, "grad_norm": 2.0810904598414712, "learning_rate": 5.018541332147663e-06, "loss": 0.3103, "step": 27287 }, { "epoch": 2.0280936454849496, "grad_norm": 1.892884700687251, "learning_rate": 5.017845620092401e-06, "loss": 0.2652, "step": 27288 }, { "epoch": 2.028167967298402, "grad_norm": 2.3585952760595745, "learning_rate": 5.017149940111995e-06, "loss": 0.3707, "step": 27289 }, { "epoch": 2.028242289111854, "grad_norm": 2.1911969987911544, "learning_rate": 5.01645429221092e-06, "loss": 0.2779, "step": 27290 }, { "epoch": 2.0283166109253066, "grad_norm": 2.0116824686987798, "learning_rate": 5.015758676393652e-06, "loss": 0.2026, "step": 27291 }, { "epoch": 2.028390932738759, "grad_norm": 2.530360517405437, "learning_rate": 5.015063092664667e-06, "loss": 0.339, "step": 27292 }, { "epoch": 2.028465254552211, "grad_norm": 1.7475383424765618, "learning_rate": 5.014367541028451e-06, "loss": 0.1922, "step": 27293 }, { "epoch": 2.0285395763656635, "grad_norm": 1.8971678442610898, "learning_rate": 5.01367202148948e-06, "loss": 0.2034, "step": 27294 }, { "epoch": 2.0286138981791155, "grad_norm": 1.7969335594297857, "learning_rate": 5.012976534052223e-06, "loss": 0.3058, "step": 27295 }, { "epoch": 2.028688219992568, "grad_norm": 2.77154007708484, "learning_rate": 5.01228107872117e-06, "loss": 0.2434, "step": 27296 }, { "epoch": 2.02876254180602, "grad_norm": 2.1237287306922457, "learning_rate": 5.011585655500793e-06, "loss": 0.2281, "step": 27297 }, { "epoch": 2.0288368636194725, "grad_norm": 1.7466166916237746, "learning_rate": 5.010890264395564e-06, "loss": 0.2608, "step": 27298 }, { "epoch": 2.0289111854329245, "grad_norm": 2.5813635992819934, "learning_rate": 5.0101949054099686e-06, "loss": 0.355, "step": 27299 }, { "epoch": 2.028985507246377, "grad_norm": 2.655396708088879, "learning_rate": 5.009499578548476e-06, "loss": 0.353, "step": 27300 }, { "epoch": 2.029059829059829, "grad_norm": 2.4922634827216315, "learning_rate": 5.008804283815574e-06, "loss": 0.3037, "step": 27301 }, { "epoch": 2.0291341508732814, "grad_norm": 2.5109827510177287, "learning_rate": 5.008109021215722e-06, "loss": 0.3096, "step": 27302 }, { "epoch": 2.0292084726867334, "grad_norm": 2.516877044354304, "learning_rate": 5.00741379075341e-06, "loss": 0.2068, "step": 27303 }, { "epoch": 2.029282794500186, "grad_norm": 2.6708391194116503, "learning_rate": 5.0067185924331095e-06, "loss": 0.2761, "step": 27304 }, { "epoch": 2.029357116313638, "grad_norm": 2.5930026292066306, "learning_rate": 5.006023426259291e-06, "loss": 0.3239, "step": 27305 }, { "epoch": 2.0294314381270904, "grad_norm": 2.255392123222176, "learning_rate": 5.005328292236439e-06, "loss": 0.3204, "step": 27306 }, { "epoch": 2.0295057599405424, "grad_norm": 2.0258247535291356, "learning_rate": 5.004633190369019e-06, "loss": 0.262, "step": 27307 }, { "epoch": 2.029580081753995, "grad_norm": 1.9411306873651768, "learning_rate": 5.003938120661518e-06, "loss": 0.2313, "step": 27308 }, { "epoch": 2.029654403567447, "grad_norm": 2.4190282149436872, "learning_rate": 5.003243083118401e-06, "loss": 0.3704, "step": 27309 }, { "epoch": 2.0297287253808993, "grad_norm": 2.374622276737249, "learning_rate": 5.002548077744144e-06, "loss": 0.4242, "step": 27310 }, { "epoch": 2.0298030471943513, "grad_norm": 2.1199739808014697, "learning_rate": 5.0018531045432264e-06, "loss": 0.3305, "step": 27311 }, { "epoch": 2.029877369007804, "grad_norm": 1.6404787106060765, "learning_rate": 5.001158163520119e-06, "loss": 0.2645, "step": 27312 }, { "epoch": 2.029951690821256, "grad_norm": 2.317970659419141, "learning_rate": 5.000463254679296e-06, "loss": 0.3114, "step": 27313 }, { "epoch": 2.0300260126347083, "grad_norm": 1.9132728847863554, "learning_rate": 4.9997683780252274e-06, "loss": 0.2872, "step": 27314 }, { "epoch": 2.0301003344481607, "grad_norm": 2.285118594150591, "learning_rate": 4.999073533562394e-06, "loss": 0.257, "step": 27315 }, { "epoch": 2.0301746562616128, "grad_norm": 2.321436672991372, "learning_rate": 4.998378721295266e-06, "loss": 0.3865, "step": 27316 }, { "epoch": 2.030248978075065, "grad_norm": 2.0173696708131974, "learning_rate": 4.997683941228313e-06, "loss": 0.2269, "step": 27317 }, { "epoch": 2.0303232998885172, "grad_norm": 1.659378288348159, "learning_rate": 4.9969891933660155e-06, "loss": 0.2126, "step": 27318 }, { "epoch": 2.0303976217019697, "grad_norm": 2.55467893385666, "learning_rate": 4.996294477712841e-06, "loss": 0.3076, "step": 27319 }, { "epoch": 2.0304719435154217, "grad_norm": 2.275163531291334, "learning_rate": 4.995599794273258e-06, "loss": 0.2763, "step": 27320 }, { "epoch": 2.030546265328874, "grad_norm": 1.8098450999175097, "learning_rate": 4.994905143051749e-06, "loss": 0.2347, "step": 27321 }, { "epoch": 2.030620587142326, "grad_norm": 1.9302146574951098, "learning_rate": 4.994210524052777e-06, "loss": 0.2874, "step": 27322 }, { "epoch": 2.0306949089557786, "grad_norm": 1.9502538354087833, "learning_rate": 4.993515937280823e-06, "loss": 0.2599, "step": 27323 }, { "epoch": 2.0307692307692307, "grad_norm": 2.2599493184506727, "learning_rate": 4.992821382740352e-06, "loss": 0.3081, "step": 27324 }, { "epoch": 2.030843552582683, "grad_norm": 2.6682448311253273, "learning_rate": 4.992126860435838e-06, "loss": 0.3494, "step": 27325 }, { "epoch": 2.030917874396135, "grad_norm": 1.9311775703670697, "learning_rate": 4.9914323703717506e-06, "loss": 0.2891, "step": 27326 }, { "epoch": 2.0309921962095876, "grad_norm": 1.606682301171112, "learning_rate": 4.990737912552559e-06, "loss": 0.2025, "step": 27327 }, { "epoch": 2.0310665180230396, "grad_norm": 1.6643023440473406, "learning_rate": 4.9900434869827396e-06, "loss": 0.2162, "step": 27328 }, { "epoch": 2.031140839836492, "grad_norm": 2.0766929746924263, "learning_rate": 4.989349093666756e-06, "loss": 0.2254, "step": 27329 }, { "epoch": 2.031215161649944, "grad_norm": 2.8737332765585064, "learning_rate": 4.9886547326090886e-06, "loss": 0.383, "step": 27330 }, { "epoch": 2.0312894834633966, "grad_norm": 1.9542969814916424, "learning_rate": 4.9879604038142e-06, "loss": 0.2619, "step": 27331 }, { "epoch": 2.0313638052768486, "grad_norm": 1.990537621591581, "learning_rate": 4.98726610728656e-06, "loss": 0.2684, "step": 27332 }, { "epoch": 2.031438127090301, "grad_norm": 1.9232930038678533, "learning_rate": 4.986571843030644e-06, "loss": 0.3076, "step": 27333 }, { "epoch": 2.031512448903753, "grad_norm": 2.2344616374582356, "learning_rate": 4.985877611050915e-06, "loss": 0.2697, "step": 27334 }, { "epoch": 2.0315867707172055, "grad_norm": 2.6098813171957644, "learning_rate": 4.985183411351853e-06, "loss": 0.3161, "step": 27335 }, { "epoch": 2.031661092530658, "grad_norm": 3.066217476501608, "learning_rate": 4.984489243937912e-06, "loss": 0.3958, "step": 27336 }, { "epoch": 2.03173541434411, "grad_norm": 3.5610611171720676, "learning_rate": 4.9837951088135735e-06, "loss": 0.3646, "step": 27337 }, { "epoch": 2.0318097361575624, "grad_norm": 2.695488956502817, "learning_rate": 4.983101005983302e-06, "loss": 0.2778, "step": 27338 }, { "epoch": 2.0318840579710145, "grad_norm": 2.126181315602198, "learning_rate": 4.982406935451561e-06, "loss": 0.2903, "step": 27339 }, { "epoch": 2.031958379784467, "grad_norm": 2.2774127744084756, "learning_rate": 4.981712897222828e-06, "loss": 0.3136, "step": 27340 }, { "epoch": 2.032032701597919, "grad_norm": 2.2570384001382684, "learning_rate": 4.981018891301568e-06, "loss": 0.249, "step": 27341 }, { "epoch": 2.0321070234113714, "grad_norm": 2.274169081179858, "learning_rate": 4.980324917692243e-06, "loss": 0.2445, "step": 27342 }, { "epoch": 2.0321813452248234, "grad_norm": 2.0961036284017625, "learning_rate": 4.979630976399329e-06, "loss": 0.351, "step": 27343 }, { "epoch": 2.032255667038276, "grad_norm": 1.9732248197789677, "learning_rate": 4.978937067427287e-06, "loss": 0.2293, "step": 27344 }, { "epoch": 2.032329988851728, "grad_norm": 2.2531877819867625, "learning_rate": 4.978243190780593e-06, "loss": 0.2952, "step": 27345 }, { "epoch": 2.0324043106651803, "grad_norm": 2.151034146872621, "learning_rate": 4.977549346463707e-06, "loss": 0.2739, "step": 27346 }, { "epoch": 2.0324786324786324, "grad_norm": 1.95346761682389, "learning_rate": 4.976855534481099e-06, "loss": 0.2127, "step": 27347 }, { "epoch": 2.032552954292085, "grad_norm": 2.6283707565619014, "learning_rate": 4.9761617548372335e-06, "loss": 0.2488, "step": 27348 }, { "epoch": 2.032627276105537, "grad_norm": 2.4125735529673924, "learning_rate": 4.975468007536574e-06, "loss": 0.2667, "step": 27349 }, { "epoch": 2.0327015979189893, "grad_norm": 2.318249363245699, "learning_rate": 4.974774292583595e-06, "loss": 0.3331, "step": 27350 }, { "epoch": 2.0327759197324413, "grad_norm": 2.815782107241165, "learning_rate": 4.9740806099827545e-06, "loss": 0.3376, "step": 27351 }, { "epoch": 2.0328502415458938, "grad_norm": 1.6444494214372594, "learning_rate": 4.973386959738525e-06, "loss": 0.2211, "step": 27352 }, { "epoch": 2.032924563359346, "grad_norm": 1.9746177891850296, "learning_rate": 4.972693341855371e-06, "loss": 0.2703, "step": 27353 }, { "epoch": 2.0329988851727983, "grad_norm": 2.4738284008740004, "learning_rate": 4.971999756337752e-06, "loss": 0.3831, "step": 27354 }, { "epoch": 2.0330732069862503, "grad_norm": 1.9844620989111033, "learning_rate": 4.971306203190142e-06, "loss": 0.2841, "step": 27355 }, { "epoch": 2.0331475287997027, "grad_norm": 1.955214068216974, "learning_rate": 4.970612682416997e-06, "loss": 0.2719, "step": 27356 }, { "epoch": 2.0332218506131547, "grad_norm": 2.0695010839864287, "learning_rate": 4.969919194022795e-06, "loss": 0.244, "step": 27357 }, { "epoch": 2.033296172426607, "grad_norm": 1.3013842322882008, "learning_rate": 4.9692257380119844e-06, "loss": 0.1617, "step": 27358 }, { "epoch": 2.0333704942400597, "grad_norm": 1.7590454740139292, "learning_rate": 4.968532314389041e-06, "loss": 0.2593, "step": 27359 }, { "epoch": 2.0334448160535117, "grad_norm": 8.297438080558717, "learning_rate": 4.967838923158426e-06, "loss": 0.3625, "step": 27360 }, { "epoch": 2.033519137866964, "grad_norm": 2.1268301357761117, "learning_rate": 4.967145564324599e-06, "loss": 0.3046, "step": 27361 }, { "epoch": 2.033593459680416, "grad_norm": 2.199750879039208, "learning_rate": 4.966452237892031e-06, "loss": 0.2897, "step": 27362 }, { "epoch": 2.0336677814938686, "grad_norm": 2.360481131239756, "learning_rate": 4.965758943865182e-06, "loss": 0.3328, "step": 27363 }, { "epoch": 2.0337421033073206, "grad_norm": 1.7413480106354726, "learning_rate": 4.9650656822485135e-06, "loss": 0.2582, "step": 27364 }, { "epoch": 2.033816425120773, "grad_norm": 1.8506474880875092, "learning_rate": 4.964372453046493e-06, "loss": 0.29, "step": 27365 }, { "epoch": 2.033890746934225, "grad_norm": 2.072179249218223, "learning_rate": 4.963679256263579e-06, "loss": 0.3139, "step": 27366 }, { "epoch": 2.0339650687476776, "grad_norm": 2.1207529429822944, "learning_rate": 4.962986091904239e-06, "loss": 0.2798, "step": 27367 }, { "epoch": 2.0340393905611296, "grad_norm": 1.7603064656910048, "learning_rate": 4.9622929599729345e-06, "loss": 0.2176, "step": 27368 }, { "epoch": 2.034113712374582, "grad_norm": 2.050417619379996, "learning_rate": 4.961599860474126e-06, "loss": 0.2968, "step": 27369 }, { "epoch": 2.034188034188034, "grad_norm": 2.812624845240402, "learning_rate": 4.960906793412275e-06, "loss": 0.3909, "step": 27370 }, { "epoch": 2.0342623560014865, "grad_norm": 2.0165903492731316, "learning_rate": 4.960213758791843e-06, "loss": 0.3071, "step": 27371 }, { "epoch": 2.0343366778149385, "grad_norm": 2.1315367807358614, "learning_rate": 4.9595207566172965e-06, "loss": 0.3229, "step": 27372 }, { "epoch": 2.034410999628391, "grad_norm": 2.464165396996028, "learning_rate": 4.958827786893089e-06, "loss": 0.3092, "step": 27373 }, { "epoch": 2.034485321441843, "grad_norm": 2.898499623199209, "learning_rate": 4.958134849623691e-06, "loss": 0.3429, "step": 27374 }, { "epoch": 2.0345596432552955, "grad_norm": 2.1592845429805023, "learning_rate": 4.957441944813559e-06, "loss": 0.2673, "step": 27375 }, { "epoch": 2.0346339650687475, "grad_norm": 2.1522491989971253, "learning_rate": 4.956749072467151e-06, "loss": 0.3214, "step": 27376 }, { "epoch": 2.0347082868822, "grad_norm": 2.056251507886965, "learning_rate": 4.956056232588934e-06, "loss": 0.2664, "step": 27377 }, { "epoch": 2.034782608695652, "grad_norm": 2.139085003190021, "learning_rate": 4.955363425183361e-06, "loss": 0.3387, "step": 27378 }, { "epoch": 2.0348569305091044, "grad_norm": 2.1395549425944846, "learning_rate": 4.954670650254901e-06, "loss": 0.2976, "step": 27379 }, { "epoch": 2.034931252322557, "grad_norm": 2.5494942037724524, "learning_rate": 4.953977907808011e-06, "loss": 0.3187, "step": 27380 }, { "epoch": 2.035005574136009, "grad_norm": 2.2249113479712914, "learning_rate": 4.953285197847147e-06, "loss": 0.2565, "step": 27381 }, { "epoch": 2.0350798959494614, "grad_norm": 2.434604551522645, "learning_rate": 4.952592520376772e-06, "loss": 0.2711, "step": 27382 }, { "epoch": 2.0351542177629134, "grad_norm": 1.6450774968015833, "learning_rate": 4.951899875401342e-06, "loss": 0.2304, "step": 27383 }, { "epoch": 2.035228539576366, "grad_norm": 2.4901861891310197, "learning_rate": 4.95120726292532e-06, "loss": 0.3019, "step": 27384 }, { "epoch": 2.035302861389818, "grad_norm": 2.5669251702680542, "learning_rate": 4.9505146829531645e-06, "loss": 0.3066, "step": 27385 }, { "epoch": 2.0353771832032703, "grad_norm": 1.8309928323969817, "learning_rate": 4.949822135489331e-06, "loss": 0.1969, "step": 27386 }, { "epoch": 2.0354515050167223, "grad_norm": 2.5672974573603575, "learning_rate": 4.9491296205382836e-06, "loss": 0.3072, "step": 27387 }, { "epoch": 2.035525826830175, "grad_norm": 2.478468081978839, "learning_rate": 4.948437138104473e-06, "loss": 0.3667, "step": 27388 }, { "epoch": 2.035600148643627, "grad_norm": 1.8697817184476642, "learning_rate": 4.947744688192367e-06, "loss": 0.2924, "step": 27389 }, { "epoch": 2.0356744704570793, "grad_norm": 2.0783562018313537, "learning_rate": 4.947052270806417e-06, "loss": 0.27, "step": 27390 }, { "epoch": 2.0357487922705313, "grad_norm": 2.336461750029394, "learning_rate": 4.946359885951083e-06, "loss": 0.3298, "step": 27391 }, { "epoch": 2.0358231140839838, "grad_norm": 2.0701865626057434, "learning_rate": 4.9456675336308225e-06, "loss": 0.2952, "step": 27392 }, { "epoch": 2.0358974358974358, "grad_norm": 5.16448582158206, "learning_rate": 4.944975213850087e-06, "loss": 0.2895, "step": 27393 }, { "epoch": 2.0359717577108882, "grad_norm": 2.5633887215483813, "learning_rate": 4.944282926613344e-06, "loss": 0.4482, "step": 27394 }, { "epoch": 2.0360460795243402, "grad_norm": 2.0623023827502522, "learning_rate": 4.943590671925041e-06, "loss": 0.2869, "step": 27395 }, { "epoch": 2.0361204013377927, "grad_norm": 2.3100165334892533, "learning_rate": 4.9428984497896426e-06, "loss": 0.3566, "step": 27396 }, { "epoch": 2.0361947231512447, "grad_norm": 3.320513709345035, "learning_rate": 4.942206260211602e-06, "loss": 0.3191, "step": 27397 }, { "epoch": 2.036269044964697, "grad_norm": 1.93111435770945, "learning_rate": 4.941514103195371e-06, "loss": 0.2408, "step": 27398 }, { "epoch": 2.036343366778149, "grad_norm": 2.604507749022653, "learning_rate": 4.940821978745413e-06, "loss": 0.3218, "step": 27399 }, { "epoch": 2.0364176885916017, "grad_norm": 3.2522771219533166, "learning_rate": 4.940129886866177e-06, "loss": 0.3484, "step": 27400 }, { "epoch": 2.0364920104050537, "grad_norm": 2.4232774195983153, "learning_rate": 4.939437827562127e-06, "loss": 0.3161, "step": 27401 }, { "epoch": 2.036566332218506, "grad_norm": 2.3489727210502225, "learning_rate": 4.938745800837712e-06, "loss": 0.2862, "step": 27402 }, { "epoch": 2.0366406540319586, "grad_norm": 2.389627580783219, "learning_rate": 4.938053806697391e-06, "loss": 0.3455, "step": 27403 }, { "epoch": 2.0367149758454106, "grad_norm": 2.322846068165911, "learning_rate": 4.937361845145615e-06, "loss": 0.2948, "step": 27404 }, { "epoch": 2.036789297658863, "grad_norm": 3.004310333256629, "learning_rate": 4.936669916186838e-06, "loss": 0.3206, "step": 27405 }, { "epoch": 2.036863619472315, "grad_norm": 1.9893794619209093, "learning_rate": 4.935978019825521e-06, "loss": 0.2989, "step": 27406 }, { "epoch": 2.0369379412857676, "grad_norm": 2.2415189813698793, "learning_rate": 4.9352861560661145e-06, "loss": 0.3125, "step": 27407 }, { "epoch": 2.0370122630992196, "grad_norm": 2.324297962510148, "learning_rate": 4.934594324913069e-06, "loss": 0.3614, "step": 27408 }, { "epoch": 2.037086584912672, "grad_norm": 2.2810497605581537, "learning_rate": 4.933902526370846e-06, "loss": 0.3338, "step": 27409 }, { "epoch": 2.037160906726124, "grad_norm": 2.920761106379576, "learning_rate": 4.933210760443892e-06, "loss": 0.3562, "step": 27410 }, { "epoch": 2.0372352285395765, "grad_norm": 2.0595613203788146, "learning_rate": 4.93251902713667e-06, "loss": 0.2685, "step": 27411 }, { "epoch": 2.0373095503530285, "grad_norm": 2.013776232977124, "learning_rate": 4.931827326453625e-06, "loss": 0.2513, "step": 27412 }, { "epoch": 2.037383872166481, "grad_norm": 1.7966917961688869, "learning_rate": 4.93113565839921e-06, "loss": 0.2279, "step": 27413 }, { "epoch": 2.037458193979933, "grad_norm": 2.150481590532569, "learning_rate": 4.930444022977889e-06, "loss": 0.2985, "step": 27414 }, { "epoch": 2.0375325157933855, "grad_norm": 2.0659359725608577, "learning_rate": 4.929752420194097e-06, "loss": 0.2214, "step": 27415 }, { "epoch": 2.0376068376068375, "grad_norm": 2.000158066143676, "learning_rate": 4.929060850052301e-06, "loss": 0.3108, "step": 27416 }, { "epoch": 2.03768115942029, "grad_norm": 2.4566122974738342, "learning_rate": 4.928369312556943e-06, "loss": 0.3169, "step": 27417 }, { "epoch": 2.037755481233742, "grad_norm": 2.0219946562201825, "learning_rate": 4.927677807712485e-06, "loss": 0.3, "step": 27418 }, { "epoch": 2.0378298030471944, "grad_norm": 2.49491963154293, "learning_rate": 4.9269863355233746e-06, "loss": 0.3703, "step": 27419 }, { "epoch": 2.0379041248606464, "grad_norm": 2.185401723666398, "learning_rate": 4.926294895994058e-06, "loss": 0.2737, "step": 27420 }, { "epoch": 2.037978446674099, "grad_norm": 1.8581012177363472, "learning_rate": 4.9256034891289965e-06, "loss": 0.2684, "step": 27421 }, { "epoch": 2.038052768487551, "grad_norm": 3.1876788341010824, "learning_rate": 4.924912114932635e-06, "loss": 0.296, "step": 27422 }, { "epoch": 2.0381270903010034, "grad_norm": 2.4427688025764462, "learning_rate": 4.924220773409424e-06, "loss": 0.3771, "step": 27423 }, { "epoch": 2.0382014121144554, "grad_norm": 1.233915266704649, "learning_rate": 4.923529464563819e-06, "loss": 0.144, "step": 27424 }, { "epoch": 2.038275733927908, "grad_norm": 1.7052734541270955, "learning_rate": 4.922838188400268e-06, "loss": 0.2072, "step": 27425 }, { "epoch": 2.0383500557413603, "grad_norm": 2.357950217486135, "learning_rate": 4.922146944923221e-06, "loss": 0.2509, "step": 27426 }, { "epoch": 2.0384243775548123, "grad_norm": 2.320940954844617, "learning_rate": 4.921455734137125e-06, "loss": 0.2678, "step": 27427 }, { "epoch": 2.0384986993682648, "grad_norm": 2.4712536713917217, "learning_rate": 4.920764556046437e-06, "loss": 0.3804, "step": 27428 }, { "epoch": 2.038573021181717, "grad_norm": 2.393273576564494, "learning_rate": 4.920073410655604e-06, "loss": 0.3642, "step": 27429 }, { "epoch": 2.0386473429951693, "grad_norm": 2.083127623759449, "learning_rate": 4.919382297969071e-06, "loss": 0.2846, "step": 27430 }, { "epoch": 2.0387216648086213, "grad_norm": 7.1296847015813745, "learning_rate": 4.9186912179912935e-06, "loss": 0.4217, "step": 27431 }, { "epoch": 2.0387959866220737, "grad_norm": 3.3197383138958503, "learning_rate": 4.918000170726716e-06, "loss": 0.4277, "step": 27432 }, { "epoch": 2.0388703084355257, "grad_norm": 1.84104240160129, "learning_rate": 4.917309156179792e-06, "loss": 0.2249, "step": 27433 }, { "epoch": 2.038944630248978, "grad_norm": 2.514757893484263, "learning_rate": 4.91661817435497e-06, "loss": 0.3, "step": 27434 }, { "epoch": 2.0390189520624302, "grad_norm": 2.7494651156296417, "learning_rate": 4.915927225256691e-06, "loss": 0.3119, "step": 27435 }, { "epoch": 2.0390932738758827, "grad_norm": 2.281058403305952, "learning_rate": 4.9152363088894125e-06, "loss": 0.2941, "step": 27436 }, { "epoch": 2.0391675956893347, "grad_norm": 2.37047129182914, "learning_rate": 4.914545425257579e-06, "loss": 0.2775, "step": 27437 }, { "epoch": 2.039241917502787, "grad_norm": 2.3415612628086153, "learning_rate": 4.91385457436564e-06, "loss": 0.3295, "step": 27438 }, { "epoch": 2.039316239316239, "grad_norm": 2.021333972534601, "learning_rate": 4.913163756218036e-06, "loss": 0.2616, "step": 27439 }, { "epoch": 2.0393905611296916, "grad_norm": 1.8109536982123728, "learning_rate": 4.912472970819223e-06, "loss": 0.2956, "step": 27440 }, { "epoch": 2.0394648829431437, "grad_norm": 1.82717903876042, "learning_rate": 4.911782218173647e-06, "loss": 0.2507, "step": 27441 }, { "epoch": 2.039539204756596, "grad_norm": 2.42422670229276, "learning_rate": 4.911091498285747e-06, "loss": 0.3583, "step": 27442 }, { "epoch": 2.039613526570048, "grad_norm": 2.338629114371285, "learning_rate": 4.910400811159981e-06, "loss": 0.3624, "step": 27443 }, { "epoch": 2.0396878483835006, "grad_norm": 2.2952041306528668, "learning_rate": 4.909710156800791e-06, "loss": 0.2411, "step": 27444 }, { "epoch": 2.0397621701969526, "grad_norm": 1.886658367987872, "learning_rate": 4.909019535212618e-06, "loss": 0.2211, "step": 27445 }, { "epoch": 2.039836492010405, "grad_norm": 2.1987783138147807, "learning_rate": 4.908328946399917e-06, "loss": 0.3138, "step": 27446 }, { "epoch": 2.039910813823857, "grad_norm": 2.3836521752746536, "learning_rate": 4.907638390367127e-06, "loss": 0.2572, "step": 27447 }, { "epoch": 2.0399851356373095, "grad_norm": 3.649360619559425, "learning_rate": 4.906947867118703e-06, "loss": 0.2699, "step": 27448 }, { "epoch": 2.040059457450762, "grad_norm": 2.294093001830483, "learning_rate": 4.906257376659078e-06, "loss": 0.3791, "step": 27449 }, { "epoch": 2.040133779264214, "grad_norm": 2.0117716743111314, "learning_rate": 4.905566918992707e-06, "loss": 0.2741, "step": 27450 }, { "epoch": 2.0402081010776665, "grad_norm": 2.4719183222739725, "learning_rate": 4.904876494124033e-06, "loss": 0.3105, "step": 27451 }, { "epoch": 2.0402824228911185, "grad_norm": 2.0061091415570522, "learning_rate": 4.904186102057494e-06, "loss": 0.2848, "step": 27452 }, { "epoch": 2.040356744704571, "grad_norm": 2.3075605607198746, "learning_rate": 4.903495742797545e-06, "loss": 0.2724, "step": 27453 }, { "epoch": 2.040431066518023, "grad_norm": 1.9995830564513135, "learning_rate": 4.902805416348623e-06, "loss": 0.2878, "step": 27454 }, { "epoch": 2.0405053883314754, "grad_norm": 2.203045002743239, "learning_rate": 4.902115122715178e-06, "loss": 0.2879, "step": 27455 }, { "epoch": 2.0405797101449274, "grad_norm": 2.3480980057777936, "learning_rate": 4.901424861901651e-06, "loss": 0.2998, "step": 27456 }, { "epoch": 2.04065403195838, "grad_norm": 2.085514315424233, "learning_rate": 4.900734633912484e-06, "loss": 0.2645, "step": 27457 }, { "epoch": 2.040728353771832, "grad_norm": 2.048075564208691, "learning_rate": 4.9000444387521264e-06, "loss": 0.2018, "step": 27458 }, { "epoch": 2.0408026755852844, "grad_norm": 2.190298363523563, "learning_rate": 4.899354276425017e-06, "loss": 0.2907, "step": 27459 }, { "epoch": 2.0408769973987364, "grad_norm": 2.077015338206638, "learning_rate": 4.898664146935601e-06, "loss": 0.3329, "step": 27460 }, { "epoch": 2.040951319212189, "grad_norm": 2.233475360558665, "learning_rate": 4.897974050288316e-06, "loss": 0.4044, "step": 27461 }, { "epoch": 2.041025641025641, "grad_norm": 2.870440112663452, "learning_rate": 4.897283986487613e-06, "loss": 0.2487, "step": 27462 }, { "epoch": 2.0410999628390933, "grad_norm": 2.5802800090991895, "learning_rate": 4.896593955537931e-06, "loss": 0.3556, "step": 27463 }, { "epoch": 2.0411742846525454, "grad_norm": 2.2937899756448155, "learning_rate": 4.89590395744371e-06, "loss": 0.2113, "step": 27464 }, { "epoch": 2.041248606465998, "grad_norm": 1.902790940349502, "learning_rate": 4.895213992209397e-06, "loss": 0.308, "step": 27465 }, { "epoch": 2.04132292827945, "grad_norm": 1.941749433557263, "learning_rate": 4.894524059839431e-06, "loss": 0.3413, "step": 27466 }, { "epoch": 2.0413972500929023, "grad_norm": 2.201876882985422, "learning_rate": 4.893834160338252e-06, "loss": 0.268, "step": 27467 }, { "epoch": 2.0414715719063543, "grad_norm": 2.038386749111084, "learning_rate": 4.893144293710307e-06, "loss": 0.3049, "step": 27468 }, { "epoch": 2.0415458937198068, "grad_norm": 2.6993227084174243, "learning_rate": 4.892454459960029e-06, "loss": 0.3065, "step": 27469 }, { "epoch": 2.041620215533259, "grad_norm": 2.002998648341097, "learning_rate": 4.8917646590918736e-06, "loss": 0.3308, "step": 27470 }, { "epoch": 2.0416945373467112, "grad_norm": 2.0653599820756394, "learning_rate": 4.8910748911102635e-06, "loss": 0.3388, "step": 27471 }, { "epoch": 2.0417688591601637, "grad_norm": 2.6032468291209208, "learning_rate": 4.890385156019652e-06, "loss": 0.2654, "step": 27472 }, { "epoch": 2.0418431809736157, "grad_norm": 2.0762295089735123, "learning_rate": 4.889695453824476e-06, "loss": 0.2023, "step": 27473 }, { "epoch": 2.041917502787068, "grad_norm": 2.3080439218740136, "learning_rate": 4.889005784529172e-06, "loss": 0.3014, "step": 27474 }, { "epoch": 2.04199182460052, "grad_norm": 2.1758856500704415, "learning_rate": 4.888316148138186e-06, "loss": 0.2987, "step": 27475 }, { "epoch": 2.0420661464139727, "grad_norm": 2.3016893847986157, "learning_rate": 4.887626544655953e-06, "loss": 0.3343, "step": 27476 }, { "epoch": 2.0421404682274247, "grad_norm": 2.6064464026285403, "learning_rate": 4.8869369740869185e-06, "loss": 0.3509, "step": 27477 }, { "epoch": 2.042214790040877, "grad_norm": 2.0929613401529825, "learning_rate": 4.8862474364355185e-06, "loss": 0.293, "step": 27478 }, { "epoch": 2.042289111854329, "grad_norm": 2.2100057663373716, "learning_rate": 4.8855579317061886e-06, "loss": 0.2529, "step": 27479 }, { "epoch": 2.0423634336677816, "grad_norm": 2.012261036050744, "learning_rate": 4.884868459903375e-06, "loss": 0.27, "step": 27480 }, { "epoch": 2.0424377554812336, "grad_norm": 2.2196845474190714, "learning_rate": 4.8841790210315086e-06, "loss": 0.2735, "step": 27481 }, { "epoch": 2.042512077294686, "grad_norm": 2.6756158420039955, "learning_rate": 4.88348961509504e-06, "loss": 0.3458, "step": 27482 }, { "epoch": 2.042586399108138, "grad_norm": 2.1283466238878663, "learning_rate": 4.882800242098392e-06, "loss": 0.265, "step": 27483 }, { "epoch": 2.0426607209215906, "grad_norm": 2.045220394628346, "learning_rate": 4.882110902046014e-06, "loss": 0.2937, "step": 27484 }, { "epoch": 2.0427350427350426, "grad_norm": 2.2948794190740833, "learning_rate": 4.881421594942342e-06, "loss": 0.278, "step": 27485 }, { "epoch": 2.042809364548495, "grad_norm": 1.8605038324517262, "learning_rate": 4.880732320791808e-06, "loss": 0.3052, "step": 27486 }, { "epoch": 2.042883686361947, "grad_norm": 2.216385377873694, "learning_rate": 4.8800430795988565e-06, "loss": 0.2957, "step": 27487 }, { "epoch": 2.0429580081753995, "grad_norm": 2.0165442595135876, "learning_rate": 4.879353871367923e-06, "loss": 0.3088, "step": 27488 }, { "epoch": 2.0430323299888515, "grad_norm": 1.8072012336662207, "learning_rate": 4.878664696103439e-06, "loss": 0.2156, "step": 27489 }, { "epoch": 2.043106651802304, "grad_norm": 2.2521915442105422, "learning_rate": 4.877975553809849e-06, "loss": 0.3321, "step": 27490 }, { "epoch": 2.043180973615756, "grad_norm": 1.9505078282992665, "learning_rate": 4.877286444491584e-06, "loss": 0.1338, "step": 27491 }, { "epoch": 2.0432552954292085, "grad_norm": 2.234103540491691, "learning_rate": 4.8765973681530875e-06, "loss": 0.3328, "step": 27492 }, { "epoch": 2.043329617242661, "grad_norm": 1.9763009783021854, "learning_rate": 4.8759083247987905e-06, "loss": 0.2481, "step": 27493 }, { "epoch": 2.043403939056113, "grad_norm": 3.819740508291143, "learning_rate": 4.875219314433129e-06, "loss": 0.4198, "step": 27494 }, { "epoch": 2.0434782608695654, "grad_norm": 1.9933245204736814, "learning_rate": 4.874530337060542e-06, "loss": 0.2911, "step": 27495 }, { "epoch": 2.0435525826830174, "grad_norm": 2.2063098580780953, "learning_rate": 4.873841392685456e-06, "loss": 0.3122, "step": 27496 }, { "epoch": 2.04362690449647, "grad_norm": 2.507461966164975, "learning_rate": 4.873152481312319e-06, "loss": 0.291, "step": 27497 }, { "epoch": 2.043701226309922, "grad_norm": 2.5472315980897653, "learning_rate": 4.872463602945557e-06, "loss": 0.3758, "step": 27498 }, { "epoch": 2.0437755481233744, "grad_norm": 1.629264677626235, "learning_rate": 4.87177475758961e-06, "loss": 0.2147, "step": 27499 }, { "epoch": 2.0438498699368264, "grad_norm": 1.9642215302691153, "learning_rate": 4.871085945248912e-06, "loss": 0.2708, "step": 27500 }, { "epoch": 2.043924191750279, "grad_norm": 2.2228402161169427, "learning_rate": 4.870397165927892e-06, "loss": 0.2828, "step": 27501 }, { "epoch": 2.043998513563731, "grad_norm": 2.5068777493652905, "learning_rate": 4.869708419630994e-06, "loss": 0.3633, "step": 27502 }, { "epoch": 2.0440728353771833, "grad_norm": 2.354860147785215, "learning_rate": 4.869019706362642e-06, "loss": 0.3456, "step": 27503 }, { "epoch": 2.0441471571906353, "grad_norm": 2.046961471025924, "learning_rate": 4.868331026127285e-06, "loss": 0.2823, "step": 27504 }, { "epoch": 2.044221479004088, "grad_norm": 2.4209861207172514, "learning_rate": 4.867642378929336e-06, "loss": 0.3244, "step": 27505 }, { "epoch": 2.04429580081754, "grad_norm": 2.517379860511538, "learning_rate": 4.866953764773243e-06, "loss": 0.3617, "step": 27506 }, { "epoch": 2.0443701226309923, "grad_norm": 2.243477178967285, "learning_rate": 4.866265183663438e-06, "loss": 0.2842, "step": 27507 }, { "epoch": 2.0444444444444443, "grad_norm": 2.2776873209841546, "learning_rate": 4.865576635604346e-06, "loss": 0.3165, "step": 27508 }, { "epoch": 2.0445187662578967, "grad_norm": 2.16149646812692, "learning_rate": 4.864888120600408e-06, "loss": 0.2556, "step": 27509 }, { "epoch": 2.0445930880713488, "grad_norm": 2.1662186988842316, "learning_rate": 4.8641996386560555e-06, "loss": 0.3757, "step": 27510 }, { "epoch": 2.0446674098848012, "grad_norm": 2.4083586884596784, "learning_rate": 4.8635111897757136e-06, "loss": 0.3148, "step": 27511 }, { "epoch": 2.0447417316982532, "grad_norm": 2.384938926311484, "learning_rate": 4.862822773963826e-06, "loss": 0.3317, "step": 27512 }, { "epoch": 2.0448160535117057, "grad_norm": 2.099176861031638, "learning_rate": 4.862134391224813e-06, "loss": 0.2981, "step": 27513 }, { "epoch": 2.0448903753251577, "grad_norm": 1.7725441714329004, "learning_rate": 4.861446041563118e-06, "loss": 0.1779, "step": 27514 }, { "epoch": 2.04496469713861, "grad_norm": 2.2986929230366044, "learning_rate": 4.860757724983166e-06, "loss": 0.3221, "step": 27515 }, { "epoch": 2.0450390189520626, "grad_norm": 2.5055141810321313, "learning_rate": 4.860069441489389e-06, "loss": 0.238, "step": 27516 }, { "epoch": 2.0451133407655147, "grad_norm": 2.788554303449083, "learning_rate": 4.8593811910862174e-06, "loss": 0.4257, "step": 27517 }, { "epoch": 2.045187662578967, "grad_norm": 1.7857157088668778, "learning_rate": 4.858692973778081e-06, "loss": 0.2908, "step": 27518 }, { "epoch": 2.045261984392419, "grad_norm": 1.6648420749551054, "learning_rate": 4.858004789569415e-06, "loss": 0.2048, "step": 27519 }, { "epoch": 2.0453363062058716, "grad_norm": 2.1766289878301253, "learning_rate": 4.857316638464643e-06, "loss": 0.2578, "step": 27520 }, { "epoch": 2.0454106280193236, "grad_norm": 1.9480175296525546, "learning_rate": 4.8566285204682054e-06, "loss": 0.251, "step": 27521 }, { "epoch": 2.045484949832776, "grad_norm": 2.2077802283907655, "learning_rate": 4.8559404355845245e-06, "loss": 0.326, "step": 27522 }, { "epoch": 2.045559271646228, "grad_norm": 2.2156096361074167, "learning_rate": 4.8552523838180285e-06, "loss": 0.3419, "step": 27523 }, { "epoch": 2.0456335934596805, "grad_norm": 2.3170299489951955, "learning_rate": 4.854564365173156e-06, "loss": 0.3516, "step": 27524 }, { "epoch": 2.0457079152731326, "grad_norm": 1.8921794252880373, "learning_rate": 4.853876379654326e-06, "loss": 0.2685, "step": 27525 }, { "epoch": 2.045782237086585, "grad_norm": 2.554547076999784, "learning_rate": 4.853188427265978e-06, "loss": 0.3769, "step": 27526 }, { "epoch": 2.045856558900037, "grad_norm": 2.0194573288168822, "learning_rate": 4.852500508012539e-06, "loss": 0.2841, "step": 27527 }, { "epoch": 2.0459308807134895, "grad_norm": 4.583252079374328, "learning_rate": 4.8518126218984265e-06, "loss": 0.4345, "step": 27528 }, { "epoch": 2.0460052025269415, "grad_norm": 2.5869474800969785, "learning_rate": 4.851124768928081e-06, "loss": 0.4102, "step": 27529 }, { "epoch": 2.046079524340394, "grad_norm": 1.7451682549861864, "learning_rate": 4.850436949105924e-06, "loss": 0.2215, "step": 27530 }, { "epoch": 2.046153846153846, "grad_norm": 1.9778430404698715, "learning_rate": 4.84974916243639e-06, "loss": 0.2672, "step": 27531 }, { "epoch": 2.0462281679672985, "grad_norm": 1.9134735245898669, "learning_rate": 4.849061408923904e-06, "loss": 0.2306, "step": 27532 }, { "epoch": 2.0463024897807505, "grad_norm": 2.120526245362782, "learning_rate": 4.84837368857289e-06, "loss": 0.3054, "step": 27533 }, { "epoch": 2.046376811594203, "grad_norm": 2.192104147054741, "learning_rate": 4.847686001387782e-06, "loss": 0.2617, "step": 27534 }, { "epoch": 2.046451133407655, "grad_norm": 2.099303206397247, "learning_rate": 4.846998347373001e-06, "loss": 0.2611, "step": 27535 }, { "epoch": 2.0465254552211074, "grad_norm": 2.6900125562479764, "learning_rate": 4.84631072653298e-06, "loss": 0.3766, "step": 27536 }, { "epoch": 2.04659977703456, "grad_norm": 1.4500062713442239, "learning_rate": 4.845623138872144e-06, "loss": 0.2149, "step": 27537 }, { "epoch": 2.046674098848012, "grad_norm": 2.111610737258569, "learning_rate": 4.844935584394918e-06, "loss": 0.3064, "step": 27538 }, { "epoch": 2.0467484206614643, "grad_norm": 2.985198988541715, "learning_rate": 4.8442480631057306e-06, "loss": 0.3356, "step": 27539 }, { "epoch": 2.0468227424749164, "grad_norm": 2.588208854787869, "learning_rate": 4.843560575009002e-06, "loss": 0.4045, "step": 27540 }, { "epoch": 2.046897064288369, "grad_norm": 2.1390728767911, "learning_rate": 4.842873120109167e-06, "loss": 0.2781, "step": 27541 }, { "epoch": 2.046971386101821, "grad_norm": 2.273732786493742, "learning_rate": 4.842185698410643e-06, "loss": 0.3379, "step": 27542 }, { "epoch": 2.0470457079152733, "grad_norm": 1.8821697834252686, "learning_rate": 4.841498309917863e-06, "loss": 0.2836, "step": 27543 }, { "epoch": 2.0471200297287253, "grad_norm": 2.1890904695348374, "learning_rate": 4.840810954635251e-06, "loss": 0.369, "step": 27544 }, { "epoch": 2.0471943515421778, "grad_norm": 1.9804411395325843, "learning_rate": 4.840123632567226e-06, "loss": 0.2512, "step": 27545 }, { "epoch": 2.04726867335563, "grad_norm": 2.8150735372224447, "learning_rate": 4.83943634371822e-06, "loss": 0.3078, "step": 27546 }, { "epoch": 2.0473429951690822, "grad_norm": 1.6570940907339073, "learning_rate": 4.838749088092657e-06, "loss": 0.2549, "step": 27547 }, { "epoch": 2.0474173169825343, "grad_norm": 1.8169340596174508, "learning_rate": 4.8380618656949545e-06, "loss": 0.225, "step": 27548 }, { "epoch": 2.0474916387959867, "grad_norm": 2.894548534049819, "learning_rate": 4.837374676529545e-06, "loss": 0.3663, "step": 27549 }, { "epoch": 2.0475659606094387, "grad_norm": 2.6946930860105662, "learning_rate": 4.83668752060085e-06, "loss": 0.2707, "step": 27550 }, { "epoch": 2.047640282422891, "grad_norm": 2.2899684935404188, "learning_rate": 4.836000397913294e-06, "loss": 0.2622, "step": 27551 }, { "epoch": 2.047714604236343, "grad_norm": 2.7200777915632064, "learning_rate": 4.835313308471295e-06, "loss": 0.3208, "step": 27552 }, { "epoch": 2.0477889260497957, "grad_norm": 1.8319149752614305, "learning_rate": 4.834626252279284e-06, "loss": 0.2139, "step": 27553 }, { "epoch": 2.0478632478632477, "grad_norm": 2.20562164826718, "learning_rate": 4.833939229341682e-06, "loss": 0.2963, "step": 27554 }, { "epoch": 2.0479375696767, "grad_norm": 2.6212199373746605, "learning_rate": 4.833252239662907e-06, "loss": 0.3431, "step": 27555 }, { "epoch": 2.048011891490152, "grad_norm": 1.7675350857415302, "learning_rate": 4.83256528324739e-06, "loss": 0.243, "step": 27556 }, { "epoch": 2.0480862133036046, "grad_norm": 1.9764435557175928, "learning_rate": 4.8318783600995456e-06, "loss": 0.268, "step": 27557 }, { "epoch": 2.0481605351170566, "grad_norm": 3.161592067806738, "learning_rate": 4.831191470223804e-06, "loss": 0.3384, "step": 27558 }, { "epoch": 2.048234856930509, "grad_norm": 2.5044996400477686, "learning_rate": 4.830504613624584e-06, "loss": 0.1863, "step": 27559 }, { "epoch": 2.0483091787439616, "grad_norm": 2.3294747789198045, "learning_rate": 4.829817790306303e-06, "loss": 0.3382, "step": 27560 }, { "epoch": 2.0483835005574136, "grad_norm": 2.402616486417589, "learning_rate": 4.829131000273394e-06, "loss": 0.3257, "step": 27561 }, { "epoch": 2.048457822370866, "grad_norm": 2.282910324478646, "learning_rate": 4.828444243530264e-06, "loss": 0.2829, "step": 27562 }, { "epoch": 2.048532144184318, "grad_norm": 1.8253621801709912, "learning_rate": 4.8277575200813455e-06, "loss": 0.2528, "step": 27563 }, { "epoch": 2.0486064659977705, "grad_norm": 2.2832854261998583, "learning_rate": 4.827070829931051e-06, "loss": 0.2645, "step": 27564 }, { "epoch": 2.0486807878112225, "grad_norm": 2.717198347388664, "learning_rate": 4.8263841730838115e-06, "loss": 0.3522, "step": 27565 }, { "epoch": 2.048755109624675, "grad_norm": 2.063842004133504, "learning_rate": 4.8256975495440405e-06, "loss": 0.3198, "step": 27566 }, { "epoch": 2.048829431438127, "grad_norm": 2.273121320960854, "learning_rate": 4.825010959316158e-06, "loss": 0.2917, "step": 27567 }, { "epoch": 2.0489037532515795, "grad_norm": 1.8190449006215936, "learning_rate": 4.824324402404589e-06, "loss": 0.2165, "step": 27568 }, { "epoch": 2.0489780750650315, "grad_norm": 1.982804989906606, "learning_rate": 4.823637878813752e-06, "loss": 0.2333, "step": 27569 }, { "epoch": 2.049052396878484, "grad_norm": 2.3960865244531684, "learning_rate": 4.822951388548061e-06, "loss": 0.3283, "step": 27570 }, { "epoch": 2.049126718691936, "grad_norm": 1.8525547121018842, "learning_rate": 4.822264931611945e-06, "loss": 0.1887, "step": 27571 }, { "epoch": 2.0492010405053884, "grad_norm": 1.885196889329932, "learning_rate": 4.821578508009819e-06, "loss": 0.2734, "step": 27572 }, { "epoch": 2.0492753623188404, "grad_norm": 2.4007669444436037, "learning_rate": 4.820892117746101e-06, "loss": 0.3201, "step": 27573 }, { "epoch": 2.049349684132293, "grad_norm": 1.8720294942188587, "learning_rate": 4.820205760825207e-06, "loss": 0.2797, "step": 27574 }, { "epoch": 2.049424005945745, "grad_norm": 2.3128559608165404, "learning_rate": 4.819519437251563e-06, "loss": 0.2993, "step": 27575 }, { "epoch": 2.0494983277591974, "grad_norm": 3.08303061307427, "learning_rate": 4.818833147029585e-06, "loss": 0.2836, "step": 27576 }, { "epoch": 2.0495726495726494, "grad_norm": 3.6400424875153576, "learning_rate": 4.818146890163687e-06, "loss": 0.3254, "step": 27577 }, { "epoch": 2.049646971386102, "grad_norm": 4.805542696851067, "learning_rate": 4.817460666658292e-06, "loss": 0.2883, "step": 27578 }, { "epoch": 2.049721293199554, "grad_norm": 2.232662055440801, "learning_rate": 4.8167744765178145e-06, "loss": 0.2858, "step": 27579 }, { "epoch": 2.0497956150130063, "grad_norm": 2.304834331249002, "learning_rate": 4.816088319746678e-06, "loss": 0.3333, "step": 27580 }, { "epoch": 2.0498699368264583, "grad_norm": 1.819065404554509, "learning_rate": 4.815402196349295e-06, "loss": 0.2288, "step": 27581 }, { "epoch": 2.049944258639911, "grad_norm": 2.0276121302058865, "learning_rate": 4.81471610633008e-06, "loss": 0.2731, "step": 27582 }, { "epoch": 2.0500185804533633, "grad_norm": 1.7506314202498945, "learning_rate": 4.814030049693461e-06, "loss": 0.2904, "step": 27583 }, { "epoch": 2.0500929022668153, "grad_norm": 1.828857779909139, "learning_rate": 4.813344026443841e-06, "loss": 0.2088, "step": 27584 }, { "epoch": 2.0501672240802677, "grad_norm": 2.783640815657407, "learning_rate": 4.812658036585646e-06, "loss": 0.3082, "step": 27585 }, { "epoch": 2.0502415458937198, "grad_norm": 2.885660618880169, "learning_rate": 4.811972080123285e-06, "loss": 0.317, "step": 27586 }, { "epoch": 2.0503158677071722, "grad_norm": 2.0665996911207456, "learning_rate": 4.8112861570611825e-06, "loss": 0.2329, "step": 27587 }, { "epoch": 2.0503901895206242, "grad_norm": 2.542412517510808, "learning_rate": 4.810600267403751e-06, "loss": 0.306, "step": 27588 }, { "epoch": 2.0504645113340767, "grad_norm": 1.8909055630024403, "learning_rate": 4.809914411155402e-06, "loss": 0.231, "step": 27589 }, { "epoch": 2.0505388331475287, "grad_norm": 2.7217247625530883, "learning_rate": 4.809228588320559e-06, "loss": 0.2957, "step": 27590 }, { "epoch": 2.050613154960981, "grad_norm": 2.2432036102845294, "learning_rate": 4.808542798903633e-06, "loss": 0.315, "step": 27591 }, { "epoch": 2.050687476774433, "grad_norm": 2.5082804958268796, "learning_rate": 4.807857042909034e-06, "loss": 0.3416, "step": 27592 }, { "epoch": 2.0507617985878857, "grad_norm": 2.077032604990459, "learning_rate": 4.807171320341186e-06, "loss": 0.325, "step": 27593 }, { "epoch": 2.0508361204013377, "grad_norm": 2.592326924069666, "learning_rate": 4.806485631204497e-06, "loss": 0.2309, "step": 27594 }, { "epoch": 2.05091044221479, "grad_norm": 2.5709685280590175, "learning_rate": 4.80579997550339e-06, "loss": 0.3083, "step": 27595 }, { "epoch": 2.050984764028242, "grad_norm": 2.605492941256074, "learning_rate": 4.805114353242267e-06, "loss": 0.3587, "step": 27596 }, { "epoch": 2.0510590858416946, "grad_norm": 2.356347605912262, "learning_rate": 4.804428764425552e-06, "loss": 0.3236, "step": 27597 }, { "epoch": 2.0511334076551466, "grad_norm": 2.083630892973276, "learning_rate": 4.803743209057654e-06, "loss": 0.3133, "step": 27598 }, { "epoch": 2.051207729468599, "grad_norm": 1.7199874321257944, "learning_rate": 4.803057687142984e-06, "loss": 0.2177, "step": 27599 }, { "epoch": 2.051282051282051, "grad_norm": 2.1267156499202917, "learning_rate": 4.802372198685964e-06, "loss": 0.3032, "step": 27600 }, { "epoch": 2.0513563730955036, "grad_norm": 2.180161481636266, "learning_rate": 4.801686743690998e-06, "loss": 0.2065, "step": 27601 }, { "epoch": 2.0514306949089556, "grad_norm": 2.4117506185343243, "learning_rate": 4.8010013221625064e-06, "loss": 0.3295, "step": 27602 }, { "epoch": 2.051505016722408, "grad_norm": 7.323356159735942, "learning_rate": 4.800315934104897e-06, "loss": 0.3337, "step": 27603 }, { "epoch": 2.05157933853586, "grad_norm": 2.0156664960798567, "learning_rate": 4.799630579522583e-06, "loss": 0.2871, "step": 27604 }, { "epoch": 2.0516536603493125, "grad_norm": 1.9111955678942074, "learning_rate": 4.798945258419979e-06, "loss": 0.2578, "step": 27605 }, { "epoch": 2.051727982162765, "grad_norm": 2.4499877280000484, "learning_rate": 4.798259970801496e-06, "loss": 0.3103, "step": 27606 }, { "epoch": 2.051802303976217, "grad_norm": 2.990862711257915, "learning_rate": 4.797574716671547e-06, "loss": 0.4591, "step": 27607 }, { "epoch": 2.0518766257896695, "grad_norm": 3.0019295039265885, "learning_rate": 4.796889496034537e-06, "loss": 0.4067, "step": 27608 }, { "epoch": 2.0519509476031215, "grad_norm": 2.3302784288676226, "learning_rate": 4.796204308894886e-06, "loss": 0.3169, "step": 27609 }, { "epoch": 2.052025269416574, "grad_norm": 2.080249154713155, "learning_rate": 4.795519155257001e-06, "loss": 0.2968, "step": 27610 }, { "epoch": 2.052099591230026, "grad_norm": 5.245484138650496, "learning_rate": 4.794834035125291e-06, "loss": 0.2227, "step": 27611 }, { "epoch": 2.0521739130434784, "grad_norm": 2.248683159177754, "learning_rate": 4.794148948504174e-06, "loss": 0.3252, "step": 27612 }, { "epoch": 2.0522482348569304, "grad_norm": 4.220973338900995, "learning_rate": 4.793463895398054e-06, "loss": 0.3378, "step": 27613 }, { "epoch": 2.052322556670383, "grad_norm": 2.6158472614196784, "learning_rate": 4.7927788758113405e-06, "loss": 0.3735, "step": 27614 }, { "epoch": 2.052396878483835, "grad_norm": 1.9795913816882262, "learning_rate": 4.792093889748451e-06, "loss": 0.3238, "step": 27615 }, { "epoch": 2.0524712002972874, "grad_norm": 2.2779359267154593, "learning_rate": 4.791408937213785e-06, "loss": 0.335, "step": 27616 }, { "epoch": 2.0525455221107394, "grad_norm": 2.1144305948844617, "learning_rate": 4.790724018211768e-06, "loss": 0.2499, "step": 27617 }, { "epoch": 2.052619843924192, "grad_norm": 2.5058093353160893, "learning_rate": 4.79003913274679e-06, "loss": 0.3539, "step": 27618 }, { "epoch": 2.052694165737644, "grad_norm": 1.7208841716412246, "learning_rate": 4.789354280823274e-06, "loss": 0.2486, "step": 27619 }, { "epoch": 2.0527684875510963, "grad_norm": 1.9150952675332842, "learning_rate": 4.788669462445626e-06, "loss": 0.3017, "step": 27620 }, { "epoch": 2.0528428093645483, "grad_norm": 2.498914897632018, "learning_rate": 4.7879846776182484e-06, "loss": 0.3479, "step": 27621 }, { "epoch": 2.052917131178001, "grad_norm": 2.2291531136486684, "learning_rate": 4.787299926345559e-06, "loss": 0.2727, "step": 27622 }, { "epoch": 2.052991452991453, "grad_norm": 1.9651068906577907, "learning_rate": 4.786615208631959e-06, "loss": 0.2791, "step": 27623 }, { "epoch": 2.0530657748049053, "grad_norm": 2.62058201081012, "learning_rate": 4.785930524481862e-06, "loss": 0.3698, "step": 27624 }, { "epoch": 2.0531400966183573, "grad_norm": 2.3664486148297468, "learning_rate": 4.785245873899675e-06, "loss": 0.2931, "step": 27625 }, { "epoch": 2.0532144184318097, "grad_norm": 2.449271772438983, "learning_rate": 4.7845612568898004e-06, "loss": 0.3008, "step": 27626 }, { "epoch": 2.053288740245262, "grad_norm": 2.5626702317925414, "learning_rate": 4.7838766734566546e-06, "loss": 0.3626, "step": 27627 }, { "epoch": 2.053363062058714, "grad_norm": 1.8711355808823589, "learning_rate": 4.7831921236046355e-06, "loss": 0.2347, "step": 27628 }, { "epoch": 2.0534373838721667, "grad_norm": 3.1719205756283375, "learning_rate": 4.7825076073381615e-06, "loss": 0.2802, "step": 27629 }, { "epoch": 2.0535117056856187, "grad_norm": 4.822519923982329, "learning_rate": 4.781823124661626e-06, "loss": 0.3093, "step": 27630 }, { "epoch": 2.053586027499071, "grad_norm": 2.601217280152481, "learning_rate": 4.781138675579446e-06, "loss": 0.2886, "step": 27631 }, { "epoch": 2.053660349312523, "grad_norm": 2.782260820882773, "learning_rate": 4.7804542600960255e-06, "loss": 0.3635, "step": 27632 }, { "epoch": 2.0537346711259756, "grad_norm": 2.40145399324046, "learning_rate": 4.779769878215765e-06, "loss": 0.362, "step": 27633 }, { "epoch": 2.0538089929394276, "grad_norm": 2.3739424016299995, "learning_rate": 4.77908552994308e-06, "loss": 0.3213, "step": 27634 }, { "epoch": 2.05388331475288, "grad_norm": 2.301552823081494, "learning_rate": 4.77840121528237e-06, "loss": 0.2369, "step": 27635 }, { "epoch": 2.053957636566332, "grad_norm": 2.5569076537042728, "learning_rate": 4.777716934238039e-06, "loss": 0.3155, "step": 27636 }, { "epoch": 2.0540319583797846, "grad_norm": 2.3476940899385204, "learning_rate": 4.777032686814499e-06, "loss": 0.3042, "step": 27637 }, { "epoch": 2.0541062801932366, "grad_norm": 2.5721066520661426, "learning_rate": 4.776348473016148e-06, "loss": 0.3488, "step": 27638 }, { "epoch": 2.054180602006689, "grad_norm": 1.9963595432344705, "learning_rate": 4.775664292847398e-06, "loss": 0.257, "step": 27639 }, { "epoch": 2.054254923820141, "grad_norm": 2.2149284762799706, "learning_rate": 4.77498014631265e-06, "loss": 0.3359, "step": 27640 }, { "epoch": 2.0543292456335935, "grad_norm": 3.2295464613337703, "learning_rate": 4.774296033416309e-06, "loss": 0.29, "step": 27641 }, { "epoch": 2.0544035674470456, "grad_norm": 1.618722522451097, "learning_rate": 4.773611954162779e-06, "loss": 0.2287, "step": 27642 }, { "epoch": 2.054477889260498, "grad_norm": 2.3900703641692775, "learning_rate": 4.7729279085564605e-06, "loss": 0.3356, "step": 27643 }, { "epoch": 2.05455221107395, "grad_norm": 2.638936308940855, "learning_rate": 4.772243896601765e-06, "loss": 0.333, "step": 27644 }, { "epoch": 2.0546265328874025, "grad_norm": 2.2268743964441144, "learning_rate": 4.771559918303088e-06, "loss": 0.2871, "step": 27645 }, { "epoch": 2.0547008547008545, "grad_norm": 1.9499079646017368, "learning_rate": 4.77087597366484e-06, "loss": 0.1504, "step": 27646 }, { "epoch": 2.054775176514307, "grad_norm": 1.7678691101158688, "learning_rate": 4.770192062691422e-06, "loss": 0.2397, "step": 27647 }, { "epoch": 2.054849498327759, "grad_norm": 2.1866806822887037, "learning_rate": 4.7695081853872315e-06, "loss": 0.4109, "step": 27648 }, { "epoch": 2.0549238201412114, "grad_norm": 1.8141868314793794, "learning_rate": 4.76882434175668e-06, "loss": 0.2691, "step": 27649 }, { "epoch": 2.054998141954664, "grad_norm": 2.5837464116816107, "learning_rate": 4.768140531804163e-06, "loss": 0.3545, "step": 27650 }, { "epoch": 2.055072463768116, "grad_norm": 2.210314113997993, "learning_rate": 4.767456755534092e-06, "loss": 0.2957, "step": 27651 }, { "epoch": 2.0551467855815684, "grad_norm": 2.3660631710227955, "learning_rate": 4.766773012950857e-06, "loss": 0.2825, "step": 27652 }, { "epoch": 2.0552211073950204, "grad_norm": 2.6728397607601386, "learning_rate": 4.766089304058868e-06, "loss": 0.278, "step": 27653 }, { "epoch": 2.055295429208473, "grad_norm": 2.235899265017448, "learning_rate": 4.765405628862526e-06, "loss": 0.2737, "step": 27654 }, { "epoch": 2.055369751021925, "grad_norm": 2.1084014314315174, "learning_rate": 4.764721987366226e-06, "loss": 0.2658, "step": 27655 }, { "epoch": 2.0554440728353773, "grad_norm": 2.359250865505351, "learning_rate": 4.764038379574378e-06, "loss": 0.3112, "step": 27656 }, { "epoch": 2.0555183946488294, "grad_norm": 1.9763072774773576, "learning_rate": 4.76335480549138e-06, "loss": 0.1718, "step": 27657 }, { "epoch": 2.055592716462282, "grad_norm": 2.0717136078388667, "learning_rate": 4.762671265121628e-06, "loss": 0.2927, "step": 27658 }, { "epoch": 2.055667038275734, "grad_norm": 2.215711264141035, "learning_rate": 4.761987758469531e-06, "loss": 0.2569, "step": 27659 }, { "epoch": 2.0557413600891863, "grad_norm": 2.1865725335157915, "learning_rate": 4.76130428553948e-06, "loss": 0.2623, "step": 27660 }, { "epoch": 2.0558156819026383, "grad_norm": 2.0919125642190557, "learning_rate": 4.7606208463358835e-06, "loss": 0.3021, "step": 27661 }, { "epoch": 2.0558900037160908, "grad_norm": 2.5765423309432567, "learning_rate": 4.75993744086314e-06, "loss": 0.3653, "step": 27662 }, { "epoch": 2.055964325529543, "grad_norm": 2.3515276733693473, "learning_rate": 4.759254069125646e-06, "loss": 0.2483, "step": 27663 }, { "epoch": 2.0560386473429952, "grad_norm": 2.1042166868174945, "learning_rate": 4.758570731127803e-06, "loss": 0.2759, "step": 27664 }, { "epoch": 2.0561129691564473, "grad_norm": 1.9140138290993303, "learning_rate": 4.7578874268740064e-06, "loss": 0.2535, "step": 27665 }, { "epoch": 2.0561872909698997, "grad_norm": 2.1285921881390064, "learning_rate": 4.757204156368661e-06, "loss": 0.2877, "step": 27666 }, { "epoch": 2.0562616127833517, "grad_norm": 2.1179552922353153, "learning_rate": 4.756520919616161e-06, "loss": 0.3481, "step": 27667 }, { "epoch": 2.056335934596804, "grad_norm": 2.7909790281067375, "learning_rate": 4.75583771662091e-06, "loss": 0.3759, "step": 27668 }, { "epoch": 2.056410256410256, "grad_norm": 1.7824823468280084, "learning_rate": 4.755154547387304e-06, "loss": 0.2611, "step": 27669 }, { "epoch": 2.0564845782237087, "grad_norm": 2.367646687783716, "learning_rate": 4.754471411919737e-06, "loss": 0.3201, "step": 27670 }, { "epoch": 2.056558900037161, "grad_norm": 2.288144288007673, "learning_rate": 4.753788310222615e-06, "loss": 0.334, "step": 27671 }, { "epoch": 2.056633221850613, "grad_norm": 2.1315470123191496, "learning_rate": 4.753105242300331e-06, "loss": 0.3442, "step": 27672 }, { "epoch": 2.0567075436640656, "grad_norm": 1.864429721378359, "learning_rate": 4.752422208157279e-06, "loss": 0.2145, "step": 27673 }, { "epoch": 2.0567818654775176, "grad_norm": 2.205422640015776, "learning_rate": 4.751739207797869e-06, "loss": 0.299, "step": 27674 }, { "epoch": 2.05685618729097, "grad_norm": 2.170953683794962, "learning_rate": 4.7510562412264815e-06, "loss": 0.263, "step": 27675 }, { "epoch": 2.056930509104422, "grad_norm": 1.9457042430798883, "learning_rate": 4.750373308447527e-06, "loss": 0.2344, "step": 27676 }, { "epoch": 2.0570048309178746, "grad_norm": 2.1362018206968285, "learning_rate": 4.74969040946539e-06, "loss": 0.3115, "step": 27677 }, { "epoch": 2.0570791527313266, "grad_norm": 1.955770452484292, "learning_rate": 4.749007544284481e-06, "loss": 0.2718, "step": 27678 }, { "epoch": 2.057153474544779, "grad_norm": 1.8206251860021168, "learning_rate": 4.748324712909187e-06, "loss": 0.2929, "step": 27679 }, { "epoch": 2.057227796358231, "grad_norm": 1.7450092261952928, "learning_rate": 4.747641915343903e-06, "loss": 0.2435, "step": 27680 }, { "epoch": 2.0573021181716835, "grad_norm": 1.9537475222320988, "learning_rate": 4.74695915159303e-06, "loss": 0.2657, "step": 27681 }, { "epoch": 2.0573764399851355, "grad_norm": 1.7294294828604684, "learning_rate": 4.746276421660959e-06, "loss": 0.2657, "step": 27682 }, { "epoch": 2.057450761798588, "grad_norm": 2.3729101892115025, "learning_rate": 4.745593725552091e-06, "loss": 0.2753, "step": 27683 }, { "epoch": 2.05752508361204, "grad_norm": 2.2679782220552553, "learning_rate": 4.7449110632708185e-06, "loss": 0.3549, "step": 27684 }, { "epoch": 2.0575994054254925, "grad_norm": 2.098007607588368, "learning_rate": 4.744228434821535e-06, "loss": 0.2747, "step": 27685 }, { "epoch": 2.0576737272389445, "grad_norm": 2.1767278659338896, "learning_rate": 4.743545840208638e-06, "loss": 0.3318, "step": 27686 }, { "epoch": 2.057748049052397, "grad_norm": 2.00197176664594, "learning_rate": 4.7428632794365144e-06, "loss": 0.2481, "step": 27687 }, { "epoch": 2.057822370865849, "grad_norm": 2.665119693238063, "learning_rate": 4.742180752509569e-06, "loss": 0.3495, "step": 27688 }, { "epoch": 2.0578966926793014, "grad_norm": 1.8927010328603495, "learning_rate": 4.741498259432187e-06, "loss": 0.3647, "step": 27689 }, { "epoch": 2.0579710144927534, "grad_norm": 2.597933964810222, "learning_rate": 4.74081580020877e-06, "loss": 0.3656, "step": 27690 }, { "epoch": 2.058045336306206, "grad_norm": 2.2915564776829753, "learning_rate": 4.740133374843708e-06, "loss": 0.3639, "step": 27691 }, { "epoch": 2.058119658119658, "grad_norm": 1.773712251431674, "learning_rate": 4.739450983341391e-06, "loss": 0.3263, "step": 27692 }, { "epoch": 2.0581939799331104, "grad_norm": 1.8307811511917071, "learning_rate": 4.738768625706219e-06, "loss": 0.2778, "step": 27693 }, { "epoch": 2.058268301746563, "grad_norm": 2.926264168181142, "learning_rate": 4.738086301942583e-06, "loss": 0.3126, "step": 27694 }, { "epoch": 2.058342623560015, "grad_norm": 2.625134286444395, "learning_rate": 4.737404012054868e-06, "loss": 0.42, "step": 27695 }, { "epoch": 2.0584169453734673, "grad_norm": 2.143750373828475, "learning_rate": 4.736721756047482e-06, "loss": 0.3484, "step": 27696 }, { "epoch": 2.0584912671869193, "grad_norm": 2.3262179830325675, "learning_rate": 4.736039533924801e-06, "loss": 0.3097, "step": 27697 }, { "epoch": 2.058565589000372, "grad_norm": 2.4693508872116494, "learning_rate": 4.735357345691228e-06, "loss": 0.2535, "step": 27698 }, { "epoch": 2.058639910813824, "grad_norm": 2.1778430640254838, "learning_rate": 4.734675191351147e-06, "loss": 0.3273, "step": 27699 }, { "epoch": 2.0587142326272763, "grad_norm": 2.19613446922913, "learning_rate": 4.733993070908957e-06, "loss": 0.2582, "step": 27700 }, { "epoch": 2.0587885544407283, "grad_norm": 2.0835508097856086, "learning_rate": 4.733310984369048e-06, "loss": 0.2512, "step": 27701 }, { "epoch": 2.0588628762541807, "grad_norm": 2.281505798739837, "learning_rate": 4.732628931735805e-06, "loss": 0.3386, "step": 27702 }, { "epoch": 2.0589371980676328, "grad_norm": 1.8346257673762254, "learning_rate": 4.731946913013628e-06, "loss": 0.2774, "step": 27703 }, { "epoch": 2.059011519881085, "grad_norm": 2.014528395757269, "learning_rate": 4.7312649282068986e-06, "loss": 0.2174, "step": 27704 }, { "epoch": 2.0590858416945372, "grad_norm": 2.0866231223970177, "learning_rate": 4.730582977320017e-06, "loss": 0.2583, "step": 27705 }, { "epoch": 2.0591601635079897, "grad_norm": 1.8875350614593187, "learning_rate": 4.729901060357368e-06, "loss": 0.24, "step": 27706 }, { "epoch": 2.0592344853214417, "grad_norm": 2.7317978491823047, "learning_rate": 4.7292191773233396e-06, "loss": 0.3391, "step": 27707 }, { "epoch": 2.059308807134894, "grad_norm": 3.747856097258301, "learning_rate": 4.728537328222331e-06, "loss": 0.368, "step": 27708 }, { "epoch": 2.059383128948346, "grad_norm": 2.3198469244767996, "learning_rate": 4.727855513058719e-06, "loss": 0.3502, "step": 27709 }, { "epoch": 2.0594574507617986, "grad_norm": 2.2244414167837587, "learning_rate": 4.727173731836904e-06, "loss": 0.3463, "step": 27710 }, { "epoch": 2.0595317725752507, "grad_norm": 2.28724261570536, "learning_rate": 4.726491984561265e-06, "loss": 0.2204, "step": 27711 }, { "epoch": 2.059606094388703, "grad_norm": 2.001142320568702, "learning_rate": 4.725810271236203e-06, "loss": 0.2743, "step": 27712 }, { "epoch": 2.059680416202155, "grad_norm": 2.538886293761375, "learning_rate": 4.7251285918661e-06, "loss": 0.3106, "step": 27713 }, { "epoch": 2.0597547380156076, "grad_norm": 2.018175701079941, "learning_rate": 4.724446946455341e-06, "loss": 0.2871, "step": 27714 }, { "epoch": 2.0598290598290596, "grad_norm": 2.2325007828527155, "learning_rate": 4.7237653350083235e-06, "loss": 0.323, "step": 27715 }, { "epoch": 2.059903381642512, "grad_norm": 2.741751880962167, "learning_rate": 4.7230837575294295e-06, "loss": 0.3641, "step": 27716 }, { "epoch": 2.0599777034559645, "grad_norm": 2.362423200052749, "learning_rate": 4.7224022140230466e-06, "loss": 0.2893, "step": 27717 }, { "epoch": 2.0600520252694166, "grad_norm": 3.366446232169386, "learning_rate": 4.7217207044935676e-06, "loss": 0.4166, "step": 27718 }, { "epoch": 2.060126347082869, "grad_norm": 1.853707765487, "learning_rate": 4.721039228945377e-06, "loss": 0.3048, "step": 27719 }, { "epoch": 2.060200668896321, "grad_norm": 3.3241657074009985, "learning_rate": 4.720357787382861e-06, "loss": 0.2896, "step": 27720 }, { "epoch": 2.0602749907097735, "grad_norm": 2.0237798885005263, "learning_rate": 4.719676379810404e-06, "loss": 0.2863, "step": 27721 }, { "epoch": 2.0603493125232255, "grad_norm": 1.601948206672077, "learning_rate": 4.718995006232401e-06, "loss": 0.2596, "step": 27722 }, { "epoch": 2.060423634336678, "grad_norm": 2.8258508298040073, "learning_rate": 4.718313666653234e-06, "loss": 0.3653, "step": 27723 }, { "epoch": 2.06049795615013, "grad_norm": 3.082117468616882, "learning_rate": 4.717632361077285e-06, "loss": 0.251, "step": 27724 }, { "epoch": 2.0605722779635824, "grad_norm": 1.963225369739851, "learning_rate": 4.71695108950895e-06, "loss": 0.2819, "step": 27725 }, { "epoch": 2.0606465997770345, "grad_norm": 2.4067278727114876, "learning_rate": 4.7162698519526055e-06, "loss": 0.3359, "step": 27726 }, { "epoch": 2.060720921590487, "grad_norm": 2.2457015256483137, "learning_rate": 4.7155886484126445e-06, "loss": 0.3388, "step": 27727 }, { "epoch": 2.060795243403939, "grad_norm": 2.211253673640115, "learning_rate": 4.71490747889345e-06, "loss": 0.2608, "step": 27728 }, { "epoch": 2.0608695652173914, "grad_norm": 2.341987242515259, "learning_rate": 4.714226343399403e-06, "loss": 0.3031, "step": 27729 }, { "epoch": 2.0609438870308434, "grad_norm": 1.9855008797049518, "learning_rate": 4.713545241934901e-06, "loss": 0.2594, "step": 27730 }, { "epoch": 2.061018208844296, "grad_norm": 2.089525737235359, "learning_rate": 4.712864174504312e-06, "loss": 0.3339, "step": 27731 }, { "epoch": 2.061092530657748, "grad_norm": 2.308794473585046, "learning_rate": 4.712183141112032e-06, "loss": 0.3317, "step": 27732 }, { "epoch": 2.0611668524712004, "grad_norm": 3.029510601411372, "learning_rate": 4.71150214176244e-06, "loss": 0.3518, "step": 27733 }, { "epoch": 2.0612411742846524, "grad_norm": 2.7941031329322255, "learning_rate": 4.710821176459927e-06, "loss": 0.3526, "step": 27734 }, { "epoch": 2.061315496098105, "grad_norm": 2.1097464836453246, "learning_rate": 4.710140245208872e-06, "loss": 0.3296, "step": 27735 }, { "epoch": 2.061389817911557, "grad_norm": 1.8354161701093095, "learning_rate": 4.709459348013657e-06, "loss": 0.2304, "step": 27736 }, { "epoch": 2.0614641397250093, "grad_norm": 2.1625360360935524, "learning_rate": 4.708778484878671e-06, "loss": 0.3047, "step": 27737 }, { "epoch": 2.0615384615384613, "grad_norm": 1.9869378028657332, "learning_rate": 4.708097655808295e-06, "loss": 0.2981, "step": 27738 }, { "epoch": 2.061612783351914, "grad_norm": 2.513023226940695, "learning_rate": 4.707416860806907e-06, "loss": 0.3625, "step": 27739 }, { "epoch": 2.0616871051653662, "grad_norm": 2.3464595317687866, "learning_rate": 4.706736099878899e-06, "loss": 0.2838, "step": 27740 }, { "epoch": 2.0617614269788183, "grad_norm": 2.7053851988411903, "learning_rate": 4.706055373028645e-06, "loss": 0.2407, "step": 27741 }, { "epoch": 2.0618357487922707, "grad_norm": 1.7532467080763312, "learning_rate": 4.70537468026054e-06, "loss": 0.2484, "step": 27742 }, { "epoch": 2.0619100706057227, "grad_norm": 2.337355595686945, "learning_rate": 4.7046940215789515e-06, "loss": 0.2657, "step": 27743 }, { "epoch": 2.061984392419175, "grad_norm": 2.2522555904661017, "learning_rate": 4.704013396988271e-06, "loss": 0.2824, "step": 27744 }, { "epoch": 2.062058714232627, "grad_norm": 1.956740973591015, "learning_rate": 4.703332806492878e-06, "loss": 0.3045, "step": 27745 }, { "epoch": 2.0621330360460797, "grad_norm": 2.228385059886146, "learning_rate": 4.702652250097148e-06, "loss": 0.2877, "step": 27746 }, { "epoch": 2.0622073578595317, "grad_norm": 2.0637774543150402, "learning_rate": 4.701971727805473e-06, "loss": 0.3062, "step": 27747 }, { "epoch": 2.062281679672984, "grad_norm": 2.4095448185711748, "learning_rate": 4.701291239622226e-06, "loss": 0.2193, "step": 27748 }, { "epoch": 2.062356001486436, "grad_norm": 1.7145432878924205, "learning_rate": 4.700610785551794e-06, "loss": 0.2784, "step": 27749 }, { "epoch": 2.0624303232998886, "grad_norm": 3.025103361642786, "learning_rate": 4.699930365598554e-06, "loss": 0.379, "step": 27750 }, { "epoch": 2.0625046451133406, "grad_norm": 2.6491129834613103, "learning_rate": 4.699249979766884e-06, "loss": 0.3377, "step": 27751 }, { "epoch": 2.062578966926793, "grad_norm": 2.296711842773152, "learning_rate": 4.6985696280611724e-06, "loss": 0.2142, "step": 27752 }, { "epoch": 2.062653288740245, "grad_norm": 2.389997779201638, "learning_rate": 4.697889310485794e-06, "loss": 0.2625, "step": 27753 }, { "epoch": 2.0627276105536976, "grad_norm": 1.8444245025408308, "learning_rate": 4.6972090270451286e-06, "loss": 0.2803, "step": 27754 }, { "epoch": 2.0628019323671496, "grad_norm": 6.154778947919108, "learning_rate": 4.696528777743553e-06, "loss": 0.3314, "step": 27755 }, { "epoch": 2.062876254180602, "grad_norm": 2.713212671561643, "learning_rate": 4.6958485625854545e-06, "loss": 0.2791, "step": 27756 }, { "epoch": 2.062950575994054, "grad_norm": 2.5718324828889134, "learning_rate": 4.695168381575207e-06, "loss": 0.3776, "step": 27757 }, { "epoch": 2.0630248978075065, "grad_norm": 2.5337928856362018, "learning_rate": 4.694488234717186e-06, "loss": 0.3425, "step": 27758 }, { "epoch": 2.0630992196209585, "grad_norm": 1.7243761995913933, "learning_rate": 4.693808122015778e-06, "loss": 0.2099, "step": 27759 }, { "epoch": 2.063173541434411, "grad_norm": 2.4950281532257863, "learning_rate": 4.693128043475359e-06, "loss": 0.2569, "step": 27760 }, { "epoch": 2.063247863247863, "grad_norm": 2.753969505610061, "learning_rate": 4.692447999100303e-06, "loss": 0.3346, "step": 27761 }, { "epoch": 2.0633221850613155, "grad_norm": 2.35077273660253, "learning_rate": 4.691767988894993e-06, "loss": 0.2893, "step": 27762 }, { "epoch": 2.063396506874768, "grad_norm": 2.254507600159845, "learning_rate": 4.6910880128638025e-06, "loss": 0.2974, "step": 27763 }, { "epoch": 2.06347082868822, "grad_norm": 1.632263494892044, "learning_rate": 4.69040807101112e-06, "loss": 0.198, "step": 27764 }, { "epoch": 2.0635451505016724, "grad_norm": 2.758888805324287, "learning_rate": 4.689728163341307e-06, "loss": 0.3181, "step": 27765 }, { "epoch": 2.0636194723151244, "grad_norm": 1.7763263173976074, "learning_rate": 4.689048289858753e-06, "loss": 0.1793, "step": 27766 }, { "epoch": 2.063693794128577, "grad_norm": 2.029291382546672, "learning_rate": 4.688368450567829e-06, "loss": 0.3594, "step": 27767 }, { "epoch": 2.063768115942029, "grad_norm": 2.388939609563297, "learning_rate": 4.687688645472911e-06, "loss": 0.3294, "step": 27768 }, { "epoch": 2.0638424377554814, "grad_norm": 2.068584744920847, "learning_rate": 4.68700887457838e-06, "loss": 0.2704, "step": 27769 }, { "epoch": 2.0639167595689334, "grad_norm": 2.164912242569474, "learning_rate": 4.686329137888609e-06, "loss": 0.2897, "step": 27770 }, { "epoch": 2.063991081382386, "grad_norm": 3.037856604229408, "learning_rate": 4.685649435407977e-06, "loss": 0.4058, "step": 27771 }, { "epoch": 2.064065403195838, "grad_norm": 2.746575503991992, "learning_rate": 4.684969767140859e-06, "loss": 0.2602, "step": 27772 }, { "epoch": 2.0641397250092903, "grad_norm": 2.0490644871476476, "learning_rate": 4.684290133091626e-06, "loss": 0.2349, "step": 27773 }, { "epoch": 2.0642140468227423, "grad_norm": 2.3155518677094844, "learning_rate": 4.683610533264661e-06, "loss": 0.3552, "step": 27774 }, { "epoch": 2.064288368636195, "grad_norm": 2.455784508474391, "learning_rate": 4.6829309676643354e-06, "loss": 0.3329, "step": 27775 }, { "epoch": 2.064362690449647, "grad_norm": 2.4166853345005546, "learning_rate": 4.682251436295023e-06, "loss": 0.3262, "step": 27776 }, { "epoch": 2.0644370122630993, "grad_norm": 2.4126413055233957, "learning_rate": 4.681571939161098e-06, "loss": 0.2848, "step": 27777 }, { "epoch": 2.0645113340765513, "grad_norm": 2.39398690851926, "learning_rate": 4.6808924762669405e-06, "loss": 0.3251, "step": 27778 }, { "epoch": 2.0645856558900038, "grad_norm": 2.172935879911562, "learning_rate": 4.680213047616921e-06, "loss": 0.2666, "step": 27779 }, { "epoch": 2.0646599777034558, "grad_norm": 2.151173975671817, "learning_rate": 4.6795336532154105e-06, "loss": 0.2945, "step": 27780 }, { "epoch": 2.0647342995169082, "grad_norm": 2.6566148232943574, "learning_rate": 4.678854293066789e-06, "loss": 0.4035, "step": 27781 }, { "epoch": 2.0648086213303602, "grad_norm": 2.3066078666604795, "learning_rate": 4.678174967175428e-06, "loss": 0.2251, "step": 27782 }, { "epoch": 2.0648829431438127, "grad_norm": 2.401601556549567, "learning_rate": 4.677495675545697e-06, "loss": 0.284, "step": 27783 }, { "epoch": 2.064957264957265, "grad_norm": 1.8647928105058051, "learning_rate": 4.6768164181819765e-06, "loss": 0.2193, "step": 27784 }, { "epoch": 2.065031586770717, "grad_norm": 2.4499235914789215, "learning_rate": 4.676137195088631e-06, "loss": 0.3183, "step": 27785 }, { "epoch": 2.0651059085841696, "grad_norm": 2.1141908682202737, "learning_rate": 4.6754580062700425e-06, "loss": 0.2636, "step": 27786 }, { "epoch": 2.0651802303976217, "grad_norm": 2.7021961334130076, "learning_rate": 4.67477885173058e-06, "loss": 0.4093, "step": 27787 }, { "epoch": 2.065254552211074, "grad_norm": 2.7067125672691668, "learning_rate": 4.674099731474613e-06, "loss": 0.3582, "step": 27788 }, { "epoch": 2.065328874024526, "grad_norm": 1.8606854207615149, "learning_rate": 4.673420645506517e-06, "loss": 0.232, "step": 27789 }, { "epoch": 2.0654031958379786, "grad_norm": 2.3703148336464626, "learning_rate": 4.672741593830659e-06, "loss": 0.3602, "step": 27790 }, { "epoch": 2.0654775176514306, "grad_norm": 3.135280404838385, "learning_rate": 4.672062576451416e-06, "loss": 0.3427, "step": 27791 }, { "epoch": 2.065551839464883, "grad_norm": 2.192454913149981, "learning_rate": 4.671383593373155e-06, "loss": 0.2956, "step": 27792 }, { "epoch": 2.065626161278335, "grad_norm": 2.0223096076854605, "learning_rate": 4.6707046446002545e-06, "loss": 0.2769, "step": 27793 }, { "epoch": 2.0657004830917876, "grad_norm": 1.8567692789437742, "learning_rate": 4.67002573013708e-06, "loss": 0.2318, "step": 27794 }, { "epoch": 2.0657748049052396, "grad_norm": 2.2395417286558548, "learning_rate": 4.669346849988e-06, "loss": 0.2543, "step": 27795 }, { "epoch": 2.065849126718692, "grad_norm": 2.3070209335806324, "learning_rate": 4.668668004157392e-06, "loss": 0.2771, "step": 27796 }, { "epoch": 2.065923448532144, "grad_norm": 2.471992521613565, "learning_rate": 4.667989192649619e-06, "loss": 0.3338, "step": 27797 }, { "epoch": 2.0659977703455965, "grad_norm": 1.9448394419283843, "learning_rate": 4.667310415469063e-06, "loss": 0.2583, "step": 27798 }, { "epoch": 2.0660720921590485, "grad_norm": 1.5942685088220785, "learning_rate": 4.6666316726200815e-06, "loss": 0.1822, "step": 27799 }, { "epoch": 2.066146413972501, "grad_norm": 2.657746504571525, "learning_rate": 4.6659529641070446e-06, "loss": 0.3752, "step": 27800 }, { "epoch": 2.066220735785953, "grad_norm": 2.7082801173016158, "learning_rate": 4.665274289934329e-06, "loss": 0.3393, "step": 27801 }, { "epoch": 2.0662950575994055, "grad_norm": 1.7837354235684189, "learning_rate": 4.664595650106299e-06, "loss": 0.2899, "step": 27802 }, { "epoch": 2.0663693794128575, "grad_norm": 1.7516282574573776, "learning_rate": 4.663917044627328e-06, "loss": 0.2034, "step": 27803 }, { "epoch": 2.06644370122631, "grad_norm": 1.739791939725361, "learning_rate": 4.663238473501781e-06, "loss": 0.1843, "step": 27804 }, { "epoch": 2.0665180230397624, "grad_norm": 2.457623471373445, "learning_rate": 4.662559936734026e-06, "loss": 0.3477, "step": 27805 }, { "epoch": 2.0665923448532144, "grad_norm": 2.5701169909577044, "learning_rate": 4.661881434328436e-06, "loss": 0.2229, "step": 27806 }, { "epoch": 2.066666666666667, "grad_norm": 2.8391318675950235, "learning_rate": 4.661202966289372e-06, "loss": 0.4273, "step": 27807 }, { "epoch": 2.066740988480119, "grad_norm": 1.836866289330932, "learning_rate": 4.6605245326212116e-06, "loss": 0.2802, "step": 27808 }, { "epoch": 2.0668153102935714, "grad_norm": 2.4086641325132447, "learning_rate": 4.659846133328315e-06, "loss": 0.2973, "step": 27809 }, { "epoch": 2.0668896321070234, "grad_norm": 2.2355427014018554, "learning_rate": 4.659167768415054e-06, "loss": 0.3354, "step": 27810 }, { "epoch": 2.066963953920476, "grad_norm": 2.697705789595173, "learning_rate": 4.658489437885792e-06, "loss": 0.3655, "step": 27811 }, { "epoch": 2.067038275733928, "grad_norm": 2.3357976378923166, "learning_rate": 4.657811141744895e-06, "loss": 0.2388, "step": 27812 }, { "epoch": 2.0671125975473803, "grad_norm": 2.3477051223724468, "learning_rate": 4.657132879996738e-06, "loss": 0.273, "step": 27813 }, { "epoch": 2.0671869193608323, "grad_norm": 2.5833967663529864, "learning_rate": 4.656454652645677e-06, "loss": 0.2912, "step": 27814 }, { "epoch": 2.067261241174285, "grad_norm": 2.4965815277575967, "learning_rate": 4.655776459696086e-06, "loss": 0.2043, "step": 27815 }, { "epoch": 2.067335562987737, "grad_norm": 2.517896983496343, "learning_rate": 4.655098301152331e-06, "loss": 0.4363, "step": 27816 }, { "epoch": 2.0674098848011893, "grad_norm": 2.81298528531765, "learning_rate": 4.654420177018772e-06, "loss": 0.3453, "step": 27817 }, { "epoch": 2.0674842066146413, "grad_norm": 2.5358878958347164, "learning_rate": 4.653742087299782e-06, "loss": 0.2881, "step": 27818 }, { "epoch": 2.0675585284280937, "grad_norm": 2.3647277629006744, "learning_rate": 4.6530640319997225e-06, "loss": 0.2863, "step": 27819 }, { "epoch": 2.0676328502415457, "grad_norm": 2.5128506108807307, "learning_rate": 4.652386011122956e-06, "loss": 0.3312, "step": 27820 }, { "epoch": 2.067707172054998, "grad_norm": 2.585047840152045, "learning_rate": 4.651708024673858e-06, "loss": 0.3107, "step": 27821 }, { "epoch": 2.0677814938684502, "grad_norm": 2.267478972734047, "learning_rate": 4.651030072656779e-06, "loss": 0.2636, "step": 27822 }, { "epoch": 2.0678558156819027, "grad_norm": 1.7441786557413566, "learning_rate": 4.650352155076095e-06, "loss": 0.2029, "step": 27823 }, { "epoch": 2.0679301374953547, "grad_norm": 1.9699657550115643, "learning_rate": 4.649674271936161e-06, "loss": 0.3046, "step": 27824 }, { "epoch": 2.068004459308807, "grad_norm": 2.203812310467346, "learning_rate": 4.648996423241352e-06, "loss": 0.3109, "step": 27825 }, { "epoch": 2.068078781122259, "grad_norm": 2.0313229171075147, "learning_rate": 4.648318608996025e-06, "loss": 0.254, "step": 27826 }, { "epoch": 2.0681531029357116, "grad_norm": 2.824957749719178, "learning_rate": 4.647640829204542e-06, "loss": 0.3251, "step": 27827 }, { "epoch": 2.068227424749164, "grad_norm": 2.2430351103158657, "learning_rate": 4.646963083871273e-06, "loss": 0.3194, "step": 27828 }, { "epoch": 2.068301746562616, "grad_norm": 2.0551755186406413, "learning_rate": 4.646285373000575e-06, "loss": 0.2816, "step": 27829 }, { "epoch": 2.0683760683760686, "grad_norm": 2.6058228719582113, "learning_rate": 4.645607696596818e-06, "loss": 0.3609, "step": 27830 }, { "epoch": 2.0684503901895206, "grad_norm": 2.5259234026258746, "learning_rate": 4.6449300546643594e-06, "loss": 0.3285, "step": 27831 }, { "epoch": 2.068524712002973, "grad_norm": 2.034791909821825, "learning_rate": 4.644252447207564e-06, "loss": 0.2699, "step": 27832 }, { "epoch": 2.068599033816425, "grad_norm": 2.0758860817152756, "learning_rate": 4.643574874230794e-06, "loss": 0.3517, "step": 27833 }, { "epoch": 2.0686733556298775, "grad_norm": 1.8732941897661175, "learning_rate": 4.642897335738406e-06, "loss": 0.2175, "step": 27834 }, { "epoch": 2.0687476774433295, "grad_norm": 2.245479167830448, "learning_rate": 4.642219831734771e-06, "loss": 0.2752, "step": 27835 }, { "epoch": 2.068821999256782, "grad_norm": 2.7542490502649866, "learning_rate": 4.641542362224244e-06, "loss": 0.275, "step": 27836 }, { "epoch": 2.068896321070234, "grad_norm": 2.3142818492310093, "learning_rate": 4.640864927211193e-06, "loss": 0.3469, "step": 27837 }, { "epoch": 2.0689706428836865, "grad_norm": 1.8234768088740272, "learning_rate": 4.640187526699974e-06, "loss": 0.2376, "step": 27838 }, { "epoch": 2.0690449646971385, "grad_norm": 1.8132350914797557, "learning_rate": 4.639510160694946e-06, "loss": 0.2814, "step": 27839 }, { "epoch": 2.069119286510591, "grad_norm": 2.5679497749726203, "learning_rate": 4.638832829200479e-06, "loss": 0.3502, "step": 27840 }, { "epoch": 2.069193608324043, "grad_norm": 1.9764915813407795, "learning_rate": 4.638155532220927e-06, "loss": 0.2279, "step": 27841 }, { "epoch": 2.0692679301374954, "grad_norm": 2.230974309811855, "learning_rate": 4.637478269760648e-06, "loss": 0.2629, "step": 27842 }, { "epoch": 2.0693422519509475, "grad_norm": 1.9406514613126584, "learning_rate": 4.636801041824013e-06, "loss": 0.2603, "step": 27843 }, { "epoch": 2.0694165737644, "grad_norm": 2.138513539406875, "learning_rate": 4.636123848415367e-06, "loss": 0.3555, "step": 27844 }, { "epoch": 2.069490895577852, "grad_norm": 1.6776956856884895, "learning_rate": 4.635446689539081e-06, "loss": 0.2108, "step": 27845 }, { "epoch": 2.0695652173913044, "grad_norm": 3.391530204400088, "learning_rate": 4.634769565199507e-06, "loss": 0.3332, "step": 27846 }, { "epoch": 2.0696395392047564, "grad_norm": 2.328310989462182, "learning_rate": 4.634092475401013e-06, "loss": 0.3132, "step": 27847 }, { "epoch": 2.069713861018209, "grad_norm": 2.9023270700963275, "learning_rate": 4.633415420147952e-06, "loss": 0.3527, "step": 27848 }, { "epoch": 2.069788182831661, "grad_norm": 1.9267366879964534, "learning_rate": 4.63273839944468e-06, "loss": 0.3161, "step": 27849 }, { "epoch": 2.0698625046451133, "grad_norm": 2.479428601455624, "learning_rate": 4.632061413295564e-06, "loss": 0.3886, "step": 27850 }, { "epoch": 2.069936826458566, "grad_norm": 2.5800749087130894, "learning_rate": 4.6313844617049556e-06, "loss": 0.2839, "step": 27851 }, { "epoch": 2.070011148272018, "grad_norm": 2.9128045028099634, "learning_rate": 4.630707544677218e-06, "loss": 0.3488, "step": 27852 }, { "epoch": 2.0700854700854703, "grad_norm": 2.179447253102572, "learning_rate": 4.630030662216707e-06, "loss": 0.3048, "step": 27853 }, { "epoch": 2.0701597918989223, "grad_norm": 2.252767092507766, "learning_rate": 4.629353814327776e-06, "loss": 0.2751, "step": 27854 }, { "epoch": 2.0702341137123748, "grad_norm": 2.2471327745639114, "learning_rate": 4.6286770010147944e-06, "loss": 0.1974, "step": 27855 }, { "epoch": 2.0703084355258268, "grad_norm": 2.432231257165911, "learning_rate": 4.628000222282103e-06, "loss": 0.3535, "step": 27856 }, { "epoch": 2.0703827573392792, "grad_norm": 1.8886337756225404, "learning_rate": 4.627323478134071e-06, "loss": 0.1982, "step": 27857 }, { "epoch": 2.0704570791527313, "grad_norm": 2.777908380499919, "learning_rate": 4.626646768575049e-06, "loss": 0.3308, "step": 27858 }, { "epoch": 2.0705314009661837, "grad_norm": 1.9565574013258427, "learning_rate": 4.6259700936094e-06, "loss": 0.2705, "step": 27859 }, { "epoch": 2.0706057227796357, "grad_norm": 2.257266424519264, "learning_rate": 4.6252934532414775e-06, "loss": 0.3085, "step": 27860 }, { "epoch": 2.070680044593088, "grad_norm": 1.9751862221041345, "learning_rate": 4.624616847475631e-06, "loss": 0.252, "step": 27861 }, { "epoch": 2.07075436640654, "grad_norm": 2.2172494690682405, "learning_rate": 4.623940276316228e-06, "loss": 0.3115, "step": 27862 }, { "epoch": 2.0708286882199927, "grad_norm": 2.296577056357701, "learning_rate": 4.623263739767617e-06, "loss": 0.3337, "step": 27863 }, { "epoch": 2.0709030100334447, "grad_norm": 2.388221376825264, "learning_rate": 4.622587237834153e-06, "loss": 0.3217, "step": 27864 }, { "epoch": 2.070977331846897, "grad_norm": 3.4264798437675794, "learning_rate": 4.621910770520196e-06, "loss": 0.2851, "step": 27865 }, { "epoch": 2.071051653660349, "grad_norm": 1.8713416050310387, "learning_rate": 4.6212343378300985e-06, "loss": 0.2486, "step": 27866 }, { "epoch": 2.0711259754738016, "grad_norm": 2.509950901837071, "learning_rate": 4.620557939768215e-06, "loss": 0.2623, "step": 27867 }, { "epoch": 2.0712002972872536, "grad_norm": 2.994239853870304, "learning_rate": 4.619881576338897e-06, "loss": 0.3291, "step": 27868 }, { "epoch": 2.071274619100706, "grad_norm": 2.303983585477722, "learning_rate": 4.6192052475465055e-06, "loss": 0.298, "step": 27869 }, { "epoch": 2.071348940914158, "grad_norm": 2.6660309039794354, "learning_rate": 4.618528953395393e-06, "loss": 0.3613, "step": 27870 }, { "epoch": 2.0714232627276106, "grad_norm": 2.4040538284685553, "learning_rate": 4.617852693889906e-06, "loss": 0.3781, "step": 27871 }, { "epoch": 2.0714975845410626, "grad_norm": 1.9315256499355515, "learning_rate": 4.617176469034409e-06, "loss": 0.2404, "step": 27872 }, { "epoch": 2.071571906354515, "grad_norm": 2.2679617558839094, "learning_rate": 4.616500278833247e-06, "loss": 0.3939, "step": 27873 }, { "epoch": 2.0716462281679675, "grad_norm": 1.9268936401138144, "learning_rate": 4.61582412329078e-06, "loss": 0.2518, "step": 27874 }, { "epoch": 2.0717205499814195, "grad_norm": 2.3387046588718445, "learning_rate": 4.6151480024113585e-06, "loss": 0.3043, "step": 27875 }, { "epoch": 2.071794871794872, "grad_norm": 2.406733881323463, "learning_rate": 4.614471916199331e-06, "loss": 0.2423, "step": 27876 }, { "epoch": 2.071869193608324, "grad_norm": 1.8119920076469676, "learning_rate": 4.613795864659061e-06, "loss": 0.1851, "step": 27877 }, { "epoch": 2.0719435154217765, "grad_norm": 2.4195239723241966, "learning_rate": 4.613119847794886e-06, "loss": 0.3278, "step": 27878 }, { "epoch": 2.0720178372352285, "grad_norm": 2.7285042208230013, "learning_rate": 4.61244386561117e-06, "loss": 0.2714, "step": 27879 }, { "epoch": 2.072092159048681, "grad_norm": 1.9351590966709584, "learning_rate": 4.611767918112258e-06, "loss": 0.3006, "step": 27880 }, { "epoch": 2.072166480862133, "grad_norm": 4.433309485583409, "learning_rate": 4.611092005302506e-06, "loss": 0.4087, "step": 27881 }, { "epoch": 2.0722408026755854, "grad_norm": 2.3160544067489712, "learning_rate": 4.6104161271862665e-06, "loss": 0.3088, "step": 27882 }, { "epoch": 2.0723151244890374, "grad_norm": 1.6926879927854785, "learning_rate": 4.609740283767883e-06, "loss": 0.273, "step": 27883 }, { "epoch": 2.07238944630249, "grad_norm": 2.1724895495108796, "learning_rate": 4.609064475051717e-06, "loss": 0.3199, "step": 27884 }, { "epoch": 2.072463768115942, "grad_norm": 1.7489649140760128, "learning_rate": 4.608388701042114e-06, "loss": 0.2489, "step": 27885 }, { "epoch": 2.0725380899293944, "grad_norm": 2.729913558891395, "learning_rate": 4.6077129617434195e-06, "loss": 0.3507, "step": 27886 }, { "epoch": 2.0726124117428464, "grad_norm": 2.4598467113497824, "learning_rate": 4.607037257159994e-06, "loss": 0.3448, "step": 27887 }, { "epoch": 2.072686733556299, "grad_norm": 2.2955653173363855, "learning_rate": 4.606361587296182e-06, "loss": 0.2807, "step": 27888 }, { "epoch": 2.072761055369751, "grad_norm": 2.2079960379803047, "learning_rate": 4.605685952156336e-06, "loss": 0.3037, "step": 27889 }, { "epoch": 2.0728353771832033, "grad_norm": 3.414086902687893, "learning_rate": 4.6050103517447985e-06, "loss": 0.3877, "step": 27890 }, { "epoch": 2.0729096989966553, "grad_norm": 1.8326430084242888, "learning_rate": 4.6043347860659295e-06, "loss": 0.2232, "step": 27891 }, { "epoch": 2.072984020810108, "grad_norm": 1.9934002575405154, "learning_rate": 4.603659255124072e-06, "loss": 0.2802, "step": 27892 }, { "epoch": 2.07305834262356, "grad_norm": 4.370968121305298, "learning_rate": 4.602983758923573e-06, "loss": 0.3056, "step": 27893 }, { "epoch": 2.0731326644370123, "grad_norm": 2.023226315855279, "learning_rate": 4.602308297468789e-06, "loss": 0.3, "step": 27894 }, { "epoch": 2.0732069862504643, "grad_norm": 2.547247813977275, "learning_rate": 4.601632870764059e-06, "loss": 0.3422, "step": 27895 }, { "epoch": 2.0732813080639168, "grad_norm": 2.3407816286272363, "learning_rate": 4.60095747881374e-06, "loss": 0.2897, "step": 27896 }, { "epoch": 2.073355629877369, "grad_norm": 2.0603577540287628, "learning_rate": 4.600282121622178e-06, "loss": 0.2389, "step": 27897 }, { "epoch": 2.0734299516908212, "grad_norm": 2.2150618066039414, "learning_rate": 4.599606799193715e-06, "loss": 0.2904, "step": 27898 }, { "epoch": 2.0735042735042737, "grad_norm": 1.7275410671601459, "learning_rate": 4.598931511532707e-06, "loss": 0.295, "step": 27899 }, { "epoch": 2.0735785953177257, "grad_norm": 2.629153939746905, "learning_rate": 4.598256258643498e-06, "loss": 0.2745, "step": 27900 }, { "epoch": 2.073652917131178, "grad_norm": 1.928047691236707, "learning_rate": 4.5975810405304346e-06, "loss": 0.3144, "step": 27901 }, { "epoch": 2.07372723894463, "grad_norm": 2.2992891990096713, "learning_rate": 4.5969058571978605e-06, "loss": 0.3185, "step": 27902 }, { "epoch": 2.0738015607580826, "grad_norm": 2.4543672645268595, "learning_rate": 4.59623070865013e-06, "loss": 0.2573, "step": 27903 }, { "epoch": 2.0738758825715347, "grad_norm": 2.948145901261095, "learning_rate": 4.595555594891586e-06, "loss": 0.3867, "step": 27904 }, { "epoch": 2.073950204384987, "grad_norm": 1.9759623473437646, "learning_rate": 4.594880515926571e-06, "loss": 0.2898, "step": 27905 }, { "epoch": 2.074024526198439, "grad_norm": 2.0765276309155607, "learning_rate": 4.594205471759439e-06, "loss": 0.3098, "step": 27906 }, { "epoch": 2.0740988480118916, "grad_norm": 2.475225718940352, "learning_rate": 4.59353046239453e-06, "loss": 0.3621, "step": 27907 }, { "epoch": 2.0741731698253436, "grad_norm": 2.4019662456095863, "learning_rate": 4.592855487836189e-06, "loss": 0.251, "step": 27908 }, { "epoch": 2.074247491638796, "grad_norm": 2.218112166583004, "learning_rate": 4.592180548088767e-06, "loss": 0.2734, "step": 27909 }, { "epoch": 2.074321813452248, "grad_norm": 2.316500905483614, "learning_rate": 4.591505643156602e-06, "loss": 0.3581, "step": 27910 }, { "epoch": 2.0743961352657005, "grad_norm": 1.5741117526191661, "learning_rate": 4.5908307730440515e-06, "loss": 0.1958, "step": 27911 }, { "epoch": 2.0744704570791526, "grad_norm": 1.9379560636227542, "learning_rate": 4.5901559377554435e-06, "loss": 0.2944, "step": 27912 }, { "epoch": 2.074544778892605, "grad_norm": 2.2135251832766905, "learning_rate": 4.589481137295135e-06, "loss": 0.2573, "step": 27913 }, { "epoch": 2.074619100706057, "grad_norm": 2.041288854876773, "learning_rate": 4.588806371667466e-06, "loss": 0.2725, "step": 27914 }, { "epoch": 2.0746934225195095, "grad_norm": 2.0009251256297027, "learning_rate": 4.588131640876777e-06, "loss": 0.3003, "step": 27915 }, { "epoch": 2.0747677443329615, "grad_norm": 1.8284326358610155, "learning_rate": 4.58745694492742e-06, "loss": 0.2504, "step": 27916 }, { "epoch": 2.074842066146414, "grad_norm": 2.1397985789618015, "learning_rate": 4.5867822838237294e-06, "loss": 0.2597, "step": 27917 }, { "epoch": 2.074916387959866, "grad_norm": 2.585160053427341, "learning_rate": 4.586107657570058e-06, "loss": 0.3729, "step": 27918 }, { "epoch": 2.0749907097733185, "grad_norm": 2.2792122543886695, "learning_rate": 4.5854330661707445e-06, "loss": 0.2848, "step": 27919 }, { "epoch": 2.075065031586771, "grad_norm": 1.521915883384192, "learning_rate": 4.58475850963013e-06, "loss": 0.1551, "step": 27920 }, { "epoch": 2.075139353400223, "grad_norm": 2.4484435519655268, "learning_rate": 4.584083987952561e-06, "loss": 0.3569, "step": 27921 }, { "epoch": 2.0752136752136754, "grad_norm": 3.387002205975351, "learning_rate": 4.583409501142378e-06, "loss": 0.3306, "step": 27922 }, { "epoch": 2.0752879970271274, "grad_norm": 2.0200164190080345, "learning_rate": 4.582735049203925e-06, "loss": 0.2375, "step": 27923 }, { "epoch": 2.07536231884058, "grad_norm": 2.1292921460494876, "learning_rate": 4.582060632141542e-06, "loss": 0.2657, "step": 27924 }, { "epoch": 2.075436640654032, "grad_norm": 2.3265836673242917, "learning_rate": 4.5813862499595675e-06, "loss": 0.2831, "step": 27925 }, { "epoch": 2.0755109624674843, "grad_norm": 4.685154952890742, "learning_rate": 4.580711902662351e-06, "loss": 0.2947, "step": 27926 }, { "epoch": 2.0755852842809364, "grad_norm": 2.278556913264934, "learning_rate": 4.580037590254226e-06, "loss": 0.3305, "step": 27927 }, { "epoch": 2.075659606094389, "grad_norm": 2.3782730324951284, "learning_rate": 4.579363312739542e-06, "loss": 0.3546, "step": 27928 }, { "epoch": 2.075733927907841, "grad_norm": 2.129289646569231, "learning_rate": 4.578689070122636e-06, "loss": 0.218, "step": 27929 }, { "epoch": 2.0758082497212933, "grad_norm": 2.8989357477791566, "learning_rate": 4.578014862407845e-06, "loss": 0.3839, "step": 27930 }, { "epoch": 2.0758825715347453, "grad_norm": 2.168061779534224, "learning_rate": 4.577340689599516e-06, "loss": 0.33, "step": 27931 }, { "epoch": 2.0759568933481978, "grad_norm": 1.7180291828517036, "learning_rate": 4.576666551701983e-06, "loss": 0.202, "step": 27932 }, { "epoch": 2.07603121516165, "grad_norm": 2.4032838787296718, "learning_rate": 4.575992448719593e-06, "loss": 0.3294, "step": 27933 }, { "epoch": 2.0761055369751023, "grad_norm": 1.9273065260319397, "learning_rate": 4.575318380656681e-06, "loss": 0.2817, "step": 27934 }, { "epoch": 2.0761798587885543, "grad_norm": 2.392246322186244, "learning_rate": 4.574644347517589e-06, "loss": 0.3276, "step": 27935 }, { "epoch": 2.0762541806020067, "grad_norm": 2.58715268737173, "learning_rate": 4.573970349306655e-06, "loss": 0.3643, "step": 27936 }, { "epoch": 2.0763285024154587, "grad_norm": 2.7441265715756513, "learning_rate": 4.573296386028216e-06, "loss": 0.3636, "step": 27937 }, { "epoch": 2.076402824228911, "grad_norm": 1.9638163579683556, "learning_rate": 4.572622457686615e-06, "loss": 0.2261, "step": 27938 }, { "epoch": 2.076477146042363, "grad_norm": 2.1519780220670013, "learning_rate": 4.571948564286186e-06, "loss": 0.2854, "step": 27939 }, { "epoch": 2.0765514678558157, "grad_norm": 2.2928862015084124, "learning_rate": 4.571274705831274e-06, "loss": 0.3128, "step": 27940 }, { "epoch": 2.076625789669268, "grad_norm": 2.912436092360561, "learning_rate": 4.570600882326214e-06, "loss": 0.2941, "step": 27941 }, { "epoch": 2.07670011148272, "grad_norm": 2.699833383468702, "learning_rate": 4.569927093775339e-06, "loss": 0.3678, "step": 27942 }, { "epoch": 2.0767744332961726, "grad_norm": 2.1179627338786684, "learning_rate": 4.569253340182997e-06, "loss": 0.2942, "step": 27943 }, { "epoch": 2.0768487551096246, "grad_norm": 3.4247934998990015, "learning_rate": 4.568579621553519e-06, "loss": 0.2987, "step": 27944 }, { "epoch": 2.076923076923077, "grad_norm": 2.5842803671138808, "learning_rate": 4.567905937891245e-06, "loss": 0.29, "step": 27945 }, { "epoch": 2.076997398736529, "grad_norm": 1.8580959369651355, "learning_rate": 4.5672322892005094e-06, "loss": 0.2811, "step": 27946 }, { "epoch": 2.0770717205499816, "grad_norm": 2.0341869820093383, "learning_rate": 4.566558675485647e-06, "loss": 0.2562, "step": 27947 }, { "epoch": 2.0771460423634336, "grad_norm": 2.3708583344532568, "learning_rate": 4.565885096751003e-06, "loss": 0.3184, "step": 27948 }, { "epoch": 2.077220364176886, "grad_norm": 2.0321420149985134, "learning_rate": 4.565211553000904e-06, "loss": 0.3247, "step": 27949 }, { "epoch": 2.077294685990338, "grad_norm": 2.2559033216818944, "learning_rate": 4.564538044239694e-06, "loss": 0.3396, "step": 27950 }, { "epoch": 2.0773690078037905, "grad_norm": 2.168868469227307, "learning_rate": 4.563864570471706e-06, "loss": 0.3443, "step": 27951 }, { "epoch": 2.0774433296172425, "grad_norm": 2.2944560618237975, "learning_rate": 4.563191131701274e-06, "loss": 0.2677, "step": 27952 }, { "epoch": 2.077517651430695, "grad_norm": 2.117815448323788, "learning_rate": 4.562517727932737e-06, "loss": 0.323, "step": 27953 }, { "epoch": 2.077591973244147, "grad_norm": 1.77940122646692, "learning_rate": 4.561844359170426e-06, "loss": 0.2583, "step": 27954 }, { "epoch": 2.0776662950575995, "grad_norm": 2.366920976477012, "learning_rate": 4.561171025418682e-06, "loss": 0.2658, "step": 27955 }, { "epoch": 2.0777406168710515, "grad_norm": 2.1164063524800434, "learning_rate": 4.560497726681838e-06, "loss": 0.2593, "step": 27956 }, { "epoch": 2.077814938684504, "grad_norm": 1.8508253237065113, "learning_rate": 4.559824462964227e-06, "loss": 0.2627, "step": 27957 }, { "epoch": 2.077889260497956, "grad_norm": 2.09723071141314, "learning_rate": 4.559151234270183e-06, "loss": 0.2828, "step": 27958 }, { "epoch": 2.0779635823114084, "grad_norm": 1.852085310562223, "learning_rate": 4.5584780406040374e-06, "loss": 0.2474, "step": 27959 }, { "epoch": 2.0780379041248604, "grad_norm": 1.9884504778178314, "learning_rate": 4.557804881970132e-06, "loss": 0.2572, "step": 27960 }, { "epoch": 2.078112225938313, "grad_norm": 1.7889323212189092, "learning_rate": 4.557131758372793e-06, "loss": 0.2751, "step": 27961 }, { "epoch": 2.0781865477517654, "grad_norm": 1.9286891001138131, "learning_rate": 4.55645866981636e-06, "loss": 0.2615, "step": 27962 }, { "epoch": 2.0782608695652174, "grad_norm": 1.8884645761431542, "learning_rate": 4.555785616305165e-06, "loss": 0.2213, "step": 27963 }, { "epoch": 2.07833519137867, "grad_norm": 2.392475275805335, "learning_rate": 4.555112597843535e-06, "loss": 0.3488, "step": 27964 }, { "epoch": 2.078409513192122, "grad_norm": 2.6331249010017337, "learning_rate": 4.554439614435812e-06, "loss": 0.3536, "step": 27965 }, { "epoch": 2.0784838350055743, "grad_norm": 2.1575991755447306, "learning_rate": 4.553766666086323e-06, "loss": 0.2531, "step": 27966 }, { "epoch": 2.0785581568190263, "grad_norm": 2.0584885230720236, "learning_rate": 4.553093752799403e-06, "loss": 0.3013, "step": 27967 }, { "epoch": 2.078632478632479, "grad_norm": 2.3197409513215335, "learning_rate": 4.552420874579383e-06, "loss": 0.3563, "step": 27968 }, { "epoch": 2.078706800445931, "grad_norm": 1.927257509176261, "learning_rate": 4.5517480314305894e-06, "loss": 0.2376, "step": 27969 }, { "epoch": 2.0787811222593833, "grad_norm": 1.888733166247492, "learning_rate": 4.551075223357364e-06, "loss": 0.2086, "step": 27970 }, { "epoch": 2.0788554440728353, "grad_norm": 2.2999290695254904, "learning_rate": 4.55040245036403e-06, "loss": 0.4024, "step": 27971 }, { "epoch": 2.0789297658862878, "grad_norm": 2.5672451498126536, "learning_rate": 4.549729712454925e-06, "loss": 0.3975, "step": 27972 }, { "epoch": 2.0790040876997398, "grad_norm": 2.5257138351617963, "learning_rate": 4.549057009634379e-06, "loss": 0.406, "step": 27973 }, { "epoch": 2.0790784095131922, "grad_norm": 2.382547946361518, "learning_rate": 4.548384341906717e-06, "loss": 0.2882, "step": 27974 }, { "epoch": 2.0791527313266442, "grad_norm": 2.232618814952644, "learning_rate": 4.547711709276276e-06, "loss": 0.3481, "step": 27975 }, { "epoch": 2.0792270531400967, "grad_norm": 2.41335510358822, "learning_rate": 4.547039111747381e-06, "loss": 0.298, "step": 27976 }, { "epoch": 2.0793013749535487, "grad_norm": 1.852977184965974, "learning_rate": 4.54636654932437e-06, "loss": 0.2638, "step": 27977 }, { "epoch": 2.079375696767001, "grad_norm": 2.2064947229945093, "learning_rate": 4.545694022011567e-06, "loss": 0.3242, "step": 27978 }, { "epoch": 2.079450018580453, "grad_norm": 2.250653247834814, "learning_rate": 4.545021529813303e-06, "loss": 0.2272, "step": 27979 }, { "epoch": 2.0795243403939057, "grad_norm": 2.0031957274460335, "learning_rate": 4.544349072733908e-06, "loss": 0.2334, "step": 27980 }, { "epoch": 2.0795986622073577, "grad_norm": 2.6094016847425885, "learning_rate": 4.5436766507777075e-06, "loss": 0.4025, "step": 27981 }, { "epoch": 2.07967298402081, "grad_norm": 2.2633085653186793, "learning_rate": 4.5430042639490355e-06, "loss": 0.319, "step": 27982 }, { "epoch": 2.079747305834262, "grad_norm": 2.438153019602917, "learning_rate": 4.542331912252217e-06, "loss": 0.3203, "step": 27983 }, { "epoch": 2.0798216276477146, "grad_norm": 2.095377231054565, "learning_rate": 4.541659595691586e-06, "loss": 0.1859, "step": 27984 }, { "epoch": 2.079895949461167, "grad_norm": 1.9289160385180284, "learning_rate": 4.540987314271465e-06, "loss": 0.3101, "step": 27985 }, { "epoch": 2.079970271274619, "grad_norm": 3.582837248281561, "learning_rate": 4.540315067996183e-06, "loss": 0.1806, "step": 27986 }, { "epoch": 2.0800445930880715, "grad_norm": 3.014993545038758, "learning_rate": 4.539642856870072e-06, "loss": 0.2419, "step": 27987 }, { "epoch": 2.0801189149015236, "grad_norm": 2.1794485079432637, "learning_rate": 4.538970680897457e-06, "loss": 0.3259, "step": 27988 }, { "epoch": 2.080193236714976, "grad_norm": 3.2654816124539323, "learning_rate": 4.538298540082663e-06, "loss": 0.4135, "step": 27989 }, { "epoch": 2.080267558528428, "grad_norm": 2.0930939773474626, "learning_rate": 4.5376264344300254e-06, "loss": 0.2733, "step": 27990 }, { "epoch": 2.0803418803418805, "grad_norm": 1.8932918886054653, "learning_rate": 4.536954363943859e-06, "loss": 0.2366, "step": 27991 }, { "epoch": 2.0804162021553325, "grad_norm": 2.4126299018782302, "learning_rate": 4.5362823286285e-06, "loss": 0.2951, "step": 27992 }, { "epoch": 2.080490523968785, "grad_norm": 2.085086251931083, "learning_rate": 4.535610328488268e-06, "loss": 0.2597, "step": 27993 }, { "epoch": 2.080564845782237, "grad_norm": 2.9122245949988637, "learning_rate": 4.534938363527498e-06, "loss": 0.2875, "step": 27994 }, { "epoch": 2.0806391675956895, "grad_norm": 2.463987796951171, "learning_rate": 4.534266433750511e-06, "loss": 0.2846, "step": 27995 }, { "epoch": 2.0807134894091415, "grad_norm": 2.217088826600513, "learning_rate": 4.533594539161629e-06, "loss": 0.2361, "step": 27996 }, { "epoch": 2.080787811222594, "grad_norm": 2.2193011089846526, "learning_rate": 4.532922679765185e-06, "loss": 0.3027, "step": 27997 }, { "epoch": 2.080862133036046, "grad_norm": 2.133776027626263, "learning_rate": 4.532250855565498e-06, "loss": 0.2572, "step": 27998 }, { "epoch": 2.0809364548494984, "grad_norm": 2.6912286276256583, "learning_rate": 4.531579066566899e-06, "loss": 0.322, "step": 27999 }, { "epoch": 2.0810107766629504, "grad_norm": 1.8030705925121953, "learning_rate": 4.530907312773712e-06, "loss": 0.2817, "step": 28000 }, { "epoch": 2.081085098476403, "grad_norm": 2.1789617121583476, "learning_rate": 4.530235594190258e-06, "loss": 0.2841, "step": 28001 }, { "epoch": 2.081159420289855, "grad_norm": 2.03565759057797, "learning_rate": 4.529563910820864e-06, "loss": 0.2753, "step": 28002 }, { "epoch": 2.0812337421033074, "grad_norm": 2.5070029711238107, "learning_rate": 4.5288922626698515e-06, "loss": 0.349, "step": 28003 }, { "epoch": 2.0813080639167594, "grad_norm": 1.967334239975587, "learning_rate": 4.528220649741549e-06, "loss": 0.2751, "step": 28004 }, { "epoch": 2.081382385730212, "grad_norm": 2.4604505658066222, "learning_rate": 4.527549072040275e-06, "loss": 0.331, "step": 28005 }, { "epoch": 2.081456707543664, "grad_norm": 2.0345335385476235, "learning_rate": 4.52687752957036e-06, "loss": 0.2635, "step": 28006 }, { "epoch": 2.0815310293571163, "grad_norm": 1.9232815941339458, "learning_rate": 4.526206022336123e-06, "loss": 0.2929, "step": 28007 }, { "epoch": 2.0816053511705688, "grad_norm": 2.8457289783513184, "learning_rate": 4.525534550341883e-06, "loss": 0.3091, "step": 28008 }, { "epoch": 2.081679672984021, "grad_norm": 2.608947694959582, "learning_rate": 4.524863113591973e-06, "loss": 0.3487, "step": 28009 }, { "epoch": 2.0817539947974733, "grad_norm": 2.3690521280234282, "learning_rate": 4.524191712090709e-06, "loss": 0.2899, "step": 28010 }, { "epoch": 2.0818283166109253, "grad_norm": 2.9636756755912055, "learning_rate": 4.523520345842411e-06, "loss": 0.3679, "step": 28011 }, { "epoch": 2.0819026384243777, "grad_norm": 2.8024401420206115, "learning_rate": 4.522849014851409e-06, "loss": 0.3395, "step": 28012 }, { "epoch": 2.0819769602378297, "grad_norm": 2.5977668189771608, "learning_rate": 4.522177719122021e-06, "loss": 0.3821, "step": 28013 }, { "epoch": 2.082051282051282, "grad_norm": 3.1873128394891306, "learning_rate": 4.5215064586585674e-06, "loss": 0.3998, "step": 28014 }, { "epoch": 2.082125603864734, "grad_norm": 2.219019237099892, "learning_rate": 4.520835233465369e-06, "loss": 0.2685, "step": 28015 }, { "epoch": 2.0821999256781867, "grad_norm": 1.9819833937868774, "learning_rate": 4.520164043546752e-06, "loss": 0.2964, "step": 28016 }, { "epoch": 2.0822742474916387, "grad_norm": 1.9794426868556836, "learning_rate": 4.519492888907034e-06, "loss": 0.3056, "step": 28017 }, { "epoch": 2.082348569305091, "grad_norm": 2.025933657156893, "learning_rate": 4.518821769550533e-06, "loss": 0.2683, "step": 28018 }, { "epoch": 2.082422891118543, "grad_norm": 2.437388237111252, "learning_rate": 4.518150685481577e-06, "loss": 0.3023, "step": 28019 }, { "epoch": 2.0824972129319956, "grad_norm": 1.881990530211096, "learning_rate": 4.51747963670448e-06, "loss": 0.2159, "step": 28020 }, { "epoch": 2.0825715347454477, "grad_norm": 2.151773852619564, "learning_rate": 4.516808623223566e-06, "loss": 0.2923, "step": 28021 }, { "epoch": 2.0826458565589, "grad_norm": 2.279705748456383, "learning_rate": 4.5161376450431546e-06, "loss": 0.3468, "step": 28022 }, { "epoch": 2.082720178372352, "grad_norm": 2.005155316191503, "learning_rate": 4.515466702167562e-06, "loss": 0.2888, "step": 28023 }, { "epoch": 2.0827945001858046, "grad_norm": 1.9715354936449243, "learning_rate": 4.514795794601116e-06, "loss": 0.2855, "step": 28024 }, { "epoch": 2.0828688219992566, "grad_norm": 2.479142400672957, "learning_rate": 4.514124922348122e-06, "loss": 0.2812, "step": 28025 }, { "epoch": 2.082943143812709, "grad_norm": 2.1993260621130046, "learning_rate": 4.513454085412913e-06, "loss": 0.2801, "step": 28026 }, { "epoch": 2.083017465626161, "grad_norm": 2.537173429431399, "learning_rate": 4.512783283799796e-06, "loss": 0.3627, "step": 28027 }, { "epoch": 2.0830917874396135, "grad_norm": 2.2006371439671675, "learning_rate": 4.512112517513101e-06, "loss": 0.3153, "step": 28028 }, { "epoch": 2.0831661092530656, "grad_norm": 2.2116368913569477, "learning_rate": 4.5114417865571405e-06, "loss": 0.2821, "step": 28029 }, { "epoch": 2.083240431066518, "grad_norm": 2.707468781965836, "learning_rate": 4.510771090936228e-06, "loss": 0.3292, "step": 28030 }, { "epoch": 2.0833147528799705, "grad_norm": 2.5255951179382548, "learning_rate": 4.5101004306546916e-06, "loss": 0.4105, "step": 28031 }, { "epoch": 2.0833890746934225, "grad_norm": 1.9935497449397477, "learning_rate": 4.509429805716843e-06, "loss": 0.3254, "step": 28032 }, { "epoch": 2.083463396506875, "grad_norm": 2.6812359661903744, "learning_rate": 4.508759216126997e-06, "loss": 0.3868, "step": 28033 }, { "epoch": 2.083537718320327, "grad_norm": 2.69799982449547, "learning_rate": 4.508088661889478e-06, "loss": 0.3393, "step": 28034 }, { "epoch": 2.0836120401337794, "grad_norm": 1.6115021004393135, "learning_rate": 4.5074181430086e-06, "loss": 0.2485, "step": 28035 }, { "epoch": 2.0836863619472314, "grad_norm": 2.1139598087367544, "learning_rate": 4.506747659488678e-06, "loss": 0.2471, "step": 28036 }, { "epoch": 2.083760683760684, "grad_norm": 2.1315200513913566, "learning_rate": 4.506077211334025e-06, "loss": 0.2737, "step": 28037 }, { "epoch": 2.083835005574136, "grad_norm": 2.575282947668714, "learning_rate": 4.505406798548967e-06, "loss": 0.2879, "step": 28038 }, { "epoch": 2.0839093273875884, "grad_norm": 2.444701355054849, "learning_rate": 4.504736421137814e-06, "loss": 0.3518, "step": 28039 }, { "epoch": 2.0839836492010404, "grad_norm": 1.9986759873099655, "learning_rate": 4.50406607910488e-06, "loss": 0.2918, "step": 28040 }, { "epoch": 2.084057971014493, "grad_norm": 2.0996124867688324, "learning_rate": 4.503395772454486e-06, "loss": 0.2742, "step": 28041 }, { "epoch": 2.084132292827945, "grad_norm": 1.6255318405156745, "learning_rate": 4.5027255011909434e-06, "loss": 0.2305, "step": 28042 }, { "epoch": 2.0842066146413973, "grad_norm": 2.400064954442059, "learning_rate": 4.50205526531857e-06, "loss": 0.3512, "step": 28043 }, { "epoch": 2.0842809364548494, "grad_norm": 2.2818502461808206, "learning_rate": 4.50138506484168e-06, "loss": 0.2324, "step": 28044 }, { "epoch": 2.084355258268302, "grad_norm": 2.1080194427434638, "learning_rate": 4.500714899764585e-06, "loss": 0.378, "step": 28045 }, { "epoch": 2.084429580081754, "grad_norm": 2.2418532928891923, "learning_rate": 4.500044770091605e-06, "loss": 0.3094, "step": 28046 }, { "epoch": 2.0845039018952063, "grad_norm": 2.1471275645005146, "learning_rate": 4.499374675827052e-06, "loss": 0.3369, "step": 28047 }, { "epoch": 2.0845782237086583, "grad_norm": 2.4403454434916148, "learning_rate": 4.4987046169752395e-06, "loss": 0.2927, "step": 28048 }, { "epoch": 2.0846525455221108, "grad_norm": 2.2861340647326847, "learning_rate": 4.49803459354048e-06, "loss": 0.2649, "step": 28049 }, { "epoch": 2.084726867335563, "grad_norm": 2.0396951309574476, "learning_rate": 4.4973646055270855e-06, "loss": 0.3164, "step": 28050 }, { "epoch": 2.0848011891490152, "grad_norm": 2.1064615234004678, "learning_rate": 4.496694652939377e-06, "loss": 0.2632, "step": 28051 }, { "epoch": 2.0848755109624673, "grad_norm": 2.1650396667293363, "learning_rate": 4.496024735781657e-06, "loss": 0.2513, "step": 28052 }, { "epoch": 2.0849498327759197, "grad_norm": 2.071073660405948, "learning_rate": 4.495354854058248e-06, "loss": 0.2703, "step": 28053 }, { "epoch": 2.085024154589372, "grad_norm": 2.2407644110238096, "learning_rate": 4.49468500777346e-06, "loss": 0.2563, "step": 28054 }, { "epoch": 2.085098476402824, "grad_norm": 1.861568719348941, "learning_rate": 4.4940151969316e-06, "loss": 0.2565, "step": 28055 }, { "epoch": 2.0851727982162767, "grad_norm": 2.251939833368922, "learning_rate": 4.4933454215369886e-06, "loss": 0.29, "step": 28056 }, { "epoch": 2.0852471200297287, "grad_norm": 2.7391310757987575, "learning_rate": 4.49267568159393e-06, "loss": 0.3541, "step": 28057 }, { "epoch": 2.085321441843181, "grad_norm": 1.8781246136325866, "learning_rate": 4.492005977106746e-06, "loss": 0.2763, "step": 28058 }, { "epoch": 2.085395763656633, "grad_norm": 2.216438889132566, "learning_rate": 4.491336308079736e-06, "loss": 0.3581, "step": 28059 }, { "epoch": 2.0854700854700856, "grad_norm": 4.0518261868750125, "learning_rate": 4.490666674517219e-06, "loss": 0.3833, "step": 28060 }, { "epoch": 2.0855444072835376, "grad_norm": 3.090967891960609, "learning_rate": 4.489997076423505e-06, "loss": 0.3852, "step": 28061 }, { "epoch": 2.08561872909699, "grad_norm": 1.9847735936395474, "learning_rate": 4.4893275138029005e-06, "loss": 0.265, "step": 28062 }, { "epoch": 2.085693050910442, "grad_norm": 2.375206398828682, "learning_rate": 4.488657986659722e-06, "loss": 0.304, "step": 28063 }, { "epoch": 2.0857673727238946, "grad_norm": 2.220205733046113, "learning_rate": 4.487988494998274e-06, "loss": 0.2647, "step": 28064 }, { "epoch": 2.0858416945373466, "grad_norm": 2.2067094802320635, "learning_rate": 4.487319038822875e-06, "loss": 0.3378, "step": 28065 }, { "epoch": 2.085916016350799, "grad_norm": 2.1478194988544392, "learning_rate": 4.48664961813783e-06, "loss": 0.2983, "step": 28066 }, { "epoch": 2.085990338164251, "grad_norm": 2.415373511676964, "learning_rate": 4.485980232947443e-06, "loss": 0.3036, "step": 28067 }, { "epoch": 2.0860646599777035, "grad_norm": 2.1241749335930735, "learning_rate": 4.485310883256034e-06, "loss": 0.3149, "step": 28068 }, { "epoch": 2.0861389817911555, "grad_norm": 1.748732333199875, "learning_rate": 4.484641569067908e-06, "loss": 0.2273, "step": 28069 }, { "epoch": 2.086213303604608, "grad_norm": 2.8490449704622156, "learning_rate": 4.4839722903873726e-06, "loss": 0.3749, "step": 28070 }, { "epoch": 2.08628762541806, "grad_norm": 2.0521879506889813, "learning_rate": 4.4833030472187374e-06, "loss": 0.2946, "step": 28071 }, { "epoch": 2.0863619472315125, "grad_norm": 2.369101064617468, "learning_rate": 4.482633839566308e-06, "loss": 0.3281, "step": 28072 }, { "epoch": 2.0864362690449645, "grad_norm": 2.1405143563723383, "learning_rate": 4.481964667434399e-06, "loss": 0.2986, "step": 28073 }, { "epoch": 2.086510590858417, "grad_norm": 2.695369352388914, "learning_rate": 4.481295530827311e-06, "loss": 0.3179, "step": 28074 }, { "epoch": 2.0865849126718694, "grad_norm": 2.9535169950629374, "learning_rate": 4.48062642974936e-06, "loss": 0.2669, "step": 28075 }, { "epoch": 2.0866592344853214, "grad_norm": 2.05408001760655, "learning_rate": 4.47995736420485e-06, "loss": 0.2653, "step": 28076 }, { "epoch": 2.086733556298774, "grad_norm": 2.1386346210337996, "learning_rate": 4.4792883341980844e-06, "loss": 0.3135, "step": 28077 }, { "epoch": 2.086807878112226, "grad_norm": 2.6958013552573927, "learning_rate": 4.478619339733377e-06, "loss": 0.3367, "step": 28078 }, { "epoch": 2.0868821999256784, "grad_norm": 1.757321020946058, "learning_rate": 4.477950380815028e-06, "loss": 0.2428, "step": 28079 }, { "epoch": 2.0869565217391304, "grad_norm": 2.735730448893922, "learning_rate": 4.4772814574473565e-06, "loss": 0.3958, "step": 28080 }, { "epoch": 2.087030843552583, "grad_norm": 2.4475052540601925, "learning_rate": 4.476612569634652e-06, "loss": 0.2982, "step": 28081 }, { "epoch": 2.087105165366035, "grad_norm": 2.0400371853682513, "learning_rate": 4.475943717381233e-06, "loss": 0.2369, "step": 28082 }, { "epoch": 2.0871794871794873, "grad_norm": 1.8415187294217534, "learning_rate": 4.4752749006914035e-06, "loss": 0.2265, "step": 28083 }, { "epoch": 2.0872538089929393, "grad_norm": 1.8221225369453304, "learning_rate": 4.474606119569461e-06, "loss": 0.2235, "step": 28084 }, { "epoch": 2.087328130806392, "grad_norm": 2.683053932530346, "learning_rate": 4.473937374019723e-06, "loss": 0.4344, "step": 28085 }, { "epoch": 2.087402452619844, "grad_norm": 2.2112718721917113, "learning_rate": 4.473268664046486e-06, "loss": 0.2916, "step": 28086 }, { "epoch": 2.0874767744332963, "grad_norm": 1.9509560099479253, "learning_rate": 4.472599989654062e-06, "loss": 0.27, "step": 28087 }, { "epoch": 2.0875510962467483, "grad_norm": 2.5918161331762133, "learning_rate": 4.4719313508467525e-06, "loss": 0.3255, "step": 28088 }, { "epoch": 2.0876254180602007, "grad_norm": 2.1798224163895843, "learning_rate": 4.471262747628857e-06, "loss": 0.2858, "step": 28089 }, { "epoch": 2.0876997398736528, "grad_norm": 2.0457858209984727, "learning_rate": 4.4705941800046905e-06, "loss": 0.2805, "step": 28090 }, { "epoch": 2.087774061687105, "grad_norm": 2.506147459750853, "learning_rate": 4.469925647978551e-06, "loss": 0.3294, "step": 28091 }, { "epoch": 2.0878483835005572, "grad_norm": 1.9503518997418117, "learning_rate": 4.469257151554744e-06, "loss": 0.2269, "step": 28092 }, { "epoch": 2.0879227053140097, "grad_norm": 2.51064693899453, "learning_rate": 4.468588690737571e-06, "loss": 0.2939, "step": 28093 }, { "epoch": 2.0879970271274617, "grad_norm": 2.18725171981804, "learning_rate": 4.467920265531334e-06, "loss": 0.3558, "step": 28094 }, { "epoch": 2.088071348940914, "grad_norm": 2.364355854427461, "learning_rate": 4.467251875940344e-06, "loss": 0.3121, "step": 28095 }, { "epoch": 2.0881456707543666, "grad_norm": 2.5113172194704254, "learning_rate": 4.4665835219688934e-06, "loss": 0.2839, "step": 28096 }, { "epoch": 2.0882199925678187, "grad_norm": 1.917023150294068, "learning_rate": 4.465915203621297e-06, "loss": 0.2705, "step": 28097 }, { "epoch": 2.088294314381271, "grad_norm": 2.551609012926105, "learning_rate": 4.46524692090185e-06, "loss": 0.3637, "step": 28098 }, { "epoch": 2.088368636194723, "grad_norm": 1.988478990460723, "learning_rate": 4.464578673814853e-06, "loss": 0.2761, "step": 28099 }, { "epoch": 2.0884429580081756, "grad_norm": 1.9602497618778987, "learning_rate": 4.4639104623646144e-06, "loss": 0.3083, "step": 28100 }, { "epoch": 2.0885172798216276, "grad_norm": 2.50877485856665, "learning_rate": 4.46324228655543e-06, "loss": 0.3268, "step": 28101 }, { "epoch": 2.08859160163508, "grad_norm": 2.0273157048506643, "learning_rate": 4.462574146391608e-06, "loss": 0.2971, "step": 28102 }, { "epoch": 2.088665923448532, "grad_norm": 2.546991610979002, "learning_rate": 4.461906041877446e-06, "loss": 0.3922, "step": 28103 }, { "epoch": 2.0887402452619845, "grad_norm": 2.1341442176825582, "learning_rate": 4.461237973017246e-06, "loss": 0.2529, "step": 28104 }, { "epoch": 2.0888145670754366, "grad_norm": 2.1974821228704124, "learning_rate": 4.4605699398153095e-06, "loss": 0.2404, "step": 28105 }, { "epoch": 2.088888888888889, "grad_norm": 2.0823107149472078, "learning_rate": 4.45990194227593e-06, "loss": 0.2456, "step": 28106 }, { "epoch": 2.088963210702341, "grad_norm": 2.4019629073876705, "learning_rate": 4.45923398040342e-06, "loss": 0.2193, "step": 28107 }, { "epoch": 2.0890375325157935, "grad_norm": 3.190308945393677, "learning_rate": 4.458566054202071e-06, "loss": 0.3232, "step": 28108 }, { "epoch": 2.0891118543292455, "grad_norm": 1.8062162196562654, "learning_rate": 4.4578981636761884e-06, "loss": 0.2423, "step": 28109 }, { "epoch": 2.089186176142698, "grad_norm": 2.1685487238389034, "learning_rate": 4.457230308830071e-06, "loss": 0.3387, "step": 28110 }, { "epoch": 2.08926049795615, "grad_norm": 3.0669395482122193, "learning_rate": 4.456562489668013e-06, "loss": 0.3962, "step": 28111 }, { "epoch": 2.0893348197696024, "grad_norm": 1.8206369597728989, "learning_rate": 4.45589470619432e-06, "loss": 0.2348, "step": 28112 }, { "epoch": 2.0894091415830545, "grad_norm": 2.110875365910387, "learning_rate": 4.455226958413291e-06, "loss": 0.3162, "step": 28113 }, { "epoch": 2.089483463396507, "grad_norm": 2.237410421921057, "learning_rate": 4.454559246329224e-06, "loss": 0.2975, "step": 28114 }, { "epoch": 2.089557785209959, "grad_norm": 2.4510186565077294, "learning_rate": 4.453891569946416e-06, "loss": 0.324, "step": 28115 }, { "epoch": 2.0896321070234114, "grad_norm": 2.1004572467531557, "learning_rate": 4.453223929269162e-06, "loss": 0.2638, "step": 28116 }, { "epoch": 2.0897064288368634, "grad_norm": 2.263769272593597, "learning_rate": 4.452556324301769e-06, "loss": 0.3081, "step": 28117 }, { "epoch": 2.089780750650316, "grad_norm": 2.289414712789209, "learning_rate": 4.451888755048527e-06, "loss": 0.2758, "step": 28118 }, { "epoch": 2.0898550724637683, "grad_norm": 2.0274865373799056, "learning_rate": 4.4512212215137395e-06, "loss": 0.3095, "step": 28119 }, { "epoch": 2.0899293942772204, "grad_norm": 2.5092173859541607, "learning_rate": 4.4505537237017034e-06, "loss": 0.377, "step": 28120 }, { "epoch": 2.090003716090673, "grad_norm": 3.5647801297185864, "learning_rate": 4.449886261616711e-06, "loss": 0.2804, "step": 28121 }, { "epoch": 2.090078037904125, "grad_norm": 2.263147627819192, "learning_rate": 4.449218835263065e-06, "loss": 0.2377, "step": 28122 }, { "epoch": 2.0901523597175773, "grad_norm": 2.3303430964971352, "learning_rate": 4.448551444645058e-06, "loss": 0.2636, "step": 28123 }, { "epoch": 2.0902266815310293, "grad_norm": 2.3008769090125134, "learning_rate": 4.447884089766991e-06, "loss": 0.2575, "step": 28124 }, { "epoch": 2.0903010033444818, "grad_norm": 2.2888514236944735, "learning_rate": 4.44721677063316e-06, "loss": 0.2132, "step": 28125 }, { "epoch": 2.090375325157934, "grad_norm": 3.443176027073745, "learning_rate": 4.446549487247857e-06, "loss": 0.3562, "step": 28126 }, { "epoch": 2.0904496469713862, "grad_norm": 2.523809569601386, "learning_rate": 4.445882239615382e-06, "loss": 0.3324, "step": 28127 }, { "epoch": 2.0905239687848383, "grad_norm": 2.2004661375781276, "learning_rate": 4.445215027740025e-06, "loss": 0.2733, "step": 28128 }, { "epoch": 2.0905982905982907, "grad_norm": 2.3335583700433227, "learning_rate": 4.444547851626089e-06, "loss": 0.4018, "step": 28129 }, { "epoch": 2.0906726124117427, "grad_norm": 2.501417256168721, "learning_rate": 4.443880711277862e-06, "loss": 0.3445, "step": 28130 }, { "epoch": 2.090746934225195, "grad_norm": 2.041038406264838, "learning_rate": 4.443213606699646e-06, "loss": 0.3221, "step": 28131 }, { "epoch": 2.090821256038647, "grad_norm": 2.105178525469818, "learning_rate": 4.442546537895733e-06, "loss": 0.3292, "step": 28132 }, { "epoch": 2.0908955778520997, "grad_norm": 1.8395191369667871, "learning_rate": 4.4418795048704136e-06, "loss": 0.2567, "step": 28133 }, { "epoch": 2.0909698996655517, "grad_norm": 2.588136997697842, "learning_rate": 4.441212507627989e-06, "loss": 0.3371, "step": 28134 }, { "epoch": 2.091044221479004, "grad_norm": 2.5970514048223965, "learning_rate": 4.440545546172749e-06, "loss": 0.2926, "step": 28135 }, { "epoch": 2.091118543292456, "grad_norm": 2.341946911077826, "learning_rate": 4.439878620508985e-06, "loss": 0.2993, "step": 28136 }, { "epoch": 2.0911928651059086, "grad_norm": 1.9354618572096653, "learning_rate": 4.439211730641002e-06, "loss": 0.2748, "step": 28137 }, { "epoch": 2.0912671869193606, "grad_norm": 2.0473082004241387, "learning_rate": 4.438544876573078e-06, "loss": 0.3064, "step": 28138 }, { "epoch": 2.091341508732813, "grad_norm": 2.2347758909756488, "learning_rate": 4.437878058309516e-06, "loss": 0.2896, "step": 28139 }, { "epoch": 2.091415830546265, "grad_norm": 2.10730669075091, "learning_rate": 4.437211275854604e-06, "loss": 0.2227, "step": 28140 }, { "epoch": 2.0914901523597176, "grad_norm": 2.3458723791038985, "learning_rate": 4.43654452921264e-06, "loss": 0.2672, "step": 28141 }, { "epoch": 2.09156447417317, "grad_norm": 2.6309989231981428, "learning_rate": 4.435877818387914e-06, "loss": 0.3298, "step": 28142 }, { "epoch": 2.091638795986622, "grad_norm": 2.200655622565237, "learning_rate": 4.4352111433847145e-06, "loss": 0.2981, "step": 28143 }, { "epoch": 2.0917131178000745, "grad_norm": 2.0200136060596496, "learning_rate": 4.43454450420734e-06, "loss": 0.2437, "step": 28144 }, { "epoch": 2.0917874396135265, "grad_norm": 2.445313129541119, "learning_rate": 4.433877900860076e-06, "loss": 0.22, "step": 28145 }, { "epoch": 2.091861761426979, "grad_norm": 2.512361434065122, "learning_rate": 4.4332113333472205e-06, "loss": 0.3094, "step": 28146 }, { "epoch": 2.091936083240431, "grad_norm": 2.1349122106949454, "learning_rate": 4.432544801673062e-06, "loss": 0.2804, "step": 28147 }, { "epoch": 2.0920104050538835, "grad_norm": 2.2412445118651574, "learning_rate": 4.431878305841891e-06, "loss": 0.296, "step": 28148 }, { "epoch": 2.0920847268673355, "grad_norm": 2.028867665329826, "learning_rate": 4.431211845857999e-06, "loss": 0.2841, "step": 28149 }, { "epoch": 2.092159048680788, "grad_norm": 1.985583759274709, "learning_rate": 4.4305454217256714e-06, "loss": 0.2304, "step": 28150 }, { "epoch": 2.09223337049424, "grad_norm": 2.2579886307483084, "learning_rate": 4.429879033449208e-06, "loss": 0.2284, "step": 28151 }, { "epoch": 2.0923076923076924, "grad_norm": 2.5848651572339394, "learning_rate": 4.429212681032891e-06, "loss": 0.2832, "step": 28152 }, { "epoch": 2.0923820141211444, "grad_norm": 2.4803001898563917, "learning_rate": 4.4285463644810165e-06, "loss": 0.3216, "step": 28153 }, { "epoch": 2.092456335934597, "grad_norm": 2.2856187891224504, "learning_rate": 4.427880083797872e-06, "loss": 0.28, "step": 28154 }, { "epoch": 2.092530657748049, "grad_norm": 2.681455458768858, "learning_rate": 4.427213838987743e-06, "loss": 0.2903, "step": 28155 }, { "epoch": 2.0926049795615014, "grad_norm": 2.344646327032191, "learning_rate": 4.426547630054925e-06, "loss": 0.3332, "step": 28156 }, { "epoch": 2.0926793013749534, "grad_norm": 3.1546182891377437, "learning_rate": 4.425881457003704e-06, "loss": 0.3691, "step": 28157 }, { "epoch": 2.092753623188406, "grad_norm": 2.5109372771216307, "learning_rate": 4.425215319838366e-06, "loss": 0.3382, "step": 28158 }, { "epoch": 2.092827945001858, "grad_norm": 2.6602257957038242, "learning_rate": 4.424549218563207e-06, "loss": 0.3489, "step": 28159 }, { "epoch": 2.0929022668153103, "grad_norm": 1.9819117438242853, "learning_rate": 4.42388315318251e-06, "loss": 0.2436, "step": 28160 }, { "epoch": 2.0929765886287623, "grad_norm": 2.277068695510956, "learning_rate": 4.423217123700564e-06, "loss": 0.2851, "step": 28161 }, { "epoch": 2.093050910442215, "grad_norm": 2.3369001793769333, "learning_rate": 4.422551130121653e-06, "loss": 0.2944, "step": 28162 }, { "epoch": 2.093125232255667, "grad_norm": 2.6064948212846155, "learning_rate": 4.421885172450071e-06, "loss": 0.2463, "step": 28163 }, { "epoch": 2.0931995540691193, "grad_norm": 2.395215778965676, "learning_rate": 4.421219250690104e-06, "loss": 0.279, "step": 28164 }, { "epoch": 2.0932738758825717, "grad_norm": 2.1012729930344465, "learning_rate": 4.4205533648460355e-06, "loss": 0.2508, "step": 28165 }, { "epoch": 2.0933481976960238, "grad_norm": 2.55908157353485, "learning_rate": 4.419887514922156e-06, "loss": 0.2939, "step": 28166 }, { "epoch": 2.093422519509476, "grad_norm": 2.4961152030664238, "learning_rate": 4.419221700922749e-06, "loss": 0.3526, "step": 28167 }, { "epoch": 2.0934968413229282, "grad_norm": 2.7200809360113753, "learning_rate": 4.418555922852107e-06, "loss": 0.3365, "step": 28168 }, { "epoch": 2.0935711631363807, "grad_norm": 2.0294814812077946, "learning_rate": 4.417890180714511e-06, "loss": 0.2554, "step": 28169 }, { "epoch": 2.0936454849498327, "grad_norm": 2.5255508882229205, "learning_rate": 4.417224474514247e-06, "loss": 0.4648, "step": 28170 }, { "epoch": 2.093719806763285, "grad_norm": 2.221317730357799, "learning_rate": 4.416558804255608e-06, "loss": 0.3001, "step": 28171 }, { "epoch": 2.093794128576737, "grad_norm": 1.7861738501314532, "learning_rate": 4.415893169942867e-06, "loss": 0.2594, "step": 28172 }, { "epoch": 2.0938684503901897, "grad_norm": 2.301176870122222, "learning_rate": 4.415227571580318e-06, "loss": 0.3124, "step": 28173 }, { "epoch": 2.0939427722036417, "grad_norm": 2.8578252383954013, "learning_rate": 4.414562009172242e-06, "loss": 0.2548, "step": 28174 }, { "epoch": 2.094017094017094, "grad_norm": 2.321573449531402, "learning_rate": 4.413896482722929e-06, "loss": 0.2573, "step": 28175 }, { "epoch": 2.094091415830546, "grad_norm": 2.400826164818401, "learning_rate": 4.413230992236661e-06, "loss": 0.3319, "step": 28176 }, { "epoch": 2.0941657376439986, "grad_norm": 2.298689925772426, "learning_rate": 4.412565537717719e-06, "loss": 0.2929, "step": 28177 }, { "epoch": 2.0942400594574506, "grad_norm": 2.632407566016967, "learning_rate": 4.411900119170393e-06, "loss": 0.3201, "step": 28178 }, { "epoch": 2.094314381270903, "grad_norm": 2.1994169787971014, "learning_rate": 4.411234736598964e-06, "loss": 0.3149, "step": 28179 }, { "epoch": 2.094388703084355, "grad_norm": 1.8807105408100444, "learning_rate": 4.410569390007713e-06, "loss": 0.2023, "step": 28180 }, { "epoch": 2.0944630248978076, "grad_norm": 2.1636895326672008, "learning_rate": 4.409904079400929e-06, "loss": 0.2864, "step": 28181 }, { "epoch": 2.0945373467112596, "grad_norm": 2.1495449981981176, "learning_rate": 4.409238804782893e-06, "loss": 0.2987, "step": 28182 }, { "epoch": 2.094611668524712, "grad_norm": 2.1972493331694696, "learning_rate": 4.408573566157887e-06, "loss": 0.2963, "step": 28183 }, { "epoch": 2.094685990338164, "grad_norm": 1.9044044156569293, "learning_rate": 4.40790836353019e-06, "loss": 0.3072, "step": 28184 }, { "epoch": 2.0947603121516165, "grad_norm": 2.550576416909369, "learning_rate": 4.407243196904094e-06, "loss": 0.3729, "step": 28185 }, { "epoch": 2.0948346339650685, "grad_norm": 2.578968815225761, "learning_rate": 4.406578066283876e-06, "loss": 0.3506, "step": 28186 }, { "epoch": 2.094908955778521, "grad_norm": 2.4537690628039255, "learning_rate": 4.405912971673812e-06, "loss": 0.315, "step": 28187 }, { "epoch": 2.0949832775919734, "grad_norm": 2.4713366764895586, "learning_rate": 4.405247913078197e-06, "loss": 0.2111, "step": 28188 }, { "epoch": 2.0950575994054255, "grad_norm": 2.1496688359048663, "learning_rate": 4.4045828905013e-06, "loss": 0.2681, "step": 28189 }, { "epoch": 2.095131921218878, "grad_norm": 2.0343404804611374, "learning_rate": 4.403917903947412e-06, "loss": 0.2919, "step": 28190 }, { "epoch": 2.09520624303233, "grad_norm": 2.5287799592945506, "learning_rate": 4.40325295342081e-06, "loss": 0.3315, "step": 28191 }, { "epoch": 2.0952805648457824, "grad_norm": 2.0423562170607514, "learning_rate": 4.402588038925772e-06, "loss": 0.2405, "step": 28192 }, { "epoch": 2.0953548866592344, "grad_norm": 2.23414173993171, "learning_rate": 4.401923160466589e-06, "loss": 0.2708, "step": 28193 }, { "epoch": 2.095429208472687, "grad_norm": 2.1556087931220906, "learning_rate": 4.401258318047527e-06, "loss": 0.2918, "step": 28194 }, { "epoch": 2.095503530286139, "grad_norm": 2.143279230584527, "learning_rate": 4.400593511672877e-06, "loss": 0.29, "step": 28195 }, { "epoch": 2.0955778520995914, "grad_norm": 2.590277917693844, "learning_rate": 4.399928741346916e-06, "loss": 0.3265, "step": 28196 }, { "epoch": 2.0956521739130434, "grad_norm": 1.8430105385911955, "learning_rate": 4.399264007073919e-06, "loss": 0.2333, "step": 28197 }, { "epoch": 2.095726495726496, "grad_norm": 2.122600530742104, "learning_rate": 4.398599308858173e-06, "loss": 0.2796, "step": 28198 }, { "epoch": 2.095800817539948, "grad_norm": 1.7806224951952685, "learning_rate": 4.397934646703951e-06, "loss": 0.1863, "step": 28199 }, { "epoch": 2.0958751393534003, "grad_norm": 1.9983252710247636, "learning_rate": 4.397270020615538e-06, "loss": 0.3084, "step": 28200 }, { "epoch": 2.0959494611668523, "grad_norm": 1.8718688656011329, "learning_rate": 4.396605430597211e-06, "loss": 0.2419, "step": 28201 }, { "epoch": 2.096023782980305, "grad_norm": 2.806024874369656, "learning_rate": 4.395940876653243e-06, "loss": 0.3618, "step": 28202 }, { "epoch": 2.096098104793757, "grad_norm": 1.8273924753645987, "learning_rate": 4.39527635878792e-06, "loss": 0.2283, "step": 28203 }, { "epoch": 2.0961724266072093, "grad_norm": 2.166960965692614, "learning_rate": 4.394611877005514e-06, "loss": 0.2667, "step": 28204 }, { "epoch": 2.0962467484206613, "grad_norm": 2.5135689921608124, "learning_rate": 4.393947431310313e-06, "loss": 0.3735, "step": 28205 }, { "epoch": 2.0963210702341137, "grad_norm": 2.1725710634049276, "learning_rate": 4.393283021706581e-06, "loss": 0.2456, "step": 28206 }, { "epoch": 2.0963953920475658, "grad_norm": 2.3430606272882626, "learning_rate": 4.3926186481986055e-06, "loss": 0.3421, "step": 28207 }, { "epoch": 2.096469713861018, "grad_norm": 2.820503160307958, "learning_rate": 4.39195431079066e-06, "loss": 0.3475, "step": 28208 }, { "epoch": 2.0965440356744702, "grad_norm": 2.2473746860931447, "learning_rate": 4.391290009487017e-06, "loss": 0.2616, "step": 28209 }, { "epoch": 2.0966183574879227, "grad_norm": 3.000057276021634, "learning_rate": 4.390625744291962e-06, "loss": 0.4097, "step": 28210 }, { "epoch": 2.096692679301375, "grad_norm": 2.2053927023720727, "learning_rate": 4.389961515209764e-06, "loss": 0.2627, "step": 28211 }, { "epoch": 2.096767001114827, "grad_norm": 2.7214064972605647, "learning_rate": 4.389297322244707e-06, "loss": 0.3056, "step": 28212 }, { "epoch": 2.0968413229282796, "grad_norm": 2.689946891984739, "learning_rate": 4.388633165401063e-06, "loss": 0.2613, "step": 28213 }, { "epoch": 2.0969156447417316, "grad_norm": 2.2698633901917216, "learning_rate": 4.3879690446831025e-06, "loss": 0.3136, "step": 28214 }, { "epoch": 2.096989966555184, "grad_norm": 2.432580125471836, "learning_rate": 4.3873049600951105e-06, "loss": 0.2712, "step": 28215 }, { "epoch": 2.097064288368636, "grad_norm": 2.7525305526801542, "learning_rate": 4.386640911641358e-06, "loss": 0.2415, "step": 28216 }, { "epoch": 2.0971386101820886, "grad_norm": 2.2182975921775694, "learning_rate": 4.385976899326119e-06, "loss": 0.2403, "step": 28217 }, { "epoch": 2.0972129319955406, "grad_norm": 1.5397704742098708, "learning_rate": 4.385312923153671e-06, "loss": 0.2141, "step": 28218 }, { "epoch": 2.097287253808993, "grad_norm": 1.902417387110814, "learning_rate": 4.384648983128282e-06, "loss": 0.1888, "step": 28219 }, { "epoch": 2.097361575622445, "grad_norm": 2.230243477920662, "learning_rate": 4.383985079254236e-06, "loss": 0.2275, "step": 28220 }, { "epoch": 2.0974358974358975, "grad_norm": 2.834714322546864, "learning_rate": 4.383321211535798e-06, "loss": 0.394, "step": 28221 }, { "epoch": 2.0975102192493496, "grad_norm": 2.0690902190689076, "learning_rate": 4.3826573799772525e-06, "loss": 0.1909, "step": 28222 }, { "epoch": 2.097584541062802, "grad_norm": 2.193206234604938, "learning_rate": 4.3819935845828656e-06, "loss": 0.2613, "step": 28223 }, { "epoch": 2.097658862876254, "grad_norm": 2.665214644473701, "learning_rate": 4.381329825356908e-06, "loss": 0.3302, "step": 28224 }, { "epoch": 2.0977331846897065, "grad_norm": 1.7116159246134603, "learning_rate": 4.380666102303663e-06, "loss": 0.2237, "step": 28225 }, { "epoch": 2.0978075065031585, "grad_norm": 2.5660513181267133, "learning_rate": 4.380002415427394e-06, "loss": 0.3222, "step": 28226 }, { "epoch": 2.097881828316611, "grad_norm": 2.321769879645357, "learning_rate": 4.379338764732385e-06, "loss": 0.3254, "step": 28227 }, { "epoch": 2.097956150130063, "grad_norm": 2.115222122464343, "learning_rate": 4.378675150222893e-06, "loss": 0.3024, "step": 28228 }, { "epoch": 2.0980304719435154, "grad_norm": 2.306578564240138, "learning_rate": 4.378011571903203e-06, "loss": 0.2407, "step": 28229 }, { "epoch": 2.0981047937569675, "grad_norm": 2.3477417931451523, "learning_rate": 4.377348029777584e-06, "loss": 0.2762, "step": 28230 }, { "epoch": 2.09817911557042, "grad_norm": 2.1062421378042404, "learning_rate": 4.376684523850302e-06, "loss": 0.221, "step": 28231 }, { "epoch": 2.0982534373838724, "grad_norm": 2.235928114553162, "learning_rate": 4.376021054125637e-06, "loss": 0.2694, "step": 28232 }, { "epoch": 2.0983277591973244, "grad_norm": 2.016060591354557, "learning_rate": 4.375357620607853e-06, "loss": 0.2702, "step": 28233 }, { "epoch": 2.098402081010777, "grad_norm": 2.622383736808206, "learning_rate": 4.374694223301228e-06, "loss": 0.3687, "step": 28234 }, { "epoch": 2.098476402824229, "grad_norm": 2.0763891979833975, "learning_rate": 4.374030862210031e-06, "loss": 0.2075, "step": 28235 }, { "epoch": 2.0985507246376813, "grad_norm": 2.400830580434938, "learning_rate": 4.373367537338527e-06, "loss": 0.3004, "step": 28236 }, { "epoch": 2.0986250464511333, "grad_norm": 1.7219355940653334, "learning_rate": 4.372704248690993e-06, "loss": 0.1964, "step": 28237 }, { "epoch": 2.098699368264586, "grad_norm": 2.397730997438387, "learning_rate": 4.372040996271699e-06, "loss": 0.3456, "step": 28238 }, { "epoch": 2.098773690078038, "grad_norm": 2.2506656151136144, "learning_rate": 4.371377780084912e-06, "loss": 0.3234, "step": 28239 }, { "epoch": 2.0988480118914903, "grad_norm": 2.7203147857820777, "learning_rate": 4.3707146001349035e-06, "loss": 0.2893, "step": 28240 }, { "epoch": 2.0989223337049423, "grad_norm": 3.396836852608811, "learning_rate": 4.370051456425939e-06, "loss": 0.3556, "step": 28241 }, { "epoch": 2.0989966555183948, "grad_norm": 2.8238262936097227, "learning_rate": 4.3693883489622935e-06, "loss": 0.3363, "step": 28242 }, { "epoch": 2.0990709773318468, "grad_norm": 2.9028988200074397, "learning_rate": 4.368725277748231e-06, "loss": 0.3403, "step": 28243 }, { "epoch": 2.0991452991452992, "grad_norm": 2.2218670575996815, "learning_rate": 4.368062242788026e-06, "loss": 0.2893, "step": 28244 }, { "epoch": 2.0992196209587513, "grad_norm": 1.6620050279535632, "learning_rate": 4.367399244085944e-06, "loss": 0.1849, "step": 28245 }, { "epoch": 2.0992939427722037, "grad_norm": 3.0206946046620327, "learning_rate": 4.366736281646251e-06, "loss": 0.2512, "step": 28246 }, { "epoch": 2.0993682645856557, "grad_norm": 2.404391298971115, "learning_rate": 4.366073355473221e-06, "loss": 0.3287, "step": 28247 }, { "epoch": 2.099442586399108, "grad_norm": 2.1729153193081663, "learning_rate": 4.3654104655711136e-06, "loss": 0.2439, "step": 28248 }, { "epoch": 2.09951690821256, "grad_norm": 4.015381282389618, "learning_rate": 4.364747611944205e-06, "loss": 0.2995, "step": 28249 }, { "epoch": 2.0995912300260127, "grad_norm": 1.9408827411776615, "learning_rate": 4.364084794596759e-06, "loss": 0.2123, "step": 28250 }, { "epoch": 2.0996655518394647, "grad_norm": 2.401392706373365, "learning_rate": 4.363422013533043e-06, "loss": 0.2874, "step": 28251 }, { "epoch": 2.099739873652917, "grad_norm": 2.5592830738605596, "learning_rate": 4.362759268757324e-06, "loss": 0.3472, "step": 28252 }, { "epoch": 2.0998141954663696, "grad_norm": 2.469105156619603, "learning_rate": 4.362096560273864e-06, "loss": 0.3815, "step": 28253 }, { "epoch": 2.0998885172798216, "grad_norm": 2.1361763748167704, "learning_rate": 4.361433888086938e-06, "loss": 0.2719, "step": 28254 }, { "epoch": 2.099962839093274, "grad_norm": 2.615436890214873, "learning_rate": 4.360771252200804e-06, "loss": 0.3062, "step": 28255 }, { "epoch": 2.100037160906726, "grad_norm": 2.540932148256053, "learning_rate": 4.3601086526197355e-06, "loss": 0.3086, "step": 28256 }, { "epoch": 2.1001114827201786, "grad_norm": 2.238945561240849, "learning_rate": 4.359446089347996e-06, "loss": 0.2911, "step": 28257 }, { "epoch": 2.1001858045336306, "grad_norm": 2.9244935418768336, "learning_rate": 4.358783562389845e-06, "loss": 0.3908, "step": 28258 }, { "epoch": 2.100260126347083, "grad_norm": 2.473485183841021, "learning_rate": 4.358121071749557e-06, "loss": 0.3275, "step": 28259 }, { "epoch": 2.100334448160535, "grad_norm": 2.5918749001897257, "learning_rate": 4.3574586174313925e-06, "loss": 0.3595, "step": 28260 }, { "epoch": 2.1004087699739875, "grad_norm": 1.7208262482840275, "learning_rate": 4.356796199439617e-06, "loss": 0.2347, "step": 28261 }, { "epoch": 2.1004830917874395, "grad_norm": 1.9975714402534819, "learning_rate": 4.356133817778495e-06, "loss": 0.2487, "step": 28262 }, { "epoch": 2.100557413600892, "grad_norm": 3.2129336273250044, "learning_rate": 4.355471472452286e-06, "loss": 0.2345, "step": 28263 }, { "epoch": 2.100631735414344, "grad_norm": 2.521825675488129, "learning_rate": 4.354809163465263e-06, "loss": 0.2752, "step": 28264 }, { "epoch": 2.1007060572277965, "grad_norm": 1.9617973720987114, "learning_rate": 4.354146890821682e-06, "loss": 0.2835, "step": 28265 }, { "epoch": 2.1007803790412485, "grad_norm": 2.093443223338011, "learning_rate": 4.353484654525815e-06, "loss": 0.251, "step": 28266 }, { "epoch": 2.100854700854701, "grad_norm": 1.6524228834001997, "learning_rate": 4.35282245458192e-06, "loss": 0.2207, "step": 28267 }, { "epoch": 2.100929022668153, "grad_norm": 2.006705756918812, "learning_rate": 4.352160290994256e-06, "loss": 0.2786, "step": 28268 }, { "epoch": 2.1010033444816054, "grad_norm": 2.3715394459275116, "learning_rate": 4.351498163767096e-06, "loss": 0.2648, "step": 28269 }, { "epoch": 2.1010776662950574, "grad_norm": 1.7655239628483705, "learning_rate": 4.350836072904694e-06, "loss": 0.2538, "step": 28270 }, { "epoch": 2.10115198810851, "grad_norm": 2.405960321778931, "learning_rate": 4.350174018411321e-06, "loss": 0.3235, "step": 28271 }, { "epoch": 2.101226309921962, "grad_norm": 2.109930287163405, "learning_rate": 4.349512000291233e-06, "loss": 0.1837, "step": 28272 }, { "epoch": 2.1013006317354144, "grad_norm": 2.5044399629248537, "learning_rate": 4.348850018548694e-06, "loss": 0.2653, "step": 28273 }, { "epoch": 2.1013749535488664, "grad_norm": 2.554638846216896, "learning_rate": 4.3481880731879655e-06, "loss": 0.2658, "step": 28274 }, { "epoch": 2.101449275362319, "grad_norm": 2.3299380408548602, "learning_rate": 4.347526164213305e-06, "loss": 0.3031, "step": 28275 }, { "epoch": 2.1015235971757713, "grad_norm": 2.3299220987618443, "learning_rate": 4.346864291628981e-06, "loss": 0.2673, "step": 28276 }, { "epoch": 2.1015979189892233, "grad_norm": 2.183636520809573, "learning_rate": 4.346202455439249e-06, "loss": 0.2782, "step": 28277 }, { "epoch": 2.101672240802676, "grad_norm": 2.4480995735999187, "learning_rate": 4.345540655648375e-06, "loss": 0.2859, "step": 28278 }, { "epoch": 2.101746562616128, "grad_norm": 2.439669517594579, "learning_rate": 4.3448788922606166e-06, "loss": 0.3161, "step": 28279 }, { "epoch": 2.1018208844295803, "grad_norm": 2.3166241642702707, "learning_rate": 4.344217165280231e-06, "loss": 0.3013, "step": 28280 }, { "epoch": 2.1018952062430323, "grad_norm": 1.965930067539178, "learning_rate": 4.343555474711486e-06, "loss": 0.2286, "step": 28281 }, { "epoch": 2.1019695280564847, "grad_norm": 1.7032781190734625, "learning_rate": 4.3428938205586364e-06, "loss": 0.2349, "step": 28282 }, { "epoch": 2.1020438498699368, "grad_norm": 1.791560338720764, "learning_rate": 4.34223220282594e-06, "loss": 0.2233, "step": 28283 }, { "epoch": 2.102118171683389, "grad_norm": 2.6594706608620307, "learning_rate": 4.341570621517667e-06, "loss": 0.389, "step": 28284 }, { "epoch": 2.1021924934968412, "grad_norm": 2.6176824728807087, "learning_rate": 4.34090907663806e-06, "loss": 0.3471, "step": 28285 }, { "epoch": 2.1022668153102937, "grad_norm": 1.8865513681517327, "learning_rate": 4.3402475681913915e-06, "loss": 0.2028, "step": 28286 }, { "epoch": 2.1023411371237457, "grad_norm": 2.3668045484723526, "learning_rate": 4.339586096181912e-06, "loss": 0.3412, "step": 28287 }, { "epoch": 2.102415458937198, "grad_norm": 2.194024930431745, "learning_rate": 4.3389246606138865e-06, "loss": 0.3002, "step": 28288 }, { "epoch": 2.10248978075065, "grad_norm": 2.2340547750112445, "learning_rate": 4.33826326149157e-06, "loss": 0.2545, "step": 28289 }, { "epoch": 2.1025641025641026, "grad_norm": 2.052787849432238, "learning_rate": 4.337601898819218e-06, "loss": 0.2719, "step": 28290 }, { "epoch": 2.1026384243775547, "grad_norm": 1.9414447679618319, "learning_rate": 4.3369405726010945e-06, "loss": 0.2689, "step": 28291 }, { "epoch": 2.102712746191007, "grad_norm": 2.144011408300582, "learning_rate": 4.3362792828414505e-06, "loss": 0.2991, "step": 28292 }, { "epoch": 2.102787068004459, "grad_norm": 1.9227395475648796, "learning_rate": 4.3356180295445496e-06, "loss": 0.2858, "step": 28293 }, { "epoch": 2.1028613898179116, "grad_norm": 2.3281576893718023, "learning_rate": 4.3349568127146455e-06, "loss": 0.2723, "step": 28294 }, { "epoch": 2.1029357116313636, "grad_norm": 2.10788403934118, "learning_rate": 4.334295632355996e-06, "loss": 0.3136, "step": 28295 }, { "epoch": 2.103010033444816, "grad_norm": 1.45891774528367, "learning_rate": 4.333634488472858e-06, "loss": 0.1766, "step": 28296 }, { "epoch": 2.103084355258268, "grad_norm": 2.3053416976486987, "learning_rate": 4.332973381069483e-06, "loss": 0.3037, "step": 28297 }, { "epoch": 2.1031586770717206, "grad_norm": 2.459250973363967, "learning_rate": 4.332312310150135e-06, "loss": 0.2861, "step": 28298 }, { "epoch": 2.103232998885173, "grad_norm": 1.9104479899447726, "learning_rate": 4.331651275719062e-06, "loss": 0.2684, "step": 28299 }, { "epoch": 2.103307320698625, "grad_norm": 2.075598753454405, "learning_rate": 4.330990277780528e-06, "loss": 0.2955, "step": 28300 }, { "epoch": 2.1033816425120775, "grad_norm": 2.7170250246023264, "learning_rate": 4.330329316338785e-06, "loss": 0.3146, "step": 28301 }, { "epoch": 2.1034559643255295, "grad_norm": 2.982664814012708, "learning_rate": 4.329668391398083e-06, "loss": 0.2425, "step": 28302 }, { "epoch": 2.103530286138982, "grad_norm": 2.051657371716277, "learning_rate": 4.329007502962687e-06, "loss": 0.204, "step": 28303 }, { "epoch": 2.103604607952434, "grad_norm": 2.098192592914097, "learning_rate": 4.328346651036845e-06, "loss": 0.3418, "step": 28304 }, { "epoch": 2.1036789297658864, "grad_norm": 2.4658676975095517, "learning_rate": 4.32768583562481e-06, "loss": 0.3713, "step": 28305 }, { "epoch": 2.1037532515793385, "grad_norm": 2.1486114951920805, "learning_rate": 4.327025056730846e-06, "loss": 0.2131, "step": 28306 }, { "epoch": 2.103827573392791, "grad_norm": 2.0348687941105763, "learning_rate": 4.3263643143591925e-06, "loss": 0.3121, "step": 28307 }, { "epoch": 2.103901895206243, "grad_norm": 2.220464309296522, "learning_rate": 4.3257036085141154e-06, "loss": 0.3339, "step": 28308 }, { "epoch": 2.1039762170196954, "grad_norm": 2.378431597568107, "learning_rate": 4.325042939199862e-06, "loss": 0.2876, "step": 28309 }, { "epoch": 2.1040505388331474, "grad_norm": 2.0176388103535476, "learning_rate": 4.3243823064206895e-06, "loss": 0.2847, "step": 28310 }, { "epoch": 2.1041248606466, "grad_norm": 2.730200864366469, "learning_rate": 4.323721710180849e-06, "loss": 0.2941, "step": 28311 }, { "epoch": 2.104199182460052, "grad_norm": 2.5125794518258067, "learning_rate": 4.323061150484591e-06, "loss": 0.2967, "step": 28312 }, { "epoch": 2.1042735042735043, "grad_norm": 2.2130071217217826, "learning_rate": 4.322400627336174e-06, "loss": 0.3083, "step": 28313 }, { "epoch": 2.1043478260869564, "grad_norm": 2.395341543425444, "learning_rate": 4.321740140739844e-06, "loss": 0.3797, "step": 28314 }, { "epoch": 2.104422147900409, "grad_norm": 2.7767818703586853, "learning_rate": 4.32107969069986e-06, "loss": 0.3934, "step": 28315 }, { "epoch": 2.104496469713861, "grad_norm": 2.424036251884414, "learning_rate": 4.320419277220469e-06, "loss": 0.3083, "step": 28316 }, { "epoch": 2.1045707915273133, "grad_norm": 2.400993215138802, "learning_rate": 4.319758900305921e-06, "loss": 0.3188, "step": 28317 }, { "epoch": 2.1046451133407653, "grad_norm": 2.359959332091138, "learning_rate": 4.319098559960479e-06, "loss": 0.2771, "step": 28318 }, { "epoch": 2.1047194351542178, "grad_norm": 2.005202876339877, "learning_rate": 4.3184382561883775e-06, "loss": 0.2271, "step": 28319 }, { "epoch": 2.10479375696767, "grad_norm": 2.5011136590069327, "learning_rate": 4.3177779889938786e-06, "loss": 0.3262, "step": 28320 }, { "epoch": 2.1048680787811223, "grad_norm": 2.39873999819294, "learning_rate": 4.317117758381231e-06, "loss": 0.329, "step": 28321 }, { "epoch": 2.1049424005945747, "grad_norm": 1.9785078273990435, "learning_rate": 4.316457564354681e-06, "loss": 0.294, "step": 28322 }, { "epoch": 2.1050167224080267, "grad_norm": 1.9526810019867304, "learning_rate": 4.315797406918485e-06, "loss": 0.2405, "step": 28323 }, { "epoch": 2.105091044221479, "grad_norm": 2.1607514240193266, "learning_rate": 4.315137286076888e-06, "loss": 0.2392, "step": 28324 }, { "epoch": 2.105165366034931, "grad_norm": 2.175819277958802, "learning_rate": 4.314477201834145e-06, "loss": 0.3469, "step": 28325 }, { "epoch": 2.1052396878483837, "grad_norm": 2.4566590672005635, "learning_rate": 4.313817154194503e-06, "loss": 0.3543, "step": 28326 }, { "epoch": 2.1053140096618357, "grad_norm": 2.587258912403914, "learning_rate": 4.3131571431622065e-06, "loss": 0.3089, "step": 28327 }, { "epoch": 2.105388331475288, "grad_norm": 2.303347688507918, "learning_rate": 4.312497168741514e-06, "loss": 0.3402, "step": 28328 }, { "epoch": 2.10546265328874, "grad_norm": 1.9905765999828366, "learning_rate": 4.31183723093667e-06, "loss": 0.2185, "step": 28329 }, { "epoch": 2.1055369751021926, "grad_norm": 2.315274707402478, "learning_rate": 4.311177329751922e-06, "loss": 0.2991, "step": 28330 }, { "epoch": 2.1056112969156446, "grad_norm": 2.2444269556871195, "learning_rate": 4.310517465191515e-06, "loss": 0.2651, "step": 28331 }, { "epoch": 2.105685618729097, "grad_norm": 1.8879986341926975, "learning_rate": 4.309857637259706e-06, "loss": 0.2678, "step": 28332 }, { "epoch": 2.105759940542549, "grad_norm": 2.185029362134029, "learning_rate": 4.309197845960738e-06, "loss": 0.2773, "step": 28333 }, { "epoch": 2.1058342623560016, "grad_norm": 2.3401316186709, "learning_rate": 4.308538091298855e-06, "loss": 0.3319, "step": 28334 }, { "epoch": 2.1059085841694536, "grad_norm": 2.3455800145459382, "learning_rate": 4.307878373278312e-06, "loss": 0.3287, "step": 28335 }, { "epoch": 2.105982905982906, "grad_norm": 2.0986597249480163, "learning_rate": 4.30721869190335e-06, "loss": 0.3186, "step": 28336 }, { "epoch": 2.106057227796358, "grad_norm": 2.4031555475715627, "learning_rate": 4.3065590471782214e-06, "loss": 0.38, "step": 28337 }, { "epoch": 2.1061315496098105, "grad_norm": 1.89184848209481, "learning_rate": 4.305899439107171e-06, "loss": 0.2346, "step": 28338 }, { "epoch": 2.1062058714232625, "grad_norm": 2.9400420103825047, "learning_rate": 4.3052398676944406e-06, "loss": 0.3214, "step": 28339 }, { "epoch": 2.106280193236715, "grad_norm": 1.7816783927833417, "learning_rate": 4.304580332944288e-06, "loss": 0.2326, "step": 28340 }, { "epoch": 2.106354515050167, "grad_norm": 2.705007112949686, "learning_rate": 4.303920834860944e-06, "loss": 0.3005, "step": 28341 }, { "epoch": 2.1064288368636195, "grad_norm": 2.405938846433558, "learning_rate": 4.303261373448666e-06, "loss": 0.3447, "step": 28342 }, { "epoch": 2.1065031586770715, "grad_norm": 2.4123170961016847, "learning_rate": 4.302601948711696e-06, "loss": 0.326, "step": 28343 }, { "epoch": 2.106577480490524, "grad_norm": 2.0619293138812576, "learning_rate": 4.301942560654274e-06, "loss": 0.3238, "step": 28344 }, { "epoch": 2.1066518023039764, "grad_norm": 2.5591161728364553, "learning_rate": 4.301283209280654e-06, "loss": 0.3296, "step": 28345 }, { "epoch": 2.1067261241174284, "grad_norm": 1.839563882609627, "learning_rate": 4.300623894595075e-06, "loss": 0.2315, "step": 28346 }, { "epoch": 2.106800445930881, "grad_norm": 2.272645440935856, "learning_rate": 4.299964616601786e-06, "loss": 0.2333, "step": 28347 }, { "epoch": 2.106874767744333, "grad_norm": 2.5739860921063586, "learning_rate": 4.2993053753050295e-06, "loss": 0.3556, "step": 28348 }, { "epoch": 2.1069490895577854, "grad_norm": 1.9722887944633172, "learning_rate": 4.2986461707090456e-06, "loss": 0.2245, "step": 28349 }, { "epoch": 2.1070234113712374, "grad_norm": 2.3798591324996026, "learning_rate": 4.297987002818086e-06, "loss": 0.2504, "step": 28350 }, { "epoch": 2.10709773318469, "grad_norm": 2.0416731035369584, "learning_rate": 4.2973278716363855e-06, "loss": 0.3067, "step": 28351 }, { "epoch": 2.107172054998142, "grad_norm": 2.3863693794884724, "learning_rate": 4.2966687771682e-06, "loss": 0.301, "step": 28352 }, { "epoch": 2.1072463768115943, "grad_norm": 2.29583158341758, "learning_rate": 4.296009719417758e-06, "loss": 0.3251, "step": 28353 }, { "epoch": 2.1073206986250463, "grad_norm": 2.311720709831094, "learning_rate": 4.2953506983893125e-06, "loss": 0.3012, "step": 28354 }, { "epoch": 2.107395020438499, "grad_norm": 1.7166292887359291, "learning_rate": 4.294691714087104e-06, "loss": 0.1551, "step": 28355 }, { "epoch": 2.107469342251951, "grad_norm": 2.2712263707438027, "learning_rate": 4.294032766515369e-06, "loss": 0.2747, "step": 28356 }, { "epoch": 2.1075436640654033, "grad_norm": 2.3172194733875884, "learning_rate": 4.29337385567836e-06, "loss": 0.2991, "step": 28357 }, { "epoch": 2.1076179858788553, "grad_norm": 2.40220106590577, "learning_rate": 4.29271498158031e-06, "loss": 0.316, "step": 28358 }, { "epoch": 2.1076923076923078, "grad_norm": 2.528699421193432, "learning_rate": 4.292056144225469e-06, "loss": 0.262, "step": 28359 }, { "epoch": 2.1077666295057598, "grad_norm": 2.394258991359668, "learning_rate": 4.291397343618074e-06, "loss": 0.2948, "step": 28360 }, { "epoch": 2.1078409513192122, "grad_norm": 3.2199587761888058, "learning_rate": 4.290738579762363e-06, "loss": 0.3868, "step": 28361 }, { "epoch": 2.1079152731326642, "grad_norm": 2.1971613991209487, "learning_rate": 4.290079852662584e-06, "loss": 0.2362, "step": 28362 }, { "epoch": 2.1079895949461167, "grad_norm": 2.525111048921038, "learning_rate": 4.289421162322976e-06, "loss": 0.2948, "step": 28363 }, { "epoch": 2.1080639167595687, "grad_norm": 2.37469479527278, "learning_rate": 4.288762508747777e-06, "loss": 0.3281, "step": 28364 }, { "epoch": 2.108138238573021, "grad_norm": 2.6924840060711412, "learning_rate": 4.288103891941228e-06, "loss": 0.3271, "step": 28365 }, { "epoch": 2.1082125603864736, "grad_norm": 3.05484566590776, "learning_rate": 4.287445311907567e-06, "loss": 0.3541, "step": 28366 }, { "epoch": 2.1082868821999257, "grad_norm": 2.2906344226005224, "learning_rate": 4.286786768651041e-06, "loss": 0.3302, "step": 28367 }, { "epoch": 2.108361204013378, "grad_norm": 2.5877525604210625, "learning_rate": 4.286128262175882e-06, "loss": 0.3645, "step": 28368 }, { "epoch": 2.10843552582683, "grad_norm": 2.4487703399391294, "learning_rate": 4.285469792486335e-06, "loss": 0.3629, "step": 28369 }, { "epoch": 2.1085098476402826, "grad_norm": 2.179957473359573, "learning_rate": 4.284811359586637e-06, "loss": 0.3055, "step": 28370 }, { "epoch": 2.1085841694537346, "grad_norm": 2.5046595535485143, "learning_rate": 4.284152963481024e-06, "loss": 0.3276, "step": 28371 }, { "epoch": 2.108658491267187, "grad_norm": 2.358573503445495, "learning_rate": 4.283494604173741e-06, "loss": 0.3301, "step": 28372 }, { "epoch": 2.108732813080639, "grad_norm": 2.0948528431354867, "learning_rate": 4.282836281669019e-06, "loss": 0.2705, "step": 28373 }, { "epoch": 2.1088071348940916, "grad_norm": 2.4243786347975096, "learning_rate": 4.282177995971108e-06, "loss": 0.3488, "step": 28374 }, { "epoch": 2.1088814567075436, "grad_norm": 2.8378914024720814, "learning_rate": 4.281519747084232e-06, "loss": 0.3191, "step": 28375 }, { "epoch": 2.108955778520996, "grad_norm": 2.552480435282926, "learning_rate": 4.2808615350126384e-06, "loss": 0.3712, "step": 28376 }, { "epoch": 2.109030100334448, "grad_norm": 2.4002812044565127, "learning_rate": 4.280203359760561e-06, "loss": 0.3127, "step": 28377 }, { "epoch": 2.1091044221479005, "grad_norm": 2.155878748070717, "learning_rate": 4.279545221332234e-06, "loss": 0.2297, "step": 28378 }, { "epoch": 2.1091787439613525, "grad_norm": 2.3043960094025357, "learning_rate": 4.278887119731903e-06, "loss": 0.3296, "step": 28379 }, { "epoch": 2.109253065774805, "grad_norm": 2.0233108412234606, "learning_rate": 4.278229054963794e-06, "loss": 0.275, "step": 28380 }, { "epoch": 2.109327387588257, "grad_norm": 2.172556971440762, "learning_rate": 4.277571027032155e-06, "loss": 0.2946, "step": 28381 }, { "epoch": 2.1094017094017095, "grad_norm": 2.6779356470439213, "learning_rate": 4.276913035941216e-06, "loss": 0.2812, "step": 28382 }, { "epoch": 2.1094760312151615, "grad_norm": 2.1043779038381465, "learning_rate": 4.27625508169521e-06, "loss": 0.2155, "step": 28383 }, { "epoch": 2.109550353028614, "grad_norm": 2.447232379660916, "learning_rate": 4.2755971642983806e-06, "loss": 0.2996, "step": 28384 }, { "epoch": 2.109624674842066, "grad_norm": 2.1878905596194373, "learning_rate": 4.2749392837549594e-06, "loss": 0.2657, "step": 28385 }, { "epoch": 2.1096989966555184, "grad_norm": 2.417987500663112, "learning_rate": 4.274281440069183e-06, "loss": 0.3053, "step": 28386 }, { "epoch": 2.109773318468971, "grad_norm": 2.1960075013636686, "learning_rate": 4.273623633245284e-06, "loss": 0.2828, "step": 28387 }, { "epoch": 2.109847640282423, "grad_norm": 1.9631683490232148, "learning_rate": 4.272965863287497e-06, "loss": 0.1661, "step": 28388 }, { "epoch": 2.1099219620958753, "grad_norm": 2.379323685786573, "learning_rate": 4.272308130200061e-06, "loss": 0.301, "step": 28389 }, { "epoch": 2.1099962839093274, "grad_norm": 2.759819216974315, "learning_rate": 4.271650433987205e-06, "loss": 0.4041, "step": 28390 }, { "epoch": 2.11007060572278, "grad_norm": 2.240280250594623, "learning_rate": 4.27099277465317e-06, "loss": 0.3534, "step": 28391 }, { "epoch": 2.110144927536232, "grad_norm": 2.705973162298753, "learning_rate": 4.270335152202186e-06, "loss": 0.2804, "step": 28392 }, { "epoch": 2.1102192493496843, "grad_norm": 3.1655522379635994, "learning_rate": 4.269677566638483e-06, "loss": 0.3113, "step": 28393 }, { "epoch": 2.1102935711631363, "grad_norm": 2.1582069915592066, "learning_rate": 4.269020017966303e-06, "loss": 0.3054, "step": 28394 }, { "epoch": 2.1103678929765888, "grad_norm": 2.9959237852193645, "learning_rate": 4.2683625061898716e-06, "loss": 0.339, "step": 28395 }, { "epoch": 2.110442214790041, "grad_norm": 2.0891569853746788, "learning_rate": 4.2677050313134276e-06, "loss": 0.3025, "step": 28396 }, { "epoch": 2.1105165366034933, "grad_norm": 2.2387035478385973, "learning_rate": 4.267047593341201e-06, "loss": 0.2678, "step": 28397 }, { "epoch": 2.1105908584169453, "grad_norm": 2.4562942446907314, "learning_rate": 4.266390192277426e-06, "loss": 0.2946, "step": 28398 }, { "epoch": 2.1106651802303977, "grad_norm": 2.1554350900255423, "learning_rate": 4.265732828126331e-06, "loss": 0.3099, "step": 28399 }, { "epoch": 2.1107395020438497, "grad_norm": 1.8849372714440698, "learning_rate": 4.265075500892148e-06, "loss": 0.2783, "step": 28400 }, { "epoch": 2.110813823857302, "grad_norm": 1.925425215683467, "learning_rate": 4.264418210579116e-06, "loss": 0.2546, "step": 28401 }, { "epoch": 2.1108881456707542, "grad_norm": 2.34593116490878, "learning_rate": 4.263760957191458e-06, "loss": 0.3076, "step": 28402 }, { "epoch": 2.1109624674842067, "grad_norm": 1.87657586881386, "learning_rate": 4.263103740733412e-06, "loss": 0.2343, "step": 28403 }, { "epoch": 2.1110367892976587, "grad_norm": 2.2278979806752446, "learning_rate": 4.262446561209207e-06, "loss": 0.3008, "step": 28404 }, { "epoch": 2.111111111111111, "grad_norm": 3.297528463504379, "learning_rate": 4.26178941862307e-06, "loss": 0.4079, "step": 28405 }, { "epoch": 2.111185432924563, "grad_norm": 2.193736685918143, "learning_rate": 4.261132312979238e-06, "loss": 0.3091, "step": 28406 }, { "epoch": 2.1112597547380156, "grad_norm": 2.1506327374278005, "learning_rate": 4.260475244281939e-06, "loss": 0.3424, "step": 28407 }, { "epoch": 2.1113340765514677, "grad_norm": 1.5749188211402627, "learning_rate": 4.259818212535402e-06, "loss": 0.1841, "step": 28408 }, { "epoch": 2.11140839836492, "grad_norm": 1.9399742664995177, "learning_rate": 4.259161217743858e-06, "loss": 0.245, "step": 28409 }, { "epoch": 2.1114827201783726, "grad_norm": 2.287333217722458, "learning_rate": 4.258504259911532e-06, "loss": 0.2874, "step": 28410 }, { "epoch": 2.1115570419918246, "grad_norm": 2.4415098125567565, "learning_rate": 4.2578473390426625e-06, "loss": 0.2988, "step": 28411 }, { "epoch": 2.111631363805277, "grad_norm": 2.627839621590885, "learning_rate": 4.257190455141469e-06, "loss": 0.3103, "step": 28412 }, { "epoch": 2.111705685618729, "grad_norm": 2.1124368110751743, "learning_rate": 4.256533608212189e-06, "loss": 0.2389, "step": 28413 }, { "epoch": 2.1117800074321815, "grad_norm": 2.446063641528048, "learning_rate": 4.255876798259049e-06, "loss": 0.2769, "step": 28414 }, { "epoch": 2.1118543292456335, "grad_norm": 2.158415244567497, "learning_rate": 4.2552200252862705e-06, "loss": 0.2958, "step": 28415 }, { "epoch": 2.111928651059086, "grad_norm": 2.1306384311004116, "learning_rate": 4.254563289298093e-06, "loss": 0.3241, "step": 28416 }, { "epoch": 2.112002972872538, "grad_norm": 2.589205514070797, "learning_rate": 4.2539065902987346e-06, "loss": 0.2563, "step": 28417 }, { "epoch": 2.1120772946859905, "grad_norm": 2.702459570213653, "learning_rate": 4.25324992829243e-06, "loss": 0.3226, "step": 28418 }, { "epoch": 2.1121516164994425, "grad_norm": 2.506637329153471, "learning_rate": 4.252593303283405e-06, "loss": 0.2892, "step": 28419 }, { "epoch": 2.112225938312895, "grad_norm": 2.1175254294399535, "learning_rate": 4.2519367152758865e-06, "loss": 0.28, "step": 28420 }, { "epoch": 2.112300260126347, "grad_norm": 2.6718801617440273, "learning_rate": 4.251280164274101e-06, "loss": 0.3163, "step": 28421 }, { "epoch": 2.1123745819397994, "grad_norm": 2.2611287157012923, "learning_rate": 4.250623650282271e-06, "loss": 0.2661, "step": 28422 }, { "epoch": 2.1124489037532515, "grad_norm": 3.596123083027427, "learning_rate": 4.249967173304632e-06, "loss": 0.3071, "step": 28423 }, { "epoch": 2.112523225566704, "grad_norm": 1.8150133246564595, "learning_rate": 4.249310733345402e-06, "loss": 0.2428, "step": 28424 }, { "epoch": 2.112597547380156, "grad_norm": 2.3591841345984506, "learning_rate": 4.248654330408816e-06, "loss": 0.2792, "step": 28425 }, { "epoch": 2.1126718691936084, "grad_norm": 2.343782773682229, "learning_rate": 4.2479979644990935e-06, "loss": 0.3196, "step": 28426 }, { "epoch": 2.1127461910070604, "grad_norm": 2.286696385735853, "learning_rate": 4.247341635620459e-06, "loss": 0.37, "step": 28427 }, { "epoch": 2.112820512820513, "grad_norm": 2.172522486512565, "learning_rate": 4.246685343777144e-06, "loss": 0.2322, "step": 28428 }, { "epoch": 2.112894834633965, "grad_norm": 2.0282828708674865, "learning_rate": 4.246029088973369e-06, "loss": 0.2243, "step": 28429 }, { "epoch": 2.1129691564474173, "grad_norm": 3.443772890992429, "learning_rate": 4.245372871213358e-06, "loss": 0.3195, "step": 28430 }, { "epoch": 2.1130434782608694, "grad_norm": 1.8319071353821, "learning_rate": 4.244716690501345e-06, "loss": 0.2634, "step": 28431 }, { "epoch": 2.113117800074322, "grad_norm": 1.9483122965107649, "learning_rate": 4.2440605468415395e-06, "loss": 0.2327, "step": 28432 }, { "epoch": 2.1131921218877743, "grad_norm": 2.0436426942655124, "learning_rate": 4.243404440238177e-06, "loss": 0.2243, "step": 28433 }, { "epoch": 2.1132664437012263, "grad_norm": 2.656725544338388, "learning_rate": 4.242748370695475e-06, "loss": 0.424, "step": 28434 }, { "epoch": 2.1133407655146788, "grad_norm": 2.19940071156383, "learning_rate": 4.242092338217664e-06, "loss": 0.2893, "step": 28435 }, { "epoch": 2.1134150873281308, "grad_norm": 2.1640150741553366, "learning_rate": 4.2414363428089635e-06, "loss": 0.1933, "step": 28436 }, { "epoch": 2.1134894091415832, "grad_norm": 2.3671817816138696, "learning_rate": 4.240780384473593e-06, "loss": 0.2592, "step": 28437 }, { "epoch": 2.1135637309550352, "grad_norm": 2.880670177066439, "learning_rate": 4.240124463215783e-06, "loss": 0.3801, "step": 28438 }, { "epoch": 2.1136380527684877, "grad_norm": 2.171501164643299, "learning_rate": 4.239468579039751e-06, "loss": 0.1792, "step": 28439 }, { "epoch": 2.1137123745819397, "grad_norm": 2.482851573464233, "learning_rate": 4.238812731949723e-06, "loss": 0.1976, "step": 28440 }, { "epoch": 2.113786696395392, "grad_norm": 2.3163020343366467, "learning_rate": 4.238156921949921e-06, "loss": 0.3576, "step": 28441 }, { "epoch": 2.113861018208844, "grad_norm": 2.3279608349074654, "learning_rate": 4.237501149044565e-06, "loss": 0.3355, "step": 28442 }, { "epoch": 2.1139353400222967, "grad_norm": 2.330767137776168, "learning_rate": 4.2368454132378775e-06, "loss": 0.26, "step": 28443 }, { "epoch": 2.1140096618357487, "grad_norm": 1.8785634523355637, "learning_rate": 4.236189714534077e-06, "loss": 0.2259, "step": 28444 }, { "epoch": 2.114083983649201, "grad_norm": 2.503405517390023, "learning_rate": 4.2355340529373925e-06, "loss": 0.2926, "step": 28445 }, { "epoch": 2.114158305462653, "grad_norm": 9.716884119911983, "learning_rate": 4.2348784284520395e-06, "loss": 0.2698, "step": 28446 }, { "epoch": 2.1142326272761056, "grad_norm": 2.2723402374693915, "learning_rate": 4.234222841082236e-06, "loss": 0.2867, "step": 28447 }, { "epoch": 2.1143069490895576, "grad_norm": 1.7453055302078344, "learning_rate": 4.233567290832211e-06, "loss": 0.2721, "step": 28448 }, { "epoch": 2.11438127090301, "grad_norm": 2.3657567181919257, "learning_rate": 4.232911777706178e-06, "loss": 0.3098, "step": 28449 }, { "epoch": 2.114455592716462, "grad_norm": 2.0245827829726157, "learning_rate": 4.232256301708362e-06, "loss": 0.2281, "step": 28450 }, { "epoch": 2.1145299145299146, "grad_norm": 2.967043089772646, "learning_rate": 4.231600862842981e-06, "loss": 0.3725, "step": 28451 }, { "epoch": 2.1146042363433666, "grad_norm": 2.250981951709673, "learning_rate": 4.230945461114251e-06, "loss": 0.2362, "step": 28452 }, { "epoch": 2.114678558156819, "grad_norm": 2.216572624248405, "learning_rate": 4.230290096526402e-06, "loss": 0.1817, "step": 28453 }, { "epoch": 2.114752879970271, "grad_norm": 2.284217388667676, "learning_rate": 4.229634769083638e-06, "loss": 0.3194, "step": 28454 }, { "epoch": 2.1148272017837235, "grad_norm": 3.2722311394192714, "learning_rate": 4.22897947879019e-06, "loss": 0.4032, "step": 28455 }, { "epoch": 2.114901523597176, "grad_norm": 2.202335183629794, "learning_rate": 4.22832422565027e-06, "loss": 0.2838, "step": 28456 }, { "epoch": 2.114975845410628, "grad_norm": 2.33228038859554, "learning_rate": 4.227669009668101e-06, "loss": 0.3266, "step": 28457 }, { "epoch": 2.1150501672240805, "grad_norm": 1.8513370581854742, "learning_rate": 4.2270138308479e-06, "loss": 0.2108, "step": 28458 }, { "epoch": 2.1151244890375325, "grad_norm": 1.790859765624911, "learning_rate": 4.226358689193883e-06, "loss": 0.2391, "step": 28459 }, { "epoch": 2.115198810850985, "grad_norm": 2.3536667051808724, "learning_rate": 4.2257035847102705e-06, "loss": 0.2792, "step": 28460 }, { "epoch": 2.115273132664437, "grad_norm": 2.167341536437205, "learning_rate": 4.225048517401274e-06, "loss": 0.2713, "step": 28461 }, { "epoch": 2.1153474544778894, "grad_norm": 2.0934374973406036, "learning_rate": 4.224393487271121e-06, "loss": 0.322, "step": 28462 }, { "epoch": 2.1154217762913414, "grad_norm": 2.0147263108450906, "learning_rate": 4.223738494324023e-06, "loss": 0.2993, "step": 28463 }, { "epoch": 2.115496098104794, "grad_norm": 2.124058246929349, "learning_rate": 4.223083538564193e-06, "loss": 0.2999, "step": 28464 }, { "epoch": 2.115570419918246, "grad_norm": 2.6966031791386174, "learning_rate": 4.2224286199958585e-06, "loss": 0.2801, "step": 28465 }, { "epoch": 2.1156447417316984, "grad_norm": 2.957080029086974, "learning_rate": 4.221773738623222e-06, "loss": 0.3513, "step": 28466 }, { "epoch": 2.1157190635451504, "grad_norm": 2.0739442518714646, "learning_rate": 4.22111889445051e-06, "loss": 0.3299, "step": 28467 }, { "epoch": 2.115793385358603, "grad_norm": 2.229063521117621, "learning_rate": 4.220464087481933e-06, "loss": 0.3415, "step": 28468 }, { "epoch": 2.115867707172055, "grad_norm": 2.4566520132051104, "learning_rate": 4.219809317721707e-06, "loss": 0.3669, "step": 28469 }, { "epoch": 2.1159420289855073, "grad_norm": 2.5877200446644664, "learning_rate": 4.219154585174051e-06, "loss": 0.2309, "step": 28470 }, { "epoch": 2.1160163507989593, "grad_norm": 2.2489205754125603, "learning_rate": 4.218499889843175e-06, "loss": 0.2443, "step": 28471 }, { "epoch": 2.116090672612412, "grad_norm": 1.9578798849101084, "learning_rate": 4.217845231733299e-06, "loss": 0.262, "step": 28472 }, { "epoch": 2.116164994425864, "grad_norm": 2.0707471809806863, "learning_rate": 4.217190610848637e-06, "loss": 0.2386, "step": 28473 }, { "epoch": 2.1162393162393163, "grad_norm": 2.081237650265918, "learning_rate": 4.216536027193398e-06, "loss": 0.271, "step": 28474 }, { "epoch": 2.1163136380527683, "grad_norm": 1.8728133475702764, "learning_rate": 4.215881480771803e-06, "loss": 0.2182, "step": 28475 }, { "epoch": 2.1163879598662207, "grad_norm": 1.75343522724481, "learning_rate": 4.215226971588063e-06, "loss": 0.2269, "step": 28476 }, { "epoch": 2.1164622816796728, "grad_norm": 2.2152487826889247, "learning_rate": 4.214572499646391e-06, "loss": 0.262, "step": 28477 }, { "epoch": 2.1165366034931252, "grad_norm": 2.802699004338399, "learning_rate": 4.2139180649509985e-06, "loss": 0.3443, "step": 28478 }, { "epoch": 2.1166109253065777, "grad_norm": 2.1884021540166914, "learning_rate": 4.213263667506104e-06, "loss": 0.315, "step": 28479 }, { "epoch": 2.1166852471200297, "grad_norm": 2.029104424691933, "learning_rate": 4.212609307315919e-06, "loss": 0.2237, "step": 28480 }, { "epoch": 2.116759568933482, "grad_norm": 2.3598196657705226, "learning_rate": 4.211954984384651e-06, "loss": 0.3322, "step": 28481 }, { "epoch": 2.116833890746934, "grad_norm": 2.132431312487385, "learning_rate": 4.211300698716519e-06, "loss": 0.2475, "step": 28482 }, { "epoch": 2.1169082125603866, "grad_norm": 1.9442774797320206, "learning_rate": 4.2106464503157295e-06, "loss": 0.2329, "step": 28483 }, { "epoch": 2.1169825343738387, "grad_norm": 3.835590545556099, "learning_rate": 4.209992239186502e-06, "loss": 0.2585, "step": 28484 }, { "epoch": 2.117056856187291, "grad_norm": 4.03246725342805, "learning_rate": 4.209338065333044e-06, "loss": 0.2855, "step": 28485 }, { "epoch": 2.117131178000743, "grad_norm": 2.281851862188225, "learning_rate": 4.208683928759563e-06, "loss": 0.2905, "step": 28486 }, { "epoch": 2.1172054998141956, "grad_norm": 2.5354306122424672, "learning_rate": 4.208029829470282e-06, "loss": 0.3206, "step": 28487 }, { "epoch": 2.1172798216276476, "grad_norm": 2.3643670799305685, "learning_rate": 4.207375767469397e-06, "loss": 0.302, "step": 28488 }, { "epoch": 2.1173541434411, "grad_norm": 2.1357492148056223, "learning_rate": 4.20672174276113e-06, "loss": 0.3089, "step": 28489 }, { "epoch": 2.117428465254552, "grad_norm": 2.3036922938726088, "learning_rate": 4.206067755349688e-06, "loss": 0.3363, "step": 28490 }, { "epoch": 2.1175027870680045, "grad_norm": 2.7385693307299803, "learning_rate": 4.205413805239277e-06, "loss": 0.336, "step": 28491 }, { "epoch": 2.1175771088814566, "grad_norm": 1.6022681492995945, "learning_rate": 4.204759892434115e-06, "loss": 0.2542, "step": 28492 }, { "epoch": 2.117651430694909, "grad_norm": 2.766051109483065, "learning_rate": 4.204106016938404e-06, "loss": 0.3224, "step": 28493 }, { "epoch": 2.117725752508361, "grad_norm": 2.241483544126663, "learning_rate": 4.203452178756362e-06, "loss": 0.3295, "step": 28494 }, { "epoch": 2.1178000743218135, "grad_norm": 2.19773299032016, "learning_rate": 4.202798377892194e-06, "loss": 0.286, "step": 28495 }, { "epoch": 2.1178743961352655, "grad_norm": 2.5881098547351677, "learning_rate": 4.202144614350107e-06, "loss": 0.2633, "step": 28496 }, { "epoch": 2.117948717948718, "grad_norm": 2.728343945835603, "learning_rate": 4.201490888134314e-06, "loss": 0.3334, "step": 28497 }, { "epoch": 2.11802303976217, "grad_norm": 2.32979698328125, "learning_rate": 4.200837199249021e-06, "loss": 0.3453, "step": 28498 }, { "epoch": 2.1180973615756225, "grad_norm": 2.1025705894079114, "learning_rate": 4.20018354769844e-06, "loss": 0.297, "step": 28499 }, { "epoch": 2.1181716833890745, "grad_norm": 2.208244653895144, "learning_rate": 4.1995299334867715e-06, "loss": 0.267, "step": 28500 }, { "epoch": 2.118246005202527, "grad_norm": 2.3166998213893217, "learning_rate": 4.1988763566182315e-06, "loss": 0.3112, "step": 28501 }, { "epoch": 2.1183203270159794, "grad_norm": 2.155778750715608, "learning_rate": 4.198222817097024e-06, "loss": 0.295, "step": 28502 }, { "epoch": 2.1183946488294314, "grad_norm": 2.034034272753651, "learning_rate": 4.197569314927354e-06, "loss": 0.2693, "step": 28503 }, { "epoch": 2.118468970642884, "grad_norm": 2.2681258521881587, "learning_rate": 4.196915850113436e-06, "loss": 0.2944, "step": 28504 }, { "epoch": 2.118543292456336, "grad_norm": 2.481935588201968, "learning_rate": 4.196262422659469e-06, "loss": 0.3723, "step": 28505 }, { "epoch": 2.1186176142697883, "grad_norm": 2.235476409011649, "learning_rate": 4.195609032569665e-06, "loss": 0.2886, "step": 28506 }, { "epoch": 2.1186919360832404, "grad_norm": 1.6061741141134451, "learning_rate": 4.19495567984823e-06, "loss": 0.1942, "step": 28507 }, { "epoch": 2.118766257896693, "grad_norm": 2.456468992774199, "learning_rate": 4.194302364499366e-06, "loss": 0.3247, "step": 28508 }, { "epoch": 2.118840579710145, "grad_norm": 2.4433116145638607, "learning_rate": 4.193649086527286e-06, "loss": 0.2918, "step": 28509 }, { "epoch": 2.1189149015235973, "grad_norm": 1.8578219417951882, "learning_rate": 4.192995845936192e-06, "loss": 0.232, "step": 28510 }, { "epoch": 2.1189892233370493, "grad_norm": 2.5288087994545574, "learning_rate": 4.192342642730288e-06, "loss": 0.2524, "step": 28511 }, { "epoch": 2.1190635451505018, "grad_norm": 2.451423433182226, "learning_rate": 4.191689476913782e-06, "loss": 0.3199, "step": 28512 }, { "epoch": 2.119137866963954, "grad_norm": 1.8088380935672552, "learning_rate": 4.1910363484908746e-06, "loss": 0.2159, "step": 28513 }, { "epoch": 2.1192121887774062, "grad_norm": 3.2637987639552213, "learning_rate": 4.190383257465775e-06, "loss": 0.2194, "step": 28514 }, { "epoch": 2.1192865105908583, "grad_norm": 2.3247308531312245, "learning_rate": 4.189730203842685e-06, "loss": 0.3172, "step": 28515 }, { "epoch": 2.1193608324043107, "grad_norm": 2.1613702571148687, "learning_rate": 4.189077187625814e-06, "loss": 0.2802, "step": 28516 }, { "epoch": 2.1194351542177627, "grad_norm": 2.0861974566068744, "learning_rate": 4.188424208819363e-06, "loss": 0.2529, "step": 28517 }, { "epoch": 2.119509476031215, "grad_norm": 2.0974848899768728, "learning_rate": 4.187771267427531e-06, "loss": 0.2903, "step": 28518 }, { "epoch": 2.119583797844667, "grad_norm": 2.4294082495500744, "learning_rate": 4.187118363454529e-06, "loss": 0.2844, "step": 28519 }, { "epoch": 2.1196581196581197, "grad_norm": 2.1539458849607045, "learning_rate": 4.186465496904554e-06, "loss": 0.2194, "step": 28520 }, { "epoch": 2.1197324414715717, "grad_norm": 2.9654249365173144, "learning_rate": 4.18581266778182e-06, "loss": 0.3462, "step": 28521 }, { "epoch": 2.119806763285024, "grad_norm": 2.5158401164591937, "learning_rate": 4.1851598760905155e-06, "loss": 0.2952, "step": 28522 }, { "epoch": 2.1198810850984766, "grad_norm": 2.209339903090862, "learning_rate": 4.184507121834853e-06, "loss": 0.2519, "step": 28523 }, { "epoch": 2.1199554069119286, "grad_norm": 2.6332822227620447, "learning_rate": 4.183854405019032e-06, "loss": 0.3939, "step": 28524 }, { "epoch": 2.120029728725381, "grad_norm": 2.319627357345049, "learning_rate": 4.183201725647251e-06, "loss": 0.331, "step": 28525 }, { "epoch": 2.120104050538833, "grad_norm": 3.3517599123694604, "learning_rate": 4.182549083723721e-06, "loss": 0.2581, "step": 28526 }, { "epoch": 2.1201783723522856, "grad_norm": 2.2320902967319305, "learning_rate": 4.181896479252632e-06, "loss": 0.3122, "step": 28527 }, { "epoch": 2.1202526941657376, "grad_norm": 2.744814385250304, "learning_rate": 4.181243912238196e-06, "loss": 0.2986, "step": 28528 }, { "epoch": 2.12032701597919, "grad_norm": 2.025631337113069, "learning_rate": 4.180591382684609e-06, "loss": 0.2758, "step": 28529 }, { "epoch": 2.120401337792642, "grad_norm": 2.2849743954893427, "learning_rate": 4.1799388905960705e-06, "loss": 0.3107, "step": 28530 }, { "epoch": 2.1204756596060945, "grad_norm": 2.821138129395155, "learning_rate": 4.179286435976786e-06, "loss": 0.3342, "step": 28531 }, { "epoch": 2.1205499814195465, "grad_norm": 1.9673579400688224, "learning_rate": 4.178634018830953e-06, "loss": 0.2901, "step": 28532 }, { "epoch": 2.120624303232999, "grad_norm": 2.2877313205179215, "learning_rate": 4.177981639162774e-06, "loss": 0.2768, "step": 28533 }, { "epoch": 2.120698625046451, "grad_norm": 2.4565337487979684, "learning_rate": 4.177329296976445e-06, "loss": 0.3226, "step": 28534 }, { "epoch": 2.1207729468599035, "grad_norm": 2.6067214114393558, "learning_rate": 4.176676992276166e-06, "loss": 0.2975, "step": 28535 }, { "epoch": 2.1208472686733555, "grad_norm": 1.9250324111603896, "learning_rate": 4.176024725066142e-06, "loss": 0.244, "step": 28536 }, { "epoch": 2.120921590486808, "grad_norm": 2.1579347660529398, "learning_rate": 4.175372495350565e-06, "loss": 0.3032, "step": 28537 }, { "epoch": 2.12099591230026, "grad_norm": 2.1733456346347615, "learning_rate": 4.17472030313364e-06, "loss": 0.3317, "step": 28538 }, { "epoch": 2.1210702341137124, "grad_norm": 1.749539956693501, "learning_rate": 4.174068148419565e-06, "loss": 0.2955, "step": 28539 }, { "epoch": 2.1211445559271644, "grad_norm": 2.3641820271470984, "learning_rate": 4.1734160312125315e-06, "loss": 0.3057, "step": 28540 }, { "epoch": 2.121218877740617, "grad_norm": 1.8685108837319613, "learning_rate": 4.172763951516747e-06, "loss": 0.2445, "step": 28541 }, { "epoch": 2.121293199554069, "grad_norm": 2.2292110520492874, "learning_rate": 4.1721119093364035e-06, "loss": 0.2866, "step": 28542 }, { "epoch": 2.1213675213675214, "grad_norm": 2.459926217836485, "learning_rate": 4.1714599046757045e-06, "loss": 0.3289, "step": 28543 }, { "epoch": 2.121441843180974, "grad_norm": 2.293157643130296, "learning_rate": 4.1708079375388445e-06, "loss": 0.3472, "step": 28544 }, { "epoch": 2.121516164994426, "grad_norm": 2.1614281224221763, "learning_rate": 4.17015600793002e-06, "loss": 0.2615, "step": 28545 }, { "epoch": 2.1215904868078783, "grad_norm": 1.9027359429662876, "learning_rate": 4.1695041158534275e-06, "loss": 0.2252, "step": 28546 }, { "epoch": 2.1216648086213303, "grad_norm": 2.361413930674986, "learning_rate": 4.168852261313263e-06, "loss": 0.2336, "step": 28547 }, { "epoch": 2.121739130434783, "grad_norm": 1.983208039642155, "learning_rate": 4.168200444313728e-06, "loss": 0.2494, "step": 28548 }, { "epoch": 2.121813452248235, "grad_norm": 2.1674201955829737, "learning_rate": 4.167548664859012e-06, "loss": 0.3174, "step": 28549 }, { "epoch": 2.1218877740616873, "grad_norm": 2.3474291802716127, "learning_rate": 4.166896922953321e-06, "loss": 0.3117, "step": 28550 }, { "epoch": 2.1219620958751393, "grad_norm": 1.9189533129661107, "learning_rate": 4.166245218600843e-06, "loss": 0.2451, "step": 28551 }, { "epoch": 2.1220364176885917, "grad_norm": 1.9954480593659616, "learning_rate": 4.165593551805771e-06, "loss": 0.2409, "step": 28552 }, { "epoch": 2.1221107395020438, "grad_norm": 2.0376355491046767, "learning_rate": 4.1649419225723105e-06, "loss": 0.2341, "step": 28553 }, { "epoch": 2.1221850613154962, "grad_norm": 2.391903543787202, "learning_rate": 4.16429033090465e-06, "loss": 0.3625, "step": 28554 }, { "epoch": 2.1222593831289482, "grad_norm": 2.6314668775436965, "learning_rate": 4.163638776806986e-06, "loss": 0.298, "step": 28555 }, { "epoch": 2.1223337049424007, "grad_norm": 2.1475340942141274, "learning_rate": 4.162987260283513e-06, "loss": 0.2704, "step": 28556 }, { "epoch": 2.1224080267558527, "grad_norm": 2.4821163559511765, "learning_rate": 4.162335781338421e-06, "loss": 0.3165, "step": 28557 }, { "epoch": 2.122482348569305, "grad_norm": 2.4837747439061357, "learning_rate": 4.161684339975912e-06, "loss": 0.3036, "step": 28558 }, { "epoch": 2.122556670382757, "grad_norm": 1.9240661450872907, "learning_rate": 4.161032936200173e-06, "loss": 0.2128, "step": 28559 }, { "epoch": 2.1226309921962097, "grad_norm": 2.384097957145986, "learning_rate": 4.160381570015405e-06, "loss": 0.3282, "step": 28560 }, { "epoch": 2.1227053140096617, "grad_norm": 2.071953212790374, "learning_rate": 4.159730241425797e-06, "loss": 0.2483, "step": 28561 }, { "epoch": 2.122779635823114, "grad_norm": 2.5537845021408203, "learning_rate": 4.159078950435539e-06, "loss": 0.2617, "step": 28562 }, { "epoch": 2.122853957636566, "grad_norm": 2.348841404430805, "learning_rate": 4.158427697048831e-06, "loss": 0.3472, "step": 28563 }, { "epoch": 2.1229282794500186, "grad_norm": 2.2045312389001706, "learning_rate": 4.157776481269859e-06, "loss": 0.2555, "step": 28564 }, { "epoch": 2.1230026012634706, "grad_norm": 2.562532092844707, "learning_rate": 4.1571253031028226e-06, "loss": 0.3485, "step": 28565 }, { "epoch": 2.123076923076923, "grad_norm": 2.5947589059139275, "learning_rate": 4.15647416255191e-06, "loss": 0.3066, "step": 28566 }, { "epoch": 2.1231512448903755, "grad_norm": 2.013666397350822, "learning_rate": 4.155823059621315e-06, "loss": 0.2204, "step": 28567 }, { "epoch": 2.1232255667038276, "grad_norm": 2.477557908155597, "learning_rate": 4.155171994315226e-06, "loss": 0.2882, "step": 28568 }, { "epoch": 2.12329988851728, "grad_norm": 2.090335289067124, "learning_rate": 4.1545209666378326e-06, "loss": 0.2293, "step": 28569 }, { "epoch": 2.123374210330732, "grad_norm": 2.4358464488377845, "learning_rate": 4.153869976593335e-06, "loss": 0.2875, "step": 28570 }, { "epoch": 2.1234485321441845, "grad_norm": 2.83386829110359, "learning_rate": 4.1532190241859185e-06, "loss": 0.3168, "step": 28571 }, { "epoch": 2.1235228539576365, "grad_norm": 1.564613496654067, "learning_rate": 4.152568109419771e-06, "loss": 0.2071, "step": 28572 }, { "epoch": 2.123597175771089, "grad_norm": 2.175607335335457, "learning_rate": 4.1519172322990906e-06, "loss": 0.3329, "step": 28573 }, { "epoch": 2.123671497584541, "grad_norm": 2.906874905107511, "learning_rate": 4.15126639282806e-06, "loss": 0.3584, "step": 28574 }, { "epoch": 2.1237458193979935, "grad_norm": 1.8429008727933351, "learning_rate": 4.150615591010876e-06, "loss": 0.2585, "step": 28575 }, { "epoch": 2.1238201412114455, "grad_norm": 2.427557869966059, "learning_rate": 4.149964826851726e-06, "loss": 0.3961, "step": 28576 }, { "epoch": 2.123894463024898, "grad_norm": 2.521866528744939, "learning_rate": 4.149314100354794e-06, "loss": 0.2693, "step": 28577 }, { "epoch": 2.12396878483835, "grad_norm": 2.2840939015075308, "learning_rate": 4.148663411524282e-06, "loss": 0.3085, "step": 28578 }, { "epoch": 2.1240431066518024, "grad_norm": 2.1561548001326565, "learning_rate": 4.148012760364365e-06, "loss": 0.2557, "step": 28579 }, { "epoch": 2.1241174284652544, "grad_norm": 2.013168852438623, "learning_rate": 4.147362146879241e-06, "loss": 0.2821, "step": 28580 }, { "epoch": 2.124191750278707, "grad_norm": 1.9546875297856035, "learning_rate": 4.146711571073092e-06, "loss": 0.2042, "step": 28581 }, { "epoch": 2.124266072092159, "grad_norm": 2.1133065120759236, "learning_rate": 4.146061032950115e-06, "loss": 0.2941, "step": 28582 }, { "epoch": 2.1243403939056114, "grad_norm": 2.2070483604027062, "learning_rate": 4.145410532514492e-06, "loss": 0.3371, "step": 28583 }, { "epoch": 2.1244147157190634, "grad_norm": 2.1149994229206803, "learning_rate": 4.144760069770409e-06, "loss": 0.2654, "step": 28584 }, { "epoch": 2.124489037532516, "grad_norm": 2.3481538056260645, "learning_rate": 4.1441096447220605e-06, "loss": 0.234, "step": 28585 }, { "epoch": 2.124563359345968, "grad_norm": 2.588029082756778, "learning_rate": 4.143459257373628e-06, "loss": 0.2873, "step": 28586 }, { "epoch": 2.1246376811594203, "grad_norm": 4.326792426808779, "learning_rate": 4.142808907729304e-06, "loss": 0.2041, "step": 28587 }, { "epoch": 2.1247120029728723, "grad_norm": 2.343863853385457, "learning_rate": 4.142158595793271e-06, "loss": 0.2916, "step": 28588 }, { "epoch": 2.124786324786325, "grad_norm": 2.525927788803067, "learning_rate": 4.141508321569717e-06, "loss": 0.3173, "step": 28589 }, { "epoch": 2.1248606465997772, "grad_norm": 2.353839954175129, "learning_rate": 4.14085808506283e-06, "loss": 0.3577, "step": 28590 }, { "epoch": 2.1249349684132293, "grad_norm": 1.8142827605479745, "learning_rate": 4.140207886276789e-06, "loss": 0.2364, "step": 28591 }, { "epoch": 2.1250092902266817, "grad_norm": 2.466181657359012, "learning_rate": 4.139557725215792e-06, "loss": 0.3075, "step": 28592 }, { "epoch": 2.1250836120401337, "grad_norm": 2.9903157386638055, "learning_rate": 4.1389076018840155e-06, "loss": 0.2573, "step": 28593 }, { "epoch": 2.125157933853586, "grad_norm": 2.366379822489633, "learning_rate": 4.138257516285645e-06, "loss": 0.3155, "step": 28594 }, { "epoch": 2.125232255667038, "grad_norm": 2.801086739832553, "learning_rate": 4.137607468424872e-06, "loss": 0.3218, "step": 28595 }, { "epoch": 2.1253065774804907, "grad_norm": 2.2037831687033393, "learning_rate": 4.1369574583058755e-06, "loss": 0.3565, "step": 28596 }, { "epoch": 2.1253808992939427, "grad_norm": 2.462129941266986, "learning_rate": 4.1363074859328455e-06, "loss": 0.2844, "step": 28597 }, { "epoch": 2.125455221107395, "grad_norm": 2.9069719828079807, "learning_rate": 4.135657551309964e-06, "loss": 0.4211, "step": 28598 }, { "epoch": 2.125529542920847, "grad_norm": 2.4701296327470796, "learning_rate": 4.135007654441411e-06, "loss": 0.2584, "step": 28599 }, { "epoch": 2.1256038647342996, "grad_norm": 1.5891760313402918, "learning_rate": 4.134357795331382e-06, "loss": 0.2081, "step": 28600 }, { "epoch": 2.1256781865477516, "grad_norm": 2.1655642104569415, "learning_rate": 4.133707973984045e-06, "loss": 0.3243, "step": 28601 }, { "epoch": 2.125752508361204, "grad_norm": 1.984200330836462, "learning_rate": 4.133058190403596e-06, "loss": 0.2079, "step": 28602 }, { "epoch": 2.125826830174656, "grad_norm": 2.2736905551263575, "learning_rate": 4.132408444594211e-06, "loss": 0.2551, "step": 28603 }, { "epoch": 2.1259011519881086, "grad_norm": 2.157506900574916, "learning_rate": 4.131758736560078e-06, "loss": 0.2558, "step": 28604 }, { "epoch": 2.1259754738015606, "grad_norm": 2.2056789419923137, "learning_rate": 4.131109066305378e-06, "loss": 0.3163, "step": 28605 }, { "epoch": 2.126049795615013, "grad_norm": 2.298393328684584, "learning_rate": 4.13045943383429e-06, "loss": 0.2523, "step": 28606 }, { "epoch": 2.126124117428465, "grad_norm": 2.0473594092155647, "learning_rate": 4.129809839151004e-06, "loss": 0.2487, "step": 28607 }, { "epoch": 2.1261984392419175, "grad_norm": 2.7430781665932584, "learning_rate": 4.1291602822596934e-06, "loss": 0.3983, "step": 28608 }, { "epoch": 2.1262727610553696, "grad_norm": 2.239065685724621, "learning_rate": 4.1285107631645486e-06, "loss": 0.3067, "step": 28609 }, { "epoch": 2.126347082868822, "grad_norm": 1.8464766622177298, "learning_rate": 4.1278612818697465e-06, "loss": 0.253, "step": 28610 }, { "epoch": 2.126421404682274, "grad_norm": 1.925963099989198, "learning_rate": 4.127211838379468e-06, "loss": 0.2798, "step": 28611 }, { "epoch": 2.1264957264957265, "grad_norm": 2.4713087024194653, "learning_rate": 4.126562432697895e-06, "loss": 0.34, "step": 28612 }, { "epoch": 2.126570048309179, "grad_norm": 2.146907823679028, "learning_rate": 4.125913064829206e-06, "loss": 0.3618, "step": 28613 }, { "epoch": 2.126644370122631, "grad_norm": 3.2904659593144334, "learning_rate": 4.125263734777586e-06, "loss": 0.3012, "step": 28614 }, { "epoch": 2.1267186919360834, "grad_norm": 2.8909471651203917, "learning_rate": 4.1246144425472146e-06, "loss": 0.3722, "step": 28615 }, { "epoch": 2.1267930137495354, "grad_norm": 2.482893400741159, "learning_rate": 4.123965188142266e-06, "loss": 0.3992, "step": 28616 }, { "epoch": 2.126867335562988, "grad_norm": 2.410583621095382, "learning_rate": 4.123315971566929e-06, "loss": 0.367, "step": 28617 }, { "epoch": 2.12694165737644, "grad_norm": 2.363228700952528, "learning_rate": 4.122666792825374e-06, "loss": 0.2696, "step": 28618 }, { "epoch": 2.1270159791898924, "grad_norm": 2.1992591181749055, "learning_rate": 4.122017651921791e-06, "loss": 0.3249, "step": 28619 }, { "epoch": 2.1270903010033444, "grad_norm": 2.5670507765335584, "learning_rate": 4.1213685488603516e-06, "loss": 0.2879, "step": 28620 }, { "epoch": 2.127164622816797, "grad_norm": 1.9524391769344795, "learning_rate": 4.120719483645232e-06, "loss": 0.2508, "step": 28621 }, { "epoch": 2.127238944630249, "grad_norm": 1.9803458098236657, "learning_rate": 4.120070456280621e-06, "loss": 0.2276, "step": 28622 }, { "epoch": 2.1273132664437013, "grad_norm": 2.0678199960531995, "learning_rate": 4.11942146677069e-06, "loss": 0.2912, "step": 28623 }, { "epoch": 2.1273875882571534, "grad_norm": 2.107259177690673, "learning_rate": 4.11877251511962e-06, "loss": 0.2633, "step": 28624 }, { "epoch": 2.127461910070606, "grad_norm": 2.2302134053642457, "learning_rate": 4.118123601331583e-06, "loss": 0.2894, "step": 28625 }, { "epoch": 2.127536231884058, "grad_norm": 2.5394985749212, "learning_rate": 4.117474725410764e-06, "loss": 0.3782, "step": 28626 }, { "epoch": 2.1276105536975103, "grad_norm": 2.300768322432519, "learning_rate": 4.116825887361337e-06, "loss": 0.2564, "step": 28627 }, { "epoch": 2.1276848755109623, "grad_norm": 1.9274970548133286, "learning_rate": 4.116177087187477e-06, "loss": 0.2899, "step": 28628 }, { "epoch": 2.1277591973244148, "grad_norm": 2.4732961596132004, "learning_rate": 4.115528324893368e-06, "loss": 0.2648, "step": 28629 }, { "epoch": 2.127833519137867, "grad_norm": 2.3145213086439536, "learning_rate": 4.114879600483178e-06, "loss": 0.3311, "step": 28630 }, { "epoch": 2.1279078409513192, "grad_norm": 2.2102136273020347, "learning_rate": 4.114230913961091e-06, "loss": 0.2761, "step": 28631 }, { "epoch": 2.1279821627647713, "grad_norm": 2.2161914504473064, "learning_rate": 4.113582265331281e-06, "loss": 0.2767, "step": 28632 }, { "epoch": 2.1280564845782237, "grad_norm": 2.132235714995679, "learning_rate": 4.112933654597918e-06, "loss": 0.2222, "step": 28633 }, { "epoch": 2.1281308063916757, "grad_norm": 2.259863059981892, "learning_rate": 4.11228508176519e-06, "loss": 0.2814, "step": 28634 }, { "epoch": 2.128205128205128, "grad_norm": 3.0193268343977357, "learning_rate": 4.11163654683726e-06, "loss": 0.2826, "step": 28635 }, { "epoch": 2.1282794500185807, "grad_norm": 3.4802198943891147, "learning_rate": 4.1109880498183095e-06, "loss": 0.3097, "step": 28636 }, { "epoch": 2.1283537718320327, "grad_norm": 2.0186172716486523, "learning_rate": 4.110339590712514e-06, "loss": 0.2159, "step": 28637 }, { "epoch": 2.128428093645485, "grad_norm": 2.142951961501139, "learning_rate": 4.109691169524043e-06, "loss": 0.2401, "step": 28638 }, { "epoch": 2.128502415458937, "grad_norm": 2.3067467387921146, "learning_rate": 4.109042786257077e-06, "loss": 0.2803, "step": 28639 }, { "epoch": 2.1285767372723896, "grad_norm": 2.77381675589577, "learning_rate": 4.1083944409157846e-06, "loss": 0.3345, "step": 28640 }, { "epoch": 2.1286510590858416, "grad_norm": 2.045537620511049, "learning_rate": 4.107746133504346e-06, "loss": 0.2657, "step": 28641 }, { "epoch": 2.128725380899294, "grad_norm": 2.5775458373446782, "learning_rate": 4.107097864026935e-06, "loss": 0.3671, "step": 28642 }, { "epoch": 2.128799702712746, "grad_norm": 2.4424147007894796, "learning_rate": 4.1064496324877155e-06, "loss": 0.316, "step": 28643 }, { "epoch": 2.1288740245261986, "grad_norm": 2.0332851677517225, "learning_rate": 4.105801438890873e-06, "loss": 0.2351, "step": 28644 }, { "epoch": 2.1289483463396506, "grad_norm": 3.23722066242441, "learning_rate": 4.105153283240574e-06, "loss": 0.3411, "step": 28645 }, { "epoch": 2.129022668153103, "grad_norm": 2.50600927740767, "learning_rate": 4.104505165540992e-06, "loss": 0.2816, "step": 28646 }, { "epoch": 2.129096989966555, "grad_norm": 1.8001119032564066, "learning_rate": 4.103857085796296e-06, "loss": 0.2757, "step": 28647 }, { "epoch": 2.1291713117800075, "grad_norm": 2.493869786064447, "learning_rate": 4.103209044010668e-06, "loss": 0.3016, "step": 28648 }, { "epoch": 2.1292456335934595, "grad_norm": 1.7573563164700678, "learning_rate": 4.102561040188272e-06, "loss": 0.2589, "step": 28649 }, { "epoch": 2.129319955406912, "grad_norm": 3.079347708942631, "learning_rate": 4.101913074333279e-06, "loss": 0.3376, "step": 28650 }, { "epoch": 2.129394277220364, "grad_norm": 2.1178979831029676, "learning_rate": 4.101265146449867e-06, "loss": 0.2548, "step": 28651 }, { "epoch": 2.1294685990338165, "grad_norm": 2.2617067289711246, "learning_rate": 4.100617256542201e-06, "loss": 0.3232, "step": 28652 }, { "epoch": 2.1295429208472685, "grad_norm": 1.8016230590311293, "learning_rate": 4.099969404614459e-06, "loss": 0.2856, "step": 28653 }, { "epoch": 2.129617242660721, "grad_norm": 2.9283343051963064, "learning_rate": 4.099321590670807e-06, "loss": 0.3389, "step": 28654 }, { "epoch": 2.1296915644741734, "grad_norm": 1.9932770614965216, "learning_rate": 4.098673814715413e-06, "loss": 0.2663, "step": 28655 }, { "epoch": 2.1297658862876254, "grad_norm": 1.924735649286678, "learning_rate": 4.098026076752454e-06, "loss": 0.2539, "step": 28656 }, { "epoch": 2.1298402081010774, "grad_norm": 1.8948635898153197, "learning_rate": 4.097378376786098e-06, "loss": 0.2029, "step": 28657 }, { "epoch": 2.12991452991453, "grad_norm": 2.6638342064031764, "learning_rate": 4.096730714820513e-06, "loss": 0.3438, "step": 28658 }, { "epoch": 2.1299888517279824, "grad_norm": 2.224523831773493, "learning_rate": 4.096083090859868e-06, "loss": 0.3074, "step": 28659 }, { "epoch": 2.1300631735414344, "grad_norm": 2.7243098100044354, "learning_rate": 4.095435504908332e-06, "loss": 0.3901, "step": 28660 }, { "epoch": 2.130137495354887, "grad_norm": 2.8117066550809007, "learning_rate": 4.094787956970079e-06, "loss": 0.3343, "step": 28661 }, { "epoch": 2.130211817168339, "grad_norm": 2.1879281172535423, "learning_rate": 4.09414044704927e-06, "loss": 0.3456, "step": 28662 }, { "epoch": 2.1302861389817913, "grad_norm": 2.7385658697705337, "learning_rate": 4.093492975150084e-06, "loss": 0.2648, "step": 28663 }, { "epoch": 2.1303604607952433, "grad_norm": 2.5402967922675708, "learning_rate": 4.092845541276683e-06, "loss": 0.2906, "step": 28664 }, { "epoch": 2.130434782608696, "grad_norm": 2.138890106107549, "learning_rate": 4.092198145433232e-06, "loss": 0.28, "step": 28665 }, { "epoch": 2.130509104422148, "grad_norm": 2.0713074993315, "learning_rate": 4.091550787623906e-06, "loss": 0.1967, "step": 28666 }, { "epoch": 2.1305834262356003, "grad_norm": 2.059940235407306, "learning_rate": 4.090903467852867e-06, "loss": 0.2888, "step": 28667 }, { "epoch": 2.1306577480490523, "grad_norm": 2.5734189900527817, "learning_rate": 4.090256186124291e-06, "loss": 0.348, "step": 28668 }, { "epoch": 2.1307320698625047, "grad_norm": 2.42692673507336, "learning_rate": 4.089608942442333e-06, "loss": 0.3375, "step": 28669 }, { "epoch": 2.1308063916759568, "grad_norm": 2.192774721363171, "learning_rate": 4.088961736811168e-06, "loss": 0.2581, "step": 28670 }, { "epoch": 2.130880713489409, "grad_norm": 2.616275474306919, "learning_rate": 4.088314569234961e-06, "loss": 0.259, "step": 28671 }, { "epoch": 2.1309550353028612, "grad_norm": 3.4307493971931953, "learning_rate": 4.087667439717875e-06, "loss": 0.3389, "step": 28672 }, { "epoch": 2.1310293571163137, "grad_norm": 2.5865712766160747, "learning_rate": 4.087020348264083e-06, "loss": 0.3045, "step": 28673 }, { "epoch": 2.1311036789297657, "grad_norm": 2.365584745022997, "learning_rate": 4.086373294877743e-06, "loss": 0.2958, "step": 28674 }, { "epoch": 2.131178000743218, "grad_norm": 2.550096152488509, "learning_rate": 4.085726279563029e-06, "loss": 0.368, "step": 28675 }, { "epoch": 2.13125232255667, "grad_norm": 2.2816564695302377, "learning_rate": 4.085079302324102e-06, "loss": 0.245, "step": 28676 }, { "epoch": 2.1313266443701226, "grad_norm": 2.148164844900856, "learning_rate": 4.084432363165125e-06, "loss": 0.2307, "step": 28677 }, { "epoch": 2.131400966183575, "grad_norm": 2.7938551689602265, "learning_rate": 4.083785462090268e-06, "loss": 0.314, "step": 28678 }, { "epoch": 2.131475287997027, "grad_norm": 2.098425405666939, "learning_rate": 4.083138599103693e-06, "loss": 0.257, "step": 28679 }, { "epoch": 2.131549609810479, "grad_norm": 1.9753796897494107, "learning_rate": 4.082491774209566e-06, "loss": 0.2692, "step": 28680 }, { "epoch": 2.1316239316239316, "grad_norm": 1.681108551519228, "learning_rate": 4.081844987412048e-06, "loss": 0.2375, "step": 28681 }, { "epoch": 2.131698253437384, "grad_norm": 2.7779760837389116, "learning_rate": 4.081198238715303e-06, "loss": 0.2794, "step": 28682 }, { "epoch": 2.131772575250836, "grad_norm": 2.3136979485568205, "learning_rate": 4.0805515281235e-06, "loss": 0.2923, "step": 28683 }, { "epoch": 2.1318468970642885, "grad_norm": 2.026742327323234, "learning_rate": 4.079904855640795e-06, "loss": 0.2372, "step": 28684 }, { "epoch": 2.1319212188777406, "grad_norm": 1.8971675913451431, "learning_rate": 4.079258221271358e-06, "loss": 0.2439, "step": 28685 }, { "epoch": 2.131995540691193, "grad_norm": 2.11388255612963, "learning_rate": 4.078611625019351e-06, "loss": 0.2053, "step": 28686 }, { "epoch": 2.132069862504645, "grad_norm": 2.117594817685919, "learning_rate": 4.077965066888931e-06, "loss": 0.3011, "step": 28687 }, { "epoch": 2.1321441843180975, "grad_norm": 2.4099095283975136, "learning_rate": 4.077318546884268e-06, "loss": 0.294, "step": 28688 }, { "epoch": 2.1322185061315495, "grad_norm": 2.1538951173330765, "learning_rate": 4.076672065009518e-06, "loss": 0.2162, "step": 28689 }, { "epoch": 2.132292827945002, "grad_norm": 2.785682004501505, "learning_rate": 4.076025621268852e-06, "loss": 0.3365, "step": 28690 }, { "epoch": 2.132367149758454, "grad_norm": 1.754179750424928, "learning_rate": 4.07537921566642e-06, "loss": 0.2607, "step": 28691 }, { "epoch": 2.1324414715719064, "grad_norm": 2.3488909211071083, "learning_rate": 4.074732848206392e-06, "loss": 0.299, "step": 28692 }, { "epoch": 2.1325157933853585, "grad_norm": 2.410219064124721, "learning_rate": 4.074086518892926e-06, "loss": 0.3786, "step": 28693 }, { "epoch": 2.132590115198811, "grad_norm": 2.2355376814255514, "learning_rate": 4.07344022773018e-06, "loss": 0.2976, "step": 28694 }, { "epoch": 2.132664437012263, "grad_norm": 2.211884317282091, "learning_rate": 4.072793974722322e-06, "loss": 0.2986, "step": 28695 }, { "epoch": 2.1327387588257154, "grad_norm": 2.4466221082884174, "learning_rate": 4.072147759873506e-06, "loss": 0.3657, "step": 28696 }, { "epoch": 2.1328130806391674, "grad_norm": 2.8162024214047103, "learning_rate": 4.0715015831878976e-06, "loss": 0.2965, "step": 28697 }, { "epoch": 2.13288740245262, "grad_norm": 1.9357835356030766, "learning_rate": 4.070855444669655e-06, "loss": 0.1823, "step": 28698 }, { "epoch": 2.132961724266072, "grad_norm": 2.0570288425852374, "learning_rate": 4.070209344322934e-06, "loss": 0.2862, "step": 28699 }, { "epoch": 2.1330360460795244, "grad_norm": 2.301563607366453, "learning_rate": 4.0695632821519005e-06, "loss": 0.2869, "step": 28700 }, { "epoch": 2.133110367892977, "grad_norm": 2.4014881617838286, "learning_rate": 4.068917258160712e-06, "loss": 0.2342, "step": 28701 }, { "epoch": 2.133184689706429, "grad_norm": 2.332415916818258, "learning_rate": 4.068271272353526e-06, "loss": 0.264, "step": 28702 }, { "epoch": 2.1332590115198813, "grad_norm": 2.1358776466729448, "learning_rate": 4.067625324734501e-06, "loss": 0.2903, "step": 28703 }, { "epoch": 2.1333333333333333, "grad_norm": 2.905678759148031, "learning_rate": 4.066979415307794e-06, "loss": 0.3509, "step": 28704 }, { "epoch": 2.1334076551467858, "grad_norm": 2.5066654501883043, "learning_rate": 4.066333544077568e-06, "loss": 0.2703, "step": 28705 }, { "epoch": 2.133481976960238, "grad_norm": 2.08441916082371, "learning_rate": 4.065687711047975e-06, "loss": 0.22, "step": 28706 }, { "epoch": 2.1335562987736902, "grad_norm": 3.086775926253308, "learning_rate": 4.065041916223181e-06, "loss": 0.389, "step": 28707 }, { "epoch": 2.1336306205871423, "grad_norm": 2.1806231089326173, "learning_rate": 4.06439615960734e-06, "loss": 0.3675, "step": 28708 }, { "epoch": 2.1337049424005947, "grad_norm": 2.560841060783457, "learning_rate": 4.063750441204603e-06, "loss": 0.3287, "step": 28709 }, { "epoch": 2.1337792642140467, "grad_norm": 2.28227975229127, "learning_rate": 4.063104761019138e-06, "loss": 0.3268, "step": 28710 }, { "epoch": 2.133853586027499, "grad_norm": 2.168981045988252, "learning_rate": 4.062459119055092e-06, "loss": 0.288, "step": 28711 }, { "epoch": 2.133927907840951, "grad_norm": 2.501464961175887, "learning_rate": 4.061813515316631e-06, "loss": 0.2335, "step": 28712 }, { "epoch": 2.1340022296544037, "grad_norm": 2.8850823573693636, "learning_rate": 4.061167949807905e-06, "loss": 0.3493, "step": 28713 }, { "epoch": 2.1340765514678557, "grad_norm": 2.1549356238840063, "learning_rate": 4.060522422533072e-06, "loss": 0.2063, "step": 28714 }, { "epoch": 2.134150873281308, "grad_norm": 2.0813646346633354, "learning_rate": 4.0598769334962876e-06, "loss": 0.2805, "step": 28715 }, { "epoch": 2.13422519509476, "grad_norm": 2.192989132340156, "learning_rate": 4.059231482701703e-06, "loss": 0.2742, "step": 28716 }, { "epoch": 2.1342995169082126, "grad_norm": 2.1064189761088348, "learning_rate": 4.058586070153482e-06, "loss": 0.2939, "step": 28717 }, { "epoch": 2.1343738387216646, "grad_norm": 2.4101967865495006, "learning_rate": 4.057940695855776e-06, "loss": 0.3375, "step": 28718 }, { "epoch": 2.134448160535117, "grad_norm": 2.41230018028066, "learning_rate": 4.057295359812735e-06, "loss": 0.2704, "step": 28719 }, { "epoch": 2.134522482348569, "grad_norm": 1.9084561675023595, "learning_rate": 4.056650062028523e-06, "loss": 0.2592, "step": 28720 }, { "epoch": 2.1345968041620216, "grad_norm": 1.9591251195360622, "learning_rate": 4.056004802507285e-06, "loss": 0.2223, "step": 28721 }, { "epoch": 2.1346711259754736, "grad_norm": 1.7951368015160514, "learning_rate": 4.055359581253183e-06, "loss": 0.1976, "step": 28722 }, { "epoch": 2.134745447788926, "grad_norm": 2.386205579266595, "learning_rate": 4.054714398270368e-06, "loss": 0.2918, "step": 28723 }, { "epoch": 2.1348197696023785, "grad_norm": 2.1761394028950933, "learning_rate": 4.054069253562993e-06, "loss": 0.289, "step": 28724 }, { "epoch": 2.1348940914158305, "grad_norm": 2.000849319414213, "learning_rate": 4.053424147135213e-06, "loss": 0.2528, "step": 28725 }, { "epoch": 2.134968413229283, "grad_norm": 2.085043727486684, "learning_rate": 4.052779078991174e-06, "loss": 0.32, "step": 28726 }, { "epoch": 2.135042735042735, "grad_norm": 2.0755906240502884, "learning_rate": 4.052134049135041e-06, "loss": 0.2233, "step": 28727 }, { "epoch": 2.1351170568561875, "grad_norm": 2.04165542729952, "learning_rate": 4.051489057570954e-06, "loss": 0.2303, "step": 28728 }, { "epoch": 2.1351913786696395, "grad_norm": 2.235477345944156, "learning_rate": 4.050844104303077e-06, "loss": 0.3018, "step": 28729 }, { "epoch": 2.135265700483092, "grad_norm": 2.2529220850469467, "learning_rate": 4.0501991893355565e-06, "loss": 0.3335, "step": 28730 }, { "epoch": 2.135340022296544, "grad_norm": 2.071567813197606, "learning_rate": 4.049554312672541e-06, "loss": 0.3103, "step": 28731 }, { "epoch": 2.1354143441099964, "grad_norm": 2.1536219926191067, "learning_rate": 4.04890947431819e-06, "loss": 0.3409, "step": 28732 }, { "epoch": 2.1354886659234484, "grad_norm": 2.3727494816724697, "learning_rate": 4.048264674276647e-06, "loss": 0.4103, "step": 28733 }, { "epoch": 2.135562987736901, "grad_norm": 2.4117741651557014, "learning_rate": 4.047619912552072e-06, "loss": 0.2589, "step": 28734 }, { "epoch": 2.135637309550353, "grad_norm": 1.9123515708652716, "learning_rate": 4.046975189148611e-06, "loss": 0.2682, "step": 28735 }, { "epoch": 2.1357116313638054, "grad_norm": 2.1998265618817854, "learning_rate": 4.046330504070414e-06, "loss": 0.2324, "step": 28736 }, { "epoch": 2.1357859531772574, "grad_norm": 2.3238748444701294, "learning_rate": 4.045685857321632e-06, "loss": 0.2846, "step": 28737 }, { "epoch": 2.13586027499071, "grad_norm": 2.4753319498522153, "learning_rate": 4.0450412489064124e-06, "loss": 0.342, "step": 28738 }, { "epoch": 2.135934596804162, "grad_norm": 2.0482916739337647, "learning_rate": 4.044396678828912e-06, "loss": 0.288, "step": 28739 }, { "epoch": 2.1360089186176143, "grad_norm": 1.9833698598779241, "learning_rate": 4.043752147093277e-06, "loss": 0.2298, "step": 28740 }, { "epoch": 2.1360832404310663, "grad_norm": 2.552601435129916, "learning_rate": 4.043107653703653e-06, "loss": 0.3029, "step": 28741 }, { "epoch": 2.136157562244519, "grad_norm": 2.0272371642921057, "learning_rate": 4.042463198664196e-06, "loss": 0.2521, "step": 28742 }, { "epoch": 2.136231884057971, "grad_norm": 2.2143670747994983, "learning_rate": 4.0418187819790475e-06, "loss": 0.2664, "step": 28743 }, { "epoch": 2.1363062058714233, "grad_norm": 1.9979934101680188, "learning_rate": 4.041174403652366e-06, "loss": 0.2585, "step": 28744 }, { "epoch": 2.1363805276848753, "grad_norm": 1.9238632218791043, "learning_rate": 4.040530063688294e-06, "loss": 0.1802, "step": 28745 }, { "epoch": 2.1364548494983278, "grad_norm": 1.8557359561658642, "learning_rate": 4.039885762090976e-06, "loss": 0.2471, "step": 28746 }, { "epoch": 2.13652917131178, "grad_norm": 2.0117865049031347, "learning_rate": 4.039241498864571e-06, "loss": 0.2558, "step": 28747 }, { "epoch": 2.1366034931252322, "grad_norm": 2.209846325187383, "learning_rate": 4.038597274013213e-06, "loss": 0.3011, "step": 28748 }, { "epoch": 2.1366778149386847, "grad_norm": 2.5395200969413096, "learning_rate": 4.037953087541061e-06, "loss": 0.2938, "step": 28749 }, { "epoch": 2.1367521367521367, "grad_norm": 2.997884527057494, "learning_rate": 4.0373089394522536e-06, "loss": 0.4048, "step": 28750 }, { "epoch": 2.136826458565589, "grad_norm": 1.618635087271905, "learning_rate": 4.036664829750945e-06, "loss": 0.2254, "step": 28751 }, { "epoch": 2.136900780379041, "grad_norm": 2.214576180981442, "learning_rate": 4.036020758441279e-06, "loss": 0.2929, "step": 28752 }, { "epoch": 2.1369751021924936, "grad_norm": 2.3254007576356615, "learning_rate": 4.035376725527398e-06, "loss": 0.3347, "step": 28753 }, { "epoch": 2.1370494240059457, "grad_norm": 2.8313115984231167, "learning_rate": 4.034732731013457e-06, "loss": 0.3678, "step": 28754 }, { "epoch": 2.137123745819398, "grad_norm": 2.387771830320581, "learning_rate": 4.034088774903592e-06, "loss": 0.3141, "step": 28755 }, { "epoch": 2.13719806763285, "grad_norm": 2.1021892465478653, "learning_rate": 4.033444857201958e-06, "loss": 0.28, "step": 28756 }, { "epoch": 2.1372723894463026, "grad_norm": 1.8456797504800384, "learning_rate": 4.032800977912698e-06, "loss": 0.1754, "step": 28757 }, { "epoch": 2.1373467112597546, "grad_norm": 2.430906659305356, "learning_rate": 4.032157137039954e-06, "loss": 0.3314, "step": 28758 }, { "epoch": 2.137421033073207, "grad_norm": 2.266707595069619, "learning_rate": 4.031513334587872e-06, "loss": 0.3609, "step": 28759 }, { "epoch": 2.137495354886659, "grad_norm": 2.1229985706977192, "learning_rate": 4.030869570560596e-06, "loss": 0.2229, "step": 28760 }, { "epoch": 2.1375696767001116, "grad_norm": 2.9267577012975443, "learning_rate": 4.0302258449622744e-06, "loss": 0.3396, "step": 28761 }, { "epoch": 2.1376439985135636, "grad_norm": 3.1594454960807794, "learning_rate": 4.029582157797049e-06, "loss": 0.3351, "step": 28762 }, { "epoch": 2.137718320327016, "grad_norm": 2.376146532604653, "learning_rate": 4.028938509069061e-06, "loss": 0.3037, "step": 28763 }, { "epoch": 2.137792642140468, "grad_norm": 1.84683069347929, "learning_rate": 4.0282948987824585e-06, "loss": 0.2659, "step": 28764 }, { "epoch": 2.1378669639539205, "grad_norm": 1.897833182829411, "learning_rate": 4.027651326941382e-06, "loss": 0.2271, "step": 28765 }, { "epoch": 2.1379412857673725, "grad_norm": 2.57632507151476, "learning_rate": 4.02700779354998e-06, "loss": 0.3313, "step": 28766 }, { "epoch": 2.138015607580825, "grad_norm": 2.8245725208147707, "learning_rate": 4.026364298612392e-06, "loss": 0.2678, "step": 28767 }, { "epoch": 2.138089929394277, "grad_norm": 2.4570221913339845, "learning_rate": 4.025720842132757e-06, "loss": 0.2574, "step": 28768 }, { "epoch": 2.1381642512077295, "grad_norm": 2.154363755808566, "learning_rate": 4.025077424115224e-06, "loss": 0.2804, "step": 28769 }, { "epoch": 2.138238573021182, "grad_norm": 1.8686398656966974, "learning_rate": 4.024434044563933e-06, "loss": 0.2537, "step": 28770 }, { "epoch": 2.138312894834634, "grad_norm": 1.74938273293999, "learning_rate": 4.023790703483025e-06, "loss": 0.2044, "step": 28771 }, { "epoch": 2.1383872166480864, "grad_norm": 2.447024424440218, "learning_rate": 4.023147400876639e-06, "loss": 0.3044, "step": 28772 }, { "epoch": 2.1384615384615384, "grad_norm": 2.056138411104237, "learning_rate": 4.0225041367489234e-06, "loss": 0.2656, "step": 28773 }, { "epoch": 2.138535860274991, "grad_norm": 2.5494207178118002, "learning_rate": 4.021860911104017e-06, "loss": 0.3221, "step": 28774 }, { "epoch": 2.138610182088443, "grad_norm": 2.2539672706161387, "learning_rate": 4.021217723946056e-06, "loss": 0.2248, "step": 28775 }, { "epoch": 2.1386845039018954, "grad_norm": 2.0793352695027014, "learning_rate": 4.020574575279188e-06, "loss": 0.1973, "step": 28776 }, { "epoch": 2.1387588257153474, "grad_norm": 1.6929451643406173, "learning_rate": 4.019931465107547e-06, "loss": 0.1962, "step": 28777 }, { "epoch": 2.1388331475288, "grad_norm": 3.1985518374424964, "learning_rate": 4.019288393435281e-06, "loss": 0.2958, "step": 28778 }, { "epoch": 2.138907469342252, "grad_norm": 3.801600027218623, "learning_rate": 4.018645360266527e-06, "loss": 0.2894, "step": 28779 }, { "epoch": 2.1389817911557043, "grad_norm": 2.4124048914300777, "learning_rate": 4.018002365605419e-06, "loss": 0.2334, "step": 28780 }, { "epoch": 2.1390561129691563, "grad_norm": 2.4335759863263724, "learning_rate": 4.017359409456109e-06, "loss": 0.2906, "step": 28781 }, { "epoch": 2.139130434782609, "grad_norm": 3.1669550283643075, "learning_rate": 4.016716491822721e-06, "loss": 0.3503, "step": 28782 }, { "epoch": 2.139204756596061, "grad_norm": 2.020749313320165, "learning_rate": 4.0160736127094045e-06, "loss": 0.243, "step": 28783 }, { "epoch": 2.1392790784095133, "grad_norm": 2.0858978883647095, "learning_rate": 4.015430772120298e-06, "loss": 0.3187, "step": 28784 }, { "epoch": 2.1393534002229653, "grad_norm": 2.714886918690866, "learning_rate": 4.014787970059532e-06, "loss": 0.2499, "step": 28785 }, { "epoch": 2.1394277220364177, "grad_norm": 2.5409702377934633, "learning_rate": 4.014145206531255e-06, "loss": 0.3274, "step": 28786 }, { "epoch": 2.1395020438498698, "grad_norm": 2.4788554699511036, "learning_rate": 4.013502481539597e-06, "loss": 0.2995, "step": 28787 }, { "epoch": 2.139576365663322, "grad_norm": 2.97673800807786, "learning_rate": 4.012859795088703e-06, "loss": 0.335, "step": 28788 }, { "epoch": 2.1396506874767742, "grad_norm": 2.0413247404032084, "learning_rate": 4.012217147182707e-06, "loss": 0.2566, "step": 28789 }, { "epoch": 2.1397250092902267, "grad_norm": 2.233615219359863, "learning_rate": 4.011574537825742e-06, "loss": 0.2771, "step": 28790 }, { "epoch": 2.1397993311036787, "grad_norm": 2.054173143082625, "learning_rate": 4.010931967021954e-06, "loss": 0.286, "step": 28791 }, { "epoch": 2.139873652917131, "grad_norm": 3.763015024791031, "learning_rate": 4.010289434775475e-06, "loss": 0.2491, "step": 28792 }, { "epoch": 2.1399479747305836, "grad_norm": 2.205278905992782, "learning_rate": 4.0096469410904416e-06, "loss": 0.3286, "step": 28793 }, { "epoch": 2.1400222965440356, "grad_norm": 1.927844275764716, "learning_rate": 4.009004485970986e-06, "loss": 0.2136, "step": 28794 }, { "epoch": 2.140096618357488, "grad_norm": 2.1939470540287065, "learning_rate": 4.008362069421253e-06, "loss": 0.2829, "step": 28795 }, { "epoch": 2.14017094017094, "grad_norm": 2.5404371687985097, "learning_rate": 4.007719691445375e-06, "loss": 0.3278, "step": 28796 }, { "epoch": 2.1402452619843926, "grad_norm": 1.987487024020312, "learning_rate": 4.007077352047482e-06, "loss": 0.2413, "step": 28797 }, { "epoch": 2.1403195837978446, "grad_norm": 2.314484945023444, "learning_rate": 4.006435051231718e-06, "loss": 0.2552, "step": 28798 }, { "epoch": 2.140393905611297, "grad_norm": 1.9153119809400048, "learning_rate": 4.005792789002211e-06, "loss": 0.2616, "step": 28799 }, { "epoch": 2.140468227424749, "grad_norm": 2.3896466811239576, "learning_rate": 4.005150565363101e-06, "loss": 0.2562, "step": 28800 }, { "epoch": 2.1405425492382015, "grad_norm": 2.1651247379435627, "learning_rate": 4.004508380318522e-06, "loss": 0.2455, "step": 28801 }, { "epoch": 2.1406168710516535, "grad_norm": 2.384950106441676, "learning_rate": 4.0038662338726046e-06, "loss": 0.3589, "step": 28802 }, { "epoch": 2.140691192865106, "grad_norm": 2.1698060171667897, "learning_rate": 4.00322412602949e-06, "loss": 0.2626, "step": 28803 }, { "epoch": 2.140765514678558, "grad_norm": 1.8262723696143972, "learning_rate": 4.002582056793302e-06, "loss": 0.2218, "step": 28804 }, { "epoch": 2.1408398364920105, "grad_norm": 2.0715615115134924, "learning_rate": 4.001940026168182e-06, "loss": 0.2756, "step": 28805 }, { "epoch": 2.1409141583054625, "grad_norm": 2.335362510617861, "learning_rate": 4.001298034158261e-06, "loss": 0.3203, "step": 28806 }, { "epoch": 2.140988480118915, "grad_norm": 2.326912287099975, "learning_rate": 4.000656080767669e-06, "loss": 0.3054, "step": 28807 }, { "epoch": 2.141062801932367, "grad_norm": 2.597400318094728, "learning_rate": 4.000014166000544e-06, "loss": 0.3565, "step": 28808 }, { "epoch": 2.1411371237458194, "grad_norm": 1.9980026665551063, "learning_rate": 3.999372289861014e-06, "loss": 0.2756, "step": 28809 }, { "epoch": 2.1412114455592715, "grad_norm": 2.248589479424568, "learning_rate": 3.998730452353218e-06, "loss": 0.2935, "step": 28810 }, { "epoch": 2.141285767372724, "grad_norm": 1.648671343478284, "learning_rate": 3.9980886534812826e-06, "loss": 0.1947, "step": 28811 }, { "epoch": 2.1413600891861764, "grad_norm": 1.9340189658233855, "learning_rate": 3.997446893249338e-06, "loss": 0.2615, "step": 28812 }, { "epoch": 2.1414344109996284, "grad_norm": 2.4634236740522346, "learning_rate": 3.996805171661523e-06, "loss": 0.2794, "step": 28813 }, { "epoch": 2.1415087328130804, "grad_norm": 2.78488614124085, "learning_rate": 3.9961634887219606e-06, "loss": 0.317, "step": 28814 }, { "epoch": 2.141583054626533, "grad_norm": 1.967070104033886, "learning_rate": 3.995521844434793e-06, "loss": 0.232, "step": 28815 }, { "epoch": 2.1416573764399853, "grad_norm": 2.2934055777599083, "learning_rate": 3.9948802388041385e-06, "loss": 0.2866, "step": 28816 }, { "epoch": 2.1417316982534373, "grad_norm": 3.294875539941608, "learning_rate": 3.994238671834135e-06, "loss": 0.2896, "step": 28817 }, { "epoch": 2.14180602006689, "grad_norm": 2.1508646860138034, "learning_rate": 3.993597143528913e-06, "loss": 0.2605, "step": 28818 }, { "epoch": 2.141880341880342, "grad_norm": 2.677713296611622, "learning_rate": 3.992955653892597e-06, "loss": 0.3266, "step": 28819 }, { "epoch": 2.1419546636937943, "grad_norm": 2.516651433225224, "learning_rate": 3.992314202929323e-06, "loss": 0.3058, "step": 28820 }, { "epoch": 2.1420289855072463, "grad_norm": 1.9710271454670238, "learning_rate": 3.991672790643217e-06, "loss": 0.2054, "step": 28821 }, { "epoch": 2.1421033073206988, "grad_norm": 1.7143323109026374, "learning_rate": 3.9910314170384124e-06, "loss": 0.1687, "step": 28822 }, { "epoch": 2.1421776291341508, "grad_norm": 2.301802372495535, "learning_rate": 3.990390082119035e-06, "loss": 0.3134, "step": 28823 }, { "epoch": 2.1422519509476032, "grad_norm": 2.38969370156067, "learning_rate": 3.989748785889212e-06, "loss": 0.3928, "step": 28824 }, { "epoch": 2.1423262727610553, "grad_norm": 2.2049676303844654, "learning_rate": 3.989107528353078e-06, "loss": 0.299, "step": 28825 }, { "epoch": 2.1424005945745077, "grad_norm": 2.0539910262786014, "learning_rate": 3.988466309514758e-06, "loss": 0.2682, "step": 28826 }, { "epoch": 2.1424749163879597, "grad_norm": 4.38281654921904, "learning_rate": 3.987825129378379e-06, "loss": 0.2659, "step": 28827 }, { "epoch": 2.142549238201412, "grad_norm": 2.325507865356734, "learning_rate": 3.98718398794807e-06, "loss": 0.3114, "step": 28828 }, { "epoch": 2.142623560014864, "grad_norm": 1.8961482073858258, "learning_rate": 3.9865428852279555e-06, "loss": 0.2287, "step": 28829 }, { "epoch": 2.1426978818283167, "grad_norm": 2.5735775227989004, "learning_rate": 3.98590182122217e-06, "loss": 0.2837, "step": 28830 }, { "epoch": 2.1427722036417687, "grad_norm": 2.2033941477598016, "learning_rate": 3.985260795934832e-06, "loss": 0.2497, "step": 28831 }, { "epoch": 2.142846525455221, "grad_norm": 2.3735126915053657, "learning_rate": 3.9846198093700764e-06, "loss": 0.3764, "step": 28832 }, { "epoch": 2.142920847268673, "grad_norm": 2.5034142880933117, "learning_rate": 3.983978861532026e-06, "loss": 0.3281, "step": 28833 }, { "epoch": 2.1429951690821256, "grad_norm": 3.294737707730359, "learning_rate": 3.983337952424805e-06, "loss": 0.3715, "step": 28834 }, { "epoch": 2.143069490895578, "grad_norm": 2.2065969323216716, "learning_rate": 3.982697082052546e-06, "loss": 0.2911, "step": 28835 }, { "epoch": 2.14314381270903, "grad_norm": 2.2334417374647466, "learning_rate": 3.982056250419366e-06, "loss": 0.2813, "step": 28836 }, { "epoch": 2.1432181345224826, "grad_norm": 2.6117086251553894, "learning_rate": 3.981415457529404e-06, "loss": 0.2339, "step": 28837 }, { "epoch": 2.1432924563359346, "grad_norm": 3.313313406380858, "learning_rate": 3.980774703386769e-06, "loss": 0.3744, "step": 28838 }, { "epoch": 2.143366778149387, "grad_norm": 1.8961393593808267, "learning_rate": 3.980133987995598e-06, "loss": 0.2742, "step": 28839 }, { "epoch": 2.143441099962839, "grad_norm": 1.788199237740079, "learning_rate": 3.9794933113600125e-06, "loss": 0.2122, "step": 28840 }, { "epoch": 2.1435154217762915, "grad_norm": 2.3653488643718665, "learning_rate": 3.9788526734841326e-06, "loss": 0.2888, "step": 28841 }, { "epoch": 2.1435897435897435, "grad_norm": 2.4598817343229875, "learning_rate": 3.9782120743720896e-06, "loss": 0.2788, "step": 28842 }, { "epoch": 2.143664065403196, "grad_norm": 2.408640073467569, "learning_rate": 3.977571514028006e-06, "loss": 0.2548, "step": 28843 }, { "epoch": 2.143738387216648, "grad_norm": 1.5433161471053238, "learning_rate": 3.976930992456001e-06, "loss": 0.1535, "step": 28844 }, { "epoch": 2.1438127090301005, "grad_norm": 2.462593480284471, "learning_rate": 3.976290509660204e-06, "loss": 0.2463, "step": 28845 }, { "epoch": 2.1438870308435525, "grad_norm": 2.168416354906746, "learning_rate": 3.975650065644734e-06, "loss": 0.2633, "step": 28846 }, { "epoch": 2.143961352657005, "grad_norm": 1.864830566958889, "learning_rate": 3.975009660413719e-06, "loss": 0.2689, "step": 28847 }, { "epoch": 2.144035674470457, "grad_norm": 2.2393901070324715, "learning_rate": 3.97436929397128e-06, "loss": 0.3501, "step": 28848 }, { "epoch": 2.1441099962839094, "grad_norm": 2.2911825649604194, "learning_rate": 3.973728966321537e-06, "loss": 0.2911, "step": 28849 }, { "epoch": 2.1441843180973614, "grad_norm": 1.8642806861595795, "learning_rate": 3.973088677468616e-06, "loss": 0.2761, "step": 28850 }, { "epoch": 2.144258639910814, "grad_norm": 2.4497632315367768, "learning_rate": 3.972448427416634e-06, "loss": 0.3178, "step": 28851 }, { "epoch": 2.144332961724266, "grad_norm": 2.319434734537625, "learning_rate": 3.971808216169719e-06, "loss": 0.3034, "step": 28852 }, { "epoch": 2.1444072835377184, "grad_norm": 2.750139862066855, "learning_rate": 3.971168043731987e-06, "loss": 0.3188, "step": 28853 }, { "epoch": 2.1444816053511704, "grad_norm": 2.8020175841407218, "learning_rate": 3.970527910107564e-06, "loss": 0.3033, "step": 28854 }, { "epoch": 2.144555927164623, "grad_norm": 1.6856637283723264, "learning_rate": 3.969887815300572e-06, "loss": 0.1893, "step": 28855 }, { "epoch": 2.144630248978075, "grad_norm": 2.028143495789808, "learning_rate": 3.969247759315124e-06, "loss": 0.2673, "step": 28856 }, { "epoch": 2.1447045707915273, "grad_norm": 1.873170473108401, "learning_rate": 3.96860774215535e-06, "loss": 0.2448, "step": 28857 }, { "epoch": 2.14477889260498, "grad_norm": 1.8440543089578425, "learning_rate": 3.967967763825364e-06, "loss": 0.2358, "step": 28858 }, { "epoch": 2.144853214418432, "grad_norm": 2.3318633413698024, "learning_rate": 3.967327824329291e-06, "loss": 0.2748, "step": 28859 }, { "epoch": 2.1449275362318843, "grad_norm": 1.9968977363020957, "learning_rate": 3.966687923671248e-06, "loss": 0.2628, "step": 28860 }, { "epoch": 2.1450018580453363, "grad_norm": 3.0249070511537997, "learning_rate": 3.966048061855356e-06, "loss": 0.3757, "step": 28861 }, { "epoch": 2.1450761798587887, "grad_norm": 1.9818225621924876, "learning_rate": 3.965408238885734e-06, "loss": 0.2856, "step": 28862 }, { "epoch": 2.1451505016722408, "grad_norm": 4.0054976988811655, "learning_rate": 3.964768454766497e-06, "loss": 0.3383, "step": 28863 }, { "epoch": 2.145224823485693, "grad_norm": 2.3263051951403266, "learning_rate": 3.96412870950177e-06, "loss": 0.2832, "step": 28864 }, { "epoch": 2.1452991452991452, "grad_norm": 2.2753412675755293, "learning_rate": 3.963489003095669e-06, "loss": 0.2807, "step": 28865 }, { "epoch": 2.1453734671125977, "grad_norm": 3.1232790769340273, "learning_rate": 3.96284933555231e-06, "loss": 0.3232, "step": 28866 }, { "epoch": 2.1454477889260497, "grad_norm": 2.26058813849006, "learning_rate": 3.962209706875817e-06, "loss": 0.2897, "step": 28867 }, { "epoch": 2.145522110739502, "grad_norm": 2.687776008459672, "learning_rate": 3.9615701170703014e-06, "loss": 0.2802, "step": 28868 }, { "epoch": 2.145596432552954, "grad_norm": 1.9182646892205892, "learning_rate": 3.960930566139888e-06, "loss": 0.2672, "step": 28869 }, { "epoch": 2.1456707543664066, "grad_norm": 2.3211328075322646, "learning_rate": 3.9602910540886885e-06, "loss": 0.3269, "step": 28870 }, { "epoch": 2.1457450761798587, "grad_norm": 2.896154483109346, "learning_rate": 3.959651580920824e-06, "loss": 0.2969, "step": 28871 }, { "epoch": 2.145819397993311, "grad_norm": 2.3988166033885645, "learning_rate": 3.959012146640407e-06, "loss": 0.3121, "step": 28872 }, { "epoch": 2.145893719806763, "grad_norm": 1.775087881719124, "learning_rate": 3.958372751251554e-06, "loss": 0.1907, "step": 28873 }, { "epoch": 2.1459680416202156, "grad_norm": 1.9488274638011882, "learning_rate": 3.957733394758388e-06, "loss": 0.2185, "step": 28874 }, { "epoch": 2.1460423634336676, "grad_norm": 2.031576908332454, "learning_rate": 3.957094077165016e-06, "loss": 0.2588, "step": 28875 }, { "epoch": 2.14611668524712, "grad_norm": 3.3521645060706096, "learning_rate": 3.956454798475563e-06, "loss": 0.2733, "step": 28876 }, { "epoch": 2.146191007060572, "grad_norm": 2.6819876931129834, "learning_rate": 3.95581555869414e-06, "loss": 0.2416, "step": 28877 }, { "epoch": 2.1462653288740245, "grad_norm": 2.1796800680228823, "learning_rate": 3.9551763578248605e-06, "loss": 0.272, "step": 28878 }, { "epoch": 2.1463396506874766, "grad_norm": 2.068991227398095, "learning_rate": 3.954537195871844e-06, "loss": 0.2796, "step": 28879 }, { "epoch": 2.146413972500929, "grad_norm": 1.9688573191568977, "learning_rate": 3.953898072839201e-06, "loss": 0.29, "step": 28880 }, { "epoch": 2.1464882943143815, "grad_norm": 2.5429466508547844, "learning_rate": 3.95325898873105e-06, "loss": 0.3169, "step": 28881 }, { "epoch": 2.1465626161278335, "grad_norm": 2.0288055144150126, "learning_rate": 3.952619943551506e-06, "loss": 0.241, "step": 28882 }, { "epoch": 2.146636937941286, "grad_norm": 2.195471811805322, "learning_rate": 3.9519809373046805e-06, "loss": 0.2603, "step": 28883 }, { "epoch": 2.146711259754738, "grad_norm": 1.9851683093845258, "learning_rate": 3.951341969994688e-06, "loss": 0.2125, "step": 28884 }, { "epoch": 2.1467855815681904, "grad_norm": 2.7899342258890942, "learning_rate": 3.950703041625638e-06, "loss": 0.3181, "step": 28885 }, { "epoch": 2.1468599033816425, "grad_norm": 2.3125927400026134, "learning_rate": 3.9500641522016515e-06, "loss": 0.272, "step": 28886 }, { "epoch": 2.146934225195095, "grad_norm": 2.3429778027235693, "learning_rate": 3.949425301726838e-06, "loss": 0.3165, "step": 28887 }, { "epoch": 2.147008547008547, "grad_norm": 2.087411932179529, "learning_rate": 3.948786490205307e-06, "loss": 0.2923, "step": 28888 }, { "epoch": 2.1470828688219994, "grad_norm": 2.3684951648023973, "learning_rate": 3.948147717641178e-06, "loss": 0.3171, "step": 28889 }, { "epoch": 2.1471571906354514, "grad_norm": 2.3249319383483584, "learning_rate": 3.947508984038558e-06, "loss": 0.3036, "step": 28890 }, { "epoch": 2.147231512448904, "grad_norm": 2.5782179339976783, "learning_rate": 3.9468702894015635e-06, "loss": 0.3204, "step": 28891 }, { "epoch": 2.147305834262356, "grad_norm": 2.48658037163652, "learning_rate": 3.946231633734304e-06, "loss": 0.3035, "step": 28892 }, { "epoch": 2.1473801560758083, "grad_norm": 2.21642516651733, "learning_rate": 3.9455930170408875e-06, "loss": 0.3128, "step": 28893 }, { "epoch": 2.1474544778892604, "grad_norm": 1.8902298621546962, "learning_rate": 3.944954439325437e-06, "loss": 0.2605, "step": 28894 }, { "epoch": 2.147528799702713, "grad_norm": 3.3825281685627715, "learning_rate": 3.944315900592047e-06, "loss": 0.4027, "step": 28895 }, { "epoch": 2.147603121516165, "grad_norm": 2.0193870245089296, "learning_rate": 3.943677400844842e-06, "loss": 0.2165, "step": 28896 }, { "epoch": 2.1476774433296173, "grad_norm": 2.7383348846328754, "learning_rate": 3.943038940087923e-06, "loss": 0.344, "step": 28897 }, { "epoch": 2.1477517651430693, "grad_norm": 2.5121253810783504, "learning_rate": 3.942400518325409e-06, "loss": 0.2808, "step": 28898 }, { "epoch": 2.1478260869565218, "grad_norm": 2.980542149395079, "learning_rate": 3.9417621355614055e-06, "loss": 0.3734, "step": 28899 }, { "epoch": 2.147900408769974, "grad_norm": 2.1885295118399437, "learning_rate": 3.941123791800021e-06, "loss": 0.3357, "step": 28900 }, { "epoch": 2.1479747305834263, "grad_norm": 2.4336880106468786, "learning_rate": 3.940485487045369e-06, "loss": 0.3586, "step": 28901 }, { "epoch": 2.1480490523968783, "grad_norm": 2.386192384680669, "learning_rate": 3.939847221301555e-06, "loss": 0.2416, "step": 28902 }, { "epoch": 2.1481233742103307, "grad_norm": 2.1697589387550162, "learning_rate": 3.939208994572693e-06, "loss": 0.2205, "step": 28903 }, { "epoch": 2.148197696023783, "grad_norm": 2.511176489018607, "learning_rate": 3.9385708068628895e-06, "loss": 0.2473, "step": 28904 }, { "epoch": 2.148272017837235, "grad_norm": 2.4441416452910243, "learning_rate": 3.937932658176251e-06, "loss": 0.3359, "step": 28905 }, { "epoch": 2.1483463396506877, "grad_norm": 2.1843889718095073, "learning_rate": 3.937294548516889e-06, "loss": 0.2622, "step": 28906 }, { "epoch": 2.1484206614641397, "grad_norm": 2.4380915596373582, "learning_rate": 3.936656477888906e-06, "loss": 0.2885, "step": 28907 }, { "epoch": 2.148494983277592, "grad_norm": 2.4603932061542975, "learning_rate": 3.936018446296418e-06, "loss": 0.3488, "step": 28908 }, { "epoch": 2.148569305091044, "grad_norm": 1.8550213917828933, "learning_rate": 3.9353804537435295e-06, "loss": 0.2562, "step": 28909 }, { "epoch": 2.1486436269044966, "grad_norm": 2.0840949954784564, "learning_rate": 3.934742500234342e-06, "loss": 0.2433, "step": 28910 }, { "epoch": 2.1487179487179486, "grad_norm": 2.2545534836449974, "learning_rate": 3.934104585772971e-06, "loss": 0.2899, "step": 28911 }, { "epoch": 2.148792270531401, "grad_norm": 2.5820702406728664, "learning_rate": 3.933466710363517e-06, "loss": 0.3223, "step": 28912 }, { "epoch": 2.148866592344853, "grad_norm": 2.383901556163476, "learning_rate": 3.932828874010093e-06, "loss": 0.2687, "step": 28913 }, { "epoch": 2.1489409141583056, "grad_norm": 1.7395804515980187, "learning_rate": 3.932191076716801e-06, "loss": 0.2542, "step": 28914 }, { "epoch": 2.1490152359717576, "grad_norm": 2.533844102035065, "learning_rate": 3.931553318487746e-06, "loss": 0.2474, "step": 28915 }, { "epoch": 2.14908955778521, "grad_norm": 2.1939370345384863, "learning_rate": 3.930915599327042e-06, "loss": 0.2181, "step": 28916 }, { "epoch": 2.149163879598662, "grad_norm": 2.6630399249778733, "learning_rate": 3.9302779192387815e-06, "loss": 0.265, "step": 28917 }, { "epoch": 2.1492382014121145, "grad_norm": 2.180827556541897, "learning_rate": 3.9296402782270805e-06, "loss": 0.3209, "step": 28918 }, { "epoch": 2.1493125232255665, "grad_norm": 2.280194222329068, "learning_rate": 3.929002676296038e-06, "loss": 0.2517, "step": 28919 }, { "epoch": 2.149386845039019, "grad_norm": 2.520284875906436, "learning_rate": 3.928365113449762e-06, "loss": 0.3163, "step": 28920 }, { "epoch": 2.149461166852471, "grad_norm": 2.3644439765698086, "learning_rate": 3.927727589692359e-06, "loss": 0.3543, "step": 28921 }, { "epoch": 2.1495354886659235, "grad_norm": 2.311104424599269, "learning_rate": 3.927090105027927e-06, "loss": 0.3537, "step": 28922 }, { "epoch": 2.1496098104793755, "grad_norm": 2.997016726445655, "learning_rate": 3.926452659460576e-06, "loss": 0.3581, "step": 28923 }, { "epoch": 2.149684132292828, "grad_norm": 2.531512953763488, "learning_rate": 3.925815252994405e-06, "loss": 0.4197, "step": 28924 }, { "epoch": 2.14975845410628, "grad_norm": 2.323974213862162, "learning_rate": 3.925177885633523e-06, "loss": 0.2415, "step": 28925 }, { "epoch": 2.1498327759197324, "grad_norm": 2.028366594807353, "learning_rate": 3.92454055738203e-06, "loss": 0.2976, "step": 28926 }, { "epoch": 2.149907097733185, "grad_norm": 2.141887961791942, "learning_rate": 3.923903268244028e-06, "loss": 0.2996, "step": 28927 }, { "epoch": 2.149981419546637, "grad_norm": 1.8758296878233292, "learning_rate": 3.923266018223628e-06, "loss": 0.208, "step": 28928 }, { "epoch": 2.1500557413600894, "grad_norm": 2.092234595328585, "learning_rate": 3.922628807324919e-06, "loss": 0.2916, "step": 28929 }, { "epoch": 2.1501300631735414, "grad_norm": 2.074834410036769, "learning_rate": 3.921991635552012e-06, "loss": 0.272, "step": 28930 }, { "epoch": 2.150204384986994, "grad_norm": 2.752053570444976, "learning_rate": 3.921354502909011e-06, "loss": 0.3335, "step": 28931 }, { "epoch": 2.150278706800446, "grad_norm": 2.2627164250946388, "learning_rate": 3.9207174094000074e-06, "loss": 0.2574, "step": 28932 }, { "epoch": 2.1503530286138983, "grad_norm": 1.9006028015818315, "learning_rate": 3.920080355029116e-06, "loss": 0.2484, "step": 28933 }, { "epoch": 2.1504273504273503, "grad_norm": 2.1461672577461504, "learning_rate": 3.9194433398004265e-06, "loss": 0.2849, "step": 28934 }, { "epoch": 2.150501672240803, "grad_norm": 2.600071875383141, "learning_rate": 3.918806363718049e-06, "loss": 0.3581, "step": 28935 }, { "epoch": 2.150575994054255, "grad_norm": 2.5588735215545615, "learning_rate": 3.918169426786081e-06, "loss": 0.3099, "step": 28936 }, { "epoch": 2.1506503158677073, "grad_norm": 2.230405857287179, "learning_rate": 3.917532529008619e-06, "loss": 0.3285, "step": 28937 }, { "epoch": 2.1507246376811593, "grad_norm": 2.179152859058924, "learning_rate": 3.916895670389771e-06, "loss": 0.2214, "step": 28938 }, { "epoch": 2.1507989594946118, "grad_norm": 2.339480975734288, "learning_rate": 3.916258850933632e-06, "loss": 0.3018, "step": 28939 }, { "epoch": 2.1508732813080638, "grad_norm": 1.7271311631848492, "learning_rate": 3.915622070644304e-06, "loss": 0.2317, "step": 28940 }, { "epoch": 2.1509476031215162, "grad_norm": 2.3646571435304824, "learning_rate": 3.914985329525881e-06, "loss": 0.2691, "step": 28941 }, { "epoch": 2.1510219249349682, "grad_norm": 2.0667091556974553, "learning_rate": 3.91434862758247e-06, "loss": 0.2645, "step": 28942 }, { "epoch": 2.1510962467484207, "grad_norm": 1.9880005243319887, "learning_rate": 3.913711964818168e-06, "loss": 0.2894, "step": 28943 }, { "epoch": 2.1511705685618727, "grad_norm": 2.5276680965767717, "learning_rate": 3.9130753412370685e-06, "loss": 0.3125, "step": 28944 }, { "epoch": 2.151244890375325, "grad_norm": 2.9096128506031995, "learning_rate": 3.912438756843278e-06, "loss": 0.3393, "step": 28945 }, { "epoch": 2.1513192121887776, "grad_norm": 2.2387543300757935, "learning_rate": 3.911802211640886e-06, "loss": 0.3089, "step": 28946 }, { "epoch": 2.1513935340022297, "grad_norm": 3.0789454180222395, "learning_rate": 3.911165705634e-06, "loss": 0.3167, "step": 28947 }, { "epoch": 2.1514678558156817, "grad_norm": 2.0238390527422556, "learning_rate": 3.910529238826714e-06, "loss": 0.28, "step": 28948 }, { "epoch": 2.151542177629134, "grad_norm": 2.6783306943552603, "learning_rate": 3.90989281122312e-06, "loss": 0.3362, "step": 28949 }, { "epoch": 2.1516164994425866, "grad_norm": 2.3031035559296265, "learning_rate": 3.909256422827328e-06, "loss": 0.361, "step": 28950 }, { "epoch": 2.1516908212560386, "grad_norm": 1.9788503589866222, "learning_rate": 3.90862007364342e-06, "loss": 0.2837, "step": 28951 }, { "epoch": 2.151765143069491, "grad_norm": 2.6793182430439875, "learning_rate": 3.907983763675502e-06, "loss": 0.29, "step": 28952 }, { "epoch": 2.151839464882943, "grad_norm": 2.354973249445804, "learning_rate": 3.90734749292767e-06, "loss": 0.2944, "step": 28953 }, { "epoch": 2.1519137866963955, "grad_norm": 2.5672759711551056, "learning_rate": 3.906711261404015e-06, "loss": 0.3609, "step": 28954 }, { "epoch": 2.1519881085098476, "grad_norm": 3.3541801866550824, "learning_rate": 3.90607506910864e-06, "loss": 0.2985, "step": 28955 }, { "epoch": 2.1520624303233, "grad_norm": 2.3022803353692938, "learning_rate": 3.905438916045634e-06, "loss": 0.2993, "step": 28956 }, { "epoch": 2.152136752136752, "grad_norm": 2.45233318756806, "learning_rate": 3.9048028022190995e-06, "loss": 0.3513, "step": 28957 }, { "epoch": 2.1522110739502045, "grad_norm": 2.1220716662131625, "learning_rate": 3.904166727633128e-06, "loss": 0.2193, "step": 28958 }, { "epoch": 2.1522853957636565, "grad_norm": 2.064334407579155, "learning_rate": 3.903530692291811e-06, "loss": 0.2899, "step": 28959 }, { "epoch": 2.152359717577109, "grad_norm": 2.3472995756094064, "learning_rate": 3.902894696199252e-06, "loss": 0.3082, "step": 28960 }, { "epoch": 2.152434039390561, "grad_norm": 2.2693166676349397, "learning_rate": 3.902258739359537e-06, "loss": 0.4117, "step": 28961 }, { "epoch": 2.1525083612040135, "grad_norm": 2.6981008183443964, "learning_rate": 3.90162282177677e-06, "loss": 0.3122, "step": 28962 }, { "epoch": 2.1525826830174655, "grad_norm": 1.530509964384941, "learning_rate": 3.900986943455032e-06, "loss": 0.1809, "step": 28963 }, { "epoch": 2.152657004830918, "grad_norm": 2.2430894133087063, "learning_rate": 3.900351104398427e-06, "loss": 0.3145, "step": 28964 }, { "epoch": 2.15273132664437, "grad_norm": 2.1261231675176204, "learning_rate": 3.899715304611045e-06, "loss": 0.2109, "step": 28965 }, { "epoch": 2.1528056484578224, "grad_norm": 2.7031116740922814, "learning_rate": 3.899079544096977e-06, "loss": 0.3707, "step": 28966 }, { "epoch": 2.1528799702712744, "grad_norm": 2.245229450107239, "learning_rate": 3.898443822860321e-06, "loss": 0.2844, "step": 28967 }, { "epoch": 2.152954292084727, "grad_norm": 2.3107909333097623, "learning_rate": 3.897808140905166e-06, "loss": 0.3538, "step": 28968 }, { "epoch": 2.1530286138981793, "grad_norm": 2.6483265186261598, "learning_rate": 3.897172498235603e-06, "loss": 0.3859, "step": 28969 }, { "epoch": 2.1531029357116314, "grad_norm": 2.3942057156328085, "learning_rate": 3.89653689485573e-06, "loss": 0.3032, "step": 28970 }, { "epoch": 2.1531772575250834, "grad_norm": 4.6112987551130304, "learning_rate": 3.895901330769633e-06, "loss": 0.3068, "step": 28971 }, { "epoch": 2.153251579338536, "grad_norm": 2.069793955186103, "learning_rate": 3.89526580598141e-06, "loss": 0.2823, "step": 28972 }, { "epoch": 2.1533259011519883, "grad_norm": 2.085895777792952, "learning_rate": 3.894630320495148e-06, "loss": 0.2504, "step": 28973 }, { "epoch": 2.1534002229654403, "grad_norm": 2.1121728056596836, "learning_rate": 3.8939948743149406e-06, "loss": 0.3192, "step": 28974 }, { "epoch": 2.1534745447788928, "grad_norm": 2.144620810005317, "learning_rate": 3.893359467444876e-06, "loss": 0.2209, "step": 28975 }, { "epoch": 2.153548866592345, "grad_norm": 2.2057653626237164, "learning_rate": 3.8927240998890435e-06, "loss": 0.254, "step": 28976 }, { "epoch": 2.1536231884057973, "grad_norm": 2.641730455780665, "learning_rate": 3.89208877165154e-06, "loss": 0.2744, "step": 28977 }, { "epoch": 2.1536975102192493, "grad_norm": 1.9535402797794603, "learning_rate": 3.891453482736448e-06, "loss": 0.3063, "step": 28978 }, { "epoch": 2.1537718320327017, "grad_norm": 1.9930840459094354, "learning_rate": 3.890818233147866e-06, "loss": 0.2535, "step": 28979 }, { "epoch": 2.1538461538461537, "grad_norm": 1.924648955842129, "learning_rate": 3.8901830228898785e-06, "loss": 0.2816, "step": 28980 }, { "epoch": 2.153920475659606, "grad_norm": 2.058045646056057, "learning_rate": 3.889547851966572e-06, "loss": 0.281, "step": 28981 }, { "epoch": 2.153994797473058, "grad_norm": 2.239624753379903, "learning_rate": 3.888912720382043e-06, "loss": 0.298, "step": 28982 }, { "epoch": 2.1540691192865107, "grad_norm": 2.793091652536501, "learning_rate": 3.888277628140373e-06, "loss": 0.4056, "step": 28983 }, { "epoch": 2.1541434410999627, "grad_norm": 2.171339055790383, "learning_rate": 3.887642575245662e-06, "loss": 0.2547, "step": 28984 }, { "epoch": 2.154217762913415, "grad_norm": 2.495437425494717, "learning_rate": 3.887007561701985e-06, "loss": 0.3038, "step": 28985 }, { "epoch": 2.154292084726867, "grad_norm": 1.888647791705661, "learning_rate": 3.886372587513439e-06, "loss": 0.2677, "step": 28986 }, { "epoch": 2.1543664065403196, "grad_norm": 2.044154039466615, "learning_rate": 3.885737652684108e-06, "loss": 0.2407, "step": 28987 }, { "epoch": 2.1544407283537717, "grad_norm": 1.605483218219925, "learning_rate": 3.885102757218078e-06, "loss": 0.2161, "step": 28988 }, { "epoch": 2.154515050167224, "grad_norm": 2.689924803207361, "learning_rate": 3.884467901119443e-06, "loss": 0.3812, "step": 28989 }, { "epoch": 2.154589371980676, "grad_norm": 3.709140259412493, "learning_rate": 3.883833084392286e-06, "loss": 0.4169, "step": 28990 }, { "epoch": 2.1546636937941286, "grad_norm": 1.8957746058488594, "learning_rate": 3.88319830704069e-06, "loss": 0.2152, "step": 28991 }, { "epoch": 2.154738015607581, "grad_norm": 2.096275270348094, "learning_rate": 3.8825635690687505e-06, "loss": 0.2719, "step": 28992 }, { "epoch": 2.154812337421033, "grad_norm": 2.6913080715122844, "learning_rate": 3.8819288704805455e-06, "loss": 0.3199, "step": 28993 }, { "epoch": 2.1548866592344855, "grad_norm": 2.290555117488421, "learning_rate": 3.881294211280168e-06, "loss": 0.3092, "step": 28994 }, { "epoch": 2.1549609810479375, "grad_norm": 2.330726686673238, "learning_rate": 3.880659591471701e-06, "loss": 0.2895, "step": 28995 }, { "epoch": 2.15503530286139, "grad_norm": 2.106332060790078, "learning_rate": 3.880025011059231e-06, "loss": 0.2819, "step": 28996 }, { "epoch": 2.155109624674842, "grad_norm": 2.243983163899815, "learning_rate": 3.879390470046842e-06, "loss": 0.2391, "step": 28997 }, { "epoch": 2.1551839464882945, "grad_norm": 2.3559476068227596, "learning_rate": 3.878755968438615e-06, "loss": 0.2959, "step": 28998 }, { "epoch": 2.1552582683017465, "grad_norm": 2.4859016886397356, "learning_rate": 3.878121506238645e-06, "loss": 0.2912, "step": 28999 }, { "epoch": 2.155332590115199, "grad_norm": 2.2340406440279614, "learning_rate": 3.877487083451006e-06, "loss": 0.2989, "step": 29000 }, { "epoch": 2.155406911928651, "grad_norm": 2.9421033304059816, "learning_rate": 3.876852700079791e-06, "loss": 0.3411, "step": 29001 }, { "epoch": 2.1554812337421034, "grad_norm": 2.9188141536790426, "learning_rate": 3.876218356129081e-06, "loss": 0.2859, "step": 29002 }, { "epoch": 2.1555555555555554, "grad_norm": 1.9495471904033053, "learning_rate": 3.875584051602957e-06, "loss": 0.2827, "step": 29003 }, { "epoch": 2.155629877369008, "grad_norm": 1.7201637999523565, "learning_rate": 3.874949786505507e-06, "loss": 0.2205, "step": 29004 }, { "epoch": 2.15570419918246, "grad_norm": 2.5091114931111553, "learning_rate": 3.874315560840809e-06, "loss": 0.3726, "step": 29005 }, { "epoch": 2.1557785209959124, "grad_norm": 2.805941651617954, "learning_rate": 3.873681374612954e-06, "loss": 0.2852, "step": 29006 }, { "epoch": 2.1558528428093644, "grad_norm": 2.285976995588675, "learning_rate": 3.87304722782602e-06, "loss": 0.2644, "step": 29007 }, { "epoch": 2.155927164622817, "grad_norm": 2.2632513550837574, "learning_rate": 3.87241312048409e-06, "loss": 0.272, "step": 29008 }, { "epoch": 2.156001486436269, "grad_norm": 2.6894162681823417, "learning_rate": 3.8717790525912465e-06, "loss": 0.3314, "step": 29009 }, { "epoch": 2.1560758082497213, "grad_norm": 2.0283948126262086, "learning_rate": 3.871145024151568e-06, "loss": 0.2639, "step": 29010 }, { "epoch": 2.1561501300631734, "grad_norm": 2.250032817596666, "learning_rate": 3.870511035169143e-06, "loss": 0.3196, "step": 29011 }, { "epoch": 2.156224451876626, "grad_norm": 2.2175630200143255, "learning_rate": 3.86987708564805e-06, "loss": 0.2738, "step": 29012 }, { "epoch": 2.156298773690078, "grad_norm": 2.0163398969557873, "learning_rate": 3.869243175592368e-06, "loss": 0.2443, "step": 29013 }, { "epoch": 2.1563730955035303, "grad_norm": 2.301176065001095, "learning_rate": 3.868609305006182e-06, "loss": 0.2726, "step": 29014 }, { "epoch": 2.1564474173169828, "grad_norm": 2.175050608426318, "learning_rate": 3.867975473893569e-06, "loss": 0.2484, "step": 29015 }, { "epoch": 2.1565217391304348, "grad_norm": 2.9258419962208704, "learning_rate": 3.867341682258615e-06, "loss": 0.3569, "step": 29016 }, { "epoch": 2.1565960609438872, "grad_norm": 1.8522567126708531, "learning_rate": 3.866707930105397e-06, "loss": 0.2122, "step": 29017 }, { "epoch": 2.1566703827573392, "grad_norm": 1.9881992290565274, "learning_rate": 3.866074217437995e-06, "loss": 0.2513, "step": 29018 }, { "epoch": 2.1567447045707917, "grad_norm": 2.7075289408703314, "learning_rate": 3.865440544260488e-06, "loss": 0.331, "step": 29019 }, { "epoch": 2.1568190263842437, "grad_norm": 2.299830042913777, "learning_rate": 3.864806910576954e-06, "loss": 0.3004, "step": 29020 }, { "epoch": 2.156893348197696, "grad_norm": 2.6386532930780247, "learning_rate": 3.864173316391479e-06, "loss": 0.2954, "step": 29021 }, { "epoch": 2.156967670011148, "grad_norm": 1.6824138730510825, "learning_rate": 3.863539761708132e-06, "loss": 0.1529, "step": 29022 }, { "epoch": 2.1570419918246007, "grad_norm": 2.3038408783010698, "learning_rate": 3.862906246531003e-06, "loss": 0.3748, "step": 29023 }, { "epoch": 2.1571163136380527, "grad_norm": 2.350117934872802, "learning_rate": 3.8622727708641646e-06, "loss": 0.3259, "step": 29024 }, { "epoch": 2.157190635451505, "grad_norm": 2.1797386707094, "learning_rate": 3.861639334711692e-06, "loss": 0.3044, "step": 29025 }, { "epoch": 2.157264957264957, "grad_norm": 2.8013121260854414, "learning_rate": 3.861005938077671e-06, "loss": 0.3018, "step": 29026 }, { "epoch": 2.1573392790784096, "grad_norm": 2.2863862832782202, "learning_rate": 3.860372580966171e-06, "loss": 0.2622, "step": 29027 }, { "epoch": 2.1574136008918616, "grad_norm": 2.0363467130467536, "learning_rate": 3.859739263381278e-06, "loss": 0.3153, "step": 29028 }, { "epoch": 2.157487922705314, "grad_norm": 1.886719591341976, "learning_rate": 3.859105985327064e-06, "loss": 0.2368, "step": 29029 }, { "epoch": 2.157562244518766, "grad_norm": 2.046405025534033, "learning_rate": 3.858472746807607e-06, "loss": 0.2886, "step": 29030 }, { "epoch": 2.1576365663322186, "grad_norm": 2.356152111692395, "learning_rate": 3.857839547826985e-06, "loss": 0.3034, "step": 29031 }, { "epoch": 2.1577108881456706, "grad_norm": 2.1679952223807915, "learning_rate": 3.857206388389269e-06, "loss": 0.3533, "step": 29032 }, { "epoch": 2.157785209959123, "grad_norm": 2.2051681761422715, "learning_rate": 3.856573268498544e-06, "loss": 0.3003, "step": 29033 }, { "epoch": 2.157859531772575, "grad_norm": 2.814121429800427, "learning_rate": 3.855940188158881e-06, "loss": 0.4298, "step": 29034 }, { "epoch": 2.1579338535860275, "grad_norm": 2.2579858915819098, "learning_rate": 3.855307147374352e-06, "loss": 0.3053, "step": 29035 }, { "epoch": 2.1580081753994795, "grad_norm": 2.238373469186197, "learning_rate": 3.854674146149041e-06, "loss": 0.2514, "step": 29036 }, { "epoch": 2.158082497212932, "grad_norm": 2.196363114401404, "learning_rate": 3.854041184487015e-06, "loss": 0.2641, "step": 29037 }, { "epoch": 2.1581568190263845, "grad_norm": 1.9887507614965731, "learning_rate": 3.8534082623923574e-06, "loss": 0.2852, "step": 29038 }, { "epoch": 2.1582311408398365, "grad_norm": 2.270110017696152, "learning_rate": 3.852775379869137e-06, "loss": 0.2998, "step": 29039 }, { "epoch": 2.158305462653289, "grad_norm": 2.4507288000408316, "learning_rate": 3.852142536921427e-06, "loss": 0.3516, "step": 29040 }, { "epoch": 2.158379784466741, "grad_norm": 2.3772091575090104, "learning_rate": 3.851509733553312e-06, "loss": 0.3214, "step": 29041 }, { "epoch": 2.1584541062801934, "grad_norm": 1.8813623563351192, "learning_rate": 3.850876969768851e-06, "loss": 0.2345, "step": 29042 }, { "epoch": 2.1585284280936454, "grad_norm": 2.8823057530071394, "learning_rate": 3.850244245572128e-06, "loss": 0.3924, "step": 29043 }, { "epoch": 2.158602749907098, "grad_norm": 1.701247377867037, "learning_rate": 3.84961156096721e-06, "loss": 0.2472, "step": 29044 }, { "epoch": 2.15867707172055, "grad_norm": 1.9474425584051058, "learning_rate": 3.848978915958178e-06, "loss": 0.2623, "step": 29045 }, { "epoch": 2.1587513935340024, "grad_norm": 1.7715603569894118, "learning_rate": 3.848346310549099e-06, "loss": 0.2512, "step": 29046 }, { "epoch": 2.1588257153474544, "grad_norm": 2.5975525147125986, "learning_rate": 3.847713744744045e-06, "loss": 0.3024, "step": 29047 }, { "epoch": 2.158900037160907, "grad_norm": 2.496478840973587, "learning_rate": 3.847081218547094e-06, "loss": 0.3339, "step": 29048 }, { "epoch": 2.158974358974359, "grad_norm": 1.699838978286423, "learning_rate": 3.8464487319623116e-06, "loss": 0.231, "step": 29049 }, { "epoch": 2.1590486807878113, "grad_norm": 2.463163247733193, "learning_rate": 3.845816284993776e-06, "loss": 0.3341, "step": 29050 }, { "epoch": 2.1591230026012633, "grad_norm": 2.2519276288409515, "learning_rate": 3.845183877645557e-06, "loss": 0.315, "step": 29051 }, { "epoch": 2.159197324414716, "grad_norm": 2.352875566064809, "learning_rate": 3.844551509921723e-06, "loss": 0.3497, "step": 29052 }, { "epoch": 2.159271646228168, "grad_norm": 2.525986294512568, "learning_rate": 3.843919181826347e-06, "loss": 0.3924, "step": 29053 }, { "epoch": 2.1593459680416203, "grad_norm": 2.265452685804105, "learning_rate": 3.843286893363498e-06, "loss": 0.2371, "step": 29054 }, { "epoch": 2.1594202898550723, "grad_norm": 1.7218571134043597, "learning_rate": 3.842654644537252e-06, "loss": 0.2125, "step": 29055 }, { "epoch": 2.1594946116685247, "grad_norm": 3.2862341620865627, "learning_rate": 3.8420224353516745e-06, "loss": 0.3505, "step": 29056 }, { "epoch": 2.1595689334819768, "grad_norm": 1.9425771509120302, "learning_rate": 3.841390265810834e-06, "loss": 0.2204, "step": 29057 }, { "epoch": 2.159643255295429, "grad_norm": 1.9695458127080285, "learning_rate": 3.840758135918807e-06, "loss": 0.2198, "step": 29058 }, { "epoch": 2.1597175771088812, "grad_norm": 2.3206282977664183, "learning_rate": 3.840126045679656e-06, "loss": 0.2831, "step": 29059 }, { "epoch": 2.1597918989223337, "grad_norm": 2.4127772269289, "learning_rate": 3.8394939950974585e-06, "loss": 0.3302, "step": 29060 }, { "epoch": 2.159866220735786, "grad_norm": 2.475400865996596, "learning_rate": 3.838861984176278e-06, "loss": 0.3145, "step": 29061 }, { "epoch": 2.159940542549238, "grad_norm": 2.4214486038874923, "learning_rate": 3.838230012920181e-06, "loss": 0.3146, "step": 29062 }, { "epoch": 2.1600148643626906, "grad_norm": 1.7859926805868271, "learning_rate": 3.837598081333246e-06, "loss": 0.2283, "step": 29063 }, { "epoch": 2.1600891861761427, "grad_norm": 2.4924960069122526, "learning_rate": 3.8369661894195286e-06, "loss": 0.3861, "step": 29064 }, { "epoch": 2.160163507989595, "grad_norm": 2.1272667117969175, "learning_rate": 3.836334337183106e-06, "loss": 0.2756, "step": 29065 }, { "epoch": 2.160237829803047, "grad_norm": 2.3825207840687317, "learning_rate": 3.83570252462804e-06, "loss": 0.3202, "step": 29066 }, { "epoch": 2.1603121516164996, "grad_norm": 1.992429936093293, "learning_rate": 3.835070751758404e-06, "loss": 0.2682, "step": 29067 }, { "epoch": 2.1603864734299516, "grad_norm": 2.0561378397358285, "learning_rate": 3.834439018578263e-06, "loss": 0.339, "step": 29068 }, { "epoch": 2.160460795243404, "grad_norm": 2.682549542339782, "learning_rate": 3.833807325091681e-06, "loss": 0.2644, "step": 29069 }, { "epoch": 2.160535117056856, "grad_norm": 2.0394331911292056, "learning_rate": 3.83317567130273e-06, "loss": 0.237, "step": 29070 }, { "epoch": 2.1606094388703085, "grad_norm": 3.185273543430404, "learning_rate": 3.832544057215472e-06, "loss": 0.2493, "step": 29071 }, { "epoch": 2.1606837606837606, "grad_norm": 2.906185266838606, "learning_rate": 3.8319124828339775e-06, "loss": 0.3304, "step": 29072 }, { "epoch": 2.160758082497213, "grad_norm": 2.9684874920383604, "learning_rate": 3.83128094816231e-06, "loss": 0.3639, "step": 29073 }, { "epoch": 2.160832404310665, "grad_norm": 2.190383248143536, "learning_rate": 3.830649453204534e-06, "loss": 0.3262, "step": 29074 }, { "epoch": 2.1609067261241175, "grad_norm": 2.9121425990443193, "learning_rate": 3.830017997964722e-06, "loss": 0.4121, "step": 29075 }, { "epoch": 2.1609810479375695, "grad_norm": 2.344567142957874, "learning_rate": 3.8293865824469276e-06, "loss": 0.2796, "step": 29076 }, { "epoch": 2.161055369751022, "grad_norm": 2.5354964983093042, "learning_rate": 3.828755206655226e-06, "loss": 0.3139, "step": 29077 }, { "epoch": 2.161129691564474, "grad_norm": 2.715011203059405, "learning_rate": 3.828123870593679e-06, "loss": 0.3037, "step": 29078 }, { "epoch": 2.1612040133779264, "grad_norm": 2.822391502839538, "learning_rate": 3.827492574266345e-06, "loss": 0.3797, "step": 29079 }, { "epoch": 2.1612783351913785, "grad_norm": 2.1908707390818853, "learning_rate": 3.826861317677298e-06, "loss": 0.2806, "step": 29080 }, { "epoch": 2.161352657004831, "grad_norm": 2.405300021306582, "learning_rate": 3.826230100830593e-06, "loss": 0.2608, "step": 29081 }, { "epoch": 2.161426978818283, "grad_norm": 1.824473599724288, "learning_rate": 3.825598923730302e-06, "loss": 0.2195, "step": 29082 }, { "epoch": 2.1615013006317354, "grad_norm": 2.1363599446495396, "learning_rate": 3.824967786380487e-06, "loss": 0.26, "step": 29083 }, { "epoch": 2.161575622445188, "grad_norm": 2.256103531399998, "learning_rate": 3.824336688785203e-06, "loss": 0.2823, "step": 29084 }, { "epoch": 2.16164994425864, "grad_norm": 2.200406662845129, "learning_rate": 3.823705630948523e-06, "loss": 0.2535, "step": 29085 }, { "epoch": 2.1617242660720923, "grad_norm": 2.2813289473609775, "learning_rate": 3.823074612874507e-06, "loss": 0.3367, "step": 29086 }, { "epoch": 2.1617985878855444, "grad_norm": 2.0456958667181326, "learning_rate": 3.8224436345672145e-06, "loss": 0.1764, "step": 29087 }, { "epoch": 2.161872909698997, "grad_norm": 2.444314104056073, "learning_rate": 3.8218126960307065e-06, "loss": 0.241, "step": 29088 }, { "epoch": 2.161947231512449, "grad_norm": 2.3957657898290505, "learning_rate": 3.821181797269051e-06, "loss": 0.3232, "step": 29089 }, { "epoch": 2.1620215533259013, "grad_norm": 1.6825593702549844, "learning_rate": 3.820550938286307e-06, "loss": 0.2088, "step": 29090 }, { "epoch": 2.1620958751393533, "grad_norm": 1.810237548636678, "learning_rate": 3.819920119086532e-06, "loss": 0.2066, "step": 29091 }, { "epoch": 2.1621701969528058, "grad_norm": 2.2378093187171935, "learning_rate": 3.819289339673794e-06, "loss": 0.2808, "step": 29092 }, { "epoch": 2.162244518766258, "grad_norm": 2.4827824047950533, "learning_rate": 3.818658600052149e-06, "loss": 0.2795, "step": 29093 }, { "epoch": 2.1623188405797102, "grad_norm": 2.074026447571844, "learning_rate": 3.818027900225659e-06, "loss": 0.2436, "step": 29094 }, { "epoch": 2.1623931623931623, "grad_norm": 2.7971163397231735, "learning_rate": 3.817397240198385e-06, "loss": 0.3638, "step": 29095 }, { "epoch": 2.1624674842066147, "grad_norm": 2.4167000498535267, "learning_rate": 3.816766619974385e-06, "loss": 0.307, "step": 29096 }, { "epoch": 2.1625418060200667, "grad_norm": 2.466423798346435, "learning_rate": 3.816136039557726e-06, "loss": 0.2869, "step": 29097 }, { "epoch": 2.162616127833519, "grad_norm": 2.149166458233289, "learning_rate": 3.815505498952457e-06, "loss": 0.3457, "step": 29098 }, { "epoch": 2.162690449646971, "grad_norm": 1.957344841587822, "learning_rate": 3.814874998162645e-06, "loss": 0.2545, "step": 29099 }, { "epoch": 2.1627647714604237, "grad_norm": 2.294824158160415, "learning_rate": 3.8142445371923476e-06, "loss": 0.3252, "step": 29100 }, { "epoch": 2.1628390932738757, "grad_norm": 3.3407899290051666, "learning_rate": 3.8136141160456197e-06, "loss": 0.3761, "step": 29101 }, { "epoch": 2.162913415087328, "grad_norm": 2.8976448169702644, "learning_rate": 3.8129837347265262e-06, "loss": 0.3466, "step": 29102 }, { "epoch": 2.1629877369007806, "grad_norm": 2.355835502910198, "learning_rate": 3.8123533932391186e-06, "loss": 0.2728, "step": 29103 }, { "epoch": 2.1630620587142326, "grad_norm": 2.3516578429978745, "learning_rate": 3.8117230915874625e-06, "loss": 0.3387, "step": 29104 }, { "epoch": 2.1631363805276846, "grad_norm": 2.5151765523015284, "learning_rate": 3.8110928297756124e-06, "loss": 0.2943, "step": 29105 }, { "epoch": 2.163210702341137, "grad_norm": 2.1308258282256003, "learning_rate": 3.8104626078076224e-06, "loss": 0.3001, "step": 29106 }, { "epoch": 2.1632850241545896, "grad_norm": 2.008188040410294, "learning_rate": 3.809832425687555e-06, "loss": 0.2178, "step": 29107 }, { "epoch": 2.1633593459680416, "grad_norm": 1.9896177101268315, "learning_rate": 3.8092022834194666e-06, "loss": 0.2036, "step": 29108 }, { "epoch": 2.163433667781494, "grad_norm": 1.8459631735711977, "learning_rate": 3.8085721810074116e-06, "loss": 0.2362, "step": 29109 }, { "epoch": 2.163507989594946, "grad_norm": 2.1301764837848736, "learning_rate": 3.8079421184554455e-06, "loss": 0.3782, "step": 29110 }, { "epoch": 2.1635823114083985, "grad_norm": 1.6262685435156508, "learning_rate": 3.807312095767629e-06, "loss": 0.195, "step": 29111 }, { "epoch": 2.1636566332218505, "grad_norm": 1.9042169256718178, "learning_rate": 3.806682112948017e-06, "loss": 0.2576, "step": 29112 }, { "epoch": 2.163730955035303, "grad_norm": 2.370344475565144, "learning_rate": 3.8060521700006605e-06, "loss": 0.3197, "step": 29113 }, { "epoch": 2.163805276848755, "grad_norm": 2.100501819574621, "learning_rate": 3.8054222669296213e-06, "loss": 0.2278, "step": 29114 }, { "epoch": 2.1638795986622075, "grad_norm": 2.531649569496546, "learning_rate": 3.804792403738953e-06, "loss": 0.3107, "step": 29115 }, { "epoch": 2.1639539204756595, "grad_norm": 2.451924613552368, "learning_rate": 3.8041625804327052e-06, "loss": 0.2291, "step": 29116 }, { "epoch": 2.164028242289112, "grad_norm": 2.6421398638133833, "learning_rate": 3.8035327970149416e-06, "loss": 0.3303, "step": 29117 }, { "epoch": 2.164102564102564, "grad_norm": 2.4861193438045093, "learning_rate": 3.802903053489709e-06, "loss": 0.3172, "step": 29118 }, { "epoch": 2.1641768859160164, "grad_norm": 2.212425602521914, "learning_rate": 3.8022733498610685e-06, "loss": 0.2882, "step": 29119 }, { "epoch": 2.1642512077294684, "grad_norm": 2.9801617111829124, "learning_rate": 3.8016436861330697e-06, "loss": 0.4009, "step": 29120 }, { "epoch": 2.164325529542921, "grad_norm": 2.0533725009104975, "learning_rate": 3.801014062309768e-06, "loss": 0.2018, "step": 29121 }, { "epoch": 2.164399851356373, "grad_norm": 2.351669246570792, "learning_rate": 3.800384478395216e-06, "loss": 0.2758, "step": 29122 }, { "epoch": 2.1644741731698254, "grad_norm": 2.152181369415799, "learning_rate": 3.7997549343934636e-06, "loss": 0.2852, "step": 29123 }, { "epoch": 2.1645484949832774, "grad_norm": 2.341893606077445, "learning_rate": 3.799125430308571e-06, "loss": 0.3472, "step": 29124 }, { "epoch": 2.16462281679673, "grad_norm": 2.7939435510726898, "learning_rate": 3.798495966144583e-06, "loss": 0.2236, "step": 29125 }, { "epoch": 2.1646971386101823, "grad_norm": 2.3853741318750408, "learning_rate": 3.7978665419055605e-06, "loss": 0.4135, "step": 29126 }, { "epoch": 2.1647714604236343, "grad_norm": 3.0827986578144833, "learning_rate": 3.7972371575955504e-06, "loss": 0.2986, "step": 29127 }, { "epoch": 2.164845782237087, "grad_norm": 2.525031096511506, "learning_rate": 3.7966078132186023e-06, "loss": 0.2729, "step": 29128 }, { "epoch": 2.164920104050539, "grad_norm": 2.262145068279311, "learning_rate": 3.7959785087787758e-06, "loss": 0.2564, "step": 29129 }, { "epoch": 2.1649944258639913, "grad_norm": 2.5268033999272386, "learning_rate": 3.795349244280113e-06, "loss": 0.3855, "step": 29130 }, { "epoch": 2.1650687476774433, "grad_norm": 1.8358348514173706, "learning_rate": 3.7947200197266778e-06, "loss": 0.2389, "step": 29131 }, { "epoch": 2.1651430694908957, "grad_norm": 2.483213675912923, "learning_rate": 3.794090835122506e-06, "loss": 0.3315, "step": 29132 }, { "epoch": 2.1652173913043478, "grad_norm": 2.0066661456773245, "learning_rate": 3.793461690471658e-06, "loss": 0.2188, "step": 29133 }, { "epoch": 2.1652917131178, "grad_norm": 2.1192478250136735, "learning_rate": 3.792832585778182e-06, "loss": 0.3049, "step": 29134 }, { "epoch": 2.1653660349312522, "grad_norm": 1.8103790043304884, "learning_rate": 3.792203521046125e-06, "loss": 0.2533, "step": 29135 }, { "epoch": 2.1654403567447047, "grad_norm": 2.6289782739388143, "learning_rate": 3.791574496279542e-06, "loss": 0.3812, "step": 29136 }, { "epoch": 2.1655146785581567, "grad_norm": 2.851081708460056, "learning_rate": 3.790945511482481e-06, "loss": 0.2815, "step": 29137 }, { "epoch": 2.165589000371609, "grad_norm": 2.458756990545896, "learning_rate": 3.790316566658987e-06, "loss": 0.3088, "step": 29138 }, { "epoch": 2.165663322185061, "grad_norm": 2.1924681440136276, "learning_rate": 3.7896876618131164e-06, "loss": 0.3125, "step": 29139 }, { "epoch": 2.1657376439985137, "grad_norm": 1.8561850048480228, "learning_rate": 3.78905879694891e-06, "loss": 0.2379, "step": 29140 }, { "epoch": 2.1658119658119657, "grad_norm": 1.963814877930326, "learning_rate": 3.7884299720704256e-06, "loss": 0.2857, "step": 29141 }, { "epoch": 2.165886287625418, "grad_norm": 2.1316366027358855, "learning_rate": 3.7878011871817057e-06, "loss": 0.2936, "step": 29142 }, { "epoch": 2.16596060943887, "grad_norm": 2.305184885208505, "learning_rate": 3.7871724422868004e-06, "loss": 0.2802, "step": 29143 }, { "epoch": 2.1660349312523226, "grad_norm": 2.2817683617592213, "learning_rate": 3.786543737389756e-06, "loss": 0.3047, "step": 29144 }, { "epoch": 2.1661092530657746, "grad_norm": 2.001392677637111, "learning_rate": 3.785915072494617e-06, "loss": 0.2605, "step": 29145 }, { "epoch": 2.166183574879227, "grad_norm": 2.7027708038768044, "learning_rate": 3.7852864476054385e-06, "loss": 0.3346, "step": 29146 }, { "epoch": 2.166257896692679, "grad_norm": 2.1399395433931545, "learning_rate": 3.7846578627262585e-06, "loss": 0.309, "step": 29147 }, { "epoch": 2.1663322185061316, "grad_norm": 2.3600296356440067, "learning_rate": 3.784029317861133e-06, "loss": 0.3542, "step": 29148 }, { "epoch": 2.166406540319584, "grad_norm": 2.394104500535678, "learning_rate": 3.7834008130141043e-06, "loss": 0.3105, "step": 29149 }, { "epoch": 2.166480862133036, "grad_norm": 2.366280570001358, "learning_rate": 3.7827723481892154e-06, "loss": 0.2524, "step": 29150 }, { "epoch": 2.1665551839464885, "grad_norm": 2.336081876189944, "learning_rate": 3.782143923390519e-06, "loss": 0.2729, "step": 29151 }, { "epoch": 2.1666295057599405, "grad_norm": 2.4443701242121603, "learning_rate": 3.7815155386220538e-06, "loss": 0.4072, "step": 29152 }, { "epoch": 2.166703827573393, "grad_norm": 2.1542940763181155, "learning_rate": 3.7808871938878723e-06, "loss": 0.2749, "step": 29153 }, { "epoch": 2.166778149386845, "grad_norm": 2.4131634486371665, "learning_rate": 3.7802588891920155e-06, "loss": 0.3335, "step": 29154 }, { "epoch": 2.1668524712002974, "grad_norm": 1.9363132233023428, "learning_rate": 3.7796306245385307e-06, "loss": 0.2137, "step": 29155 }, { "epoch": 2.1669267930137495, "grad_norm": 2.453364243147907, "learning_rate": 3.7790023999314606e-06, "loss": 0.259, "step": 29156 }, { "epoch": 2.167001114827202, "grad_norm": 1.8734040902463844, "learning_rate": 3.778374215374846e-06, "loss": 0.2458, "step": 29157 }, { "epoch": 2.167075436640654, "grad_norm": 2.217477498673905, "learning_rate": 3.77774607087274e-06, "loss": 0.2646, "step": 29158 }, { "epoch": 2.1671497584541064, "grad_norm": 1.958385043436052, "learning_rate": 3.7771179664291814e-06, "loss": 0.2387, "step": 29159 }, { "epoch": 2.1672240802675584, "grad_norm": 2.975828565235091, "learning_rate": 3.7764899020482116e-06, "loss": 0.4374, "step": 29160 }, { "epoch": 2.167298402081011, "grad_norm": 2.4311154119556773, "learning_rate": 3.7758618777338797e-06, "loss": 0.2745, "step": 29161 }, { "epoch": 2.167372723894463, "grad_norm": 2.9458232759272653, "learning_rate": 3.7752338934902223e-06, "loss": 0.3543, "step": 29162 }, { "epoch": 2.1674470457079154, "grad_norm": 2.941342017029801, "learning_rate": 3.7746059493212907e-06, "loss": 0.3413, "step": 29163 }, { "epoch": 2.1675213675213674, "grad_norm": 2.2564652685849333, "learning_rate": 3.7739780452311215e-06, "loss": 0.3378, "step": 29164 }, { "epoch": 2.16759568933482, "grad_norm": 1.6607745747353329, "learning_rate": 3.7733501812237596e-06, "loss": 0.2154, "step": 29165 }, { "epoch": 2.167670011148272, "grad_norm": 2.108645788934419, "learning_rate": 3.7727223573032467e-06, "loss": 0.2458, "step": 29166 }, { "epoch": 2.1677443329617243, "grad_norm": 2.0633508698215692, "learning_rate": 3.7720945734736193e-06, "loss": 0.3031, "step": 29167 }, { "epoch": 2.1678186547751763, "grad_norm": 2.0959224512816155, "learning_rate": 3.771466829738928e-06, "loss": 0.2549, "step": 29168 }, { "epoch": 2.167892976588629, "grad_norm": 2.5277418606749005, "learning_rate": 3.7708391261032074e-06, "loss": 0.3199, "step": 29169 }, { "epoch": 2.167967298402081, "grad_norm": 2.439108637404514, "learning_rate": 3.770211462570503e-06, "loss": 0.2809, "step": 29170 }, { "epoch": 2.1680416202155333, "grad_norm": 4.374987854304154, "learning_rate": 3.769583839144856e-06, "loss": 0.2678, "step": 29171 }, { "epoch": 2.1681159420289857, "grad_norm": 2.465840576250131, "learning_rate": 3.7689562558302995e-06, "loss": 0.3024, "step": 29172 }, { "epoch": 2.1681902638424377, "grad_norm": 2.098349754108533, "learning_rate": 3.7683287126308845e-06, "loss": 0.3095, "step": 29173 }, { "epoch": 2.16826458565589, "grad_norm": 2.133473574030373, "learning_rate": 3.7677012095506417e-06, "loss": 0.288, "step": 29174 }, { "epoch": 2.168338907469342, "grad_norm": 2.4664000809741045, "learning_rate": 3.7670737465936182e-06, "loss": 0.2747, "step": 29175 }, { "epoch": 2.1684132292827947, "grad_norm": 2.0492860627311242, "learning_rate": 3.7664463237638514e-06, "loss": 0.3033, "step": 29176 }, { "epoch": 2.1684875510962467, "grad_norm": 3.1604093249160834, "learning_rate": 3.765818941065379e-06, "loss": 0.2655, "step": 29177 }, { "epoch": 2.168561872909699, "grad_norm": 2.0000722405695797, "learning_rate": 3.765191598502241e-06, "loss": 0.2321, "step": 29178 }, { "epoch": 2.168636194723151, "grad_norm": 1.584398003181271, "learning_rate": 3.764564296078472e-06, "loss": 0.1826, "step": 29179 }, { "epoch": 2.1687105165366036, "grad_norm": 2.6930754132758765, "learning_rate": 3.7639370337981194e-06, "loss": 0.2501, "step": 29180 }, { "epoch": 2.1687848383500556, "grad_norm": 2.1870404202995575, "learning_rate": 3.7633098116652158e-06, "loss": 0.3437, "step": 29181 }, { "epoch": 2.168859160163508, "grad_norm": 2.261842771073567, "learning_rate": 3.762682629683796e-06, "loss": 0.2683, "step": 29182 }, { "epoch": 2.16893348197696, "grad_norm": 2.0683805360287058, "learning_rate": 3.7620554878579062e-06, "loss": 0.2814, "step": 29183 }, { "epoch": 2.1690078037904126, "grad_norm": 2.1798630305045075, "learning_rate": 3.7614283861915755e-06, "loss": 0.3133, "step": 29184 }, { "epoch": 2.1690821256038646, "grad_norm": 1.7546424552888185, "learning_rate": 3.7608013246888485e-06, "loss": 0.2251, "step": 29185 }, { "epoch": 2.169156447417317, "grad_norm": 2.1872160622922503, "learning_rate": 3.7601743033537596e-06, "loss": 0.2479, "step": 29186 }, { "epoch": 2.169230769230769, "grad_norm": 2.451954504207737, "learning_rate": 3.75954732219034e-06, "loss": 0.3291, "step": 29187 }, { "epoch": 2.1693050910442215, "grad_norm": 2.0459282475006453, "learning_rate": 3.7589203812026385e-06, "loss": 0.2658, "step": 29188 }, { "epoch": 2.1693794128576736, "grad_norm": 2.0974200918886656, "learning_rate": 3.758293480394678e-06, "loss": 0.2958, "step": 29189 }, { "epoch": 2.169453734671126, "grad_norm": 1.9630372419253144, "learning_rate": 3.7576666197705024e-06, "loss": 0.3267, "step": 29190 }, { "epoch": 2.169528056484578, "grad_norm": 2.333765109206897, "learning_rate": 3.7570397993341425e-06, "loss": 0.3217, "step": 29191 }, { "epoch": 2.1696023782980305, "grad_norm": 3.108551485345488, "learning_rate": 3.7564130190896386e-06, "loss": 0.4133, "step": 29192 }, { "epoch": 2.1696767001114825, "grad_norm": 2.207557710497254, "learning_rate": 3.7557862790410248e-06, "loss": 0.3085, "step": 29193 }, { "epoch": 2.169751021924935, "grad_norm": 2.9906681020263184, "learning_rate": 3.755159579192331e-06, "loss": 0.2961, "step": 29194 }, { "epoch": 2.1698253437383874, "grad_norm": 2.327892824239727, "learning_rate": 3.7545329195476e-06, "loss": 0.2398, "step": 29195 }, { "epoch": 2.1698996655518394, "grad_norm": 2.3320838148953436, "learning_rate": 3.7539063001108577e-06, "loss": 0.2771, "step": 29196 }, { "epoch": 2.169973987365292, "grad_norm": 1.657833581588845, "learning_rate": 3.753279720886146e-06, "loss": 0.2487, "step": 29197 }, { "epoch": 2.170048309178744, "grad_norm": 3.298683949593628, "learning_rate": 3.752653181877496e-06, "loss": 0.3361, "step": 29198 }, { "epoch": 2.1701226309921964, "grad_norm": 2.5191658127468717, "learning_rate": 3.7520266830889407e-06, "loss": 0.2949, "step": 29199 }, { "epoch": 2.1701969528056484, "grad_norm": 3.103622154088052, "learning_rate": 3.751400224524513e-06, "loss": 0.4032, "step": 29200 }, { "epoch": 2.170271274619101, "grad_norm": 2.740560387145229, "learning_rate": 3.7507738061882427e-06, "loss": 0.3518, "step": 29201 }, { "epoch": 2.170345596432553, "grad_norm": 2.558231252835349, "learning_rate": 3.750147428084171e-06, "loss": 0.3621, "step": 29202 }, { "epoch": 2.1704199182460053, "grad_norm": 2.2096499908973986, "learning_rate": 3.749521090216325e-06, "loss": 0.2205, "step": 29203 }, { "epoch": 2.1704942400594573, "grad_norm": 2.8069256176717254, "learning_rate": 3.748894792588734e-06, "loss": 0.3238, "step": 29204 }, { "epoch": 2.17056856187291, "grad_norm": 2.6692438392739475, "learning_rate": 3.748268535205438e-06, "loss": 0.3587, "step": 29205 }, { "epoch": 2.170642883686362, "grad_norm": 1.7213489615117246, "learning_rate": 3.747642318070461e-06, "loss": 0.188, "step": 29206 }, { "epoch": 2.1707172054998143, "grad_norm": 4.627333458242919, "learning_rate": 3.7470161411878426e-06, "loss": 0.3365, "step": 29207 }, { "epoch": 2.1707915273132663, "grad_norm": 2.248628719666933, "learning_rate": 3.74639000456161e-06, "loss": 0.2494, "step": 29208 }, { "epoch": 2.1708658491267188, "grad_norm": 2.0839669642882592, "learning_rate": 3.7457639081957897e-06, "loss": 0.249, "step": 29209 }, { "epoch": 2.1709401709401708, "grad_norm": 2.426900437740873, "learning_rate": 3.745137852094424e-06, "loss": 0.3519, "step": 29210 }, { "epoch": 2.1710144927536232, "grad_norm": 1.8530775434788258, "learning_rate": 3.7445118362615287e-06, "loss": 0.2261, "step": 29211 }, { "epoch": 2.1710888145670753, "grad_norm": 2.1597133788869507, "learning_rate": 3.743885860701146e-06, "loss": 0.2826, "step": 29212 }, { "epoch": 2.1711631363805277, "grad_norm": 1.8048205757802327, "learning_rate": 3.7432599254172976e-06, "loss": 0.285, "step": 29213 }, { "epoch": 2.1712374581939797, "grad_norm": 1.6586168770812801, "learning_rate": 3.7426340304140208e-06, "loss": 0.1533, "step": 29214 }, { "epoch": 2.171311780007432, "grad_norm": 1.9839012820777908, "learning_rate": 3.742008175695342e-06, "loss": 0.2478, "step": 29215 }, { "epoch": 2.171386101820884, "grad_norm": 2.239489988762828, "learning_rate": 3.7413823612652854e-06, "loss": 0.3625, "step": 29216 }, { "epoch": 2.1714604236343367, "grad_norm": 2.5683598568354906, "learning_rate": 3.7407565871278884e-06, "loss": 0.2849, "step": 29217 }, { "epoch": 2.171534745447789, "grad_norm": 2.8567289493607206, "learning_rate": 3.7401308532871725e-06, "loss": 0.3445, "step": 29218 }, { "epoch": 2.171609067261241, "grad_norm": 2.537098351858819, "learning_rate": 3.739505159747172e-06, "loss": 0.3191, "step": 29219 }, { "epoch": 2.1716833890746936, "grad_norm": 1.9839190424212045, "learning_rate": 3.738879506511913e-06, "loss": 0.3187, "step": 29220 }, { "epoch": 2.1717577108881456, "grad_norm": 3.042735840560015, "learning_rate": 3.738253893585423e-06, "loss": 0.3987, "step": 29221 }, { "epoch": 2.171832032701598, "grad_norm": 2.5211371160847853, "learning_rate": 3.73762832097173e-06, "loss": 0.3122, "step": 29222 }, { "epoch": 2.17190635451505, "grad_norm": 2.157991395787814, "learning_rate": 3.737002788674856e-06, "loss": 0.3341, "step": 29223 }, { "epoch": 2.1719806763285026, "grad_norm": 1.8088369190609297, "learning_rate": 3.7363772966988375e-06, "loss": 0.2021, "step": 29224 }, { "epoch": 2.1720549981419546, "grad_norm": 1.8374358654122507, "learning_rate": 3.735751845047697e-06, "loss": 0.2577, "step": 29225 }, { "epoch": 2.172129319955407, "grad_norm": 1.9845220173611953, "learning_rate": 3.735126433725458e-06, "loss": 0.2558, "step": 29226 }, { "epoch": 2.172203641768859, "grad_norm": 1.9825947948884213, "learning_rate": 3.734501062736153e-06, "loss": 0.2438, "step": 29227 }, { "epoch": 2.1722779635823115, "grad_norm": 2.1929697846464764, "learning_rate": 3.7338757320838016e-06, "loss": 0.276, "step": 29228 }, { "epoch": 2.1723522853957635, "grad_norm": 1.8894630620340995, "learning_rate": 3.7332504417724356e-06, "loss": 0.2168, "step": 29229 }, { "epoch": 2.172426607209216, "grad_norm": 2.0146335379930957, "learning_rate": 3.73262519180608e-06, "loss": 0.2676, "step": 29230 }, { "epoch": 2.172500929022668, "grad_norm": 2.1018491788254305, "learning_rate": 3.731999982188753e-06, "loss": 0.2645, "step": 29231 }, { "epoch": 2.1725752508361205, "grad_norm": 2.276062590419872, "learning_rate": 3.73137481292449e-06, "loss": 0.302, "step": 29232 }, { "epoch": 2.1726495726495725, "grad_norm": 2.1811374584563534, "learning_rate": 3.7307496840173095e-06, "loss": 0.2114, "step": 29233 }, { "epoch": 2.172723894463025, "grad_norm": 2.256350612786522, "learning_rate": 3.730124595471237e-06, "loss": 0.2995, "step": 29234 }, { "epoch": 2.172798216276477, "grad_norm": 2.3588690299473827, "learning_rate": 3.729499547290295e-06, "loss": 0.2225, "step": 29235 }, { "epoch": 2.1728725380899294, "grad_norm": 2.361919329179587, "learning_rate": 3.728874539478512e-06, "loss": 0.3566, "step": 29236 }, { "epoch": 2.172946859903382, "grad_norm": 4.974091695017322, "learning_rate": 3.728249572039909e-06, "loss": 0.3336, "step": 29237 }, { "epoch": 2.173021181716834, "grad_norm": 2.6248420964901276, "learning_rate": 3.727624644978507e-06, "loss": 0.3721, "step": 29238 }, { "epoch": 2.173095503530286, "grad_norm": 2.1542395829074397, "learning_rate": 3.7269997582983354e-06, "loss": 0.2616, "step": 29239 }, { "epoch": 2.1731698253437384, "grad_norm": 1.7389923740385838, "learning_rate": 3.7263749120034145e-06, "loss": 0.2568, "step": 29240 }, { "epoch": 2.173244147157191, "grad_norm": 5.809516757502644, "learning_rate": 3.725750106097762e-06, "loss": 0.324, "step": 29241 }, { "epoch": 2.173318468970643, "grad_norm": 2.319788871532494, "learning_rate": 3.7251253405854083e-06, "loss": 0.3089, "step": 29242 }, { "epoch": 2.1733927907840953, "grad_norm": 2.636535177599007, "learning_rate": 3.7245006154703687e-06, "loss": 0.3086, "step": 29243 }, { "epoch": 2.1734671125975473, "grad_norm": 2.152442619458245, "learning_rate": 3.723875930756675e-06, "loss": 0.3101, "step": 29244 }, { "epoch": 2.173541434411, "grad_norm": 2.4129031641657255, "learning_rate": 3.723251286448337e-06, "loss": 0.2672, "step": 29245 }, { "epoch": 2.173615756224452, "grad_norm": 1.9453223394583956, "learning_rate": 3.722626682549384e-06, "loss": 0.2719, "step": 29246 }, { "epoch": 2.1736900780379043, "grad_norm": 1.8688637945235897, "learning_rate": 3.7220021190638344e-06, "loss": 0.2148, "step": 29247 }, { "epoch": 2.1737643998513563, "grad_norm": 2.494717248601897, "learning_rate": 3.7213775959957065e-06, "loss": 0.4211, "step": 29248 }, { "epoch": 2.1738387216648087, "grad_norm": 2.342552687053279, "learning_rate": 3.720753113349027e-06, "loss": 0.2995, "step": 29249 }, { "epoch": 2.1739130434782608, "grad_norm": 2.670752233956601, "learning_rate": 3.7201286711278106e-06, "loss": 0.3031, "step": 29250 }, { "epoch": 2.173987365291713, "grad_norm": 2.371899338664239, "learning_rate": 3.7195042693360826e-06, "loss": 0.2998, "step": 29251 }, { "epoch": 2.1740616871051652, "grad_norm": 2.518280073498374, "learning_rate": 3.7188799079778603e-06, "loss": 0.3252, "step": 29252 }, { "epoch": 2.1741360089186177, "grad_norm": 1.9986981242096944, "learning_rate": 3.718255587057159e-06, "loss": 0.2676, "step": 29253 }, { "epoch": 2.1742103307320697, "grad_norm": 2.720302348919778, "learning_rate": 3.717631306578007e-06, "loss": 0.3201, "step": 29254 }, { "epoch": 2.174284652545522, "grad_norm": 3.2812588162353755, "learning_rate": 3.717007066544418e-06, "loss": 0.3714, "step": 29255 }, { "epoch": 2.174358974358974, "grad_norm": 3.5587771578818552, "learning_rate": 3.7163828669604117e-06, "loss": 0.315, "step": 29256 }, { "epoch": 2.1744332961724266, "grad_norm": 2.7405646451170296, "learning_rate": 3.715758707830004e-06, "loss": 0.3476, "step": 29257 }, { "epoch": 2.1745076179858787, "grad_norm": 2.2020305924319925, "learning_rate": 3.7151345891572176e-06, "loss": 0.3257, "step": 29258 }, { "epoch": 2.174581939799331, "grad_norm": 2.087103754746276, "learning_rate": 3.714510510946069e-06, "loss": 0.288, "step": 29259 }, { "epoch": 2.1746562616127836, "grad_norm": 2.324834286879536, "learning_rate": 3.7138864732005733e-06, "loss": 0.2464, "step": 29260 }, { "epoch": 2.1747305834262356, "grad_norm": 3.0824677928538633, "learning_rate": 3.7132624759247525e-06, "loss": 0.2961, "step": 29261 }, { "epoch": 2.1748049052396876, "grad_norm": 3.377770831098347, "learning_rate": 3.7126385191226223e-06, "loss": 0.3078, "step": 29262 }, { "epoch": 2.17487922705314, "grad_norm": 2.1956401515185657, "learning_rate": 3.712014602798195e-06, "loss": 0.2853, "step": 29263 }, { "epoch": 2.1749535488665925, "grad_norm": 3.1421659077228465, "learning_rate": 3.711390726955495e-06, "loss": 0.3513, "step": 29264 }, { "epoch": 2.1750278706800446, "grad_norm": 1.901968899199208, "learning_rate": 3.7107668915985317e-06, "loss": 0.2065, "step": 29265 }, { "epoch": 2.175102192493497, "grad_norm": 2.6703651616656128, "learning_rate": 3.710143096731329e-06, "loss": 0.3159, "step": 29266 }, { "epoch": 2.175176514306949, "grad_norm": 2.5617468788464786, "learning_rate": 3.709519342357898e-06, "loss": 0.3304, "step": 29267 }, { "epoch": 2.1752508361204015, "grad_norm": 2.571405719861824, "learning_rate": 3.7088956284822552e-06, "loss": 0.309, "step": 29268 }, { "epoch": 2.1753251579338535, "grad_norm": 1.9071308380114995, "learning_rate": 3.7082719551084166e-06, "loss": 0.2171, "step": 29269 }, { "epoch": 2.175399479747306, "grad_norm": 2.2323174865685607, "learning_rate": 3.7076483222403926e-06, "loss": 0.3477, "step": 29270 }, { "epoch": 2.175473801560758, "grad_norm": 2.7936548213114003, "learning_rate": 3.707024729882206e-06, "loss": 0.3747, "step": 29271 }, { "epoch": 2.1755481233742104, "grad_norm": 1.9140768068732867, "learning_rate": 3.706401178037864e-06, "loss": 0.2777, "step": 29272 }, { "epoch": 2.1756224451876625, "grad_norm": 1.9810107002070647, "learning_rate": 3.7057776667113876e-06, "loss": 0.3247, "step": 29273 }, { "epoch": 2.175696767001115, "grad_norm": 2.2039241962467027, "learning_rate": 3.7051541959067882e-06, "loss": 0.2608, "step": 29274 }, { "epoch": 2.175771088814567, "grad_norm": 2.181356392660956, "learning_rate": 3.704530765628076e-06, "loss": 0.254, "step": 29275 }, { "epoch": 2.1758454106280194, "grad_norm": 2.6097209926095397, "learning_rate": 3.7039073758792723e-06, "loss": 0.327, "step": 29276 }, { "epoch": 2.1759197324414714, "grad_norm": 2.300787395791733, "learning_rate": 3.703284026664382e-06, "loss": 0.2845, "step": 29277 }, { "epoch": 2.175994054254924, "grad_norm": 1.9709137924232243, "learning_rate": 3.7026607179874298e-06, "loss": 0.2664, "step": 29278 }, { "epoch": 2.176068376068376, "grad_norm": 2.692392401209762, "learning_rate": 3.7020374498524147e-06, "loss": 0.3771, "step": 29279 }, { "epoch": 2.1761426978818283, "grad_norm": 2.0287209108862654, "learning_rate": 3.7014142222633586e-06, "loss": 0.2357, "step": 29280 }, { "epoch": 2.1762170196952804, "grad_norm": 2.253583078769877, "learning_rate": 3.7007910352242706e-06, "loss": 0.3437, "step": 29281 }, { "epoch": 2.176291341508733, "grad_norm": 2.581603394150719, "learning_rate": 3.70016788873916e-06, "loss": 0.3168, "step": 29282 }, { "epoch": 2.1763656633221853, "grad_norm": 2.661109547253022, "learning_rate": 3.6995447828120445e-06, "loss": 0.3276, "step": 29283 }, { "epoch": 2.1764399851356373, "grad_norm": 2.231776317187567, "learning_rate": 3.6989217174469337e-06, "loss": 0.2598, "step": 29284 }, { "epoch": 2.1765143069490898, "grad_norm": 2.3405140881631623, "learning_rate": 3.698298692647834e-06, "loss": 0.2744, "step": 29285 }, { "epoch": 2.1765886287625418, "grad_norm": 2.425491306875491, "learning_rate": 3.6976757084187644e-06, "loss": 0.2769, "step": 29286 }, { "epoch": 2.1766629505759942, "grad_norm": 2.2324050264761164, "learning_rate": 3.6970527647637267e-06, "loss": 0.2583, "step": 29287 }, { "epoch": 2.1767372723894463, "grad_norm": 1.8466546569516131, "learning_rate": 3.696429861686741e-06, "loss": 0.2492, "step": 29288 }, { "epoch": 2.1768115942028987, "grad_norm": 1.7305116906461462, "learning_rate": 3.6958069991918122e-06, "loss": 0.1978, "step": 29289 }, { "epoch": 2.1768859160163507, "grad_norm": 2.2614833380656387, "learning_rate": 3.6951841772829512e-06, "loss": 0.2565, "step": 29290 }, { "epoch": 2.176960237829803, "grad_norm": 2.502740767586034, "learning_rate": 3.694561395964167e-06, "loss": 0.3568, "step": 29291 }, { "epoch": 2.177034559643255, "grad_norm": 2.3347596077841364, "learning_rate": 3.6939386552394664e-06, "loss": 0.2747, "step": 29292 }, { "epoch": 2.1771088814567077, "grad_norm": 2.0832388869799257, "learning_rate": 3.693315955112865e-06, "loss": 0.2725, "step": 29293 }, { "epoch": 2.1771832032701597, "grad_norm": 2.0469716214111533, "learning_rate": 3.6926932955883645e-06, "loss": 0.3065, "step": 29294 }, { "epoch": 2.177257525083612, "grad_norm": 2.0815149927057153, "learning_rate": 3.69207067666998e-06, "loss": 0.2626, "step": 29295 }, { "epoch": 2.177331846897064, "grad_norm": 1.9165590225628217, "learning_rate": 3.6914480983617183e-06, "loss": 0.2346, "step": 29296 }, { "epoch": 2.1774061687105166, "grad_norm": 2.267028493533882, "learning_rate": 3.6908255606675825e-06, "loss": 0.2918, "step": 29297 }, { "epoch": 2.1774804905239686, "grad_norm": 1.847931060502012, "learning_rate": 3.6902030635915873e-06, "loss": 0.2175, "step": 29298 }, { "epoch": 2.177554812337421, "grad_norm": 2.4106258920002914, "learning_rate": 3.689580607137735e-06, "loss": 0.2954, "step": 29299 }, { "epoch": 2.177629134150873, "grad_norm": 2.1263548350481796, "learning_rate": 3.6889581913100414e-06, "loss": 0.2991, "step": 29300 }, { "epoch": 2.1777034559643256, "grad_norm": 3.1531481000806987, "learning_rate": 3.6883358161125015e-06, "loss": 0.3462, "step": 29301 }, { "epoch": 2.1777777777777776, "grad_norm": 2.5235751727411766, "learning_rate": 3.687713481549131e-06, "loss": 0.3397, "step": 29302 }, { "epoch": 2.17785209959123, "grad_norm": 2.2343821749336525, "learning_rate": 3.6870911876239335e-06, "loss": 0.331, "step": 29303 }, { "epoch": 2.177926421404682, "grad_norm": 1.8043871990509643, "learning_rate": 3.686468934340912e-06, "loss": 0.2474, "step": 29304 }, { "epoch": 2.1780007432181345, "grad_norm": 1.8181156427830247, "learning_rate": 3.6858467217040795e-06, "loss": 0.2273, "step": 29305 }, { "epoch": 2.178075065031587, "grad_norm": 2.66126317402176, "learning_rate": 3.685224549717439e-06, "loss": 0.3157, "step": 29306 }, { "epoch": 2.178149386845039, "grad_norm": 2.1178348349498317, "learning_rate": 3.6846024183849904e-06, "loss": 0.2796, "step": 29307 }, { "epoch": 2.1782237086584915, "grad_norm": 3.424594350533398, "learning_rate": 3.6839803277107476e-06, "loss": 0.3559, "step": 29308 }, { "epoch": 2.1782980304719435, "grad_norm": 2.196062444204799, "learning_rate": 3.6833582776987085e-06, "loss": 0.2998, "step": 29309 }, { "epoch": 2.178372352285396, "grad_norm": 2.0992113921404645, "learning_rate": 3.6827362683528843e-06, "loss": 0.2017, "step": 29310 }, { "epoch": 2.178446674098848, "grad_norm": 2.914573924378015, "learning_rate": 3.6821142996772763e-06, "loss": 0.3851, "step": 29311 }, { "epoch": 2.1785209959123004, "grad_norm": 2.1064260872290688, "learning_rate": 3.681492371675889e-06, "loss": 0.321, "step": 29312 }, { "epoch": 2.1785953177257524, "grad_norm": 2.271187543966535, "learning_rate": 3.6808704843527243e-06, "loss": 0.2733, "step": 29313 }, { "epoch": 2.178669639539205, "grad_norm": 2.65537451146344, "learning_rate": 3.6802486377117864e-06, "loss": 0.3492, "step": 29314 }, { "epoch": 2.178743961352657, "grad_norm": 2.4751419192098085, "learning_rate": 3.6796268317570818e-06, "loss": 0.312, "step": 29315 }, { "epoch": 2.1788182831661094, "grad_norm": 2.6186882949686203, "learning_rate": 3.679005066492609e-06, "loss": 0.3242, "step": 29316 }, { "epoch": 2.1788926049795614, "grad_norm": 2.4829789859083977, "learning_rate": 3.6783833419223768e-06, "loss": 0.324, "step": 29317 }, { "epoch": 2.178966926793014, "grad_norm": 1.661712407893218, "learning_rate": 3.677761658050384e-06, "loss": 0.2158, "step": 29318 }, { "epoch": 2.179041248606466, "grad_norm": 2.0135720947953755, "learning_rate": 3.6771400148806314e-06, "loss": 0.2829, "step": 29319 }, { "epoch": 2.1791155704199183, "grad_norm": 3.0754422616575012, "learning_rate": 3.676518412417126e-06, "loss": 0.3651, "step": 29320 }, { "epoch": 2.1791898922333703, "grad_norm": 2.0197981083625836, "learning_rate": 3.675896850663864e-06, "loss": 0.2408, "step": 29321 }, { "epoch": 2.179264214046823, "grad_norm": 2.0901536549523363, "learning_rate": 3.6752753296248524e-06, "loss": 0.2376, "step": 29322 }, { "epoch": 2.179338535860275, "grad_norm": 2.2947335499363186, "learning_rate": 3.6746538493040907e-06, "loss": 0.2692, "step": 29323 }, { "epoch": 2.1794128576737273, "grad_norm": 2.898004846733186, "learning_rate": 3.674032409705579e-06, "loss": 0.3507, "step": 29324 }, { "epoch": 2.1794871794871793, "grad_norm": 2.6109886605803543, "learning_rate": 3.6734110108333197e-06, "loss": 0.3258, "step": 29325 }, { "epoch": 2.1795615013006318, "grad_norm": 2.302015623587508, "learning_rate": 3.6727896526913076e-06, "loss": 0.2884, "step": 29326 }, { "epoch": 2.1796358231140838, "grad_norm": 2.039296859718668, "learning_rate": 3.6721683352835504e-06, "loss": 0.2297, "step": 29327 }, { "epoch": 2.1797101449275362, "grad_norm": 2.131177786351338, "learning_rate": 3.6715470586140457e-06, "loss": 0.2585, "step": 29328 }, { "epoch": 2.1797844667409887, "grad_norm": 1.9488422526255018, "learning_rate": 3.6709258226867893e-06, "loss": 0.2575, "step": 29329 }, { "epoch": 2.1798587885544407, "grad_norm": 2.1965244239889494, "learning_rate": 3.6703046275057873e-06, "loss": 0.258, "step": 29330 }, { "epoch": 2.179933110367893, "grad_norm": 2.461170375002707, "learning_rate": 3.6696834730750317e-06, "loss": 0.3206, "step": 29331 }, { "epoch": 2.180007432181345, "grad_norm": 2.87100474195691, "learning_rate": 3.6690623593985295e-06, "loss": 0.3274, "step": 29332 }, { "epoch": 2.1800817539947976, "grad_norm": 1.828221793585259, "learning_rate": 3.6684412864802765e-06, "loss": 0.2227, "step": 29333 }, { "epoch": 2.1801560758082497, "grad_norm": 2.0360849817348043, "learning_rate": 3.667820254324268e-06, "loss": 0.2296, "step": 29334 }, { "epoch": 2.180230397621702, "grad_norm": 2.302495781443274, "learning_rate": 3.6671992629345055e-06, "loss": 0.2572, "step": 29335 }, { "epoch": 2.180304719435154, "grad_norm": 2.2279067275837914, "learning_rate": 3.6665783123149813e-06, "loss": 0.3188, "step": 29336 }, { "epoch": 2.1803790412486066, "grad_norm": 2.293121873339255, "learning_rate": 3.665957402469701e-06, "loss": 0.3096, "step": 29337 }, { "epoch": 2.1804533630620586, "grad_norm": 2.445050419893262, "learning_rate": 3.6653365334026546e-06, "loss": 0.3359, "step": 29338 }, { "epoch": 2.180527684875511, "grad_norm": 2.458953398307878, "learning_rate": 3.664715705117847e-06, "loss": 0.2956, "step": 29339 }, { "epoch": 2.180602006688963, "grad_norm": 3.058228972593772, "learning_rate": 3.6640949176192707e-06, "loss": 0.3896, "step": 29340 }, { "epoch": 2.1806763285024156, "grad_norm": 2.371591630417556, "learning_rate": 3.663474170910919e-06, "loss": 0.3034, "step": 29341 }, { "epoch": 2.1807506503158676, "grad_norm": 2.49139194907152, "learning_rate": 3.662853464996795e-06, "loss": 0.3258, "step": 29342 }, { "epoch": 2.18082497212932, "grad_norm": 2.3660380096609006, "learning_rate": 3.6622327998808884e-06, "loss": 0.2743, "step": 29343 }, { "epoch": 2.180899293942772, "grad_norm": 2.3453456332974865, "learning_rate": 3.6616121755672017e-06, "loss": 0.2485, "step": 29344 }, { "epoch": 2.1809736157562245, "grad_norm": 2.4475058740333417, "learning_rate": 3.660991592059727e-06, "loss": 0.274, "step": 29345 }, { "epoch": 2.1810479375696765, "grad_norm": 2.621580313616112, "learning_rate": 3.66037104936246e-06, "loss": 0.2097, "step": 29346 }, { "epoch": 2.181122259383129, "grad_norm": 2.850929856412415, "learning_rate": 3.659750547479394e-06, "loss": 0.3495, "step": 29347 }, { "epoch": 2.181196581196581, "grad_norm": 2.7979415779102954, "learning_rate": 3.659130086414523e-06, "loss": 0.2639, "step": 29348 }, { "epoch": 2.1812709030100335, "grad_norm": 2.033263859811152, "learning_rate": 3.6585096661718455e-06, "loss": 0.2169, "step": 29349 }, { "epoch": 2.1813452248234855, "grad_norm": 2.2700982923516944, "learning_rate": 3.6578892867553552e-06, "loss": 0.2368, "step": 29350 }, { "epoch": 2.181419546636938, "grad_norm": 2.5473958443026588, "learning_rate": 3.6572689481690395e-06, "loss": 0.3267, "step": 29351 }, { "epoch": 2.1814938684503904, "grad_norm": 1.8343377645346994, "learning_rate": 3.656648650416902e-06, "loss": 0.246, "step": 29352 }, { "epoch": 2.1815681902638424, "grad_norm": 2.6400382565197646, "learning_rate": 3.6560283935029273e-06, "loss": 0.3401, "step": 29353 }, { "epoch": 2.181642512077295, "grad_norm": 2.2568939510830273, "learning_rate": 3.6554081774311157e-06, "loss": 0.3172, "step": 29354 }, { "epoch": 2.181716833890747, "grad_norm": 3.419262042262711, "learning_rate": 3.6547880022054585e-06, "loss": 0.3908, "step": 29355 }, { "epoch": 2.1817911557041993, "grad_norm": 2.5751684741896343, "learning_rate": 3.654167867829942e-06, "loss": 0.3261, "step": 29356 }, { "epoch": 2.1818654775176514, "grad_norm": 2.239229277871002, "learning_rate": 3.6535477743085713e-06, "loss": 0.3251, "step": 29357 }, { "epoch": 2.181939799331104, "grad_norm": 1.9461722678651283, "learning_rate": 3.652927721645323e-06, "loss": 0.2409, "step": 29358 }, { "epoch": 2.182014121144556, "grad_norm": 1.7168345899487751, "learning_rate": 3.6523077098442005e-06, "loss": 0.1929, "step": 29359 }, { "epoch": 2.1820884429580083, "grad_norm": 2.146911466611965, "learning_rate": 3.6516877389091885e-06, "loss": 0.2451, "step": 29360 }, { "epoch": 2.1821627647714603, "grad_norm": 2.1905439386888057, "learning_rate": 3.651067808844284e-06, "loss": 0.3105, "step": 29361 }, { "epoch": 2.1822370865849128, "grad_norm": 2.3623054807567505, "learning_rate": 3.650447919653477e-06, "loss": 0.3177, "step": 29362 }, { "epoch": 2.182311408398365, "grad_norm": 2.382667415659799, "learning_rate": 3.649828071340752e-06, "loss": 0.2536, "step": 29363 }, { "epoch": 2.1823857302118173, "grad_norm": 2.3846770019022037, "learning_rate": 3.649208263910109e-06, "loss": 0.3444, "step": 29364 }, { "epoch": 2.1824600520252693, "grad_norm": 1.9133918651108972, "learning_rate": 3.6485884973655326e-06, "loss": 0.2131, "step": 29365 }, { "epoch": 2.1825343738387217, "grad_norm": 2.4637523672276345, "learning_rate": 3.6479687717110113e-06, "loss": 0.3393, "step": 29366 }, { "epoch": 2.1826086956521737, "grad_norm": 2.8166568294921075, "learning_rate": 3.64734908695054e-06, "loss": 0.3698, "step": 29367 }, { "epoch": 2.182683017465626, "grad_norm": 2.462268830420431, "learning_rate": 3.646729443088106e-06, "loss": 0.266, "step": 29368 }, { "epoch": 2.1827573392790782, "grad_norm": 2.446481638101747, "learning_rate": 3.6461098401276985e-06, "loss": 0.4049, "step": 29369 }, { "epoch": 2.1828316610925307, "grad_norm": 2.15863836387922, "learning_rate": 3.6454902780733024e-06, "loss": 0.3146, "step": 29370 }, { "epoch": 2.1829059829059827, "grad_norm": 2.106132569667638, "learning_rate": 3.644870756928914e-06, "loss": 0.2901, "step": 29371 }, { "epoch": 2.182980304719435, "grad_norm": 2.2539218892611332, "learning_rate": 3.6442512766985183e-06, "loss": 0.2982, "step": 29372 }, { "epoch": 2.183054626532887, "grad_norm": 2.049194786909357, "learning_rate": 3.643631837386099e-06, "loss": 0.2827, "step": 29373 }, { "epoch": 2.1831289483463396, "grad_norm": 2.12689971386904, "learning_rate": 3.643012438995652e-06, "loss": 0.245, "step": 29374 }, { "epoch": 2.183203270159792, "grad_norm": 2.0729857272224392, "learning_rate": 3.642393081531157e-06, "loss": 0.2429, "step": 29375 }, { "epoch": 2.183277591973244, "grad_norm": 2.430225797539287, "learning_rate": 3.641773764996609e-06, "loss": 0.3087, "step": 29376 }, { "epoch": 2.1833519137866966, "grad_norm": 1.9517675423541592, "learning_rate": 3.6411544893959917e-06, "loss": 0.2669, "step": 29377 }, { "epoch": 2.1834262356001486, "grad_norm": 2.451965823305288, "learning_rate": 3.6405352547332894e-06, "loss": 0.2911, "step": 29378 }, { "epoch": 2.183500557413601, "grad_norm": 2.49679819757204, "learning_rate": 3.6399160610124927e-06, "loss": 0.4113, "step": 29379 }, { "epoch": 2.183574879227053, "grad_norm": 2.777283490311076, "learning_rate": 3.6392969082375883e-06, "loss": 0.2818, "step": 29380 }, { "epoch": 2.1836492010405055, "grad_norm": 2.2755153590340713, "learning_rate": 3.6386777964125596e-06, "loss": 0.3174, "step": 29381 }, { "epoch": 2.1837235228539575, "grad_norm": 2.4815213932095954, "learning_rate": 3.638058725541389e-06, "loss": 0.2979, "step": 29382 }, { "epoch": 2.18379784466741, "grad_norm": 1.9530899655973464, "learning_rate": 3.637439695628071e-06, "loss": 0.2135, "step": 29383 }, { "epoch": 2.183872166480862, "grad_norm": 2.6913073612007294, "learning_rate": 3.636820706676585e-06, "loss": 0.3534, "step": 29384 }, { "epoch": 2.1839464882943145, "grad_norm": 2.2105651818826204, "learning_rate": 3.6362017586909136e-06, "loss": 0.2352, "step": 29385 }, { "epoch": 2.1840208101077665, "grad_norm": 2.3182283544305204, "learning_rate": 3.6355828516750502e-06, "loss": 0.3506, "step": 29386 }, { "epoch": 2.184095131921219, "grad_norm": 2.495594295581304, "learning_rate": 3.634963985632972e-06, "loss": 0.2598, "step": 29387 }, { "epoch": 2.184169453734671, "grad_norm": 2.224925540271345, "learning_rate": 3.634345160568663e-06, "loss": 0.264, "step": 29388 }, { "epoch": 2.1842437755481234, "grad_norm": 2.8484669984865385, "learning_rate": 3.633726376486113e-06, "loss": 0.3469, "step": 29389 }, { "epoch": 2.1843180973615755, "grad_norm": 1.8589574410194276, "learning_rate": 3.633107633389299e-06, "loss": 0.2343, "step": 29390 }, { "epoch": 2.184392419175028, "grad_norm": 3.03149691779337, "learning_rate": 3.632488931282214e-06, "loss": 0.2842, "step": 29391 }, { "epoch": 2.18446674098848, "grad_norm": 2.4528650452931764, "learning_rate": 3.631870270168829e-06, "loss": 0.3541, "step": 29392 }, { "epoch": 2.1845410628019324, "grad_norm": 1.963047993503239, "learning_rate": 3.6312516500531357e-06, "loss": 0.2957, "step": 29393 }, { "epoch": 2.184615384615385, "grad_norm": 2.0975646859953687, "learning_rate": 3.6306330709391134e-06, "loss": 0.3269, "step": 29394 }, { "epoch": 2.184689706428837, "grad_norm": 2.1373336384128225, "learning_rate": 3.630014532830742e-06, "loss": 0.2672, "step": 29395 }, { "epoch": 2.184764028242289, "grad_norm": 1.631504832150053, "learning_rate": 3.62939603573201e-06, "loss": 0.2099, "step": 29396 }, { "epoch": 2.1848383500557413, "grad_norm": 1.916588607490266, "learning_rate": 3.6287775796468917e-06, "loss": 0.2228, "step": 29397 }, { "epoch": 2.184912671869194, "grad_norm": 3.064490714103622, "learning_rate": 3.628159164579377e-06, "loss": 0.3307, "step": 29398 }, { "epoch": 2.184986993682646, "grad_norm": 2.3781562716663984, "learning_rate": 3.6275407905334427e-06, "loss": 0.2945, "step": 29399 }, { "epoch": 2.1850613154960983, "grad_norm": 2.4271737200577865, "learning_rate": 3.6269224575130656e-06, "loss": 0.3237, "step": 29400 }, { "epoch": 2.1851356373095503, "grad_norm": 2.5558973587984886, "learning_rate": 3.6263041655222366e-06, "loss": 0.3109, "step": 29401 }, { "epoch": 2.1852099591230028, "grad_norm": 2.6898077441427968, "learning_rate": 3.6256859145649293e-06, "loss": 0.2912, "step": 29402 }, { "epoch": 2.1852842809364548, "grad_norm": 2.7229599466909593, "learning_rate": 3.6250677046451257e-06, "loss": 0.2747, "step": 29403 }, { "epoch": 2.1853586027499072, "grad_norm": 2.4540563574187093, "learning_rate": 3.6244495357668018e-06, "loss": 0.3327, "step": 29404 }, { "epoch": 2.1854329245633592, "grad_norm": 2.739641320296483, "learning_rate": 3.623831407933944e-06, "loss": 0.3224, "step": 29405 }, { "epoch": 2.1855072463768117, "grad_norm": 2.294671021209743, "learning_rate": 3.6232133211505294e-06, "loss": 0.3282, "step": 29406 }, { "epoch": 2.1855815681902637, "grad_norm": 2.286187540696425, "learning_rate": 3.622595275420533e-06, "loss": 0.2488, "step": 29407 }, { "epoch": 2.185655890003716, "grad_norm": 1.742129521831653, "learning_rate": 3.6219772707479404e-06, "loss": 0.2439, "step": 29408 }, { "epoch": 2.185730211817168, "grad_norm": 2.7399097288108205, "learning_rate": 3.621359307136728e-06, "loss": 0.2392, "step": 29409 }, { "epoch": 2.1858045336306207, "grad_norm": 2.752815054491878, "learning_rate": 3.620741384590869e-06, "loss": 0.3211, "step": 29410 }, { "epoch": 2.1858788554440727, "grad_norm": 2.919369662930962, "learning_rate": 3.62012350311435e-06, "loss": 0.3036, "step": 29411 }, { "epoch": 2.185953177257525, "grad_norm": 1.8713142645870244, "learning_rate": 3.619505662711141e-06, "loss": 0.2453, "step": 29412 }, { "epoch": 2.186027499070977, "grad_norm": 2.743571269905679, "learning_rate": 3.618887863385232e-06, "loss": 0.3044, "step": 29413 }, { "epoch": 2.1861018208844296, "grad_norm": 2.2147079485218475, "learning_rate": 3.6182701051405834e-06, "loss": 0.2784, "step": 29414 }, { "epoch": 2.1861761426978816, "grad_norm": 2.3746965988867563, "learning_rate": 3.617652387981184e-06, "loss": 0.3001, "step": 29415 }, { "epoch": 2.186250464511334, "grad_norm": 1.9169932746095062, "learning_rate": 3.617034711911008e-06, "loss": 0.2395, "step": 29416 }, { "epoch": 2.1863247863247866, "grad_norm": 1.9752141740606313, "learning_rate": 3.616417076934028e-06, "loss": 0.2139, "step": 29417 }, { "epoch": 2.1863991081382386, "grad_norm": 2.4444119087290015, "learning_rate": 3.6157994830542263e-06, "loss": 0.2512, "step": 29418 }, { "epoch": 2.186473429951691, "grad_norm": 2.4634078795003704, "learning_rate": 3.615181930275573e-06, "loss": 0.4223, "step": 29419 }, { "epoch": 2.186547751765143, "grad_norm": 2.2215031737225273, "learning_rate": 3.614564418602051e-06, "loss": 0.3467, "step": 29420 }, { "epoch": 2.1866220735785955, "grad_norm": 1.9535988990981537, "learning_rate": 3.6139469480376323e-06, "loss": 0.2377, "step": 29421 }, { "epoch": 2.1866963953920475, "grad_norm": 2.377624455076514, "learning_rate": 3.613329518586288e-06, "loss": 0.2926, "step": 29422 }, { "epoch": 2.1867707172055, "grad_norm": 3.17003400462076, "learning_rate": 3.6127121302520007e-06, "loss": 0.3234, "step": 29423 }, { "epoch": 2.186845039018952, "grad_norm": 1.9357452594277358, "learning_rate": 3.6120947830387364e-06, "loss": 0.231, "step": 29424 }, { "epoch": 2.1869193608324045, "grad_norm": 2.556352419379934, "learning_rate": 3.6114774769504825e-06, "loss": 0.3038, "step": 29425 }, { "epoch": 2.1869936826458565, "grad_norm": 2.3794689250402796, "learning_rate": 3.6108602119911986e-06, "loss": 0.2968, "step": 29426 }, { "epoch": 2.187068004459309, "grad_norm": 2.487891753982069, "learning_rate": 3.610242988164868e-06, "loss": 0.3535, "step": 29427 }, { "epoch": 2.187142326272761, "grad_norm": 2.2729992286436755, "learning_rate": 3.6096258054754617e-06, "loss": 0.3237, "step": 29428 }, { "epoch": 2.1872166480862134, "grad_norm": 2.5418590206733893, "learning_rate": 3.6090086639269495e-06, "loss": 0.3439, "step": 29429 }, { "epoch": 2.1872909698996654, "grad_norm": 2.086204129643475, "learning_rate": 3.60839156352331e-06, "loss": 0.2451, "step": 29430 }, { "epoch": 2.187365291713118, "grad_norm": 2.2496626211028055, "learning_rate": 3.6077745042685152e-06, "loss": 0.2821, "step": 29431 }, { "epoch": 2.18743961352657, "grad_norm": 2.0204983194704487, "learning_rate": 3.6071574861665336e-06, "loss": 0.2909, "step": 29432 }, { "epoch": 2.1875139353400224, "grad_norm": 1.8488195425642502, "learning_rate": 3.6065405092213433e-06, "loss": 0.1932, "step": 29433 }, { "epoch": 2.1875882571534744, "grad_norm": 1.6625668939099216, "learning_rate": 3.60592357343691e-06, "loss": 0.1981, "step": 29434 }, { "epoch": 2.187662578966927, "grad_norm": 2.028699949372158, "learning_rate": 3.6053066788172118e-06, "loss": 0.2747, "step": 29435 }, { "epoch": 2.187736900780379, "grad_norm": 2.4281865286138946, "learning_rate": 3.6046898253662187e-06, "loss": 0.3088, "step": 29436 }, { "epoch": 2.1878112225938313, "grad_norm": 1.634395123186517, "learning_rate": 3.6040730130878998e-06, "loss": 0.1766, "step": 29437 }, { "epoch": 2.1878855444072833, "grad_norm": 2.1439047444257553, "learning_rate": 3.6034562419862274e-06, "loss": 0.2972, "step": 29438 }, { "epoch": 2.187959866220736, "grad_norm": 2.384332782600579, "learning_rate": 3.602839512065167e-06, "loss": 0.2176, "step": 29439 }, { "epoch": 2.1880341880341883, "grad_norm": 2.148381030982888, "learning_rate": 3.6022228233286995e-06, "loss": 0.2733, "step": 29440 }, { "epoch": 2.1881085098476403, "grad_norm": 2.3960763237636575, "learning_rate": 3.601606175780784e-06, "loss": 0.3197, "step": 29441 }, { "epoch": 2.1881828316610927, "grad_norm": 2.6570161664261405, "learning_rate": 3.6009895694254004e-06, "loss": 0.3342, "step": 29442 }, { "epoch": 2.1882571534745447, "grad_norm": 2.1716864670136777, "learning_rate": 3.600373004266514e-06, "loss": 0.3199, "step": 29443 }, { "epoch": 2.188331475287997, "grad_norm": 1.9108001182976477, "learning_rate": 3.599756480308091e-06, "loss": 0.2594, "step": 29444 }, { "epoch": 2.1884057971014492, "grad_norm": 2.553566816951909, "learning_rate": 3.599139997554106e-06, "loss": 0.2911, "step": 29445 }, { "epoch": 2.1884801189149017, "grad_norm": 2.128967860100341, "learning_rate": 3.598523556008523e-06, "loss": 0.2254, "step": 29446 }, { "epoch": 2.1885544407283537, "grad_norm": 1.7796343787715558, "learning_rate": 3.597907155675321e-06, "loss": 0.261, "step": 29447 }, { "epoch": 2.188628762541806, "grad_norm": 2.671788648292182, "learning_rate": 3.5972907965584526e-06, "loss": 0.2658, "step": 29448 }, { "epoch": 2.188703084355258, "grad_norm": 1.9027402035965202, "learning_rate": 3.5966744786618967e-06, "loss": 0.2056, "step": 29449 }, { "epoch": 2.1887774061687106, "grad_norm": 2.6388626998995948, "learning_rate": 3.5960582019896183e-06, "loss": 0.3546, "step": 29450 }, { "epoch": 2.1888517279821627, "grad_norm": 2.1523048715308164, "learning_rate": 3.595441966545583e-06, "loss": 0.2403, "step": 29451 }, { "epoch": 2.188926049795615, "grad_norm": 2.361213414099966, "learning_rate": 3.594825772333762e-06, "loss": 0.2311, "step": 29452 }, { "epoch": 2.189000371609067, "grad_norm": 2.152340751620823, "learning_rate": 3.594209619358121e-06, "loss": 0.3031, "step": 29453 }, { "epoch": 2.1890746934225196, "grad_norm": 2.5004301064178223, "learning_rate": 3.5935935076226226e-06, "loss": 0.2755, "step": 29454 }, { "epoch": 2.1891490152359716, "grad_norm": 2.213354922839265, "learning_rate": 3.5929774371312397e-06, "loss": 0.2227, "step": 29455 }, { "epoch": 2.189223337049424, "grad_norm": 2.9470684492700374, "learning_rate": 3.592361407887933e-06, "loss": 0.3354, "step": 29456 }, { "epoch": 2.189297658862876, "grad_norm": 2.7445888422977927, "learning_rate": 3.591745419896674e-06, "loss": 0.2835, "step": 29457 }, { "epoch": 2.1893719806763285, "grad_norm": 2.9898573638282007, "learning_rate": 3.5911294731614253e-06, "loss": 0.2946, "step": 29458 }, { "epoch": 2.1894463024897806, "grad_norm": 2.3881959122521557, "learning_rate": 3.5905135676861534e-06, "loss": 0.3384, "step": 29459 }, { "epoch": 2.189520624303233, "grad_norm": 2.3124098007272935, "learning_rate": 3.5898977034748216e-06, "loss": 0.2907, "step": 29460 }, { "epoch": 2.189594946116685, "grad_norm": 2.7532943590241015, "learning_rate": 3.5892818805313933e-06, "loss": 0.3324, "step": 29461 }, { "epoch": 2.1896692679301375, "grad_norm": 2.0659840577967072, "learning_rate": 3.5886660988598376e-06, "loss": 0.3238, "step": 29462 }, { "epoch": 2.18974358974359, "grad_norm": 1.9995318450167827, "learning_rate": 3.5880503584641145e-06, "loss": 0.3155, "step": 29463 }, { "epoch": 2.189817911557042, "grad_norm": 2.0133142836569995, "learning_rate": 3.5874346593481947e-06, "loss": 0.2187, "step": 29464 }, { "epoch": 2.1898922333704944, "grad_norm": 1.9586112020218107, "learning_rate": 3.5868190015160365e-06, "loss": 0.2301, "step": 29465 }, { "epoch": 2.1899665551839465, "grad_norm": 2.3457352170005286, "learning_rate": 3.5862033849716015e-06, "loss": 0.2417, "step": 29466 }, { "epoch": 2.190040876997399, "grad_norm": 2.7756487674836174, "learning_rate": 3.58558780971886e-06, "loss": 0.3045, "step": 29467 }, { "epoch": 2.190115198810851, "grad_norm": 2.429086061180528, "learning_rate": 3.584972275761769e-06, "loss": 0.329, "step": 29468 }, { "epoch": 2.1901895206243034, "grad_norm": 2.5342542318765555, "learning_rate": 3.584356783104296e-06, "loss": 0.2909, "step": 29469 }, { "epoch": 2.1902638424377554, "grad_norm": 3.4133802782171534, "learning_rate": 3.5837413317504045e-06, "loss": 0.361, "step": 29470 }, { "epoch": 2.190338164251208, "grad_norm": 2.1984964752295535, "learning_rate": 3.583125921704046e-06, "loss": 0.2248, "step": 29471 }, { "epoch": 2.19041248606466, "grad_norm": 2.5874885154189435, "learning_rate": 3.582510552969194e-06, "loss": 0.3628, "step": 29472 }, { "epoch": 2.1904868078781123, "grad_norm": 2.403686228386356, "learning_rate": 3.581895225549803e-06, "loss": 0.2954, "step": 29473 }, { "epoch": 2.1905611296915644, "grad_norm": 2.4994646394852973, "learning_rate": 3.58127993944984e-06, "loss": 0.3794, "step": 29474 }, { "epoch": 2.190635451505017, "grad_norm": 2.7261055783238115, "learning_rate": 3.580664694673265e-06, "loss": 0.3805, "step": 29475 }, { "epoch": 2.190709773318469, "grad_norm": 2.0013615296936456, "learning_rate": 3.5800494912240335e-06, "loss": 0.251, "step": 29476 }, { "epoch": 2.1907840951319213, "grad_norm": 2.0570860368646198, "learning_rate": 3.579434329106113e-06, "loss": 0.2707, "step": 29477 }, { "epoch": 2.1908584169453733, "grad_norm": 2.2029723615113324, "learning_rate": 3.5788192083234583e-06, "loss": 0.2854, "step": 29478 }, { "epoch": 2.1909327387588258, "grad_norm": 2.7219278663845947, "learning_rate": 3.5782041288800364e-06, "loss": 0.3569, "step": 29479 }, { "epoch": 2.191007060572278, "grad_norm": 2.065655291665066, "learning_rate": 3.577589090779803e-06, "loss": 0.2954, "step": 29480 }, { "epoch": 2.1910813823857302, "grad_norm": 2.159901149288105, "learning_rate": 3.576974094026717e-06, "loss": 0.2761, "step": 29481 }, { "epoch": 2.1911557041991823, "grad_norm": 2.3750047107467904, "learning_rate": 3.576359138624739e-06, "loss": 0.3273, "step": 29482 }, { "epoch": 2.1912300260126347, "grad_norm": 1.8376686539751814, "learning_rate": 3.575744224577824e-06, "loss": 0.2359, "step": 29483 }, { "epoch": 2.1913043478260867, "grad_norm": 1.9588951822447038, "learning_rate": 3.575129351889938e-06, "loss": 0.1936, "step": 29484 }, { "epoch": 2.191378669639539, "grad_norm": 2.60710477091698, "learning_rate": 3.5745145205650324e-06, "loss": 0.4032, "step": 29485 }, { "epoch": 2.1914529914529917, "grad_norm": 2.1926695251783976, "learning_rate": 3.573899730607072e-06, "loss": 0.227, "step": 29486 }, { "epoch": 2.1915273132664437, "grad_norm": 2.3387555124249753, "learning_rate": 3.573284982020012e-06, "loss": 0.2611, "step": 29487 }, { "epoch": 2.191601635079896, "grad_norm": 2.548554151452514, "learning_rate": 3.5726702748078057e-06, "loss": 0.2951, "step": 29488 }, { "epoch": 2.191675956893348, "grad_norm": 2.680359772238057, "learning_rate": 3.5720556089744186e-06, "loss": 0.2976, "step": 29489 }, { "epoch": 2.1917502787068006, "grad_norm": 2.6364217837564605, "learning_rate": 3.571440984523804e-06, "loss": 0.401, "step": 29490 }, { "epoch": 2.1918246005202526, "grad_norm": 1.6969553389197725, "learning_rate": 3.5708264014599157e-06, "loss": 0.2486, "step": 29491 }, { "epoch": 2.191898922333705, "grad_norm": 2.1488239857734235, "learning_rate": 3.5702118597867162e-06, "loss": 0.2242, "step": 29492 }, { "epoch": 2.191973244147157, "grad_norm": 2.269205207521347, "learning_rate": 3.56959735950816e-06, "loss": 0.3062, "step": 29493 }, { "epoch": 2.1920475659606096, "grad_norm": 2.515279887640938, "learning_rate": 3.568982900628202e-06, "loss": 0.2319, "step": 29494 }, { "epoch": 2.1921218877740616, "grad_norm": 2.438771522536136, "learning_rate": 3.568368483150795e-06, "loss": 0.342, "step": 29495 }, { "epoch": 2.192196209587514, "grad_norm": 2.632594834736077, "learning_rate": 3.5677541070799015e-06, "loss": 0.3435, "step": 29496 }, { "epoch": 2.192270531400966, "grad_norm": 2.394358273566195, "learning_rate": 3.5671397724194723e-06, "loss": 0.305, "step": 29497 }, { "epoch": 2.1923448532144185, "grad_norm": 1.8807912086676166, "learning_rate": 3.5665254791734616e-06, "loss": 0.257, "step": 29498 }, { "epoch": 2.1924191750278705, "grad_norm": 2.718631145395494, "learning_rate": 3.5659112273458285e-06, "loss": 0.2957, "step": 29499 }, { "epoch": 2.192493496841323, "grad_norm": 2.615647849138816, "learning_rate": 3.5652970169405223e-06, "loss": 0.357, "step": 29500 }, { "epoch": 2.192567818654775, "grad_norm": 2.2851123076673616, "learning_rate": 3.5646828479615028e-06, "loss": 0.2672, "step": 29501 }, { "epoch": 2.1926421404682275, "grad_norm": 2.2699660214820394, "learning_rate": 3.564068720412721e-06, "loss": 0.3436, "step": 29502 }, { "epoch": 2.1927164622816795, "grad_norm": 2.112958666261264, "learning_rate": 3.563454634298128e-06, "loss": 0.3073, "step": 29503 }, { "epoch": 2.192790784095132, "grad_norm": 2.136765694695083, "learning_rate": 3.5628405896216868e-06, "loss": 0.2488, "step": 29504 }, { "epoch": 2.192865105908584, "grad_norm": 2.108541684996301, "learning_rate": 3.562226586387337e-06, "loss": 0.2931, "step": 29505 }, { "epoch": 2.1929394277220364, "grad_norm": 2.1784241079131084, "learning_rate": 3.5616126245990424e-06, "loss": 0.2509, "step": 29506 }, { "epoch": 2.1930137495354884, "grad_norm": 1.8608082296317536, "learning_rate": 3.5609987042607475e-06, "loss": 0.2716, "step": 29507 }, { "epoch": 2.193088071348941, "grad_norm": 1.9106700311676834, "learning_rate": 3.5603848253764127e-06, "loss": 0.2778, "step": 29508 }, { "epoch": 2.1931623931623934, "grad_norm": 2.4607354994086426, "learning_rate": 3.5597709879499852e-06, "loss": 0.2588, "step": 29509 }, { "epoch": 2.1932367149758454, "grad_norm": 2.554394017652298, "learning_rate": 3.559157191985416e-06, "loss": 0.2778, "step": 29510 }, { "epoch": 2.193311036789298, "grad_norm": 2.349886650180572, "learning_rate": 3.5585434374866613e-06, "loss": 0.2924, "step": 29511 }, { "epoch": 2.19338535860275, "grad_norm": 3.1815806703866993, "learning_rate": 3.5579297244576694e-06, "loss": 0.2792, "step": 29512 }, { "epoch": 2.1934596804162023, "grad_norm": 1.8833422172308307, "learning_rate": 3.5573160529023887e-06, "loss": 0.2729, "step": 29513 }, { "epoch": 2.1935340022296543, "grad_norm": 2.453306667193097, "learning_rate": 3.5567024228247758e-06, "loss": 0.3605, "step": 29514 }, { "epoch": 2.193608324043107, "grad_norm": 2.5535041109585546, "learning_rate": 3.556088834228779e-06, "loss": 0.2941, "step": 29515 }, { "epoch": 2.193682645856559, "grad_norm": 2.157109073972396, "learning_rate": 3.5554752871183486e-06, "loss": 0.2971, "step": 29516 }, { "epoch": 2.1937569676700113, "grad_norm": 2.183054259189265, "learning_rate": 3.5548617814974296e-06, "loss": 0.2452, "step": 29517 }, { "epoch": 2.1938312894834633, "grad_norm": 1.8388082608363194, "learning_rate": 3.5542483173699794e-06, "loss": 0.2631, "step": 29518 }, { "epoch": 2.1939056112969157, "grad_norm": 2.368973669337878, "learning_rate": 3.5536348947399446e-06, "loss": 0.2862, "step": 29519 }, { "epoch": 2.1939799331103678, "grad_norm": 2.279354617258573, "learning_rate": 3.55302151361127e-06, "loss": 0.268, "step": 29520 }, { "epoch": 2.1940542549238202, "grad_norm": 2.3772331884987974, "learning_rate": 3.5524081739879123e-06, "loss": 0.3706, "step": 29521 }, { "epoch": 2.1941285767372722, "grad_norm": 2.504263263367848, "learning_rate": 3.551794875873812e-06, "loss": 0.3696, "step": 29522 }, { "epoch": 2.1942028985507247, "grad_norm": 2.231332447145439, "learning_rate": 3.5511816192729254e-06, "loss": 0.3334, "step": 29523 }, { "epoch": 2.1942772203641767, "grad_norm": 1.8972533236364324, "learning_rate": 3.5505684041891976e-06, "loss": 0.2687, "step": 29524 }, { "epoch": 2.194351542177629, "grad_norm": 2.3874051554252738, "learning_rate": 3.5499552306265718e-06, "loss": 0.3364, "step": 29525 }, { "epoch": 2.194425863991081, "grad_norm": 2.3275336344886193, "learning_rate": 3.549342098589006e-06, "loss": 0.3441, "step": 29526 }, { "epoch": 2.1945001858045337, "grad_norm": 2.892622051776141, "learning_rate": 3.548729008080435e-06, "loss": 0.3189, "step": 29527 }, { "epoch": 2.194574507617986, "grad_norm": 2.0044693933453948, "learning_rate": 3.548115959104815e-06, "loss": 0.3116, "step": 29528 }, { "epoch": 2.194648829431438, "grad_norm": 2.1286660131395387, "learning_rate": 3.5475029516660865e-06, "loss": 0.3093, "step": 29529 }, { "epoch": 2.19472315124489, "grad_norm": 2.510966314904399, "learning_rate": 3.546889985768203e-06, "loss": 0.3722, "step": 29530 }, { "epoch": 2.1947974730583426, "grad_norm": 2.2826452534915256, "learning_rate": 3.5462770614151054e-06, "loss": 0.3262, "step": 29531 }, { "epoch": 2.194871794871795, "grad_norm": 2.6866985151236022, "learning_rate": 3.545664178610739e-06, "loss": 0.2889, "step": 29532 }, { "epoch": 2.194946116685247, "grad_norm": 2.448102706471812, "learning_rate": 3.5450513373590544e-06, "loss": 0.347, "step": 29533 }, { "epoch": 2.1950204384986995, "grad_norm": 2.0463016105610805, "learning_rate": 3.5444385376639957e-06, "loss": 0.2222, "step": 29534 }, { "epoch": 2.1950947603121516, "grad_norm": 2.2990669280721825, "learning_rate": 3.5438257795295018e-06, "loss": 0.2234, "step": 29535 }, { "epoch": 2.195169082125604, "grad_norm": 2.0628484489665855, "learning_rate": 3.543213062959526e-06, "loss": 0.2055, "step": 29536 }, { "epoch": 2.195243403939056, "grad_norm": 2.0338934333659884, "learning_rate": 3.5426003879580074e-06, "loss": 0.2535, "step": 29537 }, { "epoch": 2.1953177257525085, "grad_norm": 2.8980280592563465, "learning_rate": 3.5419877545288984e-06, "loss": 0.2989, "step": 29538 }, { "epoch": 2.1953920475659605, "grad_norm": 2.156686403312377, "learning_rate": 3.5413751626761305e-06, "loss": 0.2276, "step": 29539 }, { "epoch": 2.195466369379413, "grad_norm": 2.528032113530114, "learning_rate": 3.5407626124036577e-06, "loss": 0.3453, "step": 29540 }, { "epoch": 2.195540691192865, "grad_norm": 1.8584876748424344, "learning_rate": 3.5401501037154205e-06, "loss": 0.2112, "step": 29541 }, { "epoch": 2.1956150130063175, "grad_norm": 2.3154827183409745, "learning_rate": 3.539537636615358e-06, "loss": 0.3175, "step": 29542 }, { "epoch": 2.1956893348197695, "grad_norm": 1.7445682824968096, "learning_rate": 3.53892521110742e-06, "loss": 0.2599, "step": 29543 }, { "epoch": 2.195763656633222, "grad_norm": 2.4764845231005643, "learning_rate": 3.5383128271955423e-06, "loss": 0.2703, "step": 29544 }, { "epoch": 2.195837978446674, "grad_norm": 2.5614858257622966, "learning_rate": 3.537700484883676e-06, "loss": 0.2548, "step": 29545 }, { "epoch": 2.1959123002601264, "grad_norm": 2.750043341568215, "learning_rate": 3.5370881841757575e-06, "loss": 0.3386, "step": 29546 }, { "epoch": 2.1959866220735784, "grad_norm": 1.957462885775919, "learning_rate": 3.5364759250757275e-06, "loss": 0.2218, "step": 29547 }, { "epoch": 2.196060943887031, "grad_norm": 2.024362841957102, "learning_rate": 3.5358637075875335e-06, "loss": 0.3021, "step": 29548 }, { "epoch": 2.196135265700483, "grad_norm": 2.4847879705176648, "learning_rate": 3.535251531715114e-06, "loss": 0.2832, "step": 29549 }, { "epoch": 2.1962095875139354, "grad_norm": 2.8330278756877942, "learning_rate": 3.534639397462408e-06, "loss": 0.4, "step": 29550 }, { "epoch": 2.196283909327388, "grad_norm": 2.523844142955352, "learning_rate": 3.5340273048333562e-06, "loss": 0.3286, "step": 29551 }, { "epoch": 2.19635823114084, "grad_norm": 2.3336168508107518, "learning_rate": 3.5334152538319033e-06, "loss": 0.2695, "step": 29552 }, { "epoch": 2.196432552954292, "grad_norm": 2.2227931259236655, "learning_rate": 3.5328032444619885e-06, "loss": 0.2397, "step": 29553 }, { "epoch": 2.1965068747677443, "grad_norm": 1.920096248653661, "learning_rate": 3.5321912767275466e-06, "loss": 0.2848, "step": 29554 }, { "epoch": 2.1965811965811968, "grad_norm": 2.568216819168171, "learning_rate": 3.5315793506325257e-06, "loss": 0.3683, "step": 29555 }, { "epoch": 2.196655518394649, "grad_norm": 2.5942522001641737, "learning_rate": 3.530967466180861e-06, "loss": 0.2721, "step": 29556 }, { "epoch": 2.1967298402081012, "grad_norm": 2.4123626262929196, "learning_rate": 3.530355623376489e-06, "loss": 0.3063, "step": 29557 }, { "epoch": 2.1968041620215533, "grad_norm": 2.9601452504364425, "learning_rate": 3.529743822223356e-06, "loss": 0.3163, "step": 29558 }, { "epoch": 2.1968784838350057, "grad_norm": 2.6845513716797917, "learning_rate": 3.529132062725392e-06, "loss": 0.3496, "step": 29559 }, { "epoch": 2.1969528056484577, "grad_norm": 2.266574567073514, "learning_rate": 3.528520344886547e-06, "loss": 0.3023, "step": 29560 }, { "epoch": 2.19702712746191, "grad_norm": 3.4532979787001987, "learning_rate": 3.5279086687107456e-06, "loss": 0.2888, "step": 29561 }, { "epoch": 2.197101449275362, "grad_norm": 2.0151020524520282, "learning_rate": 3.5272970342019353e-06, "loss": 0.256, "step": 29562 }, { "epoch": 2.1971757710888147, "grad_norm": 2.3964649021612456, "learning_rate": 3.526685441364052e-06, "loss": 0.3497, "step": 29563 }, { "epoch": 2.1972500929022667, "grad_norm": 2.109206691564806, "learning_rate": 3.526073890201027e-06, "loss": 0.2844, "step": 29564 }, { "epoch": 2.197324414715719, "grad_norm": 1.9312376567341865, "learning_rate": 3.5254623807168066e-06, "loss": 0.3057, "step": 29565 }, { "epoch": 2.197398736529171, "grad_norm": 2.509144950547217, "learning_rate": 3.524850912915321e-06, "loss": 0.2519, "step": 29566 }, { "epoch": 2.1974730583426236, "grad_norm": 2.3661691870396018, "learning_rate": 3.5242394868005113e-06, "loss": 0.3022, "step": 29567 }, { "epoch": 2.1975473801560756, "grad_norm": 2.3662370707762337, "learning_rate": 3.523628102376312e-06, "loss": 0.3189, "step": 29568 }, { "epoch": 2.197621701969528, "grad_norm": 2.1663252364147665, "learning_rate": 3.5230167596466558e-06, "loss": 0.3294, "step": 29569 }, { "epoch": 2.19769602378298, "grad_norm": 3.641990551128293, "learning_rate": 3.5224054586154843e-06, "loss": 0.2553, "step": 29570 }, { "epoch": 2.1977703455964326, "grad_norm": 1.9965371442586939, "learning_rate": 3.5217941992867276e-06, "loss": 0.2733, "step": 29571 }, { "epoch": 2.1978446674098846, "grad_norm": 2.1282860854877446, "learning_rate": 3.5211829816643307e-06, "loss": 0.2778, "step": 29572 }, { "epoch": 2.197918989223337, "grad_norm": 2.5124218783654215, "learning_rate": 3.5205718057522143e-06, "loss": 0.272, "step": 29573 }, { "epoch": 2.1979933110367895, "grad_norm": 2.3638919325451218, "learning_rate": 3.5199606715543245e-06, "loss": 0.3574, "step": 29574 }, { "epoch": 2.1980676328502415, "grad_norm": 2.1155563553977923, "learning_rate": 3.5193495790745913e-06, "loss": 0.2125, "step": 29575 }, { "epoch": 2.198141954663694, "grad_norm": 2.9338559456809685, "learning_rate": 3.518738528316946e-06, "loss": 0.2821, "step": 29576 }, { "epoch": 2.198216276477146, "grad_norm": 1.9049325840507418, "learning_rate": 3.518127519285329e-06, "loss": 0.2265, "step": 29577 }, { "epoch": 2.1982905982905985, "grad_norm": 2.0529594107531146, "learning_rate": 3.5175165519836706e-06, "loss": 0.2556, "step": 29578 }, { "epoch": 2.1983649201040505, "grad_norm": 1.7930205533310477, "learning_rate": 3.516905626415901e-06, "loss": 0.265, "step": 29579 }, { "epoch": 2.198439241917503, "grad_norm": 2.412569039932927, "learning_rate": 3.5162947425859608e-06, "loss": 0.2801, "step": 29580 }, { "epoch": 2.198513563730955, "grad_norm": 2.0847375093201315, "learning_rate": 3.515683900497774e-06, "loss": 0.2792, "step": 29581 }, { "epoch": 2.1985878855444074, "grad_norm": 2.2049341745360787, "learning_rate": 3.5150731001552817e-06, "loss": 0.2003, "step": 29582 }, { "epoch": 2.1986622073578594, "grad_norm": 2.5593650778257855, "learning_rate": 3.5144623415624126e-06, "loss": 0.3594, "step": 29583 }, { "epoch": 2.198736529171312, "grad_norm": 2.1523183032604223, "learning_rate": 3.513851624723097e-06, "loss": 0.3059, "step": 29584 }, { "epoch": 2.198810850984764, "grad_norm": 2.026068572529711, "learning_rate": 3.513240949641269e-06, "loss": 0.2657, "step": 29585 }, { "epoch": 2.1988851727982164, "grad_norm": 2.190915029514822, "learning_rate": 3.5126303163208564e-06, "loss": 0.3651, "step": 29586 }, { "epoch": 2.1989594946116684, "grad_norm": 2.315303251595347, "learning_rate": 3.512019724765796e-06, "loss": 0.3056, "step": 29587 }, { "epoch": 2.199033816425121, "grad_norm": 2.0484736745702525, "learning_rate": 3.511409174980013e-06, "loss": 0.2831, "step": 29588 }, { "epoch": 2.199108138238573, "grad_norm": 1.728286781994131, "learning_rate": 3.510798666967444e-06, "loss": 0.2341, "step": 29589 }, { "epoch": 2.1991824600520253, "grad_norm": 2.6970145608317155, "learning_rate": 3.5101882007320163e-06, "loss": 0.3668, "step": 29590 }, { "epoch": 2.1992567818654774, "grad_norm": 3.155799154405321, "learning_rate": 3.509577776277657e-06, "loss": 0.3063, "step": 29591 }, { "epoch": 2.19933110367893, "grad_norm": 2.561644785177197, "learning_rate": 3.5089673936083035e-06, "loss": 0.2449, "step": 29592 }, { "epoch": 2.199405425492382, "grad_norm": 2.231534428048506, "learning_rate": 3.5083570527278776e-06, "loss": 0.2867, "step": 29593 }, { "epoch": 2.1994797473058343, "grad_norm": 2.29435822780015, "learning_rate": 3.5077467536403175e-06, "loss": 0.2697, "step": 29594 }, { "epoch": 2.1995540691192863, "grad_norm": 2.9731644337950422, "learning_rate": 3.5071364963495414e-06, "loss": 0.3016, "step": 29595 }, { "epoch": 2.1996283909327388, "grad_norm": 2.132503353546037, "learning_rate": 3.5065262808594867e-06, "loss": 0.2502, "step": 29596 }, { "epoch": 2.1997027127461912, "grad_norm": 2.791152695540267, "learning_rate": 3.5059161071740787e-06, "loss": 0.3661, "step": 29597 }, { "epoch": 2.1997770345596432, "grad_norm": 2.6217036064043158, "learning_rate": 3.505305975297243e-06, "loss": 0.3121, "step": 29598 }, { "epoch": 2.1998513563730957, "grad_norm": 2.2802201107783246, "learning_rate": 3.5046958852329136e-06, "loss": 0.2653, "step": 29599 }, { "epoch": 2.1999256781865477, "grad_norm": 2.795761950657962, "learning_rate": 3.504085836985015e-06, "loss": 0.3739, "step": 29600 }, { "epoch": 2.2, "grad_norm": 1.8735986129727134, "learning_rate": 3.503475830557471e-06, "loss": 0.2602, "step": 29601 }, { "epoch": 2.200074321813452, "grad_norm": 2.2988805112279103, "learning_rate": 3.5028658659542158e-06, "loss": 0.3206, "step": 29602 }, { "epoch": 2.2001486436269047, "grad_norm": 2.5542107043140834, "learning_rate": 3.50225594317917e-06, "loss": 0.3482, "step": 29603 }, { "epoch": 2.2002229654403567, "grad_norm": 1.9212627865133427, "learning_rate": 3.501646062236267e-06, "loss": 0.2148, "step": 29604 }, { "epoch": 2.200297287253809, "grad_norm": 2.2650294356977554, "learning_rate": 3.5010362231294294e-06, "loss": 0.2409, "step": 29605 }, { "epoch": 2.200371609067261, "grad_norm": 1.820228816681238, "learning_rate": 3.5004264258625832e-06, "loss": 0.2497, "step": 29606 }, { "epoch": 2.2004459308807136, "grad_norm": 1.9684015881556962, "learning_rate": 3.499816670439654e-06, "loss": 0.2173, "step": 29607 }, { "epoch": 2.2005202526941656, "grad_norm": 2.4895054659892932, "learning_rate": 3.499206956864565e-06, "loss": 0.295, "step": 29608 }, { "epoch": 2.200594574507618, "grad_norm": 3.0394408376559228, "learning_rate": 3.4985972851412475e-06, "loss": 0.3299, "step": 29609 }, { "epoch": 2.20066889632107, "grad_norm": 2.1985526289549524, "learning_rate": 3.4979876552736205e-06, "loss": 0.2726, "step": 29610 }, { "epoch": 2.2007432181345226, "grad_norm": 3.147173215412385, "learning_rate": 3.497378067265614e-06, "loss": 0.379, "step": 29611 }, { "epoch": 2.2008175399479746, "grad_norm": 2.2652007668692864, "learning_rate": 3.496768521121151e-06, "loss": 0.2972, "step": 29612 }, { "epoch": 2.200891861761427, "grad_norm": 2.653672833282138, "learning_rate": 3.4961590168441506e-06, "loss": 0.36, "step": 29613 }, { "epoch": 2.200966183574879, "grad_norm": 1.5799087167028911, "learning_rate": 3.495549554438545e-06, "loss": 0.1661, "step": 29614 }, { "epoch": 2.2010405053883315, "grad_norm": 1.7505348895099366, "learning_rate": 3.4949401339082535e-06, "loss": 0.2275, "step": 29615 }, { "epoch": 2.2011148272017835, "grad_norm": 2.415842827090476, "learning_rate": 3.494330755257197e-06, "loss": 0.3207, "step": 29616 }, { "epoch": 2.201189149015236, "grad_norm": 3.3864010704500056, "learning_rate": 3.4937214184893077e-06, "loss": 0.37, "step": 29617 }, { "epoch": 2.201263470828688, "grad_norm": 2.6430340116084525, "learning_rate": 3.4931121236084963e-06, "loss": 0.2173, "step": 29618 }, { "epoch": 2.2013377926421405, "grad_norm": 2.2881930474181242, "learning_rate": 3.492502870618695e-06, "loss": 0.3075, "step": 29619 }, { "epoch": 2.201412114455593, "grad_norm": 2.3878605916871316, "learning_rate": 3.4918936595238174e-06, "loss": 0.3692, "step": 29620 }, { "epoch": 2.201486436269045, "grad_norm": 1.8608522496102033, "learning_rate": 3.491284490327795e-06, "loss": 0.233, "step": 29621 }, { "epoch": 2.2015607580824974, "grad_norm": 2.49749363490679, "learning_rate": 3.490675363034546e-06, "loss": 0.3009, "step": 29622 }, { "epoch": 2.2016350798959494, "grad_norm": 2.9911371046867288, "learning_rate": 3.4900662776479876e-06, "loss": 0.3189, "step": 29623 }, { "epoch": 2.201709401709402, "grad_norm": 2.5022794243540067, "learning_rate": 3.489457234172048e-06, "loss": 0.3459, "step": 29624 }, { "epoch": 2.201783723522854, "grad_norm": 2.538589173742525, "learning_rate": 3.488848232610642e-06, "loss": 0.3054, "step": 29625 }, { "epoch": 2.2018580453363064, "grad_norm": 2.1037793577387003, "learning_rate": 3.4882392729676962e-06, "loss": 0.2503, "step": 29626 }, { "epoch": 2.2019323671497584, "grad_norm": 2.6267446043893816, "learning_rate": 3.487630355247128e-06, "loss": 0.3394, "step": 29627 }, { "epoch": 2.202006688963211, "grad_norm": 2.3529846389144, "learning_rate": 3.487021479452858e-06, "loss": 0.3271, "step": 29628 }, { "epoch": 2.202081010776663, "grad_norm": 1.7869484129410642, "learning_rate": 3.4864126455888046e-06, "loss": 0.2509, "step": 29629 }, { "epoch": 2.2021553325901153, "grad_norm": 2.1661079645846524, "learning_rate": 3.4858038536588866e-06, "loss": 0.2429, "step": 29630 }, { "epoch": 2.2022296544035673, "grad_norm": 3.147961845472232, "learning_rate": 3.4851951036670285e-06, "loss": 0.238, "step": 29631 }, { "epoch": 2.20230397621702, "grad_norm": 2.4548317297937454, "learning_rate": 3.4845863956171422e-06, "loss": 0.3288, "step": 29632 }, { "epoch": 2.202378298030472, "grad_norm": 2.4350574263752613, "learning_rate": 3.483977729513155e-06, "loss": 0.3143, "step": 29633 }, { "epoch": 2.2024526198439243, "grad_norm": 2.616029187432438, "learning_rate": 3.4833691053589802e-06, "loss": 0.2369, "step": 29634 }, { "epoch": 2.2025269416573763, "grad_norm": 2.594953295651126, "learning_rate": 3.4827605231585336e-06, "loss": 0.3596, "step": 29635 }, { "epoch": 2.2026012634708287, "grad_norm": 2.3882993821007545, "learning_rate": 3.4821519829157413e-06, "loss": 0.3057, "step": 29636 }, { "epoch": 2.2026755852842808, "grad_norm": 2.3883025352530876, "learning_rate": 3.481543484634515e-06, "loss": 0.3178, "step": 29637 }, { "epoch": 2.202749907097733, "grad_norm": 2.0767426918951224, "learning_rate": 3.4809350283187703e-06, "loss": 0.3082, "step": 29638 }, { "epoch": 2.2028242289111852, "grad_norm": 2.963163342769805, "learning_rate": 3.4803266139724347e-06, "loss": 0.286, "step": 29639 }, { "epoch": 2.2028985507246377, "grad_norm": 2.047353799177815, "learning_rate": 3.4797182415994112e-06, "loss": 0.2217, "step": 29640 }, { "epoch": 2.2029728725380897, "grad_norm": 2.703325169137384, "learning_rate": 3.4791099112036275e-06, "loss": 0.3943, "step": 29641 }, { "epoch": 2.203047194351542, "grad_norm": 1.963040414366966, "learning_rate": 3.4785016227889913e-06, "loss": 0.2457, "step": 29642 }, { "epoch": 2.2031215161649946, "grad_norm": 2.7010096190227957, "learning_rate": 3.477893376359427e-06, "loss": 0.2676, "step": 29643 }, { "epoch": 2.2031958379784466, "grad_norm": 3.0122508624357636, "learning_rate": 3.477285171918847e-06, "loss": 0.3263, "step": 29644 }, { "epoch": 2.203270159791899, "grad_norm": 1.6446223663325086, "learning_rate": 3.4766770094711634e-06, "loss": 0.1933, "step": 29645 }, { "epoch": 2.203344481605351, "grad_norm": 2.4397313082528678, "learning_rate": 3.4760688890202976e-06, "loss": 0.2207, "step": 29646 }, { "epoch": 2.2034188034188036, "grad_norm": 2.908403220189448, "learning_rate": 3.4754608105701594e-06, "loss": 0.3628, "step": 29647 }, { "epoch": 2.2034931252322556, "grad_norm": 2.0680569650284744, "learning_rate": 3.4748527741246675e-06, "loss": 0.2968, "step": 29648 }, { "epoch": 2.203567447045708, "grad_norm": 2.5589602366245043, "learning_rate": 3.4742447796877366e-06, "loss": 0.3951, "step": 29649 }, { "epoch": 2.20364176885916, "grad_norm": 2.3396153316011867, "learning_rate": 3.4736368272632747e-06, "loss": 0.3584, "step": 29650 }, { "epoch": 2.2037160906726125, "grad_norm": 3.0186309557595763, "learning_rate": 3.473028916855208e-06, "loss": 0.3825, "step": 29651 }, { "epoch": 2.2037904124860646, "grad_norm": 2.6712848601180346, "learning_rate": 3.472421048467435e-06, "loss": 0.3099, "step": 29652 }, { "epoch": 2.203864734299517, "grad_norm": 1.6143169072756907, "learning_rate": 3.4718132221038804e-06, "loss": 0.1934, "step": 29653 }, { "epoch": 2.203939056112969, "grad_norm": 2.8942657321228444, "learning_rate": 3.47120543776845e-06, "loss": 0.3193, "step": 29654 }, { "epoch": 2.2040133779264215, "grad_norm": 1.6415760441671583, "learning_rate": 3.4705976954650635e-06, "loss": 0.2453, "step": 29655 }, { "epoch": 2.2040876997398735, "grad_norm": 2.0534190933085306, "learning_rate": 3.46998999519763e-06, "loss": 0.2555, "step": 29656 }, { "epoch": 2.204162021553326, "grad_norm": 2.2226323778692807, "learning_rate": 3.4693823369700595e-06, "loss": 0.2683, "step": 29657 }, { "epoch": 2.204236343366778, "grad_norm": 2.433001579609456, "learning_rate": 3.4687747207862697e-06, "loss": 0.3747, "step": 29658 }, { "epoch": 2.2043106651802304, "grad_norm": 2.5021115928118673, "learning_rate": 3.4681671466501687e-06, "loss": 0.3226, "step": 29659 }, { "epoch": 2.2043849869936825, "grad_norm": 2.276648426841214, "learning_rate": 3.4675596145656663e-06, "loss": 0.3322, "step": 29660 }, { "epoch": 2.204459308807135, "grad_norm": 2.2191996184404243, "learning_rate": 3.4669521245366788e-06, "loss": 0.206, "step": 29661 }, { "epoch": 2.204533630620587, "grad_norm": 2.4945799693786914, "learning_rate": 3.466344676567115e-06, "loss": 0.369, "step": 29662 }, { "epoch": 2.2046079524340394, "grad_norm": 2.1380663863343528, "learning_rate": 3.4657372706608848e-06, "loss": 0.2838, "step": 29663 }, { "epoch": 2.2046822742474914, "grad_norm": 2.501204647177752, "learning_rate": 3.465129906821896e-06, "loss": 0.3396, "step": 29664 }, { "epoch": 2.204756596060944, "grad_norm": 2.535965567410393, "learning_rate": 3.4645225850540654e-06, "loss": 0.3194, "step": 29665 }, { "epoch": 2.2048309178743963, "grad_norm": 3.258989930072359, "learning_rate": 3.4639153053612984e-06, "loss": 0.2671, "step": 29666 }, { "epoch": 2.2049052396878484, "grad_norm": 4.701311139905235, "learning_rate": 3.463308067747503e-06, "loss": 0.2418, "step": 29667 }, { "epoch": 2.204979561501301, "grad_norm": 2.2233011984811952, "learning_rate": 3.4627008722165945e-06, "loss": 0.2692, "step": 29668 }, { "epoch": 2.205053883314753, "grad_norm": 2.4513264770128376, "learning_rate": 3.4620937187724747e-06, "loss": 0.3433, "step": 29669 }, { "epoch": 2.2051282051282053, "grad_norm": 2.2889841420446015, "learning_rate": 3.4614866074190603e-06, "loss": 0.2446, "step": 29670 }, { "epoch": 2.2052025269416573, "grad_norm": 1.8880737875078144, "learning_rate": 3.4608795381602543e-06, "loss": 0.27, "step": 29671 }, { "epoch": 2.2052768487551098, "grad_norm": 2.1570653844460996, "learning_rate": 3.4602725109999645e-06, "loss": 0.3189, "step": 29672 }, { "epoch": 2.205351170568562, "grad_norm": 2.175249535960496, "learning_rate": 3.4596655259421076e-06, "loss": 0.2623, "step": 29673 }, { "epoch": 2.2054254923820142, "grad_norm": 2.263588603659943, "learning_rate": 3.4590585829905774e-06, "loss": 0.2752, "step": 29674 }, { "epoch": 2.2054998141954663, "grad_norm": 2.2700811546367787, "learning_rate": 3.4584516821492932e-06, "loss": 0.2771, "step": 29675 }, { "epoch": 2.2055741360089187, "grad_norm": 2.765511849595323, "learning_rate": 3.457844823422153e-06, "loss": 0.32, "step": 29676 }, { "epoch": 2.2056484578223707, "grad_norm": 2.799609726445428, "learning_rate": 3.457238006813073e-06, "loss": 0.3353, "step": 29677 }, { "epoch": 2.205722779635823, "grad_norm": 1.697492509787612, "learning_rate": 3.4566312323259542e-06, "loss": 0.233, "step": 29678 }, { "epoch": 2.205797101449275, "grad_norm": 2.815355170501909, "learning_rate": 3.4560244999647007e-06, "loss": 0.3048, "step": 29679 }, { "epoch": 2.2058714232627277, "grad_norm": 2.6397259644424262, "learning_rate": 3.4554178097332257e-06, "loss": 0.3569, "step": 29680 }, { "epoch": 2.2059457450761797, "grad_norm": 2.0502331582843105, "learning_rate": 3.4548111616354307e-06, "loss": 0.2792, "step": 29681 }, { "epoch": 2.206020066889632, "grad_norm": 2.2226099891286077, "learning_rate": 3.454204555675219e-06, "loss": 0.2288, "step": 29682 }, { "epoch": 2.206094388703084, "grad_norm": 2.0819624973762885, "learning_rate": 3.4535979918565022e-06, "loss": 0.2714, "step": 29683 }, { "epoch": 2.2061687105165366, "grad_norm": 2.2655095253943904, "learning_rate": 3.452991470183178e-06, "loss": 0.2569, "step": 29684 }, { "epoch": 2.206243032329989, "grad_norm": 2.5396261090678665, "learning_rate": 3.452384990659162e-06, "loss": 0.3431, "step": 29685 }, { "epoch": 2.206317354143441, "grad_norm": 2.833443809148875, "learning_rate": 3.4517785532883454e-06, "loss": 0.4054, "step": 29686 }, { "epoch": 2.206391675956893, "grad_norm": 1.8938659865545038, "learning_rate": 3.4511721580746415e-06, "loss": 0.1928, "step": 29687 }, { "epoch": 2.2064659977703456, "grad_norm": 2.2204311064611524, "learning_rate": 3.4505658050219517e-06, "loss": 0.3132, "step": 29688 }, { "epoch": 2.206540319583798, "grad_norm": 2.3105633605449394, "learning_rate": 3.449959494134176e-06, "loss": 0.2603, "step": 29689 }, { "epoch": 2.20661464139725, "grad_norm": 2.434446607682523, "learning_rate": 3.449353225415224e-06, "loss": 0.3405, "step": 29690 }, { "epoch": 2.2066889632107025, "grad_norm": 2.6515371460227497, "learning_rate": 3.4487469988689937e-06, "loss": 0.2955, "step": 29691 }, { "epoch": 2.2067632850241545, "grad_norm": 2.134719511705749, "learning_rate": 3.4481408144993934e-06, "loss": 0.3038, "step": 29692 }, { "epoch": 2.206837606837607, "grad_norm": 2.007393166204669, "learning_rate": 3.447534672310322e-06, "loss": 0.2455, "step": 29693 }, { "epoch": 2.206911928651059, "grad_norm": 2.4667742668676955, "learning_rate": 3.446928572305679e-06, "loss": 0.3447, "step": 29694 }, { "epoch": 2.2069862504645115, "grad_norm": 3.057966149368317, "learning_rate": 3.446322514489374e-06, "loss": 0.3749, "step": 29695 }, { "epoch": 2.2070605722779635, "grad_norm": 2.527477287151028, "learning_rate": 3.4457164988653045e-06, "loss": 0.3548, "step": 29696 }, { "epoch": 2.207134894091416, "grad_norm": 2.8125591184620142, "learning_rate": 3.4451105254373716e-06, "loss": 0.3476, "step": 29697 }, { "epoch": 2.207209215904868, "grad_norm": 1.9204945817446215, "learning_rate": 3.4445045942094735e-06, "loss": 0.2504, "step": 29698 }, { "epoch": 2.2072835377183204, "grad_norm": 2.392452571797859, "learning_rate": 3.443898705185519e-06, "loss": 0.3143, "step": 29699 }, { "epoch": 2.2073578595317724, "grad_norm": 1.9081486691092628, "learning_rate": 3.4432928583694036e-06, "loss": 0.206, "step": 29700 }, { "epoch": 2.207432181345225, "grad_norm": 2.366429818244167, "learning_rate": 3.4426870537650258e-06, "loss": 0.262, "step": 29701 }, { "epoch": 2.207506503158677, "grad_norm": 1.9154083699691429, "learning_rate": 3.4420812913762915e-06, "loss": 0.2489, "step": 29702 }, { "epoch": 2.2075808249721294, "grad_norm": 2.804635378470971, "learning_rate": 3.441475571207097e-06, "loss": 0.317, "step": 29703 }, { "epoch": 2.2076551467855814, "grad_norm": 2.489827580578484, "learning_rate": 3.4408698932613405e-06, "loss": 0.3255, "step": 29704 }, { "epoch": 2.207729468599034, "grad_norm": 1.8308926097228182, "learning_rate": 3.440264257542926e-06, "loss": 0.2711, "step": 29705 }, { "epoch": 2.207803790412486, "grad_norm": 1.7652396789402585, "learning_rate": 3.439658664055746e-06, "loss": 0.2558, "step": 29706 }, { "epoch": 2.2078781122259383, "grad_norm": 3.1570377597103136, "learning_rate": 3.4390531128037097e-06, "loss": 0.3276, "step": 29707 }, { "epoch": 2.207952434039391, "grad_norm": 2.820615648765942, "learning_rate": 3.438447603790703e-06, "loss": 0.3219, "step": 29708 }, { "epoch": 2.208026755852843, "grad_norm": 2.223680101908619, "learning_rate": 3.437842137020634e-06, "loss": 0.3168, "step": 29709 }, { "epoch": 2.2081010776662953, "grad_norm": 2.193039330512779, "learning_rate": 3.437236712497396e-06, "loss": 0.2875, "step": 29710 }, { "epoch": 2.2081753994797473, "grad_norm": 2.100566404195267, "learning_rate": 3.4366313302248845e-06, "loss": 0.2822, "step": 29711 }, { "epoch": 2.2082497212931997, "grad_norm": 2.0213196012008807, "learning_rate": 3.4360259902070024e-06, "loss": 0.2805, "step": 29712 }, { "epoch": 2.2083240431066518, "grad_norm": 2.230150080465399, "learning_rate": 3.4354206924476428e-06, "loss": 0.2661, "step": 29713 }, { "epoch": 2.208398364920104, "grad_norm": 2.0717195225745355, "learning_rate": 3.4348154369507058e-06, "loss": 0.2754, "step": 29714 }, { "epoch": 2.2084726867335562, "grad_norm": 2.293395618057664, "learning_rate": 3.434210223720088e-06, "loss": 0.2505, "step": 29715 }, { "epoch": 2.2085470085470087, "grad_norm": 2.59811790942877, "learning_rate": 3.43360505275968e-06, "loss": 0.3573, "step": 29716 }, { "epoch": 2.2086213303604607, "grad_norm": 2.1222445121826503, "learning_rate": 3.4329999240733845e-06, "loss": 0.2325, "step": 29717 }, { "epoch": 2.208695652173913, "grad_norm": 2.5274154531914563, "learning_rate": 3.432394837665095e-06, "loss": 0.3504, "step": 29718 }, { "epoch": 2.208769973987365, "grad_norm": 2.045141498626556, "learning_rate": 3.431789793538708e-06, "loss": 0.2234, "step": 29719 }, { "epoch": 2.2088442958008176, "grad_norm": 2.4589352080157094, "learning_rate": 3.4311847916981134e-06, "loss": 0.2748, "step": 29720 }, { "epoch": 2.2089186176142697, "grad_norm": 2.1426939505906937, "learning_rate": 3.430579832147214e-06, "loss": 0.2755, "step": 29721 }, { "epoch": 2.208992939427722, "grad_norm": 2.5813294761077836, "learning_rate": 3.4299749148898996e-06, "loss": 0.2735, "step": 29722 }, { "epoch": 2.209067261241174, "grad_norm": 2.9039419881220008, "learning_rate": 3.4293700399300633e-06, "loss": 0.3176, "step": 29723 }, { "epoch": 2.2091415830546266, "grad_norm": 2.516204360773739, "learning_rate": 3.4287652072716047e-06, "loss": 0.2454, "step": 29724 }, { "epoch": 2.2092159048680786, "grad_norm": 1.6425808644245992, "learning_rate": 3.428160416918416e-06, "loss": 0.2129, "step": 29725 }, { "epoch": 2.209290226681531, "grad_norm": 2.1108563057744196, "learning_rate": 3.427555668874385e-06, "loss": 0.2526, "step": 29726 }, { "epoch": 2.209364548494983, "grad_norm": 2.9923882345531854, "learning_rate": 3.426950963143413e-06, "loss": 0.3588, "step": 29727 }, { "epoch": 2.2094388703084356, "grad_norm": 5.010097267304817, "learning_rate": 3.426346299729386e-06, "loss": 0.3322, "step": 29728 }, { "epoch": 2.2095131921218876, "grad_norm": 2.423882829528333, "learning_rate": 3.425741678636205e-06, "loss": 0.3919, "step": 29729 }, { "epoch": 2.20958751393534, "grad_norm": 2.2182836709344262, "learning_rate": 3.425137099867757e-06, "loss": 0.2972, "step": 29730 }, { "epoch": 2.2096618357487925, "grad_norm": 1.9939560848529387, "learning_rate": 3.424532563427936e-06, "loss": 0.28, "step": 29731 }, { "epoch": 2.2097361575622445, "grad_norm": 1.935800087599817, "learning_rate": 3.423928069320632e-06, "loss": 0.2337, "step": 29732 }, { "epoch": 2.209810479375697, "grad_norm": 1.7311540079742038, "learning_rate": 3.4233236175497353e-06, "loss": 0.2042, "step": 29733 }, { "epoch": 2.209884801189149, "grad_norm": 2.843307293404125, "learning_rate": 3.422719208119143e-06, "loss": 0.3229, "step": 29734 }, { "epoch": 2.2099591230026014, "grad_norm": 2.1911337347062325, "learning_rate": 3.4221148410327397e-06, "loss": 0.3015, "step": 29735 }, { "epoch": 2.2100334448160535, "grad_norm": 2.150572749791391, "learning_rate": 3.421510516294424e-06, "loss": 0.2453, "step": 29736 }, { "epoch": 2.210107766629506, "grad_norm": 2.3865596400820808, "learning_rate": 3.4209062339080814e-06, "loss": 0.3621, "step": 29737 }, { "epoch": 2.210182088442958, "grad_norm": 2.3882711771523972, "learning_rate": 3.4203019938775994e-06, "loss": 0.3298, "step": 29738 }, { "epoch": 2.2102564102564104, "grad_norm": 2.333951589360308, "learning_rate": 3.4196977962068756e-06, "loss": 0.2533, "step": 29739 }, { "epoch": 2.2103307320698624, "grad_norm": 2.576931447425266, "learning_rate": 3.419093640899793e-06, "loss": 0.2893, "step": 29740 }, { "epoch": 2.210405053883315, "grad_norm": 3.2450953363680957, "learning_rate": 3.4184895279602505e-06, "loss": 0.4004, "step": 29741 }, { "epoch": 2.210479375696767, "grad_norm": 2.0286285909263055, "learning_rate": 3.417885457392127e-06, "loss": 0.2987, "step": 29742 }, { "epoch": 2.2105536975102194, "grad_norm": 2.2932991814775985, "learning_rate": 3.417281429199313e-06, "loss": 0.3778, "step": 29743 }, { "epoch": 2.2106280193236714, "grad_norm": 2.38339839729009, "learning_rate": 3.416677443385703e-06, "loss": 0.3547, "step": 29744 }, { "epoch": 2.210702341137124, "grad_norm": 2.2097415979930934, "learning_rate": 3.416073499955178e-06, "loss": 0.2957, "step": 29745 }, { "epoch": 2.210776662950576, "grad_norm": 2.3298199528435535, "learning_rate": 3.415469598911634e-06, "loss": 0.2664, "step": 29746 }, { "epoch": 2.2108509847640283, "grad_norm": 2.1365020076506673, "learning_rate": 3.414865740258955e-06, "loss": 0.2597, "step": 29747 }, { "epoch": 2.2109253065774803, "grad_norm": 2.13082929108243, "learning_rate": 3.4142619240010245e-06, "loss": 0.2445, "step": 29748 }, { "epoch": 2.210999628390933, "grad_norm": 2.3124119244260357, "learning_rate": 3.4136581501417387e-06, "loss": 0.319, "step": 29749 }, { "epoch": 2.211073950204385, "grad_norm": 2.141478834533029, "learning_rate": 3.4130544186849757e-06, "loss": 0.3492, "step": 29750 }, { "epoch": 2.2111482720178373, "grad_norm": 2.5161189916382125, "learning_rate": 3.41245072963463e-06, "loss": 0.3527, "step": 29751 }, { "epoch": 2.2112225938312893, "grad_norm": 2.36830779073478, "learning_rate": 3.411847082994585e-06, "loss": 0.2677, "step": 29752 }, { "epoch": 2.2112969156447417, "grad_norm": 1.8525689535258156, "learning_rate": 3.4112434787687265e-06, "loss": 0.2006, "step": 29753 }, { "epoch": 2.211371237458194, "grad_norm": 2.5700042111687376, "learning_rate": 3.4106399169609406e-06, "loss": 0.348, "step": 29754 }, { "epoch": 2.211445559271646, "grad_norm": 2.8935470438838244, "learning_rate": 3.4100363975751093e-06, "loss": 0.3704, "step": 29755 }, { "epoch": 2.2115198810850987, "grad_norm": 2.923293845108369, "learning_rate": 3.4094329206151254e-06, "loss": 0.3202, "step": 29756 }, { "epoch": 2.2115942028985507, "grad_norm": 2.8446346238905265, "learning_rate": 3.408829486084868e-06, "loss": 0.3228, "step": 29757 }, { "epoch": 2.211668524712003, "grad_norm": 3.3696354365430694, "learning_rate": 3.4082260939882273e-06, "loss": 0.3672, "step": 29758 }, { "epoch": 2.211742846525455, "grad_norm": 2.361825418155822, "learning_rate": 3.4076227443290843e-06, "loss": 0.3262, "step": 29759 }, { "epoch": 2.2118171683389076, "grad_norm": 2.258142739418014, "learning_rate": 3.4070194371113207e-06, "loss": 0.2658, "step": 29760 }, { "epoch": 2.2118914901523596, "grad_norm": 2.1016277883887926, "learning_rate": 3.406416172338828e-06, "loss": 0.3026, "step": 29761 }, { "epoch": 2.211965811965812, "grad_norm": 2.03176304214481, "learning_rate": 3.4058129500154844e-06, "loss": 0.2115, "step": 29762 }, { "epoch": 2.212040133779264, "grad_norm": 2.010552570682154, "learning_rate": 3.4052097701451727e-06, "loss": 0.2192, "step": 29763 }, { "epoch": 2.2121144555927166, "grad_norm": 3.1128367478725765, "learning_rate": 3.404606632731785e-06, "loss": 0.3654, "step": 29764 }, { "epoch": 2.2121887774061686, "grad_norm": 3.1312404319358262, "learning_rate": 3.4040035377791902e-06, "loss": 0.4382, "step": 29765 }, { "epoch": 2.212263099219621, "grad_norm": 1.860878547726397, "learning_rate": 3.4034004852912826e-06, "loss": 0.2973, "step": 29766 }, { "epoch": 2.212337421033073, "grad_norm": 2.021538000526151, "learning_rate": 3.402797475271937e-06, "loss": 0.2471, "step": 29767 }, { "epoch": 2.2124117428465255, "grad_norm": 2.7379024729777033, "learning_rate": 3.402194507725042e-06, "loss": 0.3413, "step": 29768 }, { "epoch": 2.2124860646599775, "grad_norm": 2.2597778303963754, "learning_rate": 3.4015915826544764e-06, "loss": 0.2581, "step": 29769 }, { "epoch": 2.21256038647343, "grad_norm": 2.095786817323764, "learning_rate": 3.4009887000641185e-06, "loss": 0.2248, "step": 29770 }, { "epoch": 2.212634708286882, "grad_norm": 2.36472285259332, "learning_rate": 3.4003858599578555e-06, "loss": 0.2477, "step": 29771 }, { "epoch": 2.2127090301003345, "grad_norm": 1.9816477890815223, "learning_rate": 3.3997830623395634e-06, "loss": 0.2629, "step": 29772 }, { "epoch": 2.2127833519137865, "grad_norm": 2.5479601932286657, "learning_rate": 3.399180307213129e-06, "loss": 0.3479, "step": 29773 }, { "epoch": 2.212857673727239, "grad_norm": 2.807560293767338, "learning_rate": 3.3985775945824294e-06, "loss": 0.39, "step": 29774 }, { "epoch": 2.212931995540691, "grad_norm": 2.4329690548033045, "learning_rate": 3.3979749244513436e-06, "loss": 0.3112, "step": 29775 }, { "epoch": 2.2130063173541434, "grad_norm": 3.53308223695489, "learning_rate": 3.3973722968237545e-06, "loss": 0.3966, "step": 29776 }, { "epoch": 2.213080639167596, "grad_norm": 2.8950481751317723, "learning_rate": 3.3967697117035347e-06, "loss": 0.2784, "step": 29777 }, { "epoch": 2.213154960981048, "grad_norm": 2.3517207679734606, "learning_rate": 3.396167169094573e-06, "loss": 0.2178, "step": 29778 }, { "epoch": 2.2132292827945004, "grad_norm": 2.7832834896156773, "learning_rate": 3.3955646690007414e-06, "loss": 0.3609, "step": 29779 }, { "epoch": 2.2133036046079524, "grad_norm": 2.5620631999218046, "learning_rate": 3.394962211425925e-06, "loss": 0.2721, "step": 29780 }, { "epoch": 2.213377926421405, "grad_norm": 2.273412968471533, "learning_rate": 3.3943597963739993e-06, "loss": 0.2724, "step": 29781 }, { "epoch": 2.213452248234857, "grad_norm": 2.480936142135601, "learning_rate": 3.393757423848838e-06, "loss": 0.3311, "step": 29782 }, { "epoch": 2.2135265700483093, "grad_norm": 2.2356346595079155, "learning_rate": 3.393155093854328e-06, "loss": 0.2631, "step": 29783 }, { "epoch": 2.2136008918617613, "grad_norm": 2.5680448691944946, "learning_rate": 3.3925528063943426e-06, "loss": 0.3274, "step": 29784 }, { "epoch": 2.213675213675214, "grad_norm": 2.5208435721544826, "learning_rate": 3.3919505614727566e-06, "loss": 0.295, "step": 29785 }, { "epoch": 2.213749535488666, "grad_norm": 1.782900210379364, "learning_rate": 3.391348359093456e-06, "loss": 0.2718, "step": 29786 }, { "epoch": 2.2138238573021183, "grad_norm": 2.5144665269548105, "learning_rate": 3.390746199260305e-06, "loss": 0.291, "step": 29787 }, { "epoch": 2.2138981791155703, "grad_norm": 2.2922888367931313, "learning_rate": 3.3901440819771902e-06, "loss": 0.3324, "step": 29788 }, { "epoch": 2.2139725009290228, "grad_norm": 2.3101397906154038, "learning_rate": 3.3895420072479824e-06, "loss": 0.3651, "step": 29789 }, { "epoch": 2.2140468227424748, "grad_norm": 2.3123005875500224, "learning_rate": 3.388939975076563e-06, "loss": 0.3261, "step": 29790 }, { "epoch": 2.2141211445559272, "grad_norm": 2.2303887474517814, "learning_rate": 3.3883379854668052e-06, "loss": 0.1999, "step": 29791 }, { "epoch": 2.2141954663693793, "grad_norm": 1.9130327374581935, "learning_rate": 3.38773603842258e-06, "loss": 0.2219, "step": 29792 }, { "epoch": 2.2142697881828317, "grad_norm": 2.684807576836919, "learning_rate": 3.3871341339477716e-06, "loss": 0.3922, "step": 29793 }, { "epoch": 2.2143441099962837, "grad_norm": 2.3881317378667712, "learning_rate": 3.3865322720462466e-06, "loss": 0.3386, "step": 29794 }, { "epoch": 2.214418431809736, "grad_norm": 2.4310944126173784, "learning_rate": 3.385930452721887e-06, "loss": 0.3864, "step": 29795 }, { "epoch": 2.214492753623188, "grad_norm": 2.168975378712818, "learning_rate": 3.3853286759785653e-06, "loss": 0.2307, "step": 29796 }, { "epoch": 2.2145670754366407, "grad_norm": 2.992391340463989, "learning_rate": 3.38472694182015e-06, "loss": 0.3184, "step": 29797 }, { "epoch": 2.2146413972500927, "grad_norm": 1.632885528298274, "learning_rate": 3.3841252502505263e-06, "loss": 0.1966, "step": 29798 }, { "epoch": 2.214715719063545, "grad_norm": 2.7222730786489446, "learning_rate": 3.3835236012735538e-06, "loss": 0.2864, "step": 29799 }, { "epoch": 2.2147900408769976, "grad_norm": 2.7949092727423976, "learning_rate": 3.382921994893116e-06, "loss": 0.2879, "step": 29800 }, { "epoch": 2.2148643626904496, "grad_norm": 2.580410512750779, "learning_rate": 3.382320431113081e-06, "loss": 0.344, "step": 29801 }, { "epoch": 2.214938684503902, "grad_norm": 2.1088122637200777, "learning_rate": 3.381718909937326e-06, "loss": 0.2944, "step": 29802 }, { "epoch": 2.215013006317354, "grad_norm": 2.1674797001458397, "learning_rate": 3.3811174313697213e-06, "loss": 0.2989, "step": 29803 }, { "epoch": 2.2150873281308066, "grad_norm": 2.27567208009127, "learning_rate": 3.3805159954141366e-06, "loss": 0.2696, "step": 29804 }, { "epoch": 2.2151616499442586, "grad_norm": 2.018974042277379, "learning_rate": 3.379914602074449e-06, "loss": 0.2801, "step": 29805 }, { "epoch": 2.215235971757711, "grad_norm": 2.566793666968628, "learning_rate": 3.379313251354528e-06, "loss": 0.2773, "step": 29806 }, { "epoch": 2.215310293571163, "grad_norm": 2.028614510779867, "learning_rate": 3.3787119432582415e-06, "loss": 0.3112, "step": 29807 }, { "epoch": 2.2153846153846155, "grad_norm": 2.743292838726069, "learning_rate": 3.3781106777894677e-06, "loss": 0.34, "step": 29808 }, { "epoch": 2.2154589371980675, "grad_norm": 1.8182922196443012, "learning_rate": 3.3775094549520736e-06, "loss": 0.2524, "step": 29809 }, { "epoch": 2.21553325901152, "grad_norm": 2.069362211756635, "learning_rate": 3.3769082747499295e-06, "loss": 0.2451, "step": 29810 }, { "epoch": 2.215607580824972, "grad_norm": 2.0596936543413187, "learning_rate": 3.3763071371869026e-06, "loss": 0.223, "step": 29811 }, { "epoch": 2.2156819026384245, "grad_norm": 2.0550526414229426, "learning_rate": 3.3757060422668707e-06, "loss": 0.2629, "step": 29812 }, { "epoch": 2.2157562244518765, "grad_norm": 2.1462615763595108, "learning_rate": 3.3751049899937004e-06, "loss": 0.2713, "step": 29813 }, { "epoch": 2.215830546265329, "grad_norm": 2.17500856697249, "learning_rate": 3.3745039803712555e-06, "loss": 0.3572, "step": 29814 }, { "epoch": 2.215904868078781, "grad_norm": 2.1726907282673316, "learning_rate": 3.3739030134034145e-06, "loss": 0.3295, "step": 29815 }, { "epoch": 2.2159791898922334, "grad_norm": 2.35848082690626, "learning_rate": 3.3733020890940383e-06, "loss": 0.2772, "step": 29816 }, { "epoch": 2.2160535117056854, "grad_norm": 3.0419708715231657, "learning_rate": 3.372701207447003e-06, "loss": 0.3615, "step": 29817 }, { "epoch": 2.216127833519138, "grad_norm": 2.534515573122179, "learning_rate": 3.3721003684661734e-06, "loss": 0.3951, "step": 29818 }, { "epoch": 2.21620215533259, "grad_norm": 2.3492256956273905, "learning_rate": 3.3714995721554144e-06, "loss": 0.3575, "step": 29819 }, { "epoch": 2.2162764771460424, "grad_norm": 2.306266693611153, "learning_rate": 3.3708988185186043e-06, "loss": 0.2414, "step": 29820 }, { "epoch": 2.2163507989594944, "grad_norm": 4.7012822492920625, "learning_rate": 3.3702981075595963e-06, "loss": 0.1931, "step": 29821 }, { "epoch": 2.216425120772947, "grad_norm": 2.053640641962391, "learning_rate": 3.369697439282269e-06, "loss": 0.2565, "step": 29822 }, { "epoch": 2.2164994425863993, "grad_norm": 2.3792006984509477, "learning_rate": 3.369096813690481e-06, "loss": 0.2906, "step": 29823 }, { "epoch": 2.2165737643998513, "grad_norm": 2.4471602878425256, "learning_rate": 3.3684962307881088e-06, "loss": 0.3076, "step": 29824 }, { "epoch": 2.216648086213304, "grad_norm": 2.0523728525453415, "learning_rate": 3.367895690579013e-06, "loss": 0.2579, "step": 29825 }, { "epoch": 2.216722408026756, "grad_norm": 1.8607839611022163, "learning_rate": 3.3672951930670572e-06, "loss": 0.253, "step": 29826 }, { "epoch": 2.2167967298402083, "grad_norm": 2.4336641256103504, "learning_rate": 3.366694738256113e-06, "loss": 0.3379, "step": 29827 }, { "epoch": 2.2168710516536603, "grad_norm": 2.3111972351960324, "learning_rate": 3.3660943261500458e-06, "loss": 0.3175, "step": 29828 }, { "epoch": 2.2169453734671127, "grad_norm": 2.3068747258041142, "learning_rate": 3.3654939567527145e-06, "loss": 0.3854, "step": 29829 }, { "epoch": 2.2170196952805648, "grad_norm": 2.276613758137051, "learning_rate": 3.3648936300679936e-06, "loss": 0.3423, "step": 29830 }, { "epoch": 2.217094017094017, "grad_norm": 2.5098989802878147, "learning_rate": 3.364293346099742e-06, "loss": 0.317, "step": 29831 }, { "epoch": 2.2171683389074692, "grad_norm": 2.1926088210968255, "learning_rate": 3.3636931048518264e-06, "loss": 0.2711, "step": 29832 }, { "epoch": 2.2172426607209217, "grad_norm": 2.0429895867699917, "learning_rate": 3.3630929063281058e-06, "loss": 0.1902, "step": 29833 }, { "epoch": 2.2173169825343737, "grad_norm": 1.659042110452009, "learning_rate": 3.3624927505324523e-06, "loss": 0.2848, "step": 29834 }, { "epoch": 2.217391304347826, "grad_norm": 2.0386271578380684, "learning_rate": 3.3618926374687267e-06, "loss": 0.2367, "step": 29835 }, { "epoch": 2.217465626161278, "grad_norm": 1.910400665017042, "learning_rate": 3.361292567140788e-06, "loss": 0.2896, "step": 29836 }, { "epoch": 2.2175399479747306, "grad_norm": 2.291169377229678, "learning_rate": 3.360692539552507e-06, "loss": 0.3401, "step": 29837 }, { "epoch": 2.2176142697881827, "grad_norm": 2.039550619367461, "learning_rate": 3.36009255470774e-06, "loss": 0.2635, "step": 29838 }, { "epoch": 2.217688591601635, "grad_norm": 2.18749359641876, "learning_rate": 3.3594926126103555e-06, "loss": 0.2596, "step": 29839 }, { "epoch": 2.217762913415087, "grad_norm": 2.2497741394914734, "learning_rate": 3.3588927132642123e-06, "loss": 0.2326, "step": 29840 }, { "epoch": 2.2178372352285396, "grad_norm": 2.1225892262509265, "learning_rate": 3.358292856673171e-06, "loss": 0.2413, "step": 29841 }, { "epoch": 2.217911557041992, "grad_norm": 2.671340068161466, "learning_rate": 3.3576930428410982e-06, "loss": 0.2205, "step": 29842 }, { "epoch": 2.217985878855444, "grad_norm": 2.6170307559764994, "learning_rate": 3.357093271771853e-06, "loss": 0.1979, "step": 29843 }, { "epoch": 2.218060200668896, "grad_norm": 2.041846222524159, "learning_rate": 3.356493543469297e-06, "loss": 0.2441, "step": 29844 }, { "epoch": 2.2181345224823485, "grad_norm": 2.20262250123766, "learning_rate": 3.355893857937288e-06, "loss": 0.3168, "step": 29845 }, { "epoch": 2.218208844295801, "grad_norm": 2.4634789315717187, "learning_rate": 3.355294215179692e-06, "loss": 0.334, "step": 29846 }, { "epoch": 2.218283166109253, "grad_norm": 2.5643777986051437, "learning_rate": 3.354694615200368e-06, "loss": 0.3144, "step": 29847 }, { "epoch": 2.2183574879227055, "grad_norm": 2.130645157013882, "learning_rate": 3.3540950580031707e-06, "loss": 0.2454, "step": 29848 }, { "epoch": 2.2184318097361575, "grad_norm": 2.2326573128390734, "learning_rate": 3.353495543591969e-06, "loss": 0.2616, "step": 29849 }, { "epoch": 2.21850613154961, "grad_norm": 2.033171115992519, "learning_rate": 3.352896071970617e-06, "loss": 0.2463, "step": 29850 }, { "epoch": 2.218580453363062, "grad_norm": 2.1687997572934505, "learning_rate": 3.3522966431429727e-06, "loss": 0.2874, "step": 29851 }, { "epoch": 2.2186547751765144, "grad_norm": 2.121008045916159, "learning_rate": 3.3516972571129016e-06, "loss": 0.274, "step": 29852 }, { "epoch": 2.2187290969899665, "grad_norm": 2.0079586139689254, "learning_rate": 3.351097913884255e-06, "loss": 0.2744, "step": 29853 }, { "epoch": 2.218803418803419, "grad_norm": 1.6842702266690082, "learning_rate": 3.3504986134609006e-06, "loss": 0.2142, "step": 29854 }, { "epoch": 2.218877740616871, "grad_norm": 2.228135151953428, "learning_rate": 3.3498993558466854e-06, "loss": 0.1411, "step": 29855 }, { "epoch": 2.2189520624303234, "grad_norm": 2.5081940826933034, "learning_rate": 3.349300141045476e-06, "loss": 0.3699, "step": 29856 }, { "epoch": 2.2190263842437754, "grad_norm": 2.5599031187278602, "learning_rate": 3.3487009690611272e-06, "loss": 0.3442, "step": 29857 }, { "epoch": 2.219100706057228, "grad_norm": 1.920090475515817, "learning_rate": 3.348101839897493e-06, "loss": 0.2774, "step": 29858 }, { "epoch": 2.21917502787068, "grad_norm": 2.123457328240714, "learning_rate": 3.347502753558438e-06, "loss": 0.2268, "step": 29859 }, { "epoch": 2.2192493496841323, "grad_norm": 2.0239663230355993, "learning_rate": 3.346903710047812e-06, "loss": 0.2749, "step": 29860 }, { "epoch": 2.2193236714975844, "grad_norm": 2.204112275035308, "learning_rate": 3.3463047093694767e-06, "loss": 0.2572, "step": 29861 }, { "epoch": 2.219397993311037, "grad_norm": 1.9216174265134198, "learning_rate": 3.345705751527287e-06, "loss": 0.1968, "step": 29862 }, { "epoch": 2.219472315124489, "grad_norm": 2.204715901010796, "learning_rate": 3.3451068365250962e-06, "loss": 0.243, "step": 29863 }, { "epoch": 2.2195466369379413, "grad_norm": 1.921339929252967, "learning_rate": 3.3445079643667644e-06, "loss": 0.3003, "step": 29864 }, { "epoch": 2.2196209587513938, "grad_norm": 2.5697367921004055, "learning_rate": 3.343909135056145e-06, "loss": 0.31, "step": 29865 }, { "epoch": 2.2196952805648458, "grad_norm": 1.9043931941677132, "learning_rate": 3.343310348597093e-06, "loss": 0.2043, "step": 29866 }, { "epoch": 2.2197696023782982, "grad_norm": 2.0418157570778623, "learning_rate": 3.342711604993465e-06, "loss": 0.2756, "step": 29867 }, { "epoch": 2.2198439241917503, "grad_norm": 2.490406628514423, "learning_rate": 3.34211290424911e-06, "loss": 0.3155, "step": 29868 }, { "epoch": 2.2199182460052027, "grad_norm": 2.7018544461237406, "learning_rate": 3.3415142463678896e-06, "loss": 0.3858, "step": 29869 }, { "epoch": 2.2199925678186547, "grad_norm": 1.748815930094434, "learning_rate": 3.3409156313536516e-06, "loss": 0.2532, "step": 29870 }, { "epoch": 2.220066889632107, "grad_norm": 2.4635888937004, "learning_rate": 3.3403170592102575e-06, "loss": 0.3456, "step": 29871 }, { "epoch": 2.220141211445559, "grad_norm": 1.8608879890430428, "learning_rate": 3.3397185299415556e-06, "loss": 0.2707, "step": 29872 }, { "epoch": 2.2202155332590117, "grad_norm": 2.1549198986435005, "learning_rate": 3.339120043551398e-06, "loss": 0.2939, "step": 29873 }, { "epoch": 2.2202898550724637, "grad_norm": 2.197143317242756, "learning_rate": 3.338521600043643e-06, "loss": 0.2731, "step": 29874 }, { "epoch": 2.220364176885916, "grad_norm": 2.2127192517118983, "learning_rate": 3.337923199422136e-06, "loss": 0.2836, "step": 29875 }, { "epoch": 2.220438498699368, "grad_norm": 2.306851015136893, "learning_rate": 3.337324841690739e-06, "loss": 0.2647, "step": 29876 }, { "epoch": 2.2205128205128206, "grad_norm": 2.807430122612653, "learning_rate": 3.336726526853297e-06, "loss": 0.2777, "step": 29877 }, { "epoch": 2.2205871423262726, "grad_norm": 2.2078372319176545, "learning_rate": 3.336128254913665e-06, "loss": 0.3342, "step": 29878 }, { "epoch": 2.220661464139725, "grad_norm": 2.160660924787507, "learning_rate": 3.3355300258756928e-06, "loss": 0.2588, "step": 29879 }, { "epoch": 2.220735785953177, "grad_norm": 2.183685369772001, "learning_rate": 3.3349318397432295e-06, "loss": 0.2508, "step": 29880 }, { "epoch": 2.2208101077666296, "grad_norm": 3.2731053993260604, "learning_rate": 3.3343336965201335e-06, "loss": 0.3491, "step": 29881 }, { "epoch": 2.2208844295800816, "grad_norm": 1.9281539525535918, "learning_rate": 3.333735596210247e-06, "loss": 0.2049, "step": 29882 }, { "epoch": 2.220958751393534, "grad_norm": 2.3032903858976854, "learning_rate": 3.333137538817428e-06, "loss": 0.2777, "step": 29883 }, { "epoch": 2.221033073206986, "grad_norm": 2.4651286152770804, "learning_rate": 3.332539524345524e-06, "loss": 0.236, "step": 29884 }, { "epoch": 2.2211073950204385, "grad_norm": 2.3921692546024707, "learning_rate": 3.3319415527983813e-06, "loss": 0.3454, "step": 29885 }, { "epoch": 2.2211817168338905, "grad_norm": 2.451019599089921, "learning_rate": 3.3313436241798556e-06, "loss": 0.2553, "step": 29886 }, { "epoch": 2.221256038647343, "grad_norm": 2.328235913019867, "learning_rate": 3.3307457384937934e-06, "loss": 0.1975, "step": 29887 }, { "epoch": 2.2213303604607955, "grad_norm": 2.161871927896929, "learning_rate": 3.3301478957440446e-06, "loss": 0.2587, "step": 29888 }, { "epoch": 2.2214046822742475, "grad_norm": 2.4257726489184432, "learning_rate": 3.3295500959344584e-06, "loss": 0.3574, "step": 29889 }, { "epoch": 2.2214790040877, "grad_norm": 2.3945476766672495, "learning_rate": 3.3289523390688784e-06, "loss": 0.3166, "step": 29890 }, { "epoch": 2.221553325901152, "grad_norm": 2.54007013061715, "learning_rate": 3.3283546251511613e-06, "loss": 0.3331, "step": 29891 }, { "epoch": 2.2216276477146044, "grad_norm": 2.659069906399493, "learning_rate": 3.327756954185146e-06, "loss": 0.2919, "step": 29892 }, { "epoch": 2.2217019695280564, "grad_norm": 1.9300656428004366, "learning_rate": 3.3271593261746905e-06, "loss": 0.2103, "step": 29893 }, { "epoch": 2.221776291341509, "grad_norm": 2.2769638456489223, "learning_rate": 3.326561741123635e-06, "loss": 0.2809, "step": 29894 }, { "epoch": 2.221850613154961, "grad_norm": 2.479287393818521, "learning_rate": 3.3259641990358272e-06, "loss": 0.3198, "step": 29895 }, { "epoch": 2.2219249349684134, "grad_norm": 2.610522030609768, "learning_rate": 3.325366699915118e-06, "loss": 0.2963, "step": 29896 }, { "epoch": 2.2219992567818654, "grad_norm": 2.690558767634733, "learning_rate": 3.3247692437653487e-06, "loss": 0.3828, "step": 29897 }, { "epoch": 2.222073578595318, "grad_norm": 2.7001578537496678, "learning_rate": 3.3241718305903713e-06, "loss": 0.3568, "step": 29898 }, { "epoch": 2.22214790040877, "grad_norm": 3.0685499685009088, "learning_rate": 3.32357446039403e-06, "loss": 0.3749, "step": 29899 }, { "epoch": 2.2222222222222223, "grad_norm": 1.9576070413224789, "learning_rate": 3.3229771331801696e-06, "loss": 0.2604, "step": 29900 }, { "epoch": 2.2222965440356743, "grad_norm": 2.512489159172283, "learning_rate": 3.322379848952636e-06, "loss": 0.2407, "step": 29901 }, { "epoch": 2.222370865849127, "grad_norm": 1.8286371359940996, "learning_rate": 3.3217826077152717e-06, "loss": 0.2138, "step": 29902 }, { "epoch": 2.222445187662579, "grad_norm": 2.4327880381120264, "learning_rate": 3.3211854094719265e-06, "loss": 0.2473, "step": 29903 }, { "epoch": 2.2225195094760313, "grad_norm": 2.3548907544648814, "learning_rate": 3.320588254226441e-06, "loss": 0.2987, "step": 29904 }, { "epoch": 2.2225938312894833, "grad_norm": 1.9554380227934454, "learning_rate": 3.319991141982665e-06, "loss": 0.2615, "step": 29905 }, { "epoch": 2.2226681531029358, "grad_norm": 2.1812259052310563, "learning_rate": 3.3193940727444386e-06, "loss": 0.2838, "step": 29906 }, { "epoch": 2.2227424749163878, "grad_norm": 2.2321897359430185, "learning_rate": 3.3187970465156037e-06, "loss": 0.3338, "step": 29907 }, { "epoch": 2.2228167967298402, "grad_norm": 2.930319830477563, "learning_rate": 3.31820006330001e-06, "loss": 0.2713, "step": 29908 }, { "epoch": 2.2228911185432922, "grad_norm": 2.0672794641254346, "learning_rate": 3.3176031231014972e-06, "loss": 0.251, "step": 29909 }, { "epoch": 2.2229654403567447, "grad_norm": 1.627350651917109, "learning_rate": 3.3170062259239056e-06, "loss": 0.2112, "step": 29910 }, { "epoch": 2.223039762170197, "grad_norm": 2.2876201342780345, "learning_rate": 3.3164093717710887e-06, "loss": 0.295, "step": 29911 }, { "epoch": 2.223114083983649, "grad_norm": 2.518702413263366, "learning_rate": 3.315812560646874e-06, "loss": 0.3654, "step": 29912 }, { "epoch": 2.2231884057971016, "grad_norm": 2.8099196459885167, "learning_rate": 3.315215792555114e-06, "loss": 0.3116, "step": 29913 }, { "epoch": 2.2232627276105537, "grad_norm": 2.2363054692825948, "learning_rate": 3.314619067499646e-06, "loss": 0.2605, "step": 29914 }, { "epoch": 2.223337049424006, "grad_norm": 2.046271964538636, "learning_rate": 3.314022385484316e-06, "loss": 0.276, "step": 29915 }, { "epoch": 2.223411371237458, "grad_norm": 2.35046449970247, "learning_rate": 3.3134257465129626e-06, "loss": 0.3083, "step": 29916 }, { "epoch": 2.2234856930509106, "grad_norm": 2.195273381223589, "learning_rate": 3.312829150589425e-06, "loss": 0.249, "step": 29917 }, { "epoch": 2.2235600148643626, "grad_norm": 2.9718161721284875, "learning_rate": 3.3122325977175498e-06, "loss": 0.2907, "step": 29918 }, { "epoch": 2.223634336677815, "grad_norm": 2.603956962591786, "learning_rate": 3.31163608790117e-06, "loss": 0.4098, "step": 29919 }, { "epoch": 2.223708658491267, "grad_norm": 2.6684942506444207, "learning_rate": 3.311039621144133e-06, "loss": 0.3549, "step": 29920 }, { "epoch": 2.2237829803047195, "grad_norm": 2.1938644112230707, "learning_rate": 3.310443197450277e-06, "loss": 0.2598, "step": 29921 }, { "epoch": 2.2238573021181716, "grad_norm": 2.4145338314556444, "learning_rate": 3.3098468168234397e-06, "loss": 0.3083, "step": 29922 }, { "epoch": 2.223931623931624, "grad_norm": 2.7147215539702545, "learning_rate": 3.309250479267462e-06, "loss": 0.3517, "step": 29923 }, { "epoch": 2.224005945745076, "grad_norm": 2.7432588088422363, "learning_rate": 3.3086541847861787e-06, "loss": 0.3737, "step": 29924 }, { "epoch": 2.2240802675585285, "grad_norm": 2.1603180610352104, "learning_rate": 3.308057933383436e-06, "loss": 0.2475, "step": 29925 }, { "epoch": 2.2241545893719805, "grad_norm": 2.1097672288214957, "learning_rate": 3.307461725063066e-06, "loss": 0.2089, "step": 29926 }, { "epoch": 2.224228911185433, "grad_norm": 2.2350526400986914, "learning_rate": 3.3068655598289134e-06, "loss": 0.317, "step": 29927 }, { "epoch": 2.224303232998885, "grad_norm": 1.9620562394159513, "learning_rate": 3.306269437684814e-06, "loss": 0.2132, "step": 29928 }, { "epoch": 2.2243775548123375, "grad_norm": 2.32653400526616, "learning_rate": 3.3056733586346e-06, "loss": 0.2657, "step": 29929 }, { "epoch": 2.2244518766257895, "grad_norm": 2.1493922274283705, "learning_rate": 3.3050773226821165e-06, "loss": 0.2527, "step": 29930 }, { "epoch": 2.224526198439242, "grad_norm": 2.4967039713849877, "learning_rate": 3.3044813298311996e-06, "loss": 0.2814, "step": 29931 }, { "epoch": 2.224600520252694, "grad_norm": 1.906400975727318, "learning_rate": 3.3038853800856797e-06, "loss": 0.2549, "step": 29932 }, { "epoch": 2.2246748420661464, "grad_norm": 2.4384218678864187, "learning_rate": 3.303289473449406e-06, "loss": 0.2627, "step": 29933 }, { "epoch": 2.224749163879599, "grad_norm": 2.303600876043038, "learning_rate": 3.3026936099261998e-06, "loss": 0.2988, "step": 29934 }, { "epoch": 2.224823485693051, "grad_norm": 1.9422502639506136, "learning_rate": 3.302097789519908e-06, "loss": 0.2656, "step": 29935 }, { "epoch": 2.2248978075065033, "grad_norm": 2.3737916839571076, "learning_rate": 3.3015020122343598e-06, "loss": 0.3205, "step": 29936 }, { "epoch": 2.2249721293199554, "grad_norm": 2.542263602271377, "learning_rate": 3.300906278073397e-06, "loss": 0.358, "step": 29937 }, { "epoch": 2.225046451133408, "grad_norm": 2.315390158826719, "learning_rate": 3.3003105870408523e-06, "loss": 0.2787, "step": 29938 }, { "epoch": 2.22512077294686, "grad_norm": 1.9597214987557834, "learning_rate": 3.299714939140558e-06, "loss": 0.2506, "step": 29939 }, { "epoch": 2.2251950947603123, "grad_norm": 1.933050019175703, "learning_rate": 3.299119334376354e-06, "loss": 0.2284, "step": 29940 }, { "epoch": 2.2252694165737643, "grad_norm": 2.011510756987925, "learning_rate": 3.298523772752068e-06, "loss": 0.295, "step": 29941 }, { "epoch": 2.2253437383872168, "grad_norm": 1.8915248825367053, "learning_rate": 3.297928254271543e-06, "loss": 0.1945, "step": 29942 }, { "epoch": 2.225418060200669, "grad_norm": 3.2405329360904767, "learning_rate": 3.297332778938607e-06, "loss": 0.2376, "step": 29943 }, { "epoch": 2.2254923820141213, "grad_norm": 2.7139895908357596, "learning_rate": 3.2967373467570953e-06, "loss": 0.2116, "step": 29944 }, { "epoch": 2.2255667038275733, "grad_norm": 2.6075466483381877, "learning_rate": 3.2961419577308405e-06, "loss": 0.3084, "step": 29945 }, { "epoch": 2.2256410256410257, "grad_norm": 2.3415146335700237, "learning_rate": 3.2955466118636727e-06, "loss": 0.2606, "step": 29946 }, { "epoch": 2.2257153474544777, "grad_norm": 2.0334306991880737, "learning_rate": 3.2949513091594307e-06, "loss": 0.2565, "step": 29947 }, { "epoch": 2.22578966926793, "grad_norm": 2.1115101503753206, "learning_rate": 3.294356049621942e-06, "loss": 0.2807, "step": 29948 }, { "epoch": 2.225863991081382, "grad_norm": 3.449659703822216, "learning_rate": 3.293760833255044e-06, "loss": 0.2209, "step": 29949 }, { "epoch": 2.2259383128948347, "grad_norm": 2.68033422263956, "learning_rate": 3.293165660062566e-06, "loss": 0.2515, "step": 29950 }, { "epoch": 2.2260126347082867, "grad_norm": 2.546972174598339, "learning_rate": 3.2925705300483356e-06, "loss": 0.3834, "step": 29951 }, { "epoch": 2.226086956521739, "grad_norm": 2.3428466335843936, "learning_rate": 3.291975443216192e-06, "loss": 0.3098, "step": 29952 }, { "epoch": 2.226161278335191, "grad_norm": 1.9706799908066546, "learning_rate": 3.2913803995699622e-06, "loss": 0.2036, "step": 29953 }, { "epoch": 2.2262356001486436, "grad_norm": 2.3979294945815526, "learning_rate": 3.290785399113473e-06, "loss": 0.2489, "step": 29954 }, { "epoch": 2.2263099219620957, "grad_norm": 2.3250156003118034, "learning_rate": 3.2901904418505636e-06, "loss": 0.2238, "step": 29955 }, { "epoch": 2.226384243775548, "grad_norm": 2.760673047725575, "learning_rate": 3.28959552778506e-06, "loss": 0.3256, "step": 29956 }, { "epoch": 2.2264585655890006, "grad_norm": 2.9580381038700216, "learning_rate": 3.289000656920792e-06, "loss": 0.2646, "step": 29957 }, { "epoch": 2.2265328874024526, "grad_norm": 2.3048379587193093, "learning_rate": 3.288405829261587e-06, "loss": 0.3076, "step": 29958 }, { "epoch": 2.226607209215905, "grad_norm": 1.9776618665741634, "learning_rate": 3.28781104481128e-06, "loss": 0.2354, "step": 29959 }, { "epoch": 2.226681531029357, "grad_norm": 2.1703569240771485, "learning_rate": 3.287216303573697e-06, "loss": 0.2921, "step": 29960 }, { "epoch": 2.2267558528428095, "grad_norm": 1.8587779842126497, "learning_rate": 3.2866216055526635e-06, "loss": 0.3209, "step": 29961 }, { "epoch": 2.2268301746562615, "grad_norm": 2.728848883463265, "learning_rate": 3.286026950752015e-06, "loss": 0.2853, "step": 29962 }, { "epoch": 2.226904496469714, "grad_norm": 3.8921861359365604, "learning_rate": 3.285432339175574e-06, "loss": 0.2922, "step": 29963 }, { "epoch": 2.226978818283166, "grad_norm": 2.484873051937951, "learning_rate": 3.2848377708271737e-06, "loss": 0.3303, "step": 29964 }, { "epoch": 2.2270531400966185, "grad_norm": 2.570230352929403, "learning_rate": 3.2842432457106398e-06, "loss": 0.2936, "step": 29965 }, { "epoch": 2.2271274619100705, "grad_norm": 2.2420711839669862, "learning_rate": 3.2836487638297964e-06, "loss": 0.2785, "step": 29966 }, { "epoch": 2.227201783723523, "grad_norm": 2.3036071030000422, "learning_rate": 3.2830543251884807e-06, "loss": 0.2926, "step": 29967 }, { "epoch": 2.227276105536975, "grad_norm": 2.2300355073689433, "learning_rate": 3.282459929790506e-06, "loss": 0.2864, "step": 29968 }, { "epoch": 2.2273504273504274, "grad_norm": 2.834929341473108, "learning_rate": 3.281865577639708e-06, "loss": 0.3249, "step": 29969 }, { "epoch": 2.2274247491638794, "grad_norm": 2.593725419037624, "learning_rate": 3.2812712687399085e-06, "loss": 0.3659, "step": 29970 }, { "epoch": 2.227499070977332, "grad_norm": 2.7508935328798914, "learning_rate": 3.2806770030949387e-06, "loss": 0.3583, "step": 29971 }, { "epoch": 2.227573392790784, "grad_norm": 2.4269376133183878, "learning_rate": 3.280082780708622e-06, "loss": 0.3177, "step": 29972 }, { "epoch": 2.2276477146042364, "grad_norm": 2.384947808939865, "learning_rate": 3.2794886015847803e-06, "loss": 0.3049, "step": 29973 }, { "epoch": 2.2277220364176884, "grad_norm": 2.8847147570474, "learning_rate": 3.2788944657272458e-06, "loss": 0.3578, "step": 29974 }, { "epoch": 2.227796358231141, "grad_norm": 3.2819450187162675, "learning_rate": 3.27830037313984e-06, "loss": 0.3361, "step": 29975 }, { "epoch": 2.2278706800445933, "grad_norm": 2.1722556146724035, "learning_rate": 3.277706323826384e-06, "loss": 0.2995, "step": 29976 }, { "epoch": 2.2279450018580453, "grad_norm": 2.6352035245798566, "learning_rate": 3.2771123177907095e-06, "loss": 0.3125, "step": 29977 }, { "epoch": 2.2280193236714974, "grad_norm": 2.3568026795576302, "learning_rate": 3.2765183550366364e-06, "loss": 0.2524, "step": 29978 }, { "epoch": 2.22809364548495, "grad_norm": 2.5348631312162175, "learning_rate": 3.27592443556799e-06, "loss": 0.3282, "step": 29979 }, { "epoch": 2.2281679672984023, "grad_norm": 2.0953795759659286, "learning_rate": 3.2753305593885897e-06, "loss": 0.2772, "step": 29980 }, { "epoch": 2.2282422891118543, "grad_norm": 1.9801476315001538, "learning_rate": 3.2747367265022658e-06, "loss": 0.2246, "step": 29981 }, { "epoch": 2.2283166109253068, "grad_norm": 2.1400545699916753, "learning_rate": 3.2741429369128373e-06, "loss": 0.3262, "step": 29982 }, { "epoch": 2.2283909327387588, "grad_norm": 3.07249126146178, "learning_rate": 3.2735491906241244e-06, "loss": 0.3078, "step": 29983 }, { "epoch": 2.2284652545522112, "grad_norm": 2.6814281068707184, "learning_rate": 3.2729554876399574e-06, "loss": 0.373, "step": 29984 }, { "epoch": 2.2285395763656632, "grad_norm": 2.2243393624884127, "learning_rate": 3.2723618279641487e-06, "loss": 0.3037, "step": 29985 }, { "epoch": 2.2286138981791157, "grad_norm": 1.849482531015078, "learning_rate": 3.27176821160053e-06, "loss": 0.2642, "step": 29986 }, { "epoch": 2.2286882199925677, "grad_norm": 2.2951411255231893, "learning_rate": 3.2711746385529188e-06, "loss": 0.3739, "step": 29987 }, { "epoch": 2.22876254180602, "grad_norm": 2.358029588373521, "learning_rate": 3.2705811088251314e-06, "loss": 0.3436, "step": 29988 }, { "epoch": 2.228836863619472, "grad_norm": 2.312648210466765, "learning_rate": 3.269987622420998e-06, "loss": 0.2446, "step": 29989 }, { "epoch": 2.2289111854329247, "grad_norm": 2.443811541899978, "learning_rate": 3.269394179344335e-06, "loss": 0.3121, "step": 29990 }, { "epoch": 2.2289855072463767, "grad_norm": 2.2017687832263353, "learning_rate": 3.2688007795989627e-06, "loss": 0.3248, "step": 29991 }, { "epoch": 2.229059829059829, "grad_norm": 1.9981626364075398, "learning_rate": 3.268207423188702e-06, "loss": 0.257, "step": 29992 }, { "epoch": 2.229134150873281, "grad_norm": 2.1516502790688765, "learning_rate": 3.2676141101173695e-06, "loss": 0.2647, "step": 29993 }, { "epoch": 2.2292084726867336, "grad_norm": 2.879883009377002, "learning_rate": 3.2670208403887915e-06, "loss": 0.373, "step": 29994 }, { "epoch": 2.2292827945001856, "grad_norm": 1.9045757286294414, "learning_rate": 3.26642761400678e-06, "loss": 0.2292, "step": 29995 }, { "epoch": 2.229357116313638, "grad_norm": 1.917276364944793, "learning_rate": 3.2658344309751622e-06, "loss": 0.2364, "step": 29996 }, { "epoch": 2.22943143812709, "grad_norm": 2.2004510917081923, "learning_rate": 3.2652412912977516e-06, "loss": 0.3772, "step": 29997 }, { "epoch": 2.2295057599405426, "grad_norm": 2.19392577216229, "learning_rate": 3.264648194978366e-06, "loss": 0.2296, "step": 29998 }, { "epoch": 2.229580081753995, "grad_norm": 2.258635605536761, "learning_rate": 3.264055142020829e-06, "loss": 0.2803, "step": 29999 }, { "epoch": 2.229654403567447, "grad_norm": 2.450242183076039, "learning_rate": 3.263462132428952e-06, "loss": 0.3601, "step": 30000 }, { "epoch": 2.2297287253808995, "grad_norm": 2.054698515568185, "learning_rate": 3.2628691662065624e-06, "loss": 0.2495, "step": 30001 }, { "epoch": 2.2298030471943515, "grad_norm": 6.051732558631388, "learning_rate": 3.2622762433574653e-06, "loss": 0.4276, "step": 30002 }, { "epoch": 2.229877369007804, "grad_norm": 3.362324300716297, "learning_rate": 3.2616833638854873e-06, "loss": 0.2516, "step": 30003 }, { "epoch": 2.229951690821256, "grad_norm": 2.6579264313343356, "learning_rate": 3.261090527794443e-06, "loss": 0.2837, "step": 30004 }, { "epoch": 2.2300260126347085, "grad_norm": 1.7484738856800717, "learning_rate": 3.260497735088144e-06, "loss": 0.2315, "step": 30005 }, { "epoch": 2.2301003344481605, "grad_norm": 2.9544517364258427, "learning_rate": 3.2599049857704147e-06, "loss": 0.4071, "step": 30006 }, { "epoch": 2.230174656261613, "grad_norm": 2.6633928032913174, "learning_rate": 3.2593122798450636e-06, "loss": 0.3282, "step": 30007 }, { "epoch": 2.230248978075065, "grad_norm": 2.3034622539596232, "learning_rate": 3.2587196173159142e-06, "loss": 0.3224, "step": 30008 }, { "epoch": 2.2303232998885174, "grad_norm": 2.2256839446334373, "learning_rate": 3.258126998186778e-06, "loss": 0.3019, "step": 30009 }, { "epoch": 2.2303976217019694, "grad_norm": 2.3608415149702853, "learning_rate": 3.257534422461467e-06, "loss": 0.3096, "step": 30010 }, { "epoch": 2.230471943515422, "grad_norm": 2.866460292186946, "learning_rate": 3.2569418901438023e-06, "loss": 0.3283, "step": 30011 }, { "epoch": 2.230546265328874, "grad_norm": 2.1558413232291915, "learning_rate": 3.256349401237596e-06, "loss": 0.2965, "step": 30012 }, { "epoch": 2.2306205871423264, "grad_norm": 2.3147173020852216, "learning_rate": 3.255756955746663e-06, "loss": 0.298, "step": 30013 }, { "epoch": 2.2306949089557784, "grad_norm": 1.8208028940783558, "learning_rate": 3.2551645536748166e-06, "loss": 0.2272, "step": 30014 }, { "epoch": 2.230769230769231, "grad_norm": 2.673868951114744, "learning_rate": 3.254572195025867e-06, "loss": 0.356, "step": 30015 }, { "epoch": 2.230843552582683, "grad_norm": 2.3978775456181247, "learning_rate": 3.253979879803636e-06, "loss": 0.2862, "step": 30016 }, { "epoch": 2.2309178743961353, "grad_norm": 2.3238544601284765, "learning_rate": 3.2533876080119275e-06, "loss": 0.3024, "step": 30017 }, { "epoch": 2.2309921962095873, "grad_norm": 2.3174611217495733, "learning_rate": 3.252795379654564e-06, "loss": 0.4184, "step": 30018 }, { "epoch": 2.23106651802304, "grad_norm": 2.2460094364270584, "learning_rate": 3.2522031947353537e-06, "loss": 0.2542, "step": 30019 }, { "epoch": 2.231140839836492, "grad_norm": 2.479355957502527, "learning_rate": 3.251611053258106e-06, "loss": 0.3147, "step": 30020 }, { "epoch": 2.2312151616499443, "grad_norm": 1.848729705884481, "learning_rate": 3.251018955226639e-06, "loss": 0.2199, "step": 30021 }, { "epoch": 2.2312894834633967, "grad_norm": 1.5343040801426715, "learning_rate": 3.2504269006447585e-06, "loss": 0.1483, "step": 30022 }, { "epoch": 2.2313638052768487, "grad_norm": 2.005608710914019, "learning_rate": 3.249834889516287e-06, "loss": 0.2923, "step": 30023 }, { "epoch": 2.231438127090301, "grad_norm": 2.0806261484931587, "learning_rate": 3.2492429218450207e-06, "loss": 0.2485, "step": 30024 }, { "epoch": 2.231512448903753, "grad_norm": 2.3311179334387555, "learning_rate": 3.2486509976347812e-06, "loss": 0.301, "step": 30025 }, { "epoch": 2.2315867707172057, "grad_norm": 1.936171679295153, "learning_rate": 3.248059116889376e-06, "loss": 0.2032, "step": 30026 }, { "epoch": 2.2316610925306577, "grad_norm": 2.8398409911206888, "learning_rate": 3.247467279612614e-06, "loss": 0.3492, "step": 30027 }, { "epoch": 2.23173541434411, "grad_norm": 2.191520786601254, "learning_rate": 3.2468754858083097e-06, "loss": 0.2605, "step": 30028 }, { "epoch": 2.231809736157562, "grad_norm": 2.4476663584594176, "learning_rate": 3.246283735480267e-06, "loss": 0.3171, "step": 30029 }, { "epoch": 2.2318840579710146, "grad_norm": 2.1180817665045244, "learning_rate": 3.2456920286323033e-06, "loss": 0.3123, "step": 30030 }, { "epoch": 2.2319583797844667, "grad_norm": 2.086107502735698, "learning_rate": 3.245100365268223e-06, "loss": 0.249, "step": 30031 }, { "epoch": 2.232032701597919, "grad_norm": 2.5817358021687413, "learning_rate": 3.2445087453918335e-06, "loss": 0.2251, "step": 30032 }, { "epoch": 2.232107023411371, "grad_norm": 2.4563608551374685, "learning_rate": 3.2439171690069494e-06, "loss": 0.2711, "step": 30033 }, { "epoch": 2.2321813452248236, "grad_norm": 2.0534624495484746, "learning_rate": 3.243325636117376e-06, "loss": 0.2261, "step": 30034 }, { "epoch": 2.2322556670382756, "grad_norm": 2.441218131580819, "learning_rate": 3.2427341467269214e-06, "loss": 0.3245, "step": 30035 }, { "epoch": 2.232329988851728, "grad_norm": 2.5267485460125636, "learning_rate": 3.2421427008393937e-06, "loss": 0.3282, "step": 30036 }, { "epoch": 2.23240431066518, "grad_norm": 2.5018057720190003, "learning_rate": 3.2415512984585974e-06, "loss": 0.3066, "step": 30037 }, { "epoch": 2.2324786324786325, "grad_norm": 2.6632997117984125, "learning_rate": 3.240959939588346e-06, "loss": 0.3384, "step": 30038 }, { "epoch": 2.2325529542920846, "grad_norm": 2.0297174093094332, "learning_rate": 3.2403686242324415e-06, "loss": 0.2373, "step": 30039 }, { "epoch": 2.232627276105537, "grad_norm": 1.9061728504248179, "learning_rate": 3.2397773523946952e-06, "loss": 0.204, "step": 30040 }, { "epoch": 2.232701597918989, "grad_norm": 2.819516928481236, "learning_rate": 3.2391861240789125e-06, "loss": 0.3277, "step": 30041 }, { "epoch": 2.2327759197324415, "grad_norm": 2.1155773251505865, "learning_rate": 3.2385949392888947e-06, "loss": 0.2691, "step": 30042 }, { "epoch": 2.2328502415458935, "grad_norm": 2.263668841711181, "learning_rate": 3.2380037980284563e-06, "loss": 0.2711, "step": 30043 }, { "epoch": 2.232924563359346, "grad_norm": 2.2316760907653452, "learning_rate": 3.2374127003013943e-06, "loss": 0.282, "step": 30044 }, { "epoch": 2.2329988851727984, "grad_norm": 2.205787861570378, "learning_rate": 3.236821646111522e-06, "loss": 0.2304, "step": 30045 }, { "epoch": 2.2330732069862504, "grad_norm": 2.563074299305473, "learning_rate": 3.2362306354626417e-06, "loss": 0.3631, "step": 30046 }, { "epoch": 2.233147528799703, "grad_norm": 2.3872274352581617, "learning_rate": 3.2356396683585566e-06, "loss": 0.2722, "step": 30047 }, { "epoch": 2.233221850613155, "grad_norm": 2.024165856878487, "learning_rate": 3.2350487448030734e-06, "loss": 0.2729, "step": 30048 }, { "epoch": 2.2332961724266074, "grad_norm": 2.1402871410459827, "learning_rate": 3.23445786479999e-06, "loss": 0.2458, "step": 30049 }, { "epoch": 2.2333704942400594, "grad_norm": 2.1529775140188443, "learning_rate": 3.2338670283531216e-06, "loss": 0.2594, "step": 30050 }, { "epoch": 2.233444816053512, "grad_norm": 2.317451912776543, "learning_rate": 3.233276235466262e-06, "loss": 0.2893, "step": 30051 }, { "epoch": 2.233519137866964, "grad_norm": 2.1397555885167407, "learning_rate": 3.232685486143221e-06, "loss": 0.292, "step": 30052 }, { "epoch": 2.2335934596804163, "grad_norm": 2.810693930826302, "learning_rate": 3.232094780387801e-06, "loss": 0.3311, "step": 30053 }, { "epoch": 2.2336677814938684, "grad_norm": 2.3671835034790605, "learning_rate": 3.2315041182037997e-06, "loss": 0.2923, "step": 30054 }, { "epoch": 2.233742103307321, "grad_norm": 2.295525678463119, "learning_rate": 3.2309134995950277e-06, "loss": 0.2882, "step": 30055 }, { "epoch": 2.233816425120773, "grad_norm": 2.5177778413353065, "learning_rate": 3.2303229245652835e-06, "loss": 0.2115, "step": 30056 }, { "epoch": 2.2338907469342253, "grad_norm": 1.964499341105394, "learning_rate": 3.2297323931183676e-06, "loss": 0.2294, "step": 30057 }, { "epoch": 2.2339650687476773, "grad_norm": 2.167755029167514, "learning_rate": 3.2291419052580854e-06, "loss": 0.2632, "step": 30058 }, { "epoch": 2.2340393905611298, "grad_norm": 2.4608216997588053, "learning_rate": 3.2285514609882316e-06, "loss": 0.3844, "step": 30059 }, { "epoch": 2.234113712374582, "grad_norm": 2.0191098939019474, "learning_rate": 3.2279610603126153e-06, "loss": 0.2386, "step": 30060 }, { "epoch": 2.2341880341880342, "grad_norm": 2.031623799710746, "learning_rate": 3.227370703235031e-06, "loss": 0.2544, "step": 30061 }, { "epoch": 2.2342623560014863, "grad_norm": 2.564535055031909, "learning_rate": 3.2267803897592863e-06, "loss": 0.2107, "step": 30062 }, { "epoch": 2.2343366778149387, "grad_norm": 2.1883718744828813, "learning_rate": 3.2261901198891787e-06, "loss": 0.2886, "step": 30063 }, { "epoch": 2.2344109996283907, "grad_norm": 2.101037076788412, "learning_rate": 3.225599893628504e-06, "loss": 0.2671, "step": 30064 }, { "epoch": 2.234485321441843, "grad_norm": 2.5252641334727755, "learning_rate": 3.2250097109810686e-06, "loss": 0.3113, "step": 30065 }, { "epoch": 2.234559643255295, "grad_norm": 2.054389052814553, "learning_rate": 3.2244195719506653e-06, "loss": 0.2509, "step": 30066 }, { "epoch": 2.2346339650687477, "grad_norm": 2.4491011854605187, "learning_rate": 3.2238294765411017e-06, "loss": 0.2467, "step": 30067 }, { "epoch": 2.2347082868822, "grad_norm": 2.050976715110784, "learning_rate": 3.2232394247561715e-06, "loss": 0.2411, "step": 30068 }, { "epoch": 2.234782608695652, "grad_norm": 2.8378355310785475, "learning_rate": 3.2226494165996747e-06, "loss": 0.3472, "step": 30069 }, { "epoch": 2.2348569305091046, "grad_norm": 2.18385672131958, "learning_rate": 3.22205945207541e-06, "loss": 0.2591, "step": 30070 }, { "epoch": 2.2349312523225566, "grad_norm": 2.4924276015390774, "learning_rate": 3.2214695311871702e-06, "loss": 0.2735, "step": 30071 }, { "epoch": 2.235005574136009, "grad_norm": 2.3977030107961, "learning_rate": 3.220879653938762e-06, "loss": 0.3182, "step": 30072 }, { "epoch": 2.235079895949461, "grad_norm": 2.2389514235070678, "learning_rate": 3.220289820333975e-06, "loss": 0.2728, "step": 30073 }, { "epoch": 2.2351542177629136, "grad_norm": 1.995498217902006, "learning_rate": 3.2197000303766147e-06, "loss": 0.2218, "step": 30074 }, { "epoch": 2.2352285395763656, "grad_norm": 1.9998960725830266, "learning_rate": 3.2191102840704726e-06, "loss": 0.3304, "step": 30075 }, { "epoch": 2.235302861389818, "grad_norm": 2.4364413256413604, "learning_rate": 3.218520581419344e-06, "loss": 0.3068, "step": 30076 }, { "epoch": 2.23537718320327, "grad_norm": 2.4252785272306747, "learning_rate": 3.2179309224270307e-06, "loss": 0.2594, "step": 30077 }, { "epoch": 2.2354515050167225, "grad_norm": 2.32555116202363, "learning_rate": 3.2173413070973267e-06, "loss": 0.2181, "step": 30078 }, { "epoch": 2.2355258268301745, "grad_norm": 2.6429001001462056, "learning_rate": 3.2167517354340237e-06, "loss": 0.3068, "step": 30079 }, { "epoch": 2.235600148643627, "grad_norm": 2.649766741600602, "learning_rate": 3.216162207440928e-06, "loss": 0.2733, "step": 30080 }, { "epoch": 2.235674470457079, "grad_norm": 2.378186877732279, "learning_rate": 3.2155727231218224e-06, "loss": 0.3498, "step": 30081 }, { "epoch": 2.2357487922705315, "grad_norm": 2.440860138208496, "learning_rate": 3.2149832824805094e-06, "loss": 0.3407, "step": 30082 }, { "epoch": 2.2358231140839835, "grad_norm": 2.364768237559618, "learning_rate": 3.214393885520779e-06, "loss": 0.3034, "step": 30083 }, { "epoch": 2.235897435897436, "grad_norm": 2.414667015714741, "learning_rate": 3.2138045322464328e-06, "loss": 0.2417, "step": 30084 }, { "epoch": 2.235971757710888, "grad_norm": 2.506794360900861, "learning_rate": 3.21321522266126e-06, "loss": 0.3112, "step": 30085 }, { "epoch": 2.2360460795243404, "grad_norm": 2.139498497224132, "learning_rate": 3.212625956769053e-06, "loss": 0.3051, "step": 30086 }, { "epoch": 2.2361204013377924, "grad_norm": 2.9434548013784636, "learning_rate": 3.2120367345736103e-06, "loss": 0.335, "step": 30087 }, { "epoch": 2.236194723151245, "grad_norm": 2.0253818236518253, "learning_rate": 3.2114475560787207e-06, "loss": 0.2403, "step": 30088 }, { "epoch": 2.236269044964697, "grad_norm": 2.5143900733322235, "learning_rate": 3.2108584212881812e-06, "loss": 0.3338, "step": 30089 }, { "epoch": 2.2363433667781494, "grad_norm": 1.9408603407353333, "learning_rate": 3.210269330205784e-06, "loss": 0.2382, "step": 30090 }, { "epoch": 2.236417688591602, "grad_norm": 2.354418826444576, "learning_rate": 3.2096802828353204e-06, "loss": 0.3099, "step": 30091 }, { "epoch": 2.236492010405054, "grad_norm": 2.5657552653974065, "learning_rate": 3.209091279180583e-06, "loss": 0.3035, "step": 30092 }, { "epoch": 2.2365663322185063, "grad_norm": 3.003913881183328, "learning_rate": 3.2085023192453604e-06, "loss": 0.2775, "step": 30093 }, { "epoch": 2.2366406540319583, "grad_norm": 2.393417302626167, "learning_rate": 3.20791340303345e-06, "loss": 0.2549, "step": 30094 }, { "epoch": 2.236714975845411, "grad_norm": 2.0195108942362796, "learning_rate": 3.2073245305486377e-06, "loss": 0.2361, "step": 30095 }, { "epoch": 2.236789297658863, "grad_norm": 2.1967180215791537, "learning_rate": 3.206735701794721e-06, "loss": 0.3437, "step": 30096 }, { "epoch": 2.2368636194723153, "grad_norm": 2.917759326812061, "learning_rate": 3.206146916775488e-06, "loss": 0.3459, "step": 30097 }, { "epoch": 2.2369379412857673, "grad_norm": 2.6140017120781582, "learning_rate": 3.205558175494725e-06, "loss": 0.3179, "step": 30098 }, { "epoch": 2.2370122630992197, "grad_norm": 2.2851019788899616, "learning_rate": 3.204969477956229e-06, "loss": 0.2405, "step": 30099 }, { "epoch": 2.2370865849126718, "grad_norm": 1.6882006907959533, "learning_rate": 3.204380824163786e-06, "loss": 0.2188, "step": 30100 }, { "epoch": 2.237160906726124, "grad_norm": 2.4943656822297484, "learning_rate": 3.203792214121185e-06, "loss": 0.3073, "step": 30101 }, { "epoch": 2.2372352285395762, "grad_norm": 2.815994760085158, "learning_rate": 3.203203647832219e-06, "loss": 0.3207, "step": 30102 }, { "epoch": 2.2373095503530287, "grad_norm": 2.302966259092429, "learning_rate": 3.2026151253006765e-06, "loss": 0.2752, "step": 30103 }, { "epoch": 2.2373838721664807, "grad_norm": 2.2109897830232237, "learning_rate": 3.202026646530345e-06, "loss": 0.3505, "step": 30104 }, { "epoch": 2.237458193979933, "grad_norm": 2.036975996889364, "learning_rate": 3.2014382115250086e-06, "loss": 0.2519, "step": 30105 }, { "epoch": 2.237532515793385, "grad_norm": 2.1570808969321775, "learning_rate": 3.2008498202884653e-06, "loss": 0.2631, "step": 30106 }, { "epoch": 2.2376068376068377, "grad_norm": 2.190872418715527, "learning_rate": 3.2002614728244973e-06, "loss": 0.1975, "step": 30107 }, { "epoch": 2.2376811594202897, "grad_norm": 1.9823924143870142, "learning_rate": 3.19967316913689e-06, "loss": 0.1808, "step": 30108 }, { "epoch": 2.237755481233742, "grad_norm": 2.1967492504168438, "learning_rate": 3.1990849092294364e-06, "loss": 0.2314, "step": 30109 }, { "epoch": 2.237829803047194, "grad_norm": 2.5649737866230775, "learning_rate": 3.198496693105919e-06, "loss": 0.3603, "step": 30110 }, { "epoch": 2.2379041248606466, "grad_norm": 2.5276300319621825, "learning_rate": 3.19790852077013e-06, "loss": 0.3422, "step": 30111 }, { "epoch": 2.2379784466740986, "grad_norm": 2.6060251761807054, "learning_rate": 3.197320392225852e-06, "loss": 0.3402, "step": 30112 }, { "epoch": 2.238052768487551, "grad_norm": 2.548973350666995, "learning_rate": 3.196732307476871e-06, "loss": 0.3117, "step": 30113 }, { "epoch": 2.2381270903010035, "grad_norm": 2.492657054698534, "learning_rate": 3.1961442665269795e-06, "loss": 0.3529, "step": 30114 }, { "epoch": 2.2382014121144556, "grad_norm": 2.2250144038412243, "learning_rate": 3.1955562693799523e-06, "loss": 0.3112, "step": 30115 }, { "epoch": 2.238275733927908, "grad_norm": 2.223969344196068, "learning_rate": 3.1949683160395837e-06, "loss": 0.2491, "step": 30116 }, { "epoch": 2.23835005574136, "grad_norm": 2.0891937597741763, "learning_rate": 3.1943804065096527e-06, "loss": 0.2525, "step": 30117 }, { "epoch": 2.2384243775548125, "grad_norm": 2.339459214658591, "learning_rate": 3.193792540793951e-06, "loss": 0.3269, "step": 30118 }, { "epoch": 2.2384986993682645, "grad_norm": 2.078551459744453, "learning_rate": 3.19320471889626e-06, "loss": 0.3116, "step": 30119 }, { "epoch": 2.238573021181717, "grad_norm": 2.487502325971242, "learning_rate": 3.19261694082036e-06, "loss": 0.322, "step": 30120 }, { "epoch": 2.238647342995169, "grad_norm": 2.6255034223207097, "learning_rate": 3.192029206570042e-06, "loss": 0.3288, "step": 30121 }, { "epoch": 2.2387216648086214, "grad_norm": 2.5617431386396454, "learning_rate": 3.1914415161490874e-06, "loss": 0.3872, "step": 30122 }, { "epoch": 2.2387959866220735, "grad_norm": 2.719183526020718, "learning_rate": 3.1908538695612754e-06, "loss": 0.2993, "step": 30123 }, { "epoch": 2.238870308435526, "grad_norm": 2.9158361130180115, "learning_rate": 3.1902662668103967e-06, "loss": 0.3829, "step": 30124 }, { "epoch": 2.238944630248978, "grad_norm": 2.33610468012682, "learning_rate": 3.18967870790023e-06, "loss": 0.3259, "step": 30125 }, { "epoch": 2.2390189520624304, "grad_norm": 2.2678981593208123, "learning_rate": 3.189091192834558e-06, "loss": 0.26, "step": 30126 }, { "epoch": 2.2390932738758824, "grad_norm": 2.6207343804318928, "learning_rate": 3.1885037216171612e-06, "loss": 0.35, "step": 30127 }, { "epoch": 2.239167595689335, "grad_norm": 2.1554687541635276, "learning_rate": 3.187916294251827e-06, "loss": 0.2489, "step": 30128 }, { "epoch": 2.239241917502787, "grad_norm": 2.075030998285869, "learning_rate": 3.1873289107423346e-06, "loss": 0.2599, "step": 30129 }, { "epoch": 2.2393162393162394, "grad_norm": 2.7809736667060494, "learning_rate": 3.186741571092461e-06, "loss": 0.2849, "step": 30130 }, { "epoch": 2.2393905611296914, "grad_norm": 2.3401610287902015, "learning_rate": 3.186154275305995e-06, "loss": 0.2205, "step": 30131 }, { "epoch": 2.239464882943144, "grad_norm": 2.0869637134472825, "learning_rate": 3.1855670233867107e-06, "loss": 0.2637, "step": 30132 }, { "epoch": 2.2395392047565963, "grad_norm": 2.018275558519705, "learning_rate": 3.184979815338397e-06, "loss": 0.2472, "step": 30133 }, { "epoch": 2.2396135265700483, "grad_norm": 2.519006223827649, "learning_rate": 3.184392651164828e-06, "loss": 0.2656, "step": 30134 }, { "epoch": 2.2396878483835003, "grad_norm": 2.2974332868766214, "learning_rate": 3.1838055308697837e-06, "loss": 0.285, "step": 30135 }, { "epoch": 2.239762170196953, "grad_norm": 2.449745556046732, "learning_rate": 3.183218454457051e-06, "loss": 0.2799, "step": 30136 }, { "epoch": 2.2398364920104052, "grad_norm": 2.2045222176700436, "learning_rate": 3.182631421930399e-06, "loss": 0.2093, "step": 30137 }, { "epoch": 2.2399108138238573, "grad_norm": 2.2565878468719913, "learning_rate": 3.182044433293614e-06, "loss": 0.2557, "step": 30138 }, { "epoch": 2.2399851356373097, "grad_norm": 8.403026293223474, "learning_rate": 3.1814574885504735e-06, "loss": 0.2589, "step": 30139 }, { "epoch": 2.2400594574507617, "grad_norm": 2.7344032048129354, "learning_rate": 3.180870587704753e-06, "loss": 0.2843, "step": 30140 }, { "epoch": 2.240133779264214, "grad_norm": 2.270112310453719, "learning_rate": 3.1802837307602374e-06, "loss": 0.2522, "step": 30141 }, { "epoch": 2.240208101077666, "grad_norm": 2.591802065717393, "learning_rate": 3.1796969177206984e-06, "loss": 0.2852, "step": 30142 }, { "epoch": 2.2402824228911187, "grad_norm": 2.0985253486389834, "learning_rate": 3.179110148589919e-06, "loss": 0.247, "step": 30143 }, { "epoch": 2.2403567447045707, "grad_norm": 2.32348760801184, "learning_rate": 3.1785234233716743e-06, "loss": 0.2222, "step": 30144 }, { "epoch": 2.240431066518023, "grad_norm": 2.161927890911798, "learning_rate": 3.1779367420697394e-06, "loss": 0.2579, "step": 30145 }, { "epoch": 2.240505388331475, "grad_norm": 2.1770655279508917, "learning_rate": 3.1773501046878963e-06, "loss": 0.2776, "step": 30146 }, { "epoch": 2.2405797101449276, "grad_norm": 2.457703974284935, "learning_rate": 3.1767635112299166e-06, "loss": 0.2786, "step": 30147 }, { "epoch": 2.2406540319583796, "grad_norm": 1.886379007439442, "learning_rate": 3.1761769616995865e-06, "loss": 0.2378, "step": 30148 }, { "epoch": 2.240728353771832, "grad_norm": 2.456221092324531, "learning_rate": 3.175590456100668e-06, "loss": 0.2754, "step": 30149 }, { "epoch": 2.240802675585284, "grad_norm": 2.784588783523889, "learning_rate": 3.175003994436947e-06, "loss": 0.3505, "step": 30150 }, { "epoch": 2.2408769973987366, "grad_norm": 1.992613922976743, "learning_rate": 3.1744175767121953e-06, "loss": 0.1903, "step": 30151 }, { "epoch": 2.2409513192121886, "grad_norm": 2.387719836500507, "learning_rate": 3.1738312029301878e-06, "loss": 0.3759, "step": 30152 }, { "epoch": 2.241025641025641, "grad_norm": 2.1568798881517086, "learning_rate": 3.173244873094703e-06, "loss": 0.2765, "step": 30153 }, { "epoch": 2.241099962839093, "grad_norm": 2.6198347549201504, "learning_rate": 3.1726585872095093e-06, "loss": 0.3052, "step": 30154 }, { "epoch": 2.2411742846525455, "grad_norm": 2.2734158301363796, "learning_rate": 3.172072345278391e-06, "loss": 0.316, "step": 30155 }, { "epoch": 2.241248606465998, "grad_norm": 2.8109558455721353, "learning_rate": 3.171486147305115e-06, "loss": 0.3478, "step": 30156 }, { "epoch": 2.24132292827945, "grad_norm": 2.634171621973378, "learning_rate": 3.1708999932934536e-06, "loss": 0.3158, "step": 30157 }, { "epoch": 2.2413972500929025, "grad_norm": 2.046914274538936, "learning_rate": 3.170313883247187e-06, "loss": 0.2226, "step": 30158 }, { "epoch": 2.2414715719063545, "grad_norm": 2.4660445021955173, "learning_rate": 3.1697278171700862e-06, "loss": 0.3642, "step": 30159 }, { "epoch": 2.241545893719807, "grad_norm": 2.6725896499457944, "learning_rate": 3.169141795065922e-06, "loss": 0.371, "step": 30160 }, { "epoch": 2.241620215533259, "grad_norm": 2.8433071669409293, "learning_rate": 3.1685558169384687e-06, "loss": 0.3136, "step": 30161 }, { "epoch": 2.2416945373467114, "grad_norm": 2.282519271890996, "learning_rate": 3.167969882791496e-06, "loss": 0.2587, "step": 30162 }, { "epoch": 2.2417688591601634, "grad_norm": 2.067403477593128, "learning_rate": 3.167383992628782e-06, "loss": 0.2627, "step": 30163 }, { "epoch": 2.241843180973616, "grad_norm": 1.6720498542695543, "learning_rate": 3.166798146454092e-06, "loss": 0.1614, "step": 30164 }, { "epoch": 2.241917502787068, "grad_norm": 3.012004832850709, "learning_rate": 3.1662123442712044e-06, "loss": 0.306, "step": 30165 }, { "epoch": 2.2419918246005204, "grad_norm": 2.2874025174344426, "learning_rate": 3.1656265860838862e-06, "loss": 0.2957, "step": 30166 }, { "epoch": 2.2420661464139724, "grad_norm": 2.4414708143122335, "learning_rate": 3.165040871895907e-06, "loss": 0.3024, "step": 30167 }, { "epoch": 2.242140468227425, "grad_norm": 2.1188917389473327, "learning_rate": 3.164455201711043e-06, "loss": 0.2432, "step": 30168 }, { "epoch": 2.242214790040877, "grad_norm": 2.4324079484478074, "learning_rate": 3.1638695755330583e-06, "loss": 0.2784, "step": 30169 }, { "epoch": 2.2422891118543293, "grad_norm": 2.231769776130163, "learning_rate": 3.1632839933657333e-06, "loss": 0.2477, "step": 30170 }, { "epoch": 2.2423634336677813, "grad_norm": 2.336648167563645, "learning_rate": 3.162698455212825e-06, "loss": 0.3328, "step": 30171 }, { "epoch": 2.242437755481234, "grad_norm": 5.596600573682689, "learning_rate": 3.162112961078112e-06, "loss": 0.3014, "step": 30172 }, { "epoch": 2.242512077294686, "grad_norm": 3.0223616476264956, "learning_rate": 3.161527510965362e-06, "loss": 0.2896, "step": 30173 }, { "epoch": 2.2425863991081383, "grad_norm": 2.818027006267692, "learning_rate": 3.1609421048783384e-06, "loss": 0.4123, "step": 30174 }, { "epoch": 2.2426607209215903, "grad_norm": 2.193357959974737, "learning_rate": 3.1603567428208192e-06, "loss": 0.2557, "step": 30175 }, { "epoch": 2.2427350427350428, "grad_norm": 2.2520435298800843, "learning_rate": 3.1597714247965637e-06, "loss": 0.2594, "step": 30176 }, { "epoch": 2.2428093645484948, "grad_norm": 2.288250076570835, "learning_rate": 3.159186150809349e-06, "loss": 0.2585, "step": 30177 }, { "epoch": 2.2428836863619472, "grad_norm": 1.7201727961719768, "learning_rate": 3.15860092086294e-06, "loss": 0.2066, "step": 30178 }, { "epoch": 2.2429580081753997, "grad_norm": 2.2351143135134524, "learning_rate": 3.1580157349610984e-06, "loss": 0.3864, "step": 30179 }, { "epoch": 2.2430323299888517, "grad_norm": 2.4466368811498866, "learning_rate": 3.1574305931076e-06, "loss": 0.2471, "step": 30180 }, { "epoch": 2.243106651802304, "grad_norm": 2.3381535114599434, "learning_rate": 3.1568454953062087e-06, "loss": 0.2939, "step": 30181 }, { "epoch": 2.243180973615756, "grad_norm": 2.282379583636955, "learning_rate": 3.156260441560691e-06, "loss": 0.2616, "step": 30182 }, { "epoch": 2.2432552954292087, "grad_norm": 2.061098889074104, "learning_rate": 3.1556754318748127e-06, "loss": 0.2729, "step": 30183 }, { "epoch": 2.2433296172426607, "grad_norm": 2.1857961710689358, "learning_rate": 3.1550904662523383e-06, "loss": 0.2546, "step": 30184 }, { "epoch": 2.243403939056113, "grad_norm": 2.4378957408971194, "learning_rate": 3.1545055446970406e-06, "loss": 0.3133, "step": 30185 }, { "epoch": 2.243478260869565, "grad_norm": 2.5942306786910576, "learning_rate": 3.1539206672126765e-06, "loss": 0.3049, "step": 30186 }, { "epoch": 2.2435525826830176, "grad_norm": 2.364552555253722, "learning_rate": 3.1533358338030196e-06, "loss": 0.3091, "step": 30187 }, { "epoch": 2.2436269044964696, "grad_norm": 1.9807658367675525, "learning_rate": 3.1527510444718312e-06, "loss": 0.2338, "step": 30188 }, { "epoch": 2.243701226309922, "grad_norm": 2.8455610313388835, "learning_rate": 3.152166299222873e-06, "loss": 0.3311, "step": 30189 }, { "epoch": 2.243775548123374, "grad_norm": 2.2486890136508992, "learning_rate": 3.151581598059916e-06, "loss": 0.3153, "step": 30190 }, { "epoch": 2.2438498699368266, "grad_norm": 2.48443439584314, "learning_rate": 3.150996940986718e-06, "loss": 0.2359, "step": 30191 }, { "epoch": 2.2439241917502786, "grad_norm": 2.2369174674238494, "learning_rate": 3.1504123280070496e-06, "loss": 0.2031, "step": 30192 }, { "epoch": 2.243998513563731, "grad_norm": 2.4947192594362635, "learning_rate": 3.1498277591246717e-06, "loss": 0.2301, "step": 30193 }, { "epoch": 2.244072835377183, "grad_norm": 2.3460435626849865, "learning_rate": 3.1492432343433475e-06, "loss": 0.2291, "step": 30194 }, { "epoch": 2.2441471571906355, "grad_norm": 2.1503577940368084, "learning_rate": 3.1486587536668388e-06, "loss": 0.2899, "step": 30195 }, { "epoch": 2.2442214790040875, "grad_norm": 2.039449308835622, "learning_rate": 3.148074317098907e-06, "loss": 0.3038, "step": 30196 }, { "epoch": 2.24429580081754, "grad_norm": 1.9102512231305369, "learning_rate": 3.147489924643321e-06, "loss": 0.3153, "step": 30197 }, { "epoch": 2.244370122630992, "grad_norm": 1.9149174261621549, "learning_rate": 3.1469055763038347e-06, "loss": 0.2699, "step": 30198 }, { "epoch": 2.2444444444444445, "grad_norm": 2.3753553911000713, "learning_rate": 3.1463212720842196e-06, "loss": 0.3161, "step": 30199 }, { "epoch": 2.2445187662578965, "grad_norm": 2.5201439801074548, "learning_rate": 3.145737011988231e-06, "loss": 0.2656, "step": 30200 }, { "epoch": 2.244593088071349, "grad_norm": 2.00984421214555, "learning_rate": 3.1451527960196294e-06, "loss": 0.2773, "step": 30201 }, { "epoch": 2.2446674098848014, "grad_norm": 3.3831307192925477, "learning_rate": 3.1445686241821816e-06, "loss": 0.4116, "step": 30202 }, { "epoch": 2.2447417316982534, "grad_norm": 2.6684666145030156, "learning_rate": 3.143984496479645e-06, "loss": 0.3217, "step": 30203 }, { "epoch": 2.244816053511706, "grad_norm": 2.218541826443697, "learning_rate": 3.1434004129157803e-06, "loss": 0.2953, "step": 30204 }, { "epoch": 2.244890375325158, "grad_norm": 1.673095321273324, "learning_rate": 3.1428163734943485e-06, "loss": 0.192, "step": 30205 }, { "epoch": 2.2449646971386104, "grad_norm": 2.348920552979315, "learning_rate": 3.1422323782191055e-06, "loss": 0.3112, "step": 30206 }, { "epoch": 2.2450390189520624, "grad_norm": 2.4918938066522207, "learning_rate": 3.1416484270938184e-06, "loss": 0.2598, "step": 30207 }, { "epoch": 2.245113340765515, "grad_norm": 2.066209515979434, "learning_rate": 3.14106452012224e-06, "loss": 0.1918, "step": 30208 }, { "epoch": 2.245187662578967, "grad_norm": 1.6786230888753788, "learning_rate": 3.1404806573081348e-06, "loss": 0.1909, "step": 30209 }, { "epoch": 2.2452619843924193, "grad_norm": 2.6554665124858112, "learning_rate": 3.1398968386552587e-06, "loss": 0.2789, "step": 30210 }, { "epoch": 2.2453363062058713, "grad_norm": 2.0995411229407677, "learning_rate": 3.1393130641673687e-06, "loss": 0.1965, "step": 30211 }, { "epoch": 2.245410628019324, "grad_norm": 2.4235577641958277, "learning_rate": 3.1387293338482274e-06, "loss": 0.288, "step": 30212 }, { "epoch": 2.245484949832776, "grad_norm": 1.9109991474272674, "learning_rate": 3.1381456477015882e-06, "loss": 0.2318, "step": 30213 }, { "epoch": 2.2455592716462283, "grad_norm": 2.047785968108026, "learning_rate": 3.137562005731214e-06, "loss": 0.2464, "step": 30214 }, { "epoch": 2.2456335934596803, "grad_norm": 2.5478198248892654, "learning_rate": 3.1369784079408604e-06, "loss": 0.3144, "step": 30215 }, { "epoch": 2.2457079152731327, "grad_norm": 2.395532837665203, "learning_rate": 3.1363948543342827e-06, "loss": 0.2858, "step": 30216 }, { "epoch": 2.2457822370865848, "grad_norm": 2.466840108777202, "learning_rate": 3.1358113449152396e-06, "loss": 0.3252, "step": 30217 }, { "epoch": 2.245856558900037, "grad_norm": 2.0531011206991994, "learning_rate": 3.1352278796874825e-06, "loss": 0.2929, "step": 30218 }, { "epoch": 2.2459308807134892, "grad_norm": 2.1343675687705423, "learning_rate": 3.1346444586547765e-06, "loss": 0.2134, "step": 30219 }, { "epoch": 2.2460052025269417, "grad_norm": 1.958852150809189, "learning_rate": 3.1340610818208693e-06, "loss": 0.2818, "step": 30220 }, { "epoch": 2.2460795243403937, "grad_norm": 3.0791099293967394, "learning_rate": 3.133477749189525e-06, "loss": 0.3234, "step": 30221 }, { "epoch": 2.246153846153846, "grad_norm": 2.4380630032841646, "learning_rate": 3.132894460764494e-06, "loss": 0.318, "step": 30222 }, { "epoch": 2.246228167967298, "grad_norm": 2.3508775854635653, "learning_rate": 3.1323112165495284e-06, "loss": 0.3342, "step": 30223 }, { "epoch": 2.2463024897807506, "grad_norm": 2.298123261165079, "learning_rate": 3.13172801654839e-06, "loss": 0.3099, "step": 30224 }, { "epoch": 2.246376811594203, "grad_norm": 2.2454280395031785, "learning_rate": 3.131144860764831e-06, "loss": 0.273, "step": 30225 }, { "epoch": 2.246451133407655, "grad_norm": 2.582340044645913, "learning_rate": 3.130561749202602e-06, "loss": 0.4196, "step": 30226 }, { "epoch": 2.2465254552211076, "grad_norm": 2.36200300320545, "learning_rate": 3.1299786818654654e-06, "loss": 0.2798, "step": 30227 }, { "epoch": 2.2465997770345596, "grad_norm": 2.7520850871060434, "learning_rate": 3.1293956587571638e-06, "loss": 0.3574, "step": 30228 }, { "epoch": 2.246674098848012, "grad_norm": 2.5494687663395488, "learning_rate": 3.12881267988146e-06, "loss": 0.3449, "step": 30229 }, { "epoch": 2.246748420661464, "grad_norm": 2.0314522789752516, "learning_rate": 3.1282297452420996e-06, "loss": 0.2868, "step": 30230 }, { "epoch": 2.2468227424749165, "grad_norm": 2.7610086263742466, "learning_rate": 3.127646854842843e-06, "loss": 0.3888, "step": 30231 }, { "epoch": 2.2468970642883686, "grad_norm": 2.418169169858663, "learning_rate": 3.1270640086874392e-06, "loss": 0.3097, "step": 30232 }, { "epoch": 2.246971386101821, "grad_norm": 2.495789057934703, "learning_rate": 3.1264812067796378e-06, "loss": 0.3222, "step": 30233 }, { "epoch": 2.247045707915273, "grad_norm": 1.826281154899435, "learning_rate": 3.125898449123197e-06, "loss": 0.2862, "step": 30234 }, { "epoch": 2.2471200297287255, "grad_norm": 2.437761677317525, "learning_rate": 3.125315735721862e-06, "loss": 0.2756, "step": 30235 }, { "epoch": 2.2471943515421775, "grad_norm": 2.3372797984561564, "learning_rate": 3.124733066579392e-06, "loss": 0.3026, "step": 30236 }, { "epoch": 2.24726867335563, "grad_norm": 2.493148802330228, "learning_rate": 3.1241504416995317e-06, "loss": 0.2492, "step": 30237 }, { "epoch": 2.247342995169082, "grad_norm": 2.509866140316541, "learning_rate": 3.1235678610860355e-06, "loss": 0.3177, "step": 30238 }, { "epoch": 2.2474173169825344, "grad_norm": 2.3448438135795375, "learning_rate": 3.1229853247426524e-06, "loss": 0.2113, "step": 30239 }, { "epoch": 2.2474916387959865, "grad_norm": 3.139355967309906, "learning_rate": 3.122402832673129e-06, "loss": 0.3266, "step": 30240 }, { "epoch": 2.247565960609439, "grad_norm": 2.248470554328824, "learning_rate": 3.1218203848812236e-06, "loss": 0.2302, "step": 30241 }, { "epoch": 2.247640282422891, "grad_norm": 2.1471717832920323, "learning_rate": 3.121237981370677e-06, "loss": 0.3023, "step": 30242 }, { "epoch": 2.2477146042363434, "grad_norm": 3.0196054006478157, "learning_rate": 3.1206556221452477e-06, "loss": 0.363, "step": 30243 }, { "epoch": 2.2477889260497954, "grad_norm": 2.294013544954151, "learning_rate": 3.120073307208681e-06, "loss": 0.3148, "step": 30244 }, { "epoch": 2.247863247863248, "grad_norm": 2.1717783498164165, "learning_rate": 3.119491036564721e-06, "loss": 0.2135, "step": 30245 }, { "epoch": 2.2479375696767, "grad_norm": 2.3339792291153043, "learning_rate": 3.118908810217125e-06, "loss": 0.2556, "step": 30246 }, { "epoch": 2.2480118914901523, "grad_norm": 2.8428874180632326, "learning_rate": 3.1183266281696356e-06, "loss": 0.2344, "step": 30247 }, { "epoch": 2.248086213303605, "grad_norm": 2.4338829043180685, "learning_rate": 3.117744490426e-06, "loss": 0.3303, "step": 30248 }, { "epoch": 2.248160535117057, "grad_norm": 2.0552963620566813, "learning_rate": 3.1171623969899744e-06, "loss": 0.2215, "step": 30249 }, { "epoch": 2.2482348569305093, "grad_norm": 1.9700779631749836, "learning_rate": 3.1165803478652945e-06, "loss": 0.217, "step": 30250 }, { "epoch": 2.2483091787439613, "grad_norm": 2.8016653792810136, "learning_rate": 3.115998343055716e-06, "loss": 0.3316, "step": 30251 }, { "epoch": 2.2483835005574138, "grad_norm": 1.6997296227081626, "learning_rate": 3.1154163825649796e-06, "loss": 0.2082, "step": 30252 }, { "epoch": 2.2484578223708658, "grad_norm": 2.659502757047747, "learning_rate": 3.114834466396839e-06, "loss": 0.3151, "step": 30253 }, { "epoch": 2.2485321441843182, "grad_norm": 3.3329217293984197, "learning_rate": 3.1142525945550363e-06, "loss": 0.2292, "step": 30254 }, { "epoch": 2.2486064659977703, "grad_norm": 2.611030380722532, "learning_rate": 3.113670767043314e-06, "loss": 0.3213, "step": 30255 }, { "epoch": 2.2486807878112227, "grad_norm": 2.6645164674025548, "learning_rate": 3.1130889838654265e-06, "loss": 0.3632, "step": 30256 }, { "epoch": 2.2487551096246747, "grad_norm": 2.0041051055151975, "learning_rate": 3.1125072450251113e-06, "loss": 0.2515, "step": 30257 }, { "epoch": 2.248829431438127, "grad_norm": 2.487013963987216, "learning_rate": 3.1119255505261205e-06, "loss": 0.2786, "step": 30258 }, { "epoch": 2.248903753251579, "grad_norm": 2.160730378574438, "learning_rate": 3.1113439003721958e-06, "loss": 0.2981, "step": 30259 }, { "epoch": 2.2489780750650317, "grad_norm": 2.357935683427215, "learning_rate": 3.1107622945670777e-06, "loss": 0.2919, "step": 30260 }, { "epoch": 2.2490523968784837, "grad_norm": 2.3886768139552053, "learning_rate": 3.1101807331145216e-06, "loss": 0.2475, "step": 30261 }, { "epoch": 2.249126718691936, "grad_norm": 2.2175804281776856, "learning_rate": 3.1095992160182573e-06, "loss": 0.263, "step": 30262 }, { "epoch": 2.249201040505388, "grad_norm": 3.3227838354677153, "learning_rate": 3.109017743282039e-06, "loss": 0.3546, "step": 30263 }, { "epoch": 2.2492753623188406, "grad_norm": 2.01894299592755, "learning_rate": 3.1084363149096075e-06, "loss": 0.2263, "step": 30264 }, { "epoch": 2.2493496841322926, "grad_norm": 2.574971090882045, "learning_rate": 3.1078549309047025e-06, "loss": 0.3563, "step": 30265 }, { "epoch": 2.249424005945745, "grad_norm": 2.333281379188578, "learning_rate": 3.1072735912710716e-06, "loss": 0.2939, "step": 30266 }, { "epoch": 2.2494983277591976, "grad_norm": 2.3409887488412706, "learning_rate": 3.1066922960124533e-06, "loss": 0.3282, "step": 30267 }, { "epoch": 2.2495726495726496, "grad_norm": 1.9935719969480552, "learning_rate": 3.106111045132596e-06, "loss": 0.2495, "step": 30268 }, { "epoch": 2.2496469713861016, "grad_norm": 2.0151135144473824, "learning_rate": 3.1055298386352385e-06, "loss": 0.2571, "step": 30269 }, { "epoch": 2.249721293199554, "grad_norm": 2.852925147968323, "learning_rate": 3.104948676524118e-06, "loss": 0.2768, "step": 30270 }, { "epoch": 2.2497956150130065, "grad_norm": 2.0958400094740766, "learning_rate": 3.104367558802983e-06, "loss": 0.177, "step": 30271 }, { "epoch": 2.2498699368264585, "grad_norm": 2.407699930608118, "learning_rate": 3.103786485475573e-06, "loss": 0.2715, "step": 30272 }, { "epoch": 2.249944258639911, "grad_norm": 3.1420361039483082, "learning_rate": 3.103205456545627e-06, "loss": 0.3102, "step": 30273 }, { "epoch": 2.250018580453363, "grad_norm": 2.5425144911914357, "learning_rate": 3.102624472016883e-06, "loss": 0.2858, "step": 30274 }, { "epoch": 2.2500929022668155, "grad_norm": 2.511320516663189, "learning_rate": 3.1020435318930884e-06, "loss": 0.3149, "step": 30275 }, { "epoch": 2.2501672240802675, "grad_norm": 2.518829453553083, "learning_rate": 3.1014626361779797e-06, "loss": 0.3144, "step": 30276 }, { "epoch": 2.25024154589372, "grad_norm": 1.8530915529637395, "learning_rate": 3.1008817848752936e-06, "loss": 0.1475, "step": 30277 }, { "epoch": 2.250315867707172, "grad_norm": 2.5664630938155066, "learning_rate": 3.100300977988775e-06, "loss": 0.3265, "step": 30278 }, { "epoch": 2.2503901895206244, "grad_norm": 2.2164433639398577, "learning_rate": 3.0997202155221584e-06, "loss": 0.2535, "step": 30279 }, { "epoch": 2.2504645113340764, "grad_norm": 3.5473298148866426, "learning_rate": 3.099139497479188e-06, "loss": 0.249, "step": 30280 }, { "epoch": 2.250538833147529, "grad_norm": 2.2914457053707924, "learning_rate": 3.098558823863599e-06, "loss": 0.2998, "step": 30281 }, { "epoch": 2.250613154960981, "grad_norm": 2.279987079835385, "learning_rate": 3.097978194679128e-06, "loss": 0.307, "step": 30282 }, { "epoch": 2.2506874767744334, "grad_norm": 2.0905435935381997, "learning_rate": 3.09739760992952e-06, "loss": 0.2457, "step": 30283 }, { "epoch": 2.2507617985878854, "grad_norm": 2.4047805402938764, "learning_rate": 3.0968170696185027e-06, "loss": 0.2952, "step": 30284 }, { "epoch": 2.250836120401338, "grad_norm": 4.103568127180926, "learning_rate": 3.0962365737498225e-06, "loss": 0.2231, "step": 30285 }, { "epoch": 2.25091044221479, "grad_norm": 1.8830112861785668, "learning_rate": 3.095656122327212e-06, "loss": 0.268, "step": 30286 }, { "epoch": 2.2509847640282423, "grad_norm": 2.0143809806714885, "learning_rate": 3.0950757153544065e-06, "loss": 0.2315, "step": 30287 }, { "epoch": 2.2510590858416943, "grad_norm": 2.248411656335882, "learning_rate": 3.0944953528351485e-06, "loss": 0.247, "step": 30288 }, { "epoch": 2.251133407655147, "grad_norm": 2.7245492793063457, "learning_rate": 3.0939150347731685e-06, "loss": 0.2291, "step": 30289 }, { "epoch": 2.2512077294685993, "grad_norm": 2.3220023960645872, "learning_rate": 3.0933347611722076e-06, "loss": 0.2706, "step": 30290 }, { "epoch": 2.2512820512820513, "grad_norm": 2.1431558046398296, "learning_rate": 3.0927545320359986e-06, "loss": 0.2675, "step": 30291 }, { "epoch": 2.2513563730955033, "grad_norm": 2.2126184679573266, "learning_rate": 3.092174347368275e-06, "loss": 0.2651, "step": 30292 }, { "epoch": 2.2514306949089558, "grad_norm": 2.257925208485797, "learning_rate": 3.091594207172778e-06, "loss": 0.2653, "step": 30293 }, { "epoch": 2.251505016722408, "grad_norm": 2.1855909382103555, "learning_rate": 3.0910141114532357e-06, "loss": 0.2461, "step": 30294 }, { "epoch": 2.2515793385358602, "grad_norm": 1.9951699553539815, "learning_rate": 3.090434060213392e-06, "loss": 0.2144, "step": 30295 }, { "epoch": 2.2516536603493127, "grad_norm": 2.027846441740422, "learning_rate": 3.08985405345697e-06, "loss": 0.2551, "step": 30296 }, { "epoch": 2.2517279821627647, "grad_norm": 2.2919218025161667, "learning_rate": 3.0892740911877105e-06, "loss": 0.3134, "step": 30297 }, { "epoch": 2.251802303976217, "grad_norm": 2.396263462158265, "learning_rate": 3.088694173409348e-06, "loss": 0.2676, "step": 30298 }, { "epoch": 2.251876625789669, "grad_norm": 1.9704657902491551, "learning_rate": 3.0881143001256085e-06, "loss": 0.2225, "step": 30299 }, { "epoch": 2.2519509476031216, "grad_norm": 3.044793961820985, "learning_rate": 3.0875344713402356e-06, "loss": 0.3647, "step": 30300 }, { "epoch": 2.2520252694165737, "grad_norm": 2.2223435430241723, "learning_rate": 3.0869546870569523e-06, "loss": 0.3419, "step": 30301 }, { "epoch": 2.252099591230026, "grad_norm": 2.459250848094327, "learning_rate": 3.0863749472795e-06, "loss": 0.3329, "step": 30302 }, { "epoch": 2.252173913043478, "grad_norm": 2.629646536955566, "learning_rate": 3.0857952520116076e-06, "loss": 0.3446, "step": 30303 }, { "epoch": 2.2522482348569306, "grad_norm": 2.4077090737651177, "learning_rate": 3.0852156012570024e-06, "loss": 0.239, "step": 30304 }, { "epoch": 2.2523225566703826, "grad_norm": 2.3126442382601384, "learning_rate": 3.0846359950194236e-06, "loss": 0.2927, "step": 30305 }, { "epoch": 2.252396878483835, "grad_norm": 3.8467298726848447, "learning_rate": 3.0840564333026e-06, "loss": 0.2994, "step": 30306 }, { "epoch": 2.252471200297287, "grad_norm": 2.449681364129361, "learning_rate": 3.083476916110262e-06, "loss": 0.2502, "step": 30307 }, { "epoch": 2.2525455221107396, "grad_norm": 2.2742976111619075, "learning_rate": 3.082897443446141e-06, "loss": 0.3081, "step": 30308 }, { "epoch": 2.2526198439241916, "grad_norm": 2.067570126872636, "learning_rate": 3.0823180153139633e-06, "loss": 0.2975, "step": 30309 }, { "epoch": 2.252694165737644, "grad_norm": 2.3202591582846015, "learning_rate": 3.081738631717468e-06, "loss": 0.3367, "step": 30310 }, { "epoch": 2.252768487551096, "grad_norm": 2.411024678506123, "learning_rate": 3.081159292660376e-06, "loss": 0.3101, "step": 30311 }, { "epoch": 2.2528428093645485, "grad_norm": 2.440926020506049, "learning_rate": 3.0805799981464247e-06, "loss": 0.4178, "step": 30312 }, { "epoch": 2.252917131178001, "grad_norm": 2.431291154111626, "learning_rate": 3.0800007481793405e-06, "loss": 0.2757, "step": 30313 }, { "epoch": 2.252991452991453, "grad_norm": 2.140815632818226, "learning_rate": 3.0794215427628494e-06, "loss": 0.3021, "step": 30314 }, { "epoch": 2.253065774804905, "grad_norm": 2.240805933997617, "learning_rate": 3.0788423819006874e-06, "loss": 0.2718, "step": 30315 }, { "epoch": 2.2531400966183575, "grad_norm": 2.7322879072896833, "learning_rate": 3.078263265596575e-06, "loss": 0.3547, "step": 30316 }, { "epoch": 2.25321441843181, "grad_norm": 2.2733335709070364, "learning_rate": 3.07768419385425e-06, "loss": 0.308, "step": 30317 }, { "epoch": 2.253288740245262, "grad_norm": 2.333582973823247, "learning_rate": 3.07710516667743e-06, "loss": 0.3518, "step": 30318 }, { "epoch": 2.2533630620587144, "grad_norm": 2.372170777353287, "learning_rate": 3.07652618406985e-06, "loss": 0.246, "step": 30319 }, { "epoch": 2.2534373838721664, "grad_norm": 2.3760239448503517, "learning_rate": 3.075947246035236e-06, "loss": 0.314, "step": 30320 }, { "epoch": 2.253511705685619, "grad_norm": 2.2884180624884216, "learning_rate": 3.0753683525773103e-06, "loss": 0.2626, "step": 30321 }, { "epoch": 2.253586027499071, "grad_norm": 2.4639841341466653, "learning_rate": 3.0747895036998075e-06, "loss": 0.3461, "step": 30322 }, { "epoch": 2.2536603493125233, "grad_norm": 2.157974624794296, "learning_rate": 3.074210699406448e-06, "loss": 0.223, "step": 30323 }, { "epoch": 2.2537346711259754, "grad_norm": 1.5976651899182903, "learning_rate": 3.073631939700963e-06, "loss": 0.1886, "step": 30324 }, { "epoch": 2.253808992939428, "grad_norm": 2.3421991664495154, "learning_rate": 3.073053224587077e-06, "loss": 0.308, "step": 30325 }, { "epoch": 2.25388331475288, "grad_norm": 2.1244881006955505, "learning_rate": 3.0724745540685106e-06, "loss": 0.2692, "step": 30326 }, { "epoch": 2.2539576365663323, "grad_norm": 2.2007127277503344, "learning_rate": 3.0718959281489967e-06, "loss": 0.3208, "step": 30327 }, { "epoch": 2.2540319583797843, "grad_norm": 2.330989944518235, "learning_rate": 3.0713173468322587e-06, "loss": 0.259, "step": 30328 }, { "epoch": 2.2541062801932368, "grad_norm": 2.322836500972847, "learning_rate": 3.0707388101220194e-06, "loss": 0.2706, "step": 30329 }, { "epoch": 2.254180602006689, "grad_norm": 2.4387991888950906, "learning_rate": 3.070160318022004e-06, "loss": 0.2724, "step": 30330 }, { "epoch": 2.2542549238201413, "grad_norm": 2.65294417200673, "learning_rate": 3.0695818705359337e-06, "loss": 0.3165, "step": 30331 }, { "epoch": 2.2543292456335933, "grad_norm": 2.7927897266023596, "learning_rate": 3.0690034676675396e-06, "loss": 0.3093, "step": 30332 }, { "epoch": 2.2544035674470457, "grad_norm": 2.254165917703854, "learning_rate": 3.0684251094205375e-06, "loss": 0.2472, "step": 30333 }, { "epoch": 2.2544778892604977, "grad_norm": 1.9577841955532451, "learning_rate": 3.067846795798658e-06, "loss": 0.2854, "step": 30334 }, { "epoch": 2.25455221107395, "grad_norm": 2.301641670913982, "learning_rate": 3.0672685268056213e-06, "loss": 0.2782, "step": 30335 }, { "epoch": 2.2546265328874027, "grad_norm": 1.8929473057191317, "learning_rate": 3.0666903024451476e-06, "loss": 0.203, "step": 30336 }, { "epoch": 2.2547008547008547, "grad_norm": 1.9334103157916664, "learning_rate": 3.0661121227209647e-06, "loss": 0.3018, "step": 30337 }, { "epoch": 2.2547751765143067, "grad_norm": 2.074467169684675, "learning_rate": 3.0655339876367895e-06, "loss": 0.3372, "step": 30338 }, { "epoch": 2.254849498327759, "grad_norm": 2.2053106456242237, "learning_rate": 3.064955897196349e-06, "loss": 0.236, "step": 30339 }, { "epoch": 2.2549238201412116, "grad_norm": 2.6291400267974288, "learning_rate": 3.0643778514033627e-06, "loss": 0.2841, "step": 30340 }, { "epoch": 2.2549981419546636, "grad_norm": 3.708468883952336, "learning_rate": 3.063799850261553e-06, "loss": 0.2835, "step": 30341 }, { "epoch": 2.255072463768116, "grad_norm": 2.676597031153034, "learning_rate": 3.063221893774638e-06, "loss": 0.2969, "step": 30342 }, { "epoch": 2.255146785581568, "grad_norm": 2.1228430997032066, "learning_rate": 3.0626439819463394e-06, "loss": 0.2114, "step": 30343 }, { "epoch": 2.2552211073950206, "grad_norm": 2.0628103450225255, "learning_rate": 3.0620661147803808e-06, "loss": 0.2897, "step": 30344 }, { "epoch": 2.2552954292084726, "grad_norm": 2.3151948869248127, "learning_rate": 3.0614882922804777e-06, "loss": 0.3089, "step": 30345 }, { "epoch": 2.255369751021925, "grad_norm": 2.093727532859312, "learning_rate": 3.060910514450356e-06, "loss": 0.3532, "step": 30346 }, { "epoch": 2.255444072835377, "grad_norm": 2.2070854755723213, "learning_rate": 3.060332781293733e-06, "loss": 0.2514, "step": 30347 }, { "epoch": 2.2555183946488295, "grad_norm": 2.239979554548347, "learning_rate": 3.059755092814324e-06, "loss": 0.2807, "step": 30348 }, { "epoch": 2.2555927164622815, "grad_norm": 2.450169776863072, "learning_rate": 3.0591774490158545e-06, "loss": 0.29, "step": 30349 }, { "epoch": 2.255667038275734, "grad_norm": 2.212590737525481, "learning_rate": 3.058599849902041e-06, "loss": 0.2927, "step": 30350 }, { "epoch": 2.255741360089186, "grad_norm": 1.7920391264704825, "learning_rate": 3.0580222954766003e-06, "loss": 0.1993, "step": 30351 }, { "epoch": 2.2558156819026385, "grad_norm": 2.3160397434801343, "learning_rate": 3.057444785743253e-06, "loss": 0.369, "step": 30352 }, { "epoch": 2.2558900037160905, "grad_norm": 1.9829435737636036, "learning_rate": 3.0568673207057122e-06, "loss": 0.2916, "step": 30353 }, { "epoch": 2.255964325529543, "grad_norm": 3.919446425016343, "learning_rate": 3.0562899003677026e-06, "loss": 0.3832, "step": 30354 }, { "epoch": 2.2560386473429954, "grad_norm": 1.9998776220026742, "learning_rate": 3.0557125247329354e-06, "loss": 0.2447, "step": 30355 }, { "epoch": 2.2561129691564474, "grad_norm": 2.6926866031209595, "learning_rate": 3.0551351938051343e-06, "loss": 0.2112, "step": 30356 }, { "epoch": 2.2561872909698995, "grad_norm": 2.265682463809007, "learning_rate": 3.054557907588012e-06, "loss": 0.2446, "step": 30357 }, { "epoch": 2.256261612783352, "grad_norm": 2.901069851381906, "learning_rate": 3.053980666085282e-06, "loss": 0.3371, "step": 30358 }, { "epoch": 2.2563359345968044, "grad_norm": 2.1550529126472844, "learning_rate": 3.0534034693006675e-06, "loss": 0.2325, "step": 30359 }, { "epoch": 2.2564102564102564, "grad_norm": 2.1844220233795575, "learning_rate": 3.052826317237878e-06, "loss": 0.1961, "step": 30360 }, { "epoch": 2.256484578223709, "grad_norm": 2.3221871329205057, "learning_rate": 3.052249209900635e-06, "loss": 0.2445, "step": 30361 }, { "epoch": 2.256558900037161, "grad_norm": 2.103779752543328, "learning_rate": 3.0516721472926514e-06, "loss": 0.2642, "step": 30362 }, { "epoch": 2.2566332218506133, "grad_norm": 3.103044824974816, "learning_rate": 3.0510951294176418e-06, "loss": 0.2867, "step": 30363 }, { "epoch": 2.2567075436640653, "grad_norm": 1.9549169271635143, "learning_rate": 3.050518156279322e-06, "loss": 0.1851, "step": 30364 }, { "epoch": 2.256781865477518, "grad_norm": 2.1270376397952995, "learning_rate": 3.0499412278814013e-06, "loss": 0.2842, "step": 30365 }, { "epoch": 2.25685618729097, "grad_norm": 3.7043085959398745, "learning_rate": 3.0493643442276022e-06, "loss": 0.3093, "step": 30366 }, { "epoch": 2.2569305091044223, "grad_norm": 2.6759138679157175, "learning_rate": 3.048787505321631e-06, "loss": 0.2682, "step": 30367 }, { "epoch": 2.2570048309178743, "grad_norm": 2.3020536857702756, "learning_rate": 3.048210711167209e-06, "loss": 0.2911, "step": 30368 }, { "epoch": 2.2570791527313268, "grad_norm": 1.9997702520645033, "learning_rate": 3.047633961768046e-06, "loss": 0.2638, "step": 30369 }, { "epoch": 2.2571534745447788, "grad_norm": 2.773392777601374, "learning_rate": 3.047057257127851e-06, "loss": 0.3733, "step": 30370 }, { "epoch": 2.2572277963582312, "grad_norm": 2.5420610762718474, "learning_rate": 3.046480597250344e-06, "loss": 0.3362, "step": 30371 }, { "epoch": 2.2573021181716832, "grad_norm": 2.4837712359553374, "learning_rate": 3.0459039821392346e-06, "loss": 0.2857, "step": 30372 }, { "epoch": 2.2573764399851357, "grad_norm": 2.425525054807345, "learning_rate": 3.04532741179823e-06, "loss": 0.297, "step": 30373 }, { "epoch": 2.2574507617985877, "grad_norm": 3.4384203039864283, "learning_rate": 3.044750886231055e-06, "loss": 0.3067, "step": 30374 }, { "epoch": 2.25752508361204, "grad_norm": 2.5420771592806988, "learning_rate": 3.0441744054414048e-06, "loss": 0.3201, "step": 30375 }, { "epoch": 2.257599405425492, "grad_norm": 2.1596861397370053, "learning_rate": 3.043597969433003e-06, "loss": 0.2554, "step": 30376 }, { "epoch": 2.2576737272389447, "grad_norm": 2.226855693547686, "learning_rate": 3.043021578209553e-06, "loss": 0.2608, "step": 30377 }, { "epoch": 2.257748049052397, "grad_norm": 2.4079083699585984, "learning_rate": 3.0424452317747723e-06, "loss": 0.3329, "step": 30378 }, { "epoch": 2.257822370865849, "grad_norm": 3.037799676376373, "learning_rate": 3.041868930132369e-06, "loss": 0.2385, "step": 30379 }, { "epoch": 2.257896692679301, "grad_norm": 3.008584445815473, "learning_rate": 3.041292673286048e-06, "loss": 0.2687, "step": 30380 }, { "epoch": 2.2579710144927536, "grad_norm": 2.584842273361778, "learning_rate": 3.040716461239528e-06, "loss": 0.3173, "step": 30381 }, { "epoch": 2.258045336306206, "grad_norm": 2.1174699252029345, "learning_rate": 3.040140293996511e-06, "loss": 0.2734, "step": 30382 }, { "epoch": 2.258119658119658, "grad_norm": 4.5770416583121944, "learning_rate": 3.0395641715607115e-06, "loss": 0.3089, "step": 30383 }, { "epoch": 2.2581939799331106, "grad_norm": 2.5089609618719484, "learning_rate": 3.0389880939358384e-06, "loss": 0.2721, "step": 30384 }, { "epoch": 2.2582683017465626, "grad_norm": 2.1371639669301494, "learning_rate": 3.0384120611255972e-06, "loss": 0.2078, "step": 30385 }, { "epoch": 2.258342623560015, "grad_norm": 2.2805553980326705, "learning_rate": 3.0378360731336997e-06, "loss": 0.2712, "step": 30386 }, { "epoch": 2.258416945373467, "grad_norm": 2.6301271985475516, "learning_rate": 3.0372601299638483e-06, "loss": 0.364, "step": 30387 }, { "epoch": 2.2584912671869195, "grad_norm": 2.4880071954499536, "learning_rate": 3.0366842316197576e-06, "loss": 0.3644, "step": 30388 }, { "epoch": 2.2585655890003715, "grad_norm": 2.351995075095303, "learning_rate": 3.0361083781051326e-06, "loss": 0.2649, "step": 30389 }, { "epoch": 2.258639910813824, "grad_norm": 1.9383044117736328, "learning_rate": 3.035532569423678e-06, "loss": 0.2208, "step": 30390 }, { "epoch": 2.258714232627276, "grad_norm": 2.700694731338296, "learning_rate": 3.0349568055791068e-06, "loss": 0.2996, "step": 30391 }, { "epoch": 2.2587885544407285, "grad_norm": 2.1622961244549743, "learning_rate": 3.034381086575118e-06, "loss": 0.2649, "step": 30392 }, { "epoch": 2.2588628762541805, "grad_norm": 2.030589845953995, "learning_rate": 3.033805412415426e-06, "loss": 0.2401, "step": 30393 }, { "epoch": 2.258937198067633, "grad_norm": 2.547355011502024, "learning_rate": 3.0332297831037338e-06, "loss": 0.3486, "step": 30394 }, { "epoch": 2.259011519881085, "grad_norm": 2.0037116335045804, "learning_rate": 3.0326541986437428e-06, "loss": 0.2476, "step": 30395 }, { "epoch": 2.2590858416945374, "grad_norm": 2.2717181468717715, "learning_rate": 3.0320786590391693e-06, "loss": 0.2631, "step": 30396 }, { "epoch": 2.2591601635079894, "grad_norm": 2.0695888497120873, "learning_rate": 3.0315031642937054e-06, "loss": 0.3054, "step": 30397 }, { "epoch": 2.259234485321442, "grad_norm": 1.9627122372547174, "learning_rate": 3.030927714411066e-06, "loss": 0.2135, "step": 30398 }, { "epoch": 2.259308807134894, "grad_norm": 2.2853079390187245, "learning_rate": 3.0303523093949494e-06, "loss": 0.291, "step": 30399 }, { "epoch": 2.2593831289483464, "grad_norm": 2.65842045011412, "learning_rate": 3.0297769492490657e-06, "loss": 0.3394, "step": 30400 }, { "epoch": 2.259457450761799, "grad_norm": 2.5508360633630978, "learning_rate": 3.0292016339771156e-06, "loss": 0.3031, "step": 30401 }, { "epoch": 2.259531772575251, "grad_norm": 2.4002692986158065, "learning_rate": 3.0286263635828016e-06, "loss": 0.2848, "step": 30402 }, { "epoch": 2.259606094388703, "grad_norm": 2.164064913437146, "learning_rate": 3.028051138069833e-06, "loss": 0.2397, "step": 30403 }, { "epoch": 2.2596804162021553, "grad_norm": 5.433591778147472, "learning_rate": 3.0274759574419053e-06, "loss": 0.2514, "step": 30404 }, { "epoch": 2.2597547380156078, "grad_norm": 2.0415020736337106, "learning_rate": 3.0269008217027283e-06, "loss": 0.2872, "step": 30405 }, { "epoch": 2.25982905982906, "grad_norm": 2.59766970281468, "learning_rate": 3.0263257308560035e-06, "loss": 0.3602, "step": 30406 }, { "epoch": 2.2599033816425123, "grad_norm": 2.058570827173967, "learning_rate": 3.0257506849054275e-06, "loss": 0.2557, "step": 30407 }, { "epoch": 2.2599777034559643, "grad_norm": 2.306183269370663, "learning_rate": 3.0251756838547133e-06, "loss": 0.2733, "step": 30408 }, { "epoch": 2.2600520252694167, "grad_norm": 2.569540288406852, "learning_rate": 3.0246007277075495e-06, "loss": 0.2818, "step": 30409 }, { "epoch": 2.2601263470828687, "grad_norm": 2.1492239030398856, "learning_rate": 3.0240258164676484e-06, "loss": 0.2988, "step": 30410 }, { "epoch": 2.260200668896321, "grad_norm": 2.664268098230559, "learning_rate": 3.0234509501387064e-06, "loss": 0.3455, "step": 30411 }, { "epoch": 2.2602749907097732, "grad_norm": 1.992814483690609, "learning_rate": 3.0228761287244225e-06, "loss": 0.1831, "step": 30412 }, { "epoch": 2.2603493125232257, "grad_norm": 2.315838186867857, "learning_rate": 3.022301352228503e-06, "loss": 0.305, "step": 30413 }, { "epoch": 2.2604236343366777, "grad_norm": 1.6922884548297754, "learning_rate": 3.0217266206546423e-06, "loss": 0.1932, "step": 30414 }, { "epoch": 2.26049795615013, "grad_norm": 2.393819777873526, "learning_rate": 3.0211519340065466e-06, "loss": 0.2902, "step": 30415 }, { "epoch": 2.260572277963582, "grad_norm": 2.6560405829084948, "learning_rate": 3.0205772922879117e-06, "loss": 0.3262, "step": 30416 }, { "epoch": 2.2606465997770346, "grad_norm": 2.001909273609865, "learning_rate": 3.0200026955024363e-06, "loss": 0.2541, "step": 30417 }, { "epoch": 2.2607209215904867, "grad_norm": 2.9692888483725284, "learning_rate": 3.0194281436538243e-06, "loss": 0.3628, "step": 30418 }, { "epoch": 2.260795243403939, "grad_norm": 2.224910639800984, "learning_rate": 3.018853636745771e-06, "loss": 0.244, "step": 30419 }, { "epoch": 2.260869565217391, "grad_norm": 2.169889473204407, "learning_rate": 3.018279174781976e-06, "loss": 0.277, "step": 30420 }, { "epoch": 2.2609438870308436, "grad_norm": 2.240836946802825, "learning_rate": 3.0177047577661347e-06, "loss": 0.2702, "step": 30421 }, { "epoch": 2.2610182088442956, "grad_norm": 2.235245953969105, "learning_rate": 3.017130385701951e-06, "loss": 0.2258, "step": 30422 }, { "epoch": 2.261092530657748, "grad_norm": 2.3164704083002023, "learning_rate": 3.0165560585931197e-06, "loss": 0.297, "step": 30423 }, { "epoch": 2.2611668524712005, "grad_norm": 2.294879888101739, "learning_rate": 3.015981776443334e-06, "loss": 0.3473, "step": 30424 }, { "epoch": 2.2612411742846525, "grad_norm": 2.212520320978096, "learning_rate": 3.0154075392562998e-06, "loss": 0.3253, "step": 30425 }, { "epoch": 2.2613154960981046, "grad_norm": 2.0513777083723963, "learning_rate": 3.014833347035705e-06, "loss": 0.2752, "step": 30426 }, { "epoch": 2.261389817911557, "grad_norm": 2.578199257090688, "learning_rate": 3.014259199785254e-06, "loss": 0.2976, "step": 30427 }, { "epoch": 2.2614641397250095, "grad_norm": 2.5237533373142873, "learning_rate": 3.013685097508641e-06, "loss": 0.3389, "step": 30428 }, { "epoch": 2.2615384615384615, "grad_norm": 2.0214573069483577, "learning_rate": 3.0131110402095566e-06, "loss": 0.2721, "step": 30429 }, { "epoch": 2.261612783351914, "grad_norm": 1.8713573056799122, "learning_rate": 3.0125370278917077e-06, "loss": 0.2274, "step": 30430 }, { "epoch": 2.261687105165366, "grad_norm": 2.2645880492202357, "learning_rate": 3.0119630605587757e-06, "loss": 0.3256, "step": 30431 }, { "epoch": 2.2617614269788184, "grad_norm": 2.49824698284328, "learning_rate": 3.0113891382144665e-06, "loss": 0.4014, "step": 30432 }, { "epoch": 2.2618357487922705, "grad_norm": 2.3009451562414776, "learning_rate": 3.0108152608624717e-06, "loss": 0.2427, "step": 30433 }, { "epoch": 2.261910070605723, "grad_norm": 2.322024845001815, "learning_rate": 3.010241428506482e-06, "loss": 0.3815, "step": 30434 }, { "epoch": 2.261984392419175, "grad_norm": 3.041202197318035, "learning_rate": 3.009667641150199e-06, "loss": 0.304, "step": 30435 }, { "epoch": 2.2620587142326274, "grad_norm": 2.2019924049815662, "learning_rate": 3.0090938987973094e-06, "loss": 0.2803, "step": 30436 }, { "epoch": 2.2621330360460794, "grad_norm": 2.316071793017319, "learning_rate": 3.0085202014515147e-06, "loss": 0.3372, "step": 30437 }, { "epoch": 2.262207357859532, "grad_norm": 2.4684681274647975, "learning_rate": 3.007946549116503e-06, "loss": 0.3285, "step": 30438 }, { "epoch": 2.262281679672984, "grad_norm": 2.688825866623542, "learning_rate": 3.0073729417959664e-06, "loss": 0.2684, "step": 30439 }, { "epoch": 2.2623560014864363, "grad_norm": 2.2085007917423716, "learning_rate": 3.0067993794936022e-06, "loss": 0.3228, "step": 30440 }, { "epoch": 2.2624303232998884, "grad_norm": 2.570587729820063, "learning_rate": 3.0062258622131013e-06, "loss": 0.3158, "step": 30441 }, { "epoch": 2.262504645113341, "grad_norm": 2.5823259222456776, "learning_rate": 3.0056523899581546e-06, "loss": 0.2975, "step": 30442 }, { "epoch": 2.262578966926793, "grad_norm": 2.7533275703686257, "learning_rate": 3.0050789627324526e-06, "loss": 0.4035, "step": 30443 }, { "epoch": 2.2626532887402453, "grad_norm": 2.1976973454205297, "learning_rate": 3.0045055805396927e-06, "loss": 0.2291, "step": 30444 }, { "epoch": 2.2627276105536973, "grad_norm": 1.7309157477011174, "learning_rate": 3.0039322433835614e-06, "loss": 0.187, "step": 30445 }, { "epoch": 2.2628019323671498, "grad_norm": 2.696150147699914, "learning_rate": 3.00335895126775e-06, "loss": 0.3638, "step": 30446 }, { "epoch": 2.2628762541806022, "grad_norm": 1.8547030723313986, "learning_rate": 3.002785704195953e-06, "loss": 0.2472, "step": 30447 }, { "epoch": 2.2629505759940542, "grad_norm": 2.272268621301383, "learning_rate": 3.002212502171855e-06, "loss": 0.2782, "step": 30448 }, { "epoch": 2.2630248978075063, "grad_norm": 2.7531207654109484, "learning_rate": 3.0016393451991545e-06, "loss": 0.2712, "step": 30449 }, { "epoch": 2.2630992196209587, "grad_norm": 2.529606581738374, "learning_rate": 3.0010662332815353e-06, "loss": 0.295, "step": 30450 }, { "epoch": 2.263173541434411, "grad_norm": 3.113844654417672, "learning_rate": 3.0004931664226854e-06, "loss": 0.3924, "step": 30451 }, { "epoch": 2.263247863247863, "grad_norm": 2.283834321671045, "learning_rate": 2.9999201446263005e-06, "loss": 0.2337, "step": 30452 }, { "epoch": 2.2633221850613157, "grad_norm": 2.3485528048139375, "learning_rate": 2.999347167896067e-06, "loss": 0.2841, "step": 30453 }, { "epoch": 2.2633965068747677, "grad_norm": 2.605592607423546, "learning_rate": 2.998774236235673e-06, "loss": 0.3177, "step": 30454 }, { "epoch": 2.26347082868822, "grad_norm": 1.9540073783040062, "learning_rate": 2.998201349648807e-06, "loss": 0.2636, "step": 30455 }, { "epoch": 2.263545150501672, "grad_norm": 2.5034454195834765, "learning_rate": 2.997628508139155e-06, "loss": 0.2813, "step": 30456 }, { "epoch": 2.2636194723151246, "grad_norm": 1.9044428727293499, "learning_rate": 2.9970557117104104e-06, "loss": 0.2106, "step": 30457 }, { "epoch": 2.2636937941285766, "grad_norm": 2.1126754197285265, "learning_rate": 2.9964829603662537e-06, "loss": 0.281, "step": 30458 }, { "epoch": 2.263768115942029, "grad_norm": 1.8663361170483348, "learning_rate": 2.9959102541103804e-06, "loss": 0.2094, "step": 30459 }, { "epoch": 2.263842437755481, "grad_norm": 2.242500255466642, "learning_rate": 2.9953375929464724e-06, "loss": 0.2911, "step": 30460 }, { "epoch": 2.2639167595689336, "grad_norm": 2.2088409986066253, "learning_rate": 2.994764976878215e-06, "loss": 0.3073, "step": 30461 }, { "epoch": 2.2639910813823856, "grad_norm": 2.746008193530992, "learning_rate": 2.994192405909301e-06, "loss": 0.3061, "step": 30462 }, { "epoch": 2.264065403195838, "grad_norm": 1.8652077556772162, "learning_rate": 2.993619880043409e-06, "loss": 0.2662, "step": 30463 }, { "epoch": 2.26413972500929, "grad_norm": 2.0460241731988202, "learning_rate": 2.9930473992842347e-06, "loss": 0.2528, "step": 30464 }, { "epoch": 2.2642140468227425, "grad_norm": 2.3797161100926356, "learning_rate": 2.992474963635451e-06, "loss": 0.3399, "step": 30465 }, { "epoch": 2.2642883686361945, "grad_norm": 1.6652294070534688, "learning_rate": 2.9919025731007546e-06, "loss": 0.2487, "step": 30466 }, { "epoch": 2.264362690449647, "grad_norm": 2.5104041906184875, "learning_rate": 2.9913302276838242e-06, "loss": 0.3284, "step": 30467 }, { "epoch": 2.264437012263099, "grad_norm": 2.3311568964102225, "learning_rate": 2.9907579273883436e-06, "loss": 0.2719, "step": 30468 }, { "epoch": 2.2645113340765515, "grad_norm": 2.0353165076817694, "learning_rate": 2.9901856722180033e-06, "loss": 0.208, "step": 30469 }, { "epoch": 2.264585655890004, "grad_norm": 2.7155094933440824, "learning_rate": 2.9896134621764806e-06, "loss": 0.3907, "step": 30470 }, { "epoch": 2.264659977703456, "grad_norm": 1.6296890215494988, "learning_rate": 2.9890412972674653e-06, "loss": 0.188, "step": 30471 }, { "epoch": 2.264734299516908, "grad_norm": 1.9228091438958697, "learning_rate": 2.9884691774946395e-06, "loss": 0.2155, "step": 30472 }, { "epoch": 2.2648086213303604, "grad_norm": 2.2322880230650877, "learning_rate": 2.987897102861681e-06, "loss": 0.2628, "step": 30473 }, { "epoch": 2.264882943143813, "grad_norm": 1.8876399767759837, "learning_rate": 2.9873250733722815e-06, "loss": 0.2351, "step": 30474 }, { "epoch": 2.264957264957265, "grad_norm": 2.097357615032186, "learning_rate": 2.986753089030118e-06, "loss": 0.2841, "step": 30475 }, { "epoch": 2.2650315867707174, "grad_norm": 2.3619016485599658, "learning_rate": 2.9861811498388748e-06, "loss": 0.3308, "step": 30476 }, { "epoch": 2.2651059085841694, "grad_norm": 2.528519177008055, "learning_rate": 2.985609255802233e-06, "loss": 0.347, "step": 30477 }, { "epoch": 2.265180230397622, "grad_norm": 2.579615375741848, "learning_rate": 2.985037406923873e-06, "loss": 0.3466, "step": 30478 }, { "epoch": 2.265254552211074, "grad_norm": 2.6784637478312217, "learning_rate": 2.9844656032074805e-06, "loss": 0.3526, "step": 30479 }, { "epoch": 2.2653288740245263, "grad_norm": 2.2254262304071433, "learning_rate": 2.983893844656731e-06, "loss": 0.1975, "step": 30480 }, { "epoch": 2.2654031958379783, "grad_norm": 2.0522893308787253, "learning_rate": 2.9833221312753126e-06, "loss": 0.2635, "step": 30481 }, { "epoch": 2.265477517651431, "grad_norm": 2.827455856745631, "learning_rate": 2.9827504630669012e-06, "loss": 0.3253, "step": 30482 }, { "epoch": 2.265551839464883, "grad_norm": 2.837876415951846, "learning_rate": 2.982178840035176e-06, "loss": 0.3066, "step": 30483 }, { "epoch": 2.2656261612783353, "grad_norm": 1.9857574689640367, "learning_rate": 2.981607262183822e-06, "loss": 0.2736, "step": 30484 }, { "epoch": 2.2657004830917873, "grad_norm": 2.783615853461817, "learning_rate": 2.9810357295165137e-06, "loss": 0.2929, "step": 30485 }, { "epoch": 2.2657748049052397, "grad_norm": 2.574564841697615, "learning_rate": 2.9804642420369355e-06, "loss": 0.2772, "step": 30486 }, { "epoch": 2.2658491267186918, "grad_norm": 1.8923915910710458, "learning_rate": 2.9798927997487647e-06, "loss": 0.2158, "step": 30487 }, { "epoch": 2.2659234485321442, "grad_norm": 2.077450525423226, "learning_rate": 2.9793214026556805e-06, "loss": 0.2632, "step": 30488 }, { "epoch": 2.2659977703455962, "grad_norm": 2.5447980187815733, "learning_rate": 2.97875005076136e-06, "loss": 0.3121, "step": 30489 }, { "epoch": 2.2660720921590487, "grad_norm": 2.6903343576437533, "learning_rate": 2.9781787440694797e-06, "loss": 0.2579, "step": 30490 }, { "epoch": 2.2661464139725007, "grad_norm": 2.4995710054896807, "learning_rate": 2.977607482583723e-06, "loss": 0.2993, "step": 30491 }, { "epoch": 2.266220735785953, "grad_norm": 3.2813499709179, "learning_rate": 2.9770362663077623e-06, "loss": 0.2984, "step": 30492 }, { "epoch": 2.2662950575994056, "grad_norm": 2.577560180053257, "learning_rate": 2.97646509524528e-06, "loss": 0.3479, "step": 30493 }, { "epoch": 2.2663693794128577, "grad_norm": 2.1512126885095566, "learning_rate": 2.9758939693999522e-06, "loss": 0.3275, "step": 30494 }, { "epoch": 2.26644370122631, "grad_norm": 1.7969180053980285, "learning_rate": 2.9753228887754503e-06, "loss": 0.1829, "step": 30495 }, { "epoch": 2.266518023039762, "grad_norm": 1.9895509826082727, "learning_rate": 2.9747518533754592e-06, "loss": 0.2444, "step": 30496 }, { "epoch": 2.2665923448532146, "grad_norm": 2.611532695903253, "learning_rate": 2.974180863203651e-06, "loss": 0.2931, "step": 30497 }, { "epoch": 2.2666666666666666, "grad_norm": 2.530972662560512, "learning_rate": 2.9736099182637012e-06, "loss": 0.2799, "step": 30498 }, { "epoch": 2.266740988480119, "grad_norm": 2.3109146277864827, "learning_rate": 2.973039018559286e-06, "loss": 0.2797, "step": 30499 }, { "epoch": 2.266815310293571, "grad_norm": 2.257234530335624, "learning_rate": 2.9724681640940787e-06, "loss": 0.3288, "step": 30500 }, { "epoch": 2.2668896321070235, "grad_norm": 2.5783814960945852, "learning_rate": 2.97189735487176e-06, "loss": 0.2948, "step": 30501 }, { "epoch": 2.2669639539204756, "grad_norm": 2.177516675192724, "learning_rate": 2.9713265908959977e-06, "loss": 0.2868, "step": 30502 }, { "epoch": 2.267038275733928, "grad_norm": 1.9315777013384854, "learning_rate": 2.9707558721704734e-06, "loss": 0.1703, "step": 30503 }, { "epoch": 2.26711259754738, "grad_norm": 2.6996909160772833, "learning_rate": 2.9701851986988583e-06, "loss": 0.3209, "step": 30504 }, { "epoch": 2.2671869193608325, "grad_norm": 2.9336508812793416, "learning_rate": 2.9696145704848235e-06, "loss": 0.3697, "step": 30505 }, { "epoch": 2.2672612411742845, "grad_norm": 2.968431855990914, "learning_rate": 2.9690439875320475e-06, "loss": 0.3348, "step": 30506 }, { "epoch": 2.267335562987737, "grad_norm": 2.845011998396509, "learning_rate": 2.968473449844198e-06, "loss": 0.3413, "step": 30507 }, { "epoch": 2.267409884801189, "grad_norm": 2.1272022424370545, "learning_rate": 2.9679029574249565e-06, "loss": 0.2478, "step": 30508 }, { "epoch": 2.2674842066146415, "grad_norm": 2.2662912966742033, "learning_rate": 2.9673325102779894e-06, "loss": 0.2962, "step": 30509 }, { "epoch": 2.2675585284280935, "grad_norm": 2.1770870079304103, "learning_rate": 2.966762108406971e-06, "loss": 0.2359, "step": 30510 }, { "epoch": 2.267632850241546, "grad_norm": 2.714725444265943, "learning_rate": 2.9661917518155736e-06, "loss": 0.3143, "step": 30511 }, { "epoch": 2.2677071720549984, "grad_norm": 2.4323060749519136, "learning_rate": 2.9656214405074645e-06, "loss": 0.3264, "step": 30512 }, { "epoch": 2.2677814938684504, "grad_norm": 2.2341376240522037, "learning_rate": 2.9650511744863243e-06, "loss": 0.2626, "step": 30513 }, { "epoch": 2.2678558156819024, "grad_norm": 2.9379966770526913, "learning_rate": 2.9644809537558185e-06, "loss": 0.2953, "step": 30514 }, { "epoch": 2.267930137495355, "grad_norm": 2.279276900913386, "learning_rate": 2.9639107783196165e-06, "loss": 0.2707, "step": 30515 }, { "epoch": 2.2680044593088073, "grad_norm": 2.395209762564999, "learning_rate": 2.963340648181395e-06, "loss": 0.2943, "step": 30516 }, { "epoch": 2.2680787811222594, "grad_norm": 1.9526287743161062, "learning_rate": 2.9627705633448177e-06, "loss": 0.2284, "step": 30517 }, { "epoch": 2.268153102935712, "grad_norm": 1.8466552670609797, "learning_rate": 2.962200523813562e-06, "loss": 0.2416, "step": 30518 }, { "epoch": 2.268227424749164, "grad_norm": 2.6333680528944137, "learning_rate": 2.961630529591294e-06, "loss": 0.3052, "step": 30519 }, { "epoch": 2.2683017465626163, "grad_norm": 2.339860066701978, "learning_rate": 2.9610605806816807e-06, "loss": 0.3008, "step": 30520 }, { "epoch": 2.2683760683760683, "grad_norm": 2.7254925153257394, "learning_rate": 2.9604906770884e-06, "loss": 0.3579, "step": 30521 }, { "epoch": 2.2684503901895208, "grad_norm": 2.5157019347887153, "learning_rate": 2.9599208188151084e-06, "loss": 0.3166, "step": 30522 }, { "epoch": 2.268524712002973, "grad_norm": 2.2756465554060603, "learning_rate": 2.959351005865485e-06, "loss": 0.2719, "step": 30523 }, { "epoch": 2.2685990338164252, "grad_norm": 2.2166840491967488, "learning_rate": 2.9587812382431915e-06, "loss": 0.2333, "step": 30524 }, { "epoch": 2.2686733556298773, "grad_norm": 2.630341963341157, "learning_rate": 2.958211515951902e-06, "loss": 0.2516, "step": 30525 }, { "epoch": 2.2687476774433297, "grad_norm": 2.357962177248054, "learning_rate": 2.957641838995282e-06, "loss": 0.3385, "step": 30526 }, { "epoch": 2.2688219992567817, "grad_norm": 2.426126139458966, "learning_rate": 2.957072207376994e-06, "loss": 0.3479, "step": 30527 }, { "epoch": 2.268896321070234, "grad_norm": 1.9995770416980978, "learning_rate": 2.956502621100714e-06, "loss": 0.2509, "step": 30528 }, { "epoch": 2.268970642883686, "grad_norm": 2.2329489993461955, "learning_rate": 2.9559330801701012e-06, "loss": 0.3451, "step": 30529 }, { "epoch": 2.2690449646971387, "grad_norm": 2.5288704787523395, "learning_rate": 2.9553635845888282e-06, "loss": 0.3102, "step": 30530 }, { "epoch": 2.2691192865105907, "grad_norm": 2.427275536465506, "learning_rate": 2.95479413436056e-06, "loss": 0.3115, "step": 30531 }, { "epoch": 2.269193608324043, "grad_norm": 2.1764740019808393, "learning_rate": 2.95422472948896e-06, "loss": 0.2157, "step": 30532 }, { "epoch": 2.269267930137495, "grad_norm": 2.864663483306242, "learning_rate": 2.953655369977697e-06, "loss": 0.3186, "step": 30533 }, { "epoch": 2.2693422519509476, "grad_norm": 2.1676868744665247, "learning_rate": 2.953086055830432e-06, "loss": 0.2537, "step": 30534 }, { "epoch": 2.2694165737644, "grad_norm": 3.001831762605315, "learning_rate": 2.9525167870508365e-06, "loss": 0.3674, "step": 30535 }, { "epoch": 2.269490895577852, "grad_norm": 1.7330940144331324, "learning_rate": 2.9519475636425722e-06, "loss": 0.1913, "step": 30536 }, { "epoch": 2.269565217391304, "grad_norm": 2.270707850501956, "learning_rate": 2.9513783856092992e-06, "loss": 0.2927, "step": 30537 }, { "epoch": 2.2696395392047566, "grad_norm": 2.38564882486482, "learning_rate": 2.9508092529546916e-06, "loss": 0.3196, "step": 30538 }, { "epoch": 2.269713861018209, "grad_norm": 2.4563954075367587, "learning_rate": 2.950240165682404e-06, "loss": 0.3342, "step": 30539 }, { "epoch": 2.269788182831661, "grad_norm": 2.561928465488748, "learning_rate": 2.9496711237961075e-06, "loss": 0.2884, "step": 30540 }, { "epoch": 2.2698625046451135, "grad_norm": 2.3253650647717232, "learning_rate": 2.9491021272994626e-06, "loss": 0.2904, "step": 30541 }, { "epoch": 2.2699368264585655, "grad_norm": 2.048671203629501, "learning_rate": 2.94853317619613e-06, "loss": 0.2548, "step": 30542 }, { "epoch": 2.270011148272018, "grad_norm": 2.6039211381029146, "learning_rate": 2.9479642704897805e-06, "loss": 0.3349, "step": 30543 }, { "epoch": 2.27008547008547, "grad_norm": 2.3381663178836694, "learning_rate": 2.9473954101840654e-06, "loss": 0.2513, "step": 30544 }, { "epoch": 2.2701597918989225, "grad_norm": 4.373773474397799, "learning_rate": 2.946826595282655e-06, "loss": 0.2638, "step": 30545 }, { "epoch": 2.2702341137123745, "grad_norm": 2.980212871470007, "learning_rate": 2.946257825789206e-06, "loss": 0.4046, "step": 30546 }, { "epoch": 2.270308435525827, "grad_norm": 2.518507258151711, "learning_rate": 2.945689101707386e-06, "loss": 0.38, "step": 30547 }, { "epoch": 2.270382757339279, "grad_norm": 2.7031656569707856, "learning_rate": 2.9451204230408546e-06, "loss": 0.3351, "step": 30548 }, { "epoch": 2.2704570791527314, "grad_norm": 2.800511258267245, "learning_rate": 2.9445517897932684e-06, "loss": 0.3052, "step": 30549 }, { "epoch": 2.2705314009661834, "grad_norm": 2.6538599217831855, "learning_rate": 2.9439832019682947e-06, "loss": 0.212, "step": 30550 }, { "epoch": 2.270605722779636, "grad_norm": 2.0250591717907582, "learning_rate": 2.9434146595695877e-06, "loss": 0.2708, "step": 30551 }, { "epoch": 2.270680044593088, "grad_norm": 2.9303278411193823, "learning_rate": 2.9428461626008144e-06, "loss": 0.2549, "step": 30552 }, { "epoch": 2.2707543664065404, "grad_norm": 2.380516676938021, "learning_rate": 2.942277711065631e-06, "loss": 0.3027, "step": 30553 }, { "epoch": 2.2708286882199924, "grad_norm": 1.9168947647296861, "learning_rate": 2.941709304967697e-06, "loss": 0.2415, "step": 30554 }, { "epoch": 2.270903010033445, "grad_norm": 2.1719043046055417, "learning_rate": 2.941140944310673e-06, "loss": 0.2732, "step": 30555 }, { "epoch": 2.270977331846897, "grad_norm": 2.332909628193245, "learning_rate": 2.940572629098215e-06, "loss": 0.2717, "step": 30556 }, { "epoch": 2.2710516536603493, "grad_norm": 2.4229878165107346, "learning_rate": 2.9400043593339856e-06, "loss": 0.407, "step": 30557 }, { "epoch": 2.271125975473802, "grad_norm": 2.327140280653314, "learning_rate": 2.9394361350216426e-06, "loss": 0.2928, "step": 30558 }, { "epoch": 2.271200297287254, "grad_norm": 2.1795170229996264, "learning_rate": 2.9388679561648404e-06, "loss": 0.3194, "step": 30559 }, { "epoch": 2.271274619100706, "grad_norm": 2.0546332124549815, "learning_rate": 2.938299822767243e-06, "loss": 0.2783, "step": 30560 }, { "epoch": 2.2713489409141583, "grad_norm": 2.096854948535795, "learning_rate": 2.9377317348325016e-06, "loss": 0.2771, "step": 30561 }, { "epoch": 2.2714232627276107, "grad_norm": 2.3187625427712533, "learning_rate": 2.937163692364281e-06, "loss": 0.3158, "step": 30562 }, { "epoch": 2.2714975845410628, "grad_norm": 2.5378734677470285, "learning_rate": 2.9365956953662335e-06, "loss": 0.33, "step": 30563 }, { "epoch": 2.2715719063545152, "grad_norm": 1.923659524608966, "learning_rate": 2.9360277438420125e-06, "loss": 0.2583, "step": 30564 }, { "epoch": 2.2716462281679672, "grad_norm": 2.7627006312196594, "learning_rate": 2.9354598377952813e-06, "loss": 0.3675, "step": 30565 }, { "epoch": 2.2717205499814197, "grad_norm": 2.0910022812762237, "learning_rate": 2.9348919772296947e-06, "loss": 0.2298, "step": 30566 }, { "epoch": 2.2717948717948717, "grad_norm": 2.543596863569244, "learning_rate": 2.934324162148906e-06, "loss": 0.3066, "step": 30567 }, { "epoch": 2.271869193608324, "grad_norm": 1.9736747235374776, "learning_rate": 2.9337563925565684e-06, "loss": 0.2807, "step": 30568 }, { "epoch": 2.271943515421776, "grad_norm": 2.313500377152971, "learning_rate": 2.9331886684563438e-06, "loss": 0.252, "step": 30569 }, { "epoch": 2.2720178372352287, "grad_norm": 2.138915947263827, "learning_rate": 2.9326209898518842e-06, "loss": 0.2494, "step": 30570 }, { "epoch": 2.2720921590486807, "grad_norm": 2.629891650979209, "learning_rate": 2.93205335674684e-06, "loss": 0.3126, "step": 30571 }, { "epoch": 2.272166480862133, "grad_norm": 3.166703890537538, "learning_rate": 2.931485769144874e-06, "loss": 0.2462, "step": 30572 }, { "epoch": 2.272240802675585, "grad_norm": 2.176635696260556, "learning_rate": 2.930918227049632e-06, "loss": 0.2617, "step": 30573 }, { "epoch": 2.2723151244890376, "grad_norm": 1.8591234482871666, "learning_rate": 2.9303507304647747e-06, "loss": 0.2091, "step": 30574 }, { "epoch": 2.2723894463024896, "grad_norm": 2.16529371149947, "learning_rate": 2.929783279393953e-06, "loss": 0.2095, "step": 30575 }, { "epoch": 2.272463768115942, "grad_norm": 2.8979107718479398, "learning_rate": 2.929215873840816e-06, "loss": 0.3175, "step": 30576 }, { "epoch": 2.272538089929394, "grad_norm": 2.5860094658569994, "learning_rate": 2.928648513809027e-06, "loss": 0.3685, "step": 30577 }, { "epoch": 2.2726124117428466, "grad_norm": 2.1967975136663687, "learning_rate": 2.9280811993022252e-06, "loss": 0.2829, "step": 30578 }, { "epoch": 2.2726867335562986, "grad_norm": 2.789334464219721, "learning_rate": 2.9275139303240742e-06, "loss": 0.3117, "step": 30579 }, { "epoch": 2.272761055369751, "grad_norm": 2.586448482874066, "learning_rate": 2.92694670687822e-06, "loss": 0.3303, "step": 30580 }, { "epoch": 2.2728353771832035, "grad_norm": 2.3767046436141284, "learning_rate": 2.926379528968314e-06, "loss": 0.316, "step": 30581 }, { "epoch": 2.2729096989966555, "grad_norm": 2.4424388043407492, "learning_rate": 2.9258123965980113e-06, "loss": 0.2548, "step": 30582 }, { "epoch": 2.2729840208101075, "grad_norm": 2.0210745783151864, "learning_rate": 2.9252453097709587e-06, "loss": 0.2185, "step": 30583 }, { "epoch": 2.27305834262356, "grad_norm": 2.1179435957517834, "learning_rate": 2.9246782684908137e-06, "loss": 0.3081, "step": 30584 }, { "epoch": 2.2731326644370125, "grad_norm": 2.233749166544381, "learning_rate": 2.924111272761222e-06, "loss": 0.3273, "step": 30585 }, { "epoch": 2.2732069862504645, "grad_norm": 2.0291608946828243, "learning_rate": 2.923544322585832e-06, "loss": 0.2629, "step": 30586 }, { "epoch": 2.273281308063917, "grad_norm": 3.0211240459066913, "learning_rate": 2.922977417968299e-06, "loss": 0.2921, "step": 30587 }, { "epoch": 2.273355629877369, "grad_norm": 1.9857048351620705, "learning_rate": 2.9224105589122707e-06, "loss": 0.2569, "step": 30588 }, { "epoch": 2.2734299516908214, "grad_norm": 2.303872866802963, "learning_rate": 2.9218437454213967e-06, "loss": 0.2847, "step": 30589 }, { "epoch": 2.2735042735042734, "grad_norm": 2.1322263521898233, "learning_rate": 2.921276977499321e-06, "loss": 0.2581, "step": 30590 }, { "epoch": 2.273578595317726, "grad_norm": 2.039904540135307, "learning_rate": 2.9207102551497015e-06, "loss": 0.2513, "step": 30591 }, { "epoch": 2.273652917131178, "grad_norm": 2.19453909236001, "learning_rate": 2.92014357837618e-06, "loss": 0.2471, "step": 30592 }, { "epoch": 2.2737272389446304, "grad_norm": 2.2048391809803487, "learning_rate": 2.9195769471824055e-06, "loss": 0.2803, "step": 30593 }, { "epoch": 2.2738015607580824, "grad_norm": 2.4560376169284974, "learning_rate": 2.91901036157203e-06, "loss": 0.2945, "step": 30594 }, { "epoch": 2.273875882571535, "grad_norm": 3.1094544503206527, "learning_rate": 2.9184438215486943e-06, "loss": 0.34, "step": 30595 }, { "epoch": 2.273950204384987, "grad_norm": 2.0976221724163935, "learning_rate": 2.9178773271160533e-06, "loss": 0.3289, "step": 30596 }, { "epoch": 2.2740245261984393, "grad_norm": 2.489771080902302, "learning_rate": 2.9173108782777517e-06, "loss": 0.2833, "step": 30597 }, { "epoch": 2.2740988480118913, "grad_norm": 2.306555864891196, "learning_rate": 2.916744475037431e-06, "loss": 0.3282, "step": 30598 }, { "epoch": 2.274173169825344, "grad_norm": 3.001097943454471, "learning_rate": 2.916178117398745e-06, "loss": 0.3122, "step": 30599 }, { "epoch": 2.274247491638796, "grad_norm": 2.177809736643106, "learning_rate": 2.915611805365337e-06, "loss": 0.253, "step": 30600 }, { "epoch": 2.2743218134522483, "grad_norm": 3.443504075237083, "learning_rate": 2.9150455389408527e-06, "loss": 0.3792, "step": 30601 }, { "epoch": 2.2743961352657003, "grad_norm": 2.0752160451823087, "learning_rate": 2.9144793181289366e-06, "loss": 0.2311, "step": 30602 }, { "epoch": 2.2744704570791527, "grad_norm": 1.678148663405301, "learning_rate": 2.9139131429332336e-06, "loss": 0.2405, "step": 30603 }, { "epoch": 2.274544778892605, "grad_norm": 2.0336537716965033, "learning_rate": 2.9133470133573915e-06, "loss": 0.3139, "step": 30604 }, { "epoch": 2.274619100706057, "grad_norm": 2.1855345180383865, "learning_rate": 2.912780929405051e-06, "loss": 0.3104, "step": 30605 }, { "epoch": 2.2746934225195092, "grad_norm": 1.8949327178728193, "learning_rate": 2.912214891079863e-06, "loss": 0.2392, "step": 30606 }, { "epoch": 2.2747677443329617, "grad_norm": 2.360674596866939, "learning_rate": 2.9116488983854673e-06, "loss": 0.2886, "step": 30607 }, { "epoch": 2.274842066146414, "grad_norm": 2.918356579601575, "learning_rate": 2.9110829513255055e-06, "loss": 0.2876, "step": 30608 }, { "epoch": 2.274916387959866, "grad_norm": 2.45313278503161, "learning_rate": 2.910517049903626e-06, "loss": 0.1827, "step": 30609 }, { "epoch": 2.2749907097733186, "grad_norm": 2.328604906954015, "learning_rate": 2.909951194123468e-06, "loss": 0.3046, "step": 30610 }, { "epoch": 2.2750650315867706, "grad_norm": 2.015349778349302, "learning_rate": 2.909385383988682e-06, "loss": 0.2537, "step": 30611 }, { "epoch": 2.275139353400223, "grad_norm": 2.4851161671437088, "learning_rate": 2.9088196195028984e-06, "loss": 0.3285, "step": 30612 }, { "epoch": 2.275213675213675, "grad_norm": 2.9826168871941383, "learning_rate": 2.9082539006697695e-06, "loss": 0.3468, "step": 30613 }, { "epoch": 2.2752879970271276, "grad_norm": 3.0394295065562655, "learning_rate": 2.907688227492934e-06, "loss": 0.3799, "step": 30614 }, { "epoch": 2.2753623188405796, "grad_norm": 2.0725384797070956, "learning_rate": 2.907122599976031e-06, "loss": 0.2076, "step": 30615 }, { "epoch": 2.275436640654032, "grad_norm": 2.279922618188633, "learning_rate": 2.9065570181227075e-06, "loss": 0.291, "step": 30616 }, { "epoch": 2.275510962467484, "grad_norm": 2.428812934447085, "learning_rate": 2.9059914819365988e-06, "loss": 0.3453, "step": 30617 }, { "epoch": 2.2755852842809365, "grad_norm": 2.524616349237371, "learning_rate": 2.9054259914213524e-06, "loss": 0.2839, "step": 30618 }, { "epoch": 2.2756596060943886, "grad_norm": 2.547556153347893, "learning_rate": 2.9048605465806044e-06, "loss": 0.3228, "step": 30619 }, { "epoch": 2.275733927907841, "grad_norm": 2.320323343302919, "learning_rate": 2.9042951474179936e-06, "loss": 0.2466, "step": 30620 }, { "epoch": 2.275808249721293, "grad_norm": 2.4640848590083997, "learning_rate": 2.9037297939371655e-06, "loss": 0.381, "step": 30621 }, { "epoch": 2.2758825715347455, "grad_norm": 2.3887420709260643, "learning_rate": 2.903164486141756e-06, "loss": 0.2896, "step": 30622 }, { "epoch": 2.2759568933481975, "grad_norm": 2.2850621254452377, "learning_rate": 2.902599224035406e-06, "loss": 0.2536, "step": 30623 }, { "epoch": 2.27603121516165, "grad_norm": 2.431382787681421, "learning_rate": 2.9020340076217545e-06, "loss": 0.2734, "step": 30624 }, { "epoch": 2.276105536975102, "grad_norm": 3.0956177901039457, "learning_rate": 2.9014688369044365e-06, "loss": 0.3863, "step": 30625 }, { "epoch": 2.2761798587885544, "grad_norm": 2.722389865647797, "learning_rate": 2.9009037118870964e-06, "loss": 0.3061, "step": 30626 }, { "epoch": 2.276254180602007, "grad_norm": 2.1092467279700227, "learning_rate": 2.9003386325733663e-06, "loss": 0.2709, "step": 30627 }, { "epoch": 2.276328502415459, "grad_norm": 2.7983853589293677, "learning_rate": 2.899773598966892e-06, "loss": 0.288, "step": 30628 }, { "epoch": 2.276402824228911, "grad_norm": 1.7427941333779216, "learning_rate": 2.899208611071306e-06, "loss": 0.258, "step": 30629 }, { "epoch": 2.2764771460423634, "grad_norm": 2.59449146982309, "learning_rate": 2.898643668890243e-06, "loss": 0.3265, "step": 30630 }, { "epoch": 2.276551467855816, "grad_norm": 2.6870936247409207, "learning_rate": 2.8980787724273475e-06, "loss": 0.3857, "step": 30631 }, { "epoch": 2.276625789669268, "grad_norm": 2.385500209753148, "learning_rate": 2.897513921686249e-06, "loss": 0.3274, "step": 30632 }, { "epoch": 2.2767001114827203, "grad_norm": 2.630089452785231, "learning_rate": 2.8969491166705943e-06, "loss": 0.2549, "step": 30633 }, { "epoch": 2.2767744332961724, "grad_norm": 2.309551765729759, "learning_rate": 2.8963843573840056e-06, "loss": 0.355, "step": 30634 }, { "epoch": 2.276848755109625, "grad_norm": 2.1459836820099785, "learning_rate": 2.895819643830129e-06, "loss": 0.2822, "step": 30635 }, { "epoch": 2.276923076923077, "grad_norm": 1.9576911643745403, "learning_rate": 2.895254976012597e-06, "loss": 0.2264, "step": 30636 }, { "epoch": 2.2769973987365293, "grad_norm": 2.0608367420063307, "learning_rate": 2.8946903539350414e-06, "loss": 0.2506, "step": 30637 }, { "epoch": 2.2770717205499813, "grad_norm": 2.461067565236351, "learning_rate": 2.8941257776011043e-06, "loss": 0.3626, "step": 30638 }, { "epoch": 2.2771460423634338, "grad_norm": 2.5221304830308684, "learning_rate": 2.893561247014414e-06, "loss": 0.2749, "step": 30639 }, { "epoch": 2.277220364176886, "grad_norm": 2.4722675757667623, "learning_rate": 2.892996762178609e-06, "loss": 0.3407, "step": 30640 }, { "epoch": 2.2772946859903382, "grad_norm": 2.4429456382790393, "learning_rate": 2.8924323230973237e-06, "loss": 0.2609, "step": 30641 }, { "epoch": 2.2773690078037903, "grad_norm": 1.8632230484620775, "learning_rate": 2.891867929774187e-06, "loss": 0.2384, "step": 30642 }, { "epoch": 2.2774433296172427, "grad_norm": 2.032858069623384, "learning_rate": 2.891303582212839e-06, "loss": 0.302, "step": 30643 }, { "epoch": 2.2775176514306947, "grad_norm": 2.715451597178897, "learning_rate": 2.8907392804169087e-06, "loss": 0.3792, "step": 30644 }, { "epoch": 2.277591973244147, "grad_norm": 2.1779541510186324, "learning_rate": 2.890175024390032e-06, "loss": 0.2885, "step": 30645 }, { "epoch": 2.2776662950575997, "grad_norm": 1.9373544619141012, "learning_rate": 2.889610814135837e-06, "loss": 0.2513, "step": 30646 }, { "epoch": 2.2777406168710517, "grad_norm": 2.4564889723094328, "learning_rate": 2.8890466496579573e-06, "loss": 0.3271, "step": 30647 }, { "epoch": 2.2778149386845037, "grad_norm": 2.500430669443352, "learning_rate": 2.8884825309600296e-06, "loss": 0.3313, "step": 30648 }, { "epoch": 2.277889260497956, "grad_norm": 2.266209246763226, "learning_rate": 2.8879184580456777e-06, "loss": 0.2952, "step": 30649 }, { "epoch": 2.2779635823114086, "grad_norm": 1.8620126627069908, "learning_rate": 2.8873544309185418e-06, "loss": 0.2263, "step": 30650 }, { "epoch": 2.2780379041248606, "grad_norm": 2.6188611056658875, "learning_rate": 2.8867904495822496e-06, "loss": 0.2671, "step": 30651 }, { "epoch": 2.278112225938313, "grad_norm": 2.3676994434749843, "learning_rate": 2.886226514040428e-06, "loss": 0.2711, "step": 30652 }, { "epoch": 2.278186547751765, "grad_norm": 2.536307046175138, "learning_rate": 2.8856626242967132e-06, "loss": 0.2871, "step": 30653 }, { "epoch": 2.2782608695652176, "grad_norm": 2.294102535930193, "learning_rate": 2.88509878035473e-06, "loss": 0.2769, "step": 30654 }, { "epoch": 2.2783351913786696, "grad_norm": 2.1240875219453303, "learning_rate": 2.8845349822181166e-06, "loss": 0.2731, "step": 30655 }, { "epoch": 2.278409513192122, "grad_norm": 2.1971555232718964, "learning_rate": 2.8839712298904963e-06, "loss": 0.2479, "step": 30656 }, { "epoch": 2.278483835005574, "grad_norm": 2.176449904745008, "learning_rate": 2.883407523375501e-06, "loss": 0.3025, "step": 30657 }, { "epoch": 2.2785581568190265, "grad_norm": 2.47019588678353, "learning_rate": 2.8828438626767575e-06, "loss": 0.2799, "step": 30658 }, { "epoch": 2.2786324786324785, "grad_norm": 2.6883939673045454, "learning_rate": 2.882280247797894e-06, "loss": 0.368, "step": 30659 }, { "epoch": 2.278706800445931, "grad_norm": 2.2460412483731877, "learning_rate": 2.8817166787425434e-06, "loss": 0.28, "step": 30660 }, { "epoch": 2.278781122259383, "grad_norm": 2.2055846416472495, "learning_rate": 2.881153155514331e-06, "loss": 0.2184, "step": 30661 }, { "epoch": 2.2788554440728355, "grad_norm": 1.9260060029463975, "learning_rate": 2.8805896781168842e-06, "loss": 0.2068, "step": 30662 }, { "epoch": 2.2789297658862875, "grad_norm": 2.630946948092282, "learning_rate": 2.880026246553833e-06, "loss": 0.3138, "step": 30663 }, { "epoch": 2.27900408769974, "grad_norm": 1.977336874659764, "learning_rate": 2.8794628608288e-06, "loss": 0.1878, "step": 30664 }, { "epoch": 2.279078409513192, "grad_norm": 2.6025080832068164, "learning_rate": 2.8788995209454197e-06, "loss": 0.3316, "step": 30665 }, { "epoch": 2.2791527313266444, "grad_norm": 2.304685215063286, "learning_rate": 2.8783362269073145e-06, "loss": 0.3248, "step": 30666 }, { "epoch": 2.2792270531400964, "grad_norm": 2.673390203679807, "learning_rate": 2.877772978718112e-06, "loss": 0.3286, "step": 30667 }, { "epoch": 2.279301374953549, "grad_norm": 2.046987698673313, "learning_rate": 2.8772097763814355e-06, "loss": 0.2614, "step": 30668 }, { "epoch": 2.2793756967670014, "grad_norm": 2.1173835423743395, "learning_rate": 2.8766466199009114e-06, "loss": 0.3033, "step": 30669 }, { "epoch": 2.2794500185804534, "grad_norm": 2.1360788317803188, "learning_rate": 2.8760835092801698e-06, "loss": 0.2707, "step": 30670 }, { "epoch": 2.2795243403939054, "grad_norm": 1.9771416231225474, "learning_rate": 2.8755204445228293e-06, "loss": 0.2159, "step": 30671 }, { "epoch": 2.279598662207358, "grad_norm": 2.4758286516713026, "learning_rate": 2.874957425632522e-06, "loss": 0.2545, "step": 30672 }, { "epoch": 2.2796729840208103, "grad_norm": 2.2417342591392195, "learning_rate": 2.8743944526128687e-06, "loss": 0.2778, "step": 30673 }, { "epoch": 2.2797473058342623, "grad_norm": 2.9871716454806165, "learning_rate": 2.873831525467491e-06, "loss": 0.302, "step": 30674 }, { "epoch": 2.279821627647715, "grad_norm": 2.413696199287946, "learning_rate": 2.873268644200019e-06, "loss": 0.311, "step": 30675 }, { "epoch": 2.279895949461167, "grad_norm": 2.9617992485717615, "learning_rate": 2.8727058088140714e-06, "loss": 0.3049, "step": 30676 }, { "epoch": 2.2799702712746193, "grad_norm": 1.9744863649287963, "learning_rate": 2.872143019313277e-06, "loss": 0.2118, "step": 30677 }, { "epoch": 2.2800445930880713, "grad_norm": 2.6104705972443187, "learning_rate": 2.8715802757012555e-06, "loss": 0.2386, "step": 30678 }, { "epoch": 2.2801189149015237, "grad_norm": 2.8766060900312906, "learning_rate": 2.8710175779816305e-06, "loss": 0.3813, "step": 30679 }, { "epoch": 2.2801932367149758, "grad_norm": 2.414627700846877, "learning_rate": 2.870454926158025e-06, "loss": 0.3073, "step": 30680 }, { "epoch": 2.280267558528428, "grad_norm": 2.1569492227186786, "learning_rate": 2.869892320234058e-06, "loss": 0.2201, "step": 30681 }, { "epoch": 2.2803418803418802, "grad_norm": 2.422177334247449, "learning_rate": 2.8693297602133563e-06, "loss": 0.3096, "step": 30682 }, { "epoch": 2.2804162021553327, "grad_norm": 2.7285110189643986, "learning_rate": 2.86876724609954e-06, "loss": 0.3223, "step": 30683 }, { "epoch": 2.2804905239687847, "grad_norm": 2.3788473549427405, "learning_rate": 2.8682047778962263e-06, "loss": 0.3071, "step": 30684 }, { "epoch": 2.280564845782237, "grad_norm": 2.2235655050637577, "learning_rate": 2.8676423556070443e-06, "loss": 0.2954, "step": 30685 }, { "epoch": 2.280639167595689, "grad_norm": 1.9009276242434188, "learning_rate": 2.8670799792356072e-06, "loss": 0.2311, "step": 30686 }, { "epoch": 2.2807134894091416, "grad_norm": 2.775870162863484, "learning_rate": 2.8665176487855427e-06, "loss": 0.3091, "step": 30687 }, { "epoch": 2.2807878112225937, "grad_norm": 2.690197848434596, "learning_rate": 2.8659553642604665e-06, "loss": 0.27, "step": 30688 }, { "epoch": 2.280862133036046, "grad_norm": 1.7700620464640364, "learning_rate": 2.8653931256639977e-06, "loss": 0.2154, "step": 30689 }, { "epoch": 2.280936454849498, "grad_norm": 2.55126063397191, "learning_rate": 2.8648309329997624e-06, "loss": 0.3318, "step": 30690 }, { "epoch": 2.2810107766629506, "grad_norm": 2.521281065805738, "learning_rate": 2.86426878627137e-06, "loss": 0.35, "step": 30691 }, { "epoch": 2.281085098476403, "grad_norm": 2.588530631363995, "learning_rate": 2.863706685482448e-06, "loss": 0.2913, "step": 30692 }, { "epoch": 2.281159420289855, "grad_norm": 2.656314897653476, "learning_rate": 2.8631446306366097e-06, "loss": 0.3141, "step": 30693 }, { "epoch": 2.281233742103307, "grad_norm": 2.365351165009501, "learning_rate": 2.8625826217374776e-06, "loss": 0.4073, "step": 30694 }, { "epoch": 2.2813080639167596, "grad_norm": 2.3729322983171954, "learning_rate": 2.862020658788669e-06, "loss": 0.3323, "step": 30695 }, { "epoch": 2.281382385730212, "grad_norm": 2.164200845464486, "learning_rate": 2.861458741793798e-06, "loss": 0.2296, "step": 30696 }, { "epoch": 2.281456707543664, "grad_norm": 2.300908737417508, "learning_rate": 2.860896870756489e-06, "loss": 0.2615, "step": 30697 }, { "epoch": 2.2815310293571165, "grad_norm": 2.908267334909114, "learning_rate": 2.860335045680351e-06, "loss": 0.3342, "step": 30698 }, { "epoch": 2.2816053511705685, "grad_norm": 2.7050794136258762, "learning_rate": 2.859773266569009e-06, "loss": 0.3372, "step": 30699 }, { "epoch": 2.281679672984021, "grad_norm": 2.4005975460814124, "learning_rate": 2.8592115334260774e-06, "loss": 0.2511, "step": 30700 }, { "epoch": 2.281753994797473, "grad_norm": 2.8845293941564454, "learning_rate": 2.8586498462551706e-06, "loss": 0.4304, "step": 30701 }, { "epoch": 2.2818283166109254, "grad_norm": 1.8833016644588672, "learning_rate": 2.8580882050599044e-06, "loss": 0.2183, "step": 30702 }, { "epoch": 2.2819026384243775, "grad_norm": 2.3161656532029324, "learning_rate": 2.8575266098438947e-06, "loss": 0.2657, "step": 30703 }, { "epoch": 2.28197696023783, "grad_norm": 2.145963563792652, "learning_rate": 2.85696506061076e-06, "loss": 0.2434, "step": 30704 }, { "epoch": 2.282051282051282, "grad_norm": 2.000098338119541, "learning_rate": 2.8564035573641136e-06, "loss": 0.2493, "step": 30705 }, { "epoch": 2.2821256038647344, "grad_norm": 2.3335538348944342, "learning_rate": 2.8558421001075678e-06, "loss": 0.2983, "step": 30706 }, { "epoch": 2.2821999256781864, "grad_norm": 1.6643398778158907, "learning_rate": 2.855280688844743e-06, "loss": 0.198, "step": 30707 }, { "epoch": 2.282274247491639, "grad_norm": 2.13619886941191, "learning_rate": 2.854719323579247e-06, "loss": 0.2704, "step": 30708 }, { "epoch": 2.282348569305091, "grad_norm": 2.6498011464986746, "learning_rate": 2.8541580043147e-06, "loss": 0.3691, "step": 30709 }, { "epoch": 2.2824228911185434, "grad_norm": 2.463185363655418, "learning_rate": 2.8535967310547143e-06, "loss": 0.2658, "step": 30710 }, { "epoch": 2.2824972129319954, "grad_norm": 2.5770508579713596, "learning_rate": 2.8530355038028976e-06, "loss": 0.3094, "step": 30711 }, { "epoch": 2.282571534745448, "grad_norm": 1.7772692180928955, "learning_rate": 2.8524743225628704e-06, "loss": 0.301, "step": 30712 }, { "epoch": 2.2826458565589, "grad_norm": 2.1606971187342623, "learning_rate": 2.8519131873382434e-06, "loss": 0.2165, "step": 30713 }, { "epoch": 2.2827201783723523, "grad_norm": 2.769548389571226, "learning_rate": 2.8513520981326293e-06, "loss": 0.3105, "step": 30714 }, { "epoch": 2.2827945001858048, "grad_norm": 2.1223165750065633, "learning_rate": 2.8507910549496356e-06, "loss": 0.2495, "step": 30715 }, { "epoch": 2.282868821999257, "grad_norm": 1.9401671400860263, "learning_rate": 2.85023005779288e-06, "loss": 0.2463, "step": 30716 }, { "epoch": 2.282943143812709, "grad_norm": 2.2749641289446383, "learning_rate": 2.8496691066659733e-06, "loss": 0.2471, "step": 30717 }, { "epoch": 2.2830174656261613, "grad_norm": 4.992043787749747, "learning_rate": 2.8491082015725235e-06, "loss": 0.3655, "step": 30718 }, { "epoch": 2.2830917874396137, "grad_norm": 1.8186125438022258, "learning_rate": 2.848547342516146e-06, "loss": 0.2509, "step": 30719 }, { "epoch": 2.2831661092530657, "grad_norm": 2.6148357619488807, "learning_rate": 2.8479865295004472e-06, "loss": 0.335, "step": 30720 }, { "epoch": 2.283240431066518, "grad_norm": 3.004362397318032, "learning_rate": 2.8474257625290438e-06, "loss": 0.361, "step": 30721 }, { "epoch": 2.28331475287997, "grad_norm": 2.438585423478377, "learning_rate": 2.846865041605541e-06, "loss": 0.2802, "step": 30722 }, { "epoch": 2.2833890746934227, "grad_norm": 1.9880718450508874, "learning_rate": 2.8463043667335468e-06, "loss": 0.2983, "step": 30723 }, { "epoch": 2.2834633965068747, "grad_norm": 2.3824553949636575, "learning_rate": 2.845743737916681e-06, "loss": 0.2606, "step": 30724 }, { "epoch": 2.283537718320327, "grad_norm": 2.2746750057275205, "learning_rate": 2.8451831551585386e-06, "loss": 0.2828, "step": 30725 }, { "epoch": 2.283612040133779, "grad_norm": 2.264836464890242, "learning_rate": 2.84462261846274e-06, "loss": 0.2766, "step": 30726 }, { "epoch": 2.2836863619472316, "grad_norm": 2.1471576960020453, "learning_rate": 2.8440621278328883e-06, "loss": 0.2641, "step": 30727 }, { "epoch": 2.2837606837606836, "grad_norm": 2.680617589768704, "learning_rate": 2.8435016832725915e-06, "loss": 0.3025, "step": 30728 }, { "epoch": 2.283835005574136, "grad_norm": 2.3943542108601314, "learning_rate": 2.842941284785462e-06, "loss": 0.3109, "step": 30729 }, { "epoch": 2.283909327387588, "grad_norm": 2.5201868238040825, "learning_rate": 2.842380932375103e-06, "loss": 0.2197, "step": 30730 }, { "epoch": 2.2839836492010406, "grad_norm": 2.5367433629073033, "learning_rate": 2.8418206260451262e-06, "loss": 0.2847, "step": 30731 }, { "epoch": 2.2840579710144926, "grad_norm": 1.8291384876668346, "learning_rate": 2.8412603657991365e-06, "loss": 0.1834, "step": 30732 }, { "epoch": 2.284132292827945, "grad_norm": 2.1853017739681326, "learning_rate": 2.8407001516407393e-06, "loss": 0.2902, "step": 30733 }, { "epoch": 2.284206614641397, "grad_norm": 2.83207363926743, "learning_rate": 2.8401399835735455e-06, "loss": 0.3688, "step": 30734 }, { "epoch": 2.2842809364548495, "grad_norm": 2.6203751457909368, "learning_rate": 2.8395798616011596e-06, "loss": 0.3298, "step": 30735 }, { "epoch": 2.2843552582683015, "grad_norm": 1.838752114744399, "learning_rate": 2.839019785727186e-06, "loss": 0.2185, "step": 30736 }, { "epoch": 2.284429580081754, "grad_norm": 2.3497379034549155, "learning_rate": 2.8384597559552285e-06, "loss": 0.2694, "step": 30737 }, { "epoch": 2.2845039018952065, "grad_norm": 1.9437300436775944, "learning_rate": 2.837899772288899e-06, "loss": 0.2025, "step": 30738 }, { "epoch": 2.2845782237086585, "grad_norm": 2.1406462215736752, "learning_rate": 2.8373398347317994e-06, "loss": 0.2578, "step": 30739 }, { "epoch": 2.2846525455221105, "grad_norm": 2.371628155883447, "learning_rate": 2.836779943287531e-06, "loss": 0.3287, "step": 30740 }, { "epoch": 2.284726867335563, "grad_norm": 2.6546508172263095, "learning_rate": 2.8362200979597044e-06, "loss": 0.3274, "step": 30741 }, { "epoch": 2.2848011891490154, "grad_norm": 2.6784777569991802, "learning_rate": 2.835660298751918e-06, "loss": 0.3352, "step": 30742 }, { "epoch": 2.2848755109624674, "grad_norm": 1.9820934553418, "learning_rate": 2.8351005456677817e-06, "loss": 0.2521, "step": 30743 }, { "epoch": 2.28494983277592, "grad_norm": 2.456361495307497, "learning_rate": 2.8345408387108976e-06, "loss": 0.2866, "step": 30744 }, { "epoch": 2.285024154589372, "grad_norm": 2.5620924058225256, "learning_rate": 2.8339811778848637e-06, "loss": 0.3304, "step": 30745 }, { "epoch": 2.2850984764028244, "grad_norm": 2.4118568328734056, "learning_rate": 2.8334215631932927e-06, "loss": 0.2742, "step": 30746 }, { "epoch": 2.2851727982162764, "grad_norm": 2.339622590340228, "learning_rate": 2.832861994639777e-06, "loss": 0.2582, "step": 30747 }, { "epoch": 2.285247120029729, "grad_norm": 2.1780396678357152, "learning_rate": 2.832302472227927e-06, "loss": 0.2437, "step": 30748 }, { "epoch": 2.285321441843181, "grad_norm": 1.9431611698548574, "learning_rate": 2.8317429959613406e-06, "loss": 0.2251, "step": 30749 }, { "epoch": 2.2853957636566333, "grad_norm": 2.0983398961297572, "learning_rate": 2.8311835658436182e-06, "loss": 0.2829, "step": 30750 }, { "epoch": 2.2854700854700853, "grad_norm": 2.288983619424404, "learning_rate": 2.8306241818783677e-06, "loss": 0.2633, "step": 30751 }, { "epoch": 2.285544407283538, "grad_norm": 2.4853465945809083, "learning_rate": 2.8300648440691837e-06, "loss": 0.2355, "step": 30752 }, { "epoch": 2.28561872909699, "grad_norm": 2.4036932723329514, "learning_rate": 2.829505552419672e-06, "loss": 0.3274, "step": 30753 }, { "epoch": 2.2856930509104423, "grad_norm": 2.74449472296566, "learning_rate": 2.828946306933432e-06, "loss": 0.3069, "step": 30754 }, { "epoch": 2.2857673727238943, "grad_norm": 2.8007028315039686, "learning_rate": 2.828387107614061e-06, "loss": 0.3439, "step": 30755 }, { "epoch": 2.2858416945373468, "grad_norm": 2.3676329667397824, "learning_rate": 2.8278279544651653e-06, "loss": 0.2406, "step": 30756 }, { "epoch": 2.2859160163507988, "grad_norm": 2.1315168399908386, "learning_rate": 2.8272688474903374e-06, "loss": 0.2636, "step": 30757 }, { "epoch": 2.2859903381642512, "grad_norm": 2.8432339633084274, "learning_rate": 2.826709786693187e-06, "loss": 0.3513, "step": 30758 }, { "epoch": 2.2860646599777033, "grad_norm": 2.2074382066568248, "learning_rate": 2.8261507720772997e-06, "loss": 0.3024, "step": 30759 }, { "epoch": 2.2861389817911557, "grad_norm": 1.9042948696725004, "learning_rate": 2.825591803646285e-06, "loss": 0.2407, "step": 30760 }, { "epoch": 2.286213303604608, "grad_norm": 2.962612598344515, "learning_rate": 2.8250328814037385e-06, "loss": 0.3642, "step": 30761 }, { "epoch": 2.28628762541806, "grad_norm": 2.6285647399488026, "learning_rate": 2.824474005353255e-06, "loss": 0.352, "step": 30762 }, { "epoch": 2.286361947231512, "grad_norm": 2.3318863727092602, "learning_rate": 2.8239151754984372e-06, "loss": 0.2155, "step": 30763 }, { "epoch": 2.2864362690449647, "grad_norm": 2.369503893999203, "learning_rate": 2.8233563918428798e-06, "loss": 0.2905, "step": 30764 }, { "epoch": 2.286510590858417, "grad_norm": 2.457850163983929, "learning_rate": 2.8227976543901837e-06, "loss": 0.3689, "step": 30765 }, { "epoch": 2.286584912671869, "grad_norm": 2.4297602452064253, "learning_rate": 2.8222389631439438e-06, "loss": 0.3416, "step": 30766 }, { "epoch": 2.2866592344853216, "grad_norm": 2.740637330400406, "learning_rate": 2.8216803181077544e-06, "loss": 0.2685, "step": 30767 }, { "epoch": 2.2867335562987736, "grad_norm": 1.4700425698757984, "learning_rate": 2.821121719285218e-06, "loss": 0.1651, "step": 30768 }, { "epoch": 2.286807878112226, "grad_norm": 2.8202456378249305, "learning_rate": 2.820563166679927e-06, "loss": 0.3866, "step": 30769 }, { "epoch": 2.286882199925678, "grad_norm": 7.336911527358779, "learning_rate": 2.8200046602954778e-06, "loss": 0.4093, "step": 30770 }, { "epoch": 2.2869565217391306, "grad_norm": 1.884020456563782, "learning_rate": 2.819446200135466e-06, "loss": 0.2296, "step": 30771 }, { "epoch": 2.2870308435525826, "grad_norm": 2.1470385749414556, "learning_rate": 2.8188877862034846e-06, "loss": 0.2686, "step": 30772 }, { "epoch": 2.287105165366035, "grad_norm": 2.0181391367629726, "learning_rate": 2.818329418503134e-06, "loss": 0.2795, "step": 30773 }, { "epoch": 2.287179487179487, "grad_norm": 2.3841387265618885, "learning_rate": 2.817771097038002e-06, "loss": 0.2732, "step": 30774 }, { "epoch": 2.2872538089929395, "grad_norm": 2.687639112824879, "learning_rate": 2.81721282181169e-06, "loss": 0.3928, "step": 30775 }, { "epoch": 2.2873281308063915, "grad_norm": 2.575425186746746, "learning_rate": 2.81665459282779e-06, "loss": 0.2633, "step": 30776 }, { "epoch": 2.287402452619844, "grad_norm": 1.9553170841908274, "learning_rate": 2.8160964100898926e-06, "loss": 0.2628, "step": 30777 }, { "epoch": 2.287476774433296, "grad_norm": 2.232174619138354, "learning_rate": 2.8155382736015956e-06, "loss": 0.2892, "step": 30778 }, { "epoch": 2.2875510962467485, "grad_norm": 2.581140685799361, "learning_rate": 2.814980183366487e-06, "loss": 0.3284, "step": 30779 }, { "epoch": 2.2876254180602005, "grad_norm": 2.3482645061948055, "learning_rate": 2.814422139388171e-06, "loss": 0.2903, "step": 30780 }, { "epoch": 2.287699739873653, "grad_norm": 2.532235618635355, "learning_rate": 2.813864141670225e-06, "loss": 0.4018, "step": 30781 }, { "epoch": 2.287774061687105, "grad_norm": 3.511390509376448, "learning_rate": 2.8133061902162518e-06, "loss": 0.2751, "step": 30782 }, { "epoch": 2.2878483835005574, "grad_norm": 2.0224789436353334, "learning_rate": 2.8127482850298405e-06, "loss": 0.2314, "step": 30783 }, { "epoch": 2.28792270531401, "grad_norm": 2.1283558499068853, "learning_rate": 2.8121904261145804e-06, "loss": 0.2506, "step": 30784 }, { "epoch": 2.287997027127462, "grad_norm": 2.076958014267561, "learning_rate": 2.8116326134740678e-06, "loss": 0.2336, "step": 30785 }, { "epoch": 2.2880713489409144, "grad_norm": 2.039597069807912, "learning_rate": 2.811074847111892e-06, "loss": 0.2677, "step": 30786 }, { "epoch": 2.2881456707543664, "grad_norm": 2.657760789132655, "learning_rate": 2.81051712703164e-06, "loss": 0.2995, "step": 30787 }, { "epoch": 2.288219992567819, "grad_norm": 2.2581940521672, "learning_rate": 2.809959453236909e-06, "loss": 0.2726, "step": 30788 }, { "epoch": 2.288294314381271, "grad_norm": 1.3747931503977833, "learning_rate": 2.809401825731283e-06, "loss": 0.1362, "step": 30789 }, { "epoch": 2.2883686361947233, "grad_norm": 2.716314983477568, "learning_rate": 2.808844244518357e-06, "loss": 0.3921, "step": 30790 }, { "epoch": 2.2884429580081753, "grad_norm": 2.560611764977194, "learning_rate": 2.8082867096017197e-06, "loss": 0.3402, "step": 30791 }, { "epoch": 2.288517279821628, "grad_norm": 2.040444171125186, "learning_rate": 2.8077292209849595e-06, "loss": 0.2118, "step": 30792 }, { "epoch": 2.28859160163508, "grad_norm": 2.507654553296002, "learning_rate": 2.8071717786716644e-06, "loss": 0.2842, "step": 30793 }, { "epoch": 2.2886659234485323, "grad_norm": 2.1098358361503857, "learning_rate": 2.8066143826654213e-06, "loss": 0.3026, "step": 30794 }, { "epoch": 2.2887402452619843, "grad_norm": 2.3241953898902725, "learning_rate": 2.8060570329698257e-06, "loss": 0.2832, "step": 30795 }, { "epoch": 2.2888145670754367, "grad_norm": 2.1887576637393, "learning_rate": 2.8054997295884578e-06, "loss": 0.2328, "step": 30796 }, { "epoch": 2.2888888888888888, "grad_norm": 3.1591020184647833, "learning_rate": 2.8049424725249118e-06, "loss": 0.366, "step": 30797 }, { "epoch": 2.288963210702341, "grad_norm": 3.090540853457208, "learning_rate": 2.8043852617827736e-06, "loss": 0.3713, "step": 30798 }, { "epoch": 2.2890375325157932, "grad_norm": 2.406755520670009, "learning_rate": 2.803828097365627e-06, "loss": 0.3006, "step": 30799 }, { "epoch": 2.2891118543292457, "grad_norm": 2.069836434785609, "learning_rate": 2.8032709792770643e-06, "loss": 0.2391, "step": 30800 }, { "epoch": 2.2891861761426977, "grad_norm": 2.4101654257013734, "learning_rate": 2.8027139075206665e-06, "loss": 0.3209, "step": 30801 }, { "epoch": 2.28926049795615, "grad_norm": 1.7547112348529428, "learning_rate": 2.8021568821000267e-06, "loss": 0.1774, "step": 30802 }, { "epoch": 2.2893348197696026, "grad_norm": 1.8428298291373852, "learning_rate": 2.801599903018727e-06, "loss": 0.21, "step": 30803 }, { "epoch": 2.2894091415830546, "grad_norm": 2.0036650037341333, "learning_rate": 2.8010429702803533e-06, "loss": 0.2393, "step": 30804 }, { "epoch": 2.2894834633965067, "grad_norm": 2.0369251698792916, "learning_rate": 2.8004860838884915e-06, "loss": 0.1828, "step": 30805 }, { "epoch": 2.289557785209959, "grad_norm": 2.438110677551737, "learning_rate": 2.799929243846725e-06, "loss": 0.2535, "step": 30806 }, { "epoch": 2.2896321070234116, "grad_norm": 2.185996343371194, "learning_rate": 2.7993724501586417e-06, "loss": 0.1831, "step": 30807 }, { "epoch": 2.2897064288368636, "grad_norm": 2.5486806503026416, "learning_rate": 2.7988157028278263e-06, "loss": 0.3427, "step": 30808 }, { "epoch": 2.289780750650316, "grad_norm": 2.5155876688430565, "learning_rate": 2.7982590018578582e-06, "loss": 0.2947, "step": 30809 }, { "epoch": 2.289855072463768, "grad_norm": 1.7228206908375134, "learning_rate": 2.797702347252328e-06, "loss": 0.1997, "step": 30810 }, { "epoch": 2.2899293942772205, "grad_norm": 2.0152129836073494, "learning_rate": 2.7971457390148128e-06, "loss": 0.2369, "step": 30811 }, { "epoch": 2.2900037160906725, "grad_norm": 2.4573074907931947, "learning_rate": 2.796589177148904e-06, "loss": 0.3174, "step": 30812 }, { "epoch": 2.290078037904125, "grad_norm": 2.2387174548614013, "learning_rate": 2.796032661658179e-06, "loss": 0.2614, "step": 30813 }, { "epoch": 2.290152359717577, "grad_norm": 2.162151567271218, "learning_rate": 2.7954761925462225e-06, "loss": 0.2533, "step": 30814 }, { "epoch": 2.2902266815310295, "grad_norm": 2.8113969140370543, "learning_rate": 2.7949197698166166e-06, "loss": 0.2839, "step": 30815 }, { "epoch": 2.2903010033444815, "grad_norm": 1.9382936725597255, "learning_rate": 2.7943633934729395e-06, "loss": 0.2676, "step": 30816 }, { "epoch": 2.290375325157934, "grad_norm": 2.2371436728429916, "learning_rate": 2.7938070635187807e-06, "loss": 0.2901, "step": 30817 }, { "epoch": 2.290449646971386, "grad_norm": 2.119783991857204, "learning_rate": 2.7932507799577146e-06, "loss": 0.2829, "step": 30818 }, { "epoch": 2.2905239687848384, "grad_norm": 2.9617194895808776, "learning_rate": 2.7926945427933305e-06, "loss": 0.2543, "step": 30819 }, { "epoch": 2.2905982905982905, "grad_norm": 2.3847995557918336, "learning_rate": 2.7921383520292033e-06, "loss": 0.3109, "step": 30820 }, { "epoch": 2.290672612411743, "grad_norm": 2.6465380847679127, "learning_rate": 2.791582207668914e-06, "loss": 0.3598, "step": 30821 }, { "epoch": 2.290746934225195, "grad_norm": 1.944581581931298, "learning_rate": 2.7910261097160463e-06, "loss": 0.228, "step": 30822 }, { "epoch": 2.2908212560386474, "grad_norm": 2.00118516784151, "learning_rate": 2.790470058174175e-06, "loss": 0.245, "step": 30823 }, { "epoch": 2.2908955778520994, "grad_norm": 2.0272642272150203, "learning_rate": 2.7899140530468873e-06, "loss": 0.2667, "step": 30824 }, { "epoch": 2.290969899665552, "grad_norm": 2.122163303390835, "learning_rate": 2.7893580943377584e-06, "loss": 0.2784, "step": 30825 }, { "epoch": 2.2910442214790043, "grad_norm": 2.223556854131666, "learning_rate": 2.7888021820503688e-06, "loss": 0.2848, "step": 30826 }, { "epoch": 2.2911185432924563, "grad_norm": 2.8794725359410234, "learning_rate": 2.7882463161882956e-06, "loss": 0.3079, "step": 30827 }, { "epoch": 2.2911928651059084, "grad_norm": 2.0388261789079625, "learning_rate": 2.7876904967551153e-06, "loss": 0.2345, "step": 30828 }, { "epoch": 2.291267186919361, "grad_norm": 1.7751366327793818, "learning_rate": 2.787134723754413e-06, "loss": 0.2363, "step": 30829 }, { "epoch": 2.2913415087328133, "grad_norm": 2.228778266614872, "learning_rate": 2.7865789971897626e-06, "loss": 0.2467, "step": 30830 }, { "epoch": 2.2914158305462653, "grad_norm": 2.4725706810746035, "learning_rate": 2.78602331706474e-06, "loss": 0.3403, "step": 30831 }, { "epoch": 2.2914901523597178, "grad_norm": 2.262388228510962, "learning_rate": 2.7854676833829275e-06, "loss": 0.296, "step": 30832 }, { "epoch": 2.2915644741731698, "grad_norm": 6.611099928237598, "learning_rate": 2.784912096147897e-06, "loss": 0.2179, "step": 30833 }, { "epoch": 2.2916387959866222, "grad_norm": 2.6383319582857014, "learning_rate": 2.78435655536323e-06, "loss": 0.3375, "step": 30834 }, { "epoch": 2.2917131178000743, "grad_norm": 2.214058912518164, "learning_rate": 2.7838010610325026e-06, "loss": 0.2932, "step": 30835 }, { "epoch": 2.2917874396135267, "grad_norm": 2.714999392110794, "learning_rate": 2.7832456131592855e-06, "loss": 0.2727, "step": 30836 }, { "epoch": 2.2918617614269787, "grad_norm": 1.9390749441683, "learning_rate": 2.7826902117471654e-06, "loss": 0.2428, "step": 30837 }, { "epoch": 2.291936083240431, "grad_norm": 2.147638909157938, "learning_rate": 2.7821348567997052e-06, "loss": 0.2862, "step": 30838 }, { "epoch": 2.292010405053883, "grad_norm": 2.5563329631928147, "learning_rate": 2.781579548320489e-06, "loss": 0.2076, "step": 30839 }, { "epoch": 2.2920847268673357, "grad_norm": 4.290966544191698, "learning_rate": 2.7810242863130853e-06, "loss": 0.3287, "step": 30840 }, { "epoch": 2.2921590486807877, "grad_norm": 2.1622143755956365, "learning_rate": 2.7804690707810767e-06, "loss": 0.2364, "step": 30841 }, { "epoch": 2.29223337049424, "grad_norm": 2.0929713787022277, "learning_rate": 2.779913901728034e-06, "loss": 0.2731, "step": 30842 }, { "epoch": 2.292307692307692, "grad_norm": 2.2020530210663276, "learning_rate": 2.779358779157527e-06, "loss": 0.2074, "step": 30843 }, { "epoch": 2.2923820141211446, "grad_norm": 2.154980001114037, "learning_rate": 2.7788037030731374e-06, "loss": 0.2399, "step": 30844 }, { "epoch": 2.2924563359345966, "grad_norm": 2.348526797790797, "learning_rate": 2.7782486734784307e-06, "loss": 0.2904, "step": 30845 }, { "epoch": 2.292530657748049, "grad_norm": 1.8337030721155991, "learning_rate": 2.7776936903769892e-06, "loss": 0.2495, "step": 30846 }, { "epoch": 2.292604979561501, "grad_norm": 2.6899565660735436, "learning_rate": 2.777138753772379e-06, "loss": 0.2253, "step": 30847 }, { "epoch": 2.2926793013749536, "grad_norm": 2.4934313687008145, "learning_rate": 2.7765838636681763e-06, "loss": 0.401, "step": 30848 }, { "epoch": 2.292753623188406, "grad_norm": 2.459856939565055, "learning_rate": 2.7760290200679507e-06, "loss": 0.2431, "step": 30849 }, { "epoch": 2.292827945001858, "grad_norm": 2.6132205615625566, "learning_rate": 2.7754742229752727e-06, "loss": 0.4094, "step": 30850 }, { "epoch": 2.29290226681531, "grad_norm": 1.9540829333974354, "learning_rate": 2.77491947239372e-06, "loss": 0.2368, "step": 30851 }, { "epoch": 2.2929765886287625, "grad_norm": 2.341235480109779, "learning_rate": 2.7743647683268617e-06, "loss": 0.2431, "step": 30852 }, { "epoch": 2.293050910442215, "grad_norm": 2.514745021664791, "learning_rate": 2.7738101107782633e-06, "loss": 0.256, "step": 30853 }, { "epoch": 2.293125232255667, "grad_norm": 2.2515401051296178, "learning_rate": 2.773255499751505e-06, "loss": 0.236, "step": 30854 }, { "epoch": 2.2931995540691195, "grad_norm": 2.570834155750439, "learning_rate": 2.772700935250149e-06, "loss": 0.2906, "step": 30855 }, { "epoch": 2.2932738758825715, "grad_norm": 2.537952891344885, "learning_rate": 2.7721464172777724e-06, "loss": 0.2886, "step": 30856 }, { "epoch": 2.293348197696024, "grad_norm": 2.0854622567168133, "learning_rate": 2.7715919458379423e-06, "loss": 0.2732, "step": 30857 }, { "epoch": 2.293422519509476, "grad_norm": 2.455573139034905, "learning_rate": 2.7710375209342256e-06, "loss": 0.2624, "step": 30858 }, { "epoch": 2.2934968413229284, "grad_norm": 2.471986814792431, "learning_rate": 2.7704831425701993e-06, "loss": 0.2928, "step": 30859 }, { "epoch": 2.2935711631363804, "grad_norm": 2.374914654850025, "learning_rate": 2.7699288107494226e-06, "loss": 0.3131, "step": 30860 }, { "epoch": 2.293645484949833, "grad_norm": 2.5907403465379404, "learning_rate": 2.7693745254754713e-06, "loss": 0.2937, "step": 30861 }, { "epoch": 2.293719806763285, "grad_norm": 3.053772128320709, "learning_rate": 2.7688202867519088e-06, "loss": 0.2953, "step": 30862 }, { "epoch": 2.2937941285767374, "grad_norm": 2.459459104459434, "learning_rate": 2.768266094582309e-06, "loss": 0.3249, "step": 30863 }, { "epoch": 2.2938684503901894, "grad_norm": 2.6698649261604963, "learning_rate": 2.7677119489702376e-06, "loss": 0.2857, "step": 30864 }, { "epoch": 2.293942772203642, "grad_norm": 2.4769971581479666, "learning_rate": 2.7671578499192576e-06, "loss": 0.3178, "step": 30865 }, { "epoch": 2.294017094017094, "grad_norm": 2.5150598117776193, "learning_rate": 2.766603797432943e-06, "loss": 0.3155, "step": 30866 }, { "epoch": 2.2940914158305463, "grad_norm": 2.636246178603498, "learning_rate": 2.7660497915148556e-06, "loss": 0.3026, "step": 30867 }, { "epoch": 2.2941657376439983, "grad_norm": 3.0029455251553987, "learning_rate": 2.7654958321685665e-06, "loss": 0.3974, "step": 30868 }, { "epoch": 2.294240059457451, "grad_norm": 2.4346802131957794, "learning_rate": 2.76494191939764e-06, "loss": 0.2529, "step": 30869 }, { "epoch": 2.294314381270903, "grad_norm": 1.924883433637667, "learning_rate": 2.76438805320564e-06, "loss": 0.2738, "step": 30870 }, { "epoch": 2.2943887030843553, "grad_norm": 2.7536373393943374, "learning_rate": 2.763834233596139e-06, "loss": 0.3514, "step": 30871 }, { "epoch": 2.2944630248978077, "grad_norm": 1.9746478208798812, "learning_rate": 2.7632804605726927e-06, "loss": 0.259, "step": 30872 }, { "epoch": 2.2945373467112598, "grad_norm": 2.000328118005577, "learning_rate": 2.762726734138873e-06, "loss": 0.2355, "step": 30873 }, { "epoch": 2.2946116685247118, "grad_norm": 2.473158248483684, "learning_rate": 2.762173054298244e-06, "loss": 0.2667, "step": 30874 }, { "epoch": 2.2946859903381642, "grad_norm": 2.0762869627840725, "learning_rate": 2.761619421054367e-06, "loss": 0.2473, "step": 30875 }, { "epoch": 2.2947603121516167, "grad_norm": 2.39997573439466, "learning_rate": 2.7610658344108098e-06, "loss": 0.2876, "step": 30876 }, { "epoch": 2.2948346339650687, "grad_norm": 2.1436255518912835, "learning_rate": 2.760512294371134e-06, "loss": 0.2604, "step": 30877 }, { "epoch": 2.294908955778521, "grad_norm": 2.4516704611215854, "learning_rate": 2.7599588009389067e-06, "loss": 0.2585, "step": 30878 }, { "epoch": 2.294983277591973, "grad_norm": 2.4365937407295952, "learning_rate": 2.7594053541176892e-06, "loss": 0.2467, "step": 30879 }, { "epoch": 2.2950575994054256, "grad_norm": 2.7260864828639533, "learning_rate": 2.7588519539110403e-06, "loss": 0.3132, "step": 30880 }, { "epoch": 2.2951319212188777, "grad_norm": 1.8977671793864157, "learning_rate": 2.758298600322531e-06, "loss": 0.2528, "step": 30881 }, { "epoch": 2.29520624303233, "grad_norm": 1.844831049844302, "learning_rate": 2.7577452933557193e-06, "loss": 0.2467, "step": 30882 }, { "epoch": 2.295280564845782, "grad_norm": 1.9070423252912716, "learning_rate": 2.7571920330141665e-06, "loss": 0.2359, "step": 30883 }, { "epoch": 2.2953548866592346, "grad_norm": 2.254221771616184, "learning_rate": 2.756638819301434e-06, "loss": 0.3124, "step": 30884 }, { "epoch": 2.2954292084726866, "grad_norm": 2.334223690652995, "learning_rate": 2.7560856522210877e-06, "loss": 0.2881, "step": 30885 }, { "epoch": 2.295503530286139, "grad_norm": 2.2340962118632377, "learning_rate": 2.7555325317766857e-06, "loss": 0.268, "step": 30886 }, { "epoch": 2.295577852099591, "grad_norm": 2.4282750745143975, "learning_rate": 2.754979457971786e-06, "loss": 0.2787, "step": 30887 }, { "epoch": 2.2956521739130435, "grad_norm": 1.7673882933819434, "learning_rate": 2.754426430809957e-06, "loss": 0.2076, "step": 30888 }, { "epoch": 2.2957264957264956, "grad_norm": 2.3828303556138763, "learning_rate": 2.7538734502947506e-06, "loss": 0.2726, "step": 30889 }, { "epoch": 2.295800817539948, "grad_norm": 2.1861104867526557, "learning_rate": 2.753320516429734e-06, "loss": 0.2836, "step": 30890 }, { "epoch": 2.2958751393534, "grad_norm": 2.4321940432208975, "learning_rate": 2.7527676292184637e-06, "loss": 0.3096, "step": 30891 }, { "epoch": 2.2959494611668525, "grad_norm": 2.3408280875873326, "learning_rate": 2.752214788664497e-06, "loss": 0.178, "step": 30892 }, { "epoch": 2.2960237829803045, "grad_norm": 2.016701858125877, "learning_rate": 2.7516619947714016e-06, "loss": 0.2283, "step": 30893 }, { "epoch": 2.296098104793757, "grad_norm": 2.9199467341610204, "learning_rate": 2.7511092475427236e-06, "loss": 0.3385, "step": 30894 }, { "epoch": 2.2961724266072094, "grad_norm": 2.6536294684817365, "learning_rate": 2.7505565469820315e-06, "loss": 0.2948, "step": 30895 }, { "epoch": 2.2962467484206615, "grad_norm": 2.462545745141712, "learning_rate": 2.7500038930928797e-06, "loss": 0.3057, "step": 30896 }, { "epoch": 2.2963210702341135, "grad_norm": 2.4376959217482064, "learning_rate": 2.7494512858788245e-06, "loss": 0.2925, "step": 30897 }, { "epoch": 2.296395392047566, "grad_norm": 2.3431745523506216, "learning_rate": 2.7488987253434274e-06, "loss": 0.36, "step": 30898 }, { "epoch": 2.2964697138610184, "grad_norm": 2.489332168436159, "learning_rate": 2.748346211490243e-06, "loss": 0.2956, "step": 30899 }, { "epoch": 2.2965440356744704, "grad_norm": 2.150981050082104, "learning_rate": 2.747793744322831e-06, "loss": 0.2653, "step": 30900 }, { "epoch": 2.296618357487923, "grad_norm": 3.0399182592061935, "learning_rate": 2.747241323844747e-06, "loss": 0.3492, "step": 30901 }, { "epoch": 2.296692679301375, "grad_norm": 1.9337132925793798, "learning_rate": 2.746688950059544e-06, "loss": 0.2223, "step": 30902 }, { "epoch": 2.2967670011148273, "grad_norm": 1.9426956118669365, "learning_rate": 2.7461366229707844e-06, "loss": 0.2153, "step": 30903 }, { "epoch": 2.2968413229282794, "grad_norm": 2.19251391757935, "learning_rate": 2.7455843425820173e-06, "loss": 0.3193, "step": 30904 }, { "epoch": 2.296915644741732, "grad_norm": 2.1934493967535076, "learning_rate": 2.745032108896808e-06, "loss": 0.2799, "step": 30905 }, { "epoch": 2.296989966555184, "grad_norm": 1.833900407550891, "learning_rate": 2.744479921918699e-06, "loss": 0.2127, "step": 30906 }, { "epoch": 2.2970642883686363, "grad_norm": 2.485512442132537, "learning_rate": 2.7439277816512557e-06, "loss": 0.2853, "step": 30907 }, { "epoch": 2.2971386101820883, "grad_norm": 3.2357851161098603, "learning_rate": 2.743375688098029e-06, "loss": 0.2851, "step": 30908 }, { "epoch": 2.2972129319955408, "grad_norm": 2.287639164699761, "learning_rate": 2.7428236412625695e-06, "loss": 0.3433, "step": 30909 }, { "epoch": 2.297287253808993, "grad_norm": 2.5095908709780304, "learning_rate": 2.7422716411484374e-06, "loss": 0.3513, "step": 30910 }, { "epoch": 2.2973615756224453, "grad_norm": 2.872940095048442, "learning_rate": 2.7417196877591845e-06, "loss": 0.3451, "step": 30911 }, { "epoch": 2.2974358974358973, "grad_norm": 2.3491982865920877, "learning_rate": 2.7411677810983605e-06, "loss": 0.2613, "step": 30912 }, { "epoch": 2.2975102192493497, "grad_norm": 2.34032503434836, "learning_rate": 2.740615921169525e-06, "loss": 0.3088, "step": 30913 }, { "epoch": 2.2975845410628017, "grad_norm": 2.154141257895573, "learning_rate": 2.7400641079762247e-06, "loss": 0.351, "step": 30914 }, { "epoch": 2.297658862876254, "grad_norm": 1.96317093279532, "learning_rate": 2.7395123415220183e-06, "loss": 0.2615, "step": 30915 }, { "epoch": 2.297733184689706, "grad_norm": 1.9602334750125807, "learning_rate": 2.7389606218104536e-06, "loss": 0.2727, "step": 30916 }, { "epoch": 2.2978075065031587, "grad_norm": 2.745375086210681, "learning_rate": 2.7384089488450837e-06, "loss": 0.4182, "step": 30917 }, { "epoch": 2.297881828316611, "grad_norm": 3.8853952513718504, "learning_rate": 2.737857322629461e-06, "loss": 0.3029, "step": 30918 }, { "epoch": 2.297956150130063, "grad_norm": 2.2690896572708033, "learning_rate": 2.7373057431671325e-06, "loss": 0.3689, "step": 30919 }, { "epoch": 2.298030471943515, "grad_norm": 2.433117351157867, "learning_rate": 2.736754210461656e-06, "loss": 0.3322, "step": 30920 }, { "epoch": 2.2981047937569676, "grad_norm": 2.5509533658318637, "learning_rate": 2.736202724516577e-06, "loss": 0.2808, "step": 30921 }, { "epoch": 2.29817911557042, "grad_norm": 1.7915750350209527, "learning_rate": 2.735651285335449e-06, "loss": 0.2955, "step": 30922 }, { "epoch": 2.298253437383872, "grad_norm": 2.1222998163705245, "learning_rate": 2.735099892921822e-06, "loss": 0.3075, "step": 30923 }, { "epoch": 2.2983277591973246, "grad_norm": 2.890564193798564, "learning_rate": 2.734548547279242e-06, "loss": 0.3441, "step": 30924 }, { "epoch": 2.2984020810107766, "grad_norm": 2.195066429029894, "learning_rate": 2.733997248411265e-06, "loss": 0.3045, "step": 30925 }, { "epoch": 2.298476402824229, "grad_norm": 2.2995703422506253, "learning_rate": 2.7334459963214333e-06, "loss": 0.3422, "step": 30926 }, { "epoch": 2.298550724637681, "grad_norm": 2.3394605405308297, "learning_rate": 2.732894791013305e-06, "loss": 0.3344, "step": 30927 }, { "epoch": 2.2986250464511335, "grad_norm": 2.507614577578166, "learning_rate": 2.7323436324904172e-06, "loss": 0.3881, "step": 30928 }, { "epoch": 2.2986993682645855, "grad_norm": 3.077088561874133, "learning_rate": 2.7317925207563265e-06, "loss": 0.3226, "step": 30929 }, { "epoch": 2.298773690078038, "grad_norm": 3.668977676056887, "learning_rate": 2.7312414558145796e-06, "loss": 0.3857, "step": 30930 }, { "epoch": 2.29884801189149, "grad_norm": 2.1064011649984358, "learning_rate": 2.7306904376687193e-06, "loss": 0.2806, "step": 30931 }, { "epoch": 2.2989223337049425, "grad_norm": 1.7612796820084666, "learning_rate": 2.730139466322299e-06, "loss": 0.2135, "step": 30932 }, { "epoch": 2.2989966555183945, "grad_norm": 2.294798925455956, "learning_rate": 2.729588541778865e-06, "loss": 0.2525, "step": 30933 }, { "epoch": 2.299070977331847, "grad_norm": 4.69479382023592, "learning_rate": 2.729037664041959e-06, "loss": 0.3119, "step": 30934 }, { "epoch": 2.299145299145299, "grad_norm": 2.2785250754162627, "learning_rate": 2.728486833115135e-06, "loss": 0.2697, "step": 30935 }, { "epoch": 2.2992196209587514, "grad_norm": 2.405676421677403, "learning_rate": 2.727936049001931e-06, "loss": 0.2983, "step": 30936 }, { "epoch": 2.299293942772204, "grad_norm": 2.0924366830264267, "learning_rate": 2.7273853117059013e-06, "loss": 0.2837, "step": 30937 }, { "epoch": 2.299368264585656, "grad_norm": 1.830295600103376, "learning_rate": 2.7268346212305886e-06, "loss": 0.2631, "step": 30938 }, { "epoch": 2.299442586399108, "grad_norm": 1.8260829034690218, "learning_rate": 2.726283977579536e-06, "loss": 0.2225, "step": 30939 }, { "epoch": 2.2995169082125604, "grad_norm": 2.317033619091156, "learning_rate": 2.725733380756289e-06, "loss": 0.263, "step": 30940 }, { "epoch": 2.299591230026013, "grad_norm": 2.7929910236861604, "learning_rate": 2.7251828307643913e-06, "loss": 0.3321, "step": 30941 }, { "epoch": 2.299665551839465, "grad_norm": 2.3323157421498544, "learning_rate": 2.7246323276073914e-06, "loss": 0.2847, "step": 30942 }, { "epoch": 2.2997398736529173, "grad_norm": 2.4611709429345265, "learning_rate": 2.7240818712888296e-06, "loss": 0.2566, "step": 30943 }, { "epoch": 2.2998141954663693, "grad_norm": 1.9058566643102464, "learning_rate": 2.723531461812252e-06, "loss": 0.2885, "step": 30944 }, { "epoch": 2.299888517279822, "grad_norm": 2.4029274124169464, "learning_rate": 2.7229810991812034e-06, "loss": 0.2874, "step": 30945 }, { "epoch": 2.299962839093274, "grad_norm": 2.32027225231589, "learning_rate": 2.72243078339922e-06, "loss": 0.2603, "step": 30946 }, { "epoch": 2.3000371609067263, "grad_norm": 2.4577975401805436, "learning_rate": 2.7218805144698534e-06, "loss": 0.2858, "step": 30947 }, { "epoch": 2.3001114827201783, "grad_norm": 2.470709231294422, "learning_rate": 2.72133029239664e-06, "loss": 0.3365, "step": 30948 }, { "epoch": 2.3001858045336308, "grad_norm": 1.83094892894951, "learning_rate": 2.7207801171831273e-06, "loss": 0.2039, "step": 30949 }, { "epoch": 2.3002601263470828, "grad_norm": 2.5896107676293365, "learning_rate": 2.720229988832854e-06, "loss": 0.3067, "step": 30950 }, { "epoch": 2.3003344481605352, "grad_norm": 1.8973193275282354, "learning_rate": 2.719679907349363e-06, "loss": 0.221, "step": 30951 }, { "epoch": 2.3004087699739872, "grad_norm": 2.3523810516616477, "learning_rate": 2.7191298727361935e-06, "loss": 0.3266, "step": 30952 }, { "epoch": 2.3004830917874397, "grad_norm": 3.003645135752425, "learning_rate": 2.718579884996887e-06, "loss": 0.2673, "step": 30953 }, { "epoch": 2.3005574136008917, "grad_norm": 2.040822498547019, "learning_rate": 2.7180299441349877e-06, "loss": 0.2935, "step": 30954 }, { "epoch": 2.300631735414344, "grad_norm": 2.1320292917574166, "learning_rate": 2.7174800501540334e-06, "loss": 0.2517, "step": 30955 }, { "epoch": 2.300706057227796, "grad_norm": 2.2736425380350727, "learning_rate": 2.7169302030575616e-06, "loss": 0.3228, "step": 30956 }, { "epoch": 2.3007803790412487, "grad_norm": 1.9534151279547454, "learning_rate": 2.716380402849118e-06, "loss": 0.2589, "step": 30957 }, { "epoch": 2.3008547008547007, "grad_norm": 2.387497065670957, "learning_rate": 2.7158306495322374e-06, "loss": 0.3343, "step": 30958 }, { "epoch": 2.300929022668153, "grad_norm": 2.441491650449758, "learning_rate": 2.715280943110464e-06, "loss": 0.2578, "step": 30959 }, { "epoch": 2.3010033444816056, "grad_norm": 2.5035250914277793, "learning_rate": 2.7147312835873328e-06, "loss": 0.342, "step": 30960 }, { "epoch": 2.3010776662950576, "grad_norm": 2.2524858811336195, "learning_rate": 2.714181670966385e-06, "loss": 0.304, "step": 30961 }, { "epoch": 2.3011519881085096, "grad_norm": 2.4504455984795324, "learning_rate": 2.713632105251157e-06, "loss": 0.2371, "step": 30962 }, { "epoch": 2.301226309921962, "grad_norm": 1.8962738024207189, "learning_rate": 2.713082586445185e-06, "loss": 0.2656, "step": 30963 }, { "epoch": 2.3013006317354145, "grad_norm": 2.1387741000369034, "learning_rate": 2.7125331145520104e-06, "loss": 0.2353, "step": 30964 }, { "epoch": 2.3013749535488666, "grad_norm": 2.2441273579172507, "learning_rate": 2.7119836895751685e-06, "loss": 0.2594, "step": 30965 }, { "epoch": 2.301449275362319, "grad_norm": 2.6918670758024583, "learning_rate": 2.7114343115181996e-06, "loss": 0.2844, "step": 30966 }, { "epoch": 2.301523597175771, "grad_norm": 2.469174244102293, "learning_rate": 2.7108849803846384e-06, "loss": 0.3651, "step": 30967 }, { "epoch": 2.3015979189892235, "grad_norm": 2.0392806822789096, "learning_rate": 2.7103356961780192e-06, "loss": 0.2854, "step": 30968 }, { "epoch": 2.3016722408026755, "grad_norm": 2.318342288656416, "learning_rate": 2.709786458901883e-06, "loss": 0.2828, "step": 30969 }, { "epoch": 2.301746562616128, "grad_norm": 2.0442980176817915, "learning_rate": 2.70923726855976e-06, "loss": 0.2662, "step": 30970 }, { "epoch": 2.30182088442958, "grad_norm": 2.1279598861147644, "learning_rate": 2.708688125155192e-06, "loss": 0.2318, "step": 30971 }, { "epoch": 2.3018952062430325, "grad_norm": 2.1594872178958577, "learning_rate": 2.708139028691712e-06, "loss": 0.3005, "step": 30972 }, { "epoch": 2.3019695280564845, "grad_norm": 3.081622991953968, "learning_rate": 2.7075899791728543e-06, "loss": 0.3712, "step": 30973 }, { "epoch": 2.302043849869937, "grad_norm": 2.348471570457009, "learning_rate": 2.7070409766021545e-06, "loss": 0.3223, "step": 30974 }, { "epoch": 2.302118171683389, "grad_norm": 1.9359808730009622, "learning_rate": 2.7064920209831436e-06, "loss": 0.2139, "step": 30975 }, { "epoch": 2.3021924934968414, "grad_norm": 2.1417849980617354, "learning_rate": 2.7059431123193614e-06, "loss": 0.2654, "step": 30976 }, { "epoch": 2.3022668153102934, "grad_norm": 1.8744083442854296, "learning_rate": 2.7053942506143382e-06, "loss": 0.2192, "step": 30977 }, { "epoch": 2.302341137123746, "grad_norm": 1.9156081555746167, "learning_rate": 2.704845435871606e-06, "loss": 0.2042, "step": 30978 }, { "epoch": 2.302415458937198, "grad_norm": 2.4452300967835705, "learning_rate": 2.7042966680947037e-06, "loss": 0.2997, "step": 30979 }, { "epoch": 2.3024897807506504, "grad_norm": 3.3305564533457104, "learning_rate": 2.7037479472871573e-06, "loss": 0.4657, "step": 30980 }, { "epoch": 2.3025641025641024, "grad_norm": 2.4179321787857275, "learning_rate": 2.7031992734525058e-06, "loss": 0.2392, "step": 30981 }, { "epoch": 2.302638424377555, "grad_norm": 2.3975174886614865, "learning_rate": 2.702650646594279e-06, "loss": 0.3077, "step": 30982 }, { "epoch": 2.3027127461910073, "grad_norm": 2.64734759775482, "learning_rate": 2.702102066716006e-06, "loss": 0.3249, "step": 30983 }, { "epoch": 2.3027870680044593, "grad_norm": 1.9863011254703553, "learning_rate": 2.7015535338212272e-06, "loss": 0.2064, "step": 30984 }, { "epoch": 2.3028613898179113, "grad_norm": 2.219950034875941, "learning_rate": 2.7010050479134618e-06, "loss": 0.3378, "step": 30985 }, { "epoch": 2.302935711631364, "grad_norm": 1.9308127567338493, "learning_rate": 2.7004566089962505e-06, "loss": 0.2018, "step": 30986 }, { "epoch": 2.3030100334448163, "grad_norm": 2.528642822139815, "learning_rate": 2.6999082170731182e-06, "loss": 0.3169, "step": 30987 }, { "epoch": 2.3030843552582683, "grad_norm": 2.010559040918208, "learning_rate": 2.6993598721476e-06, "loss": 0.2697, "step": 30988 }, { "epoch": 2.3031586770717207, "grad_norm": 2.7156361786500316, "learning_rate": 2.6988115742232247e-06, "loss": 0.2981, "step": 30989 }, { "epoch": 2.3032329988851727, "grad_norm": 2.9323060550372957, "learning_rate": 2.6982633233035183e-06, "loss": 0.2679, "step": 30990 }, { "epoch": 2.303307320698625, "grad_norm": 2.449105160007675, "learning_rate": 2.697715119392017e-06, "loss": 0.235, "step": 30991 }, { "epoch": 2.303381642512077, "grad_norm": 2.098458129867324, "learning_rate": 2.6971669624922447e-06, "loss": 0.2566, "step": 30992 }, { "epoch": 2.3034559643255297, "grad_norm": 1.434847101720084, "learning_rate": 2.696618852607734e-06, "loss": 0.1394, "step": 30993 }, { "epoch": 2.3035302861389817, "grad_norm": 2.8234868171457053, "learning_rate": 2.6960707897420137e-06, "loss": 0.2848, "step": 30994 }, { "epoch": 2.303604607952434, "grad_norm": 1.7810559602526672, "learning_rate": 2.6955227738986103e-06, "loss": 0.2376, "step": 30995 }, { "epoch": 2.303678929765886, "grad_norm": 2.060434056948106, "learning_rate": 2.694974805081051e-06, "loss": 0.2348, "step": 30996 }, { "epoch": 2.3037532515793386, "grad_norm": 2.1849923575130994, "learning_rate": 2.694426883292863e-06, "loss": 0.2758, "step": 30997 }, { "epoch": 2.3038275733927907, "grad_norm": 2.0179015398080464, "learning_rate": 2.6938790085375786e-06, "loss": 0.2539, "step": 30998 }, { "epoch": 2.303901895206243, "grad_norm": 1.9169656597308968, "learning_rate": 2.6933311808187223e-06, "loss": 0.214, "step": 30999 }, { "epoch": 2.303976217019695, "grad_norm": 2.0823794000598905, "learning_rate": 2.6927834001398166e-06, "loss": 0.2576, "step": 31000 }, { "epoch": 2.3040505388331476, "grad_norm": 1.9312618283472454, "learning_rate": 2.692235666504397e-06, "loss": 0.2379, "step": 31001 }, { "epoch": 2.3041248606465996, "grad_norm": 2.6372493893879083, "learning_rate": 2.6916879799159803e-06, "loss": 0.215, "step": 31002 }, { "epoch": 2.304199182460052, "grad_norm": 2.653443065618567, "learning_rate": 2.6911403403781013e-06, "loss": 0.3465, "step": 31003 }, { "epoch": 2.304273504273504, "grad_norm": 2.5897829633199603, "learning_rate": 2.6905927478942806e-06, "loss": 0.3042, "step": 31004 }, { "epoch": 2.3043478260869565, "grad_norm": 2.4449784283138163, "learning_rate": 2.6900452024680424e-06, "loss": 0.276, "step": 31005 }, { "epoch": 2.304422147900409, "grad_norm": 2.171009951383825, "learning_rate": 2.689497704102919e-06, "loss": 0.2409, "step": 31006 }, { "epoch": 2.304496469713861, "grad_norm": 2.326705062626033, "learning_rate": 2.6889502528024248e-06, "loss": 0.2509, "step": 31007 }, { "epoch": 2.304570791527313, "grad_norm": 3.944657582703243, "learning_rate": 2.688402848570093e-06, "loss": 0.3151, "step": 31008 }, { "epoch": 2.3046451133407655, "grad_norm": 1.8624143661855252, "learning_rate": 2.68785549140944e-06, "loss": 0.2524, "step": 31009 }, { "epoch": 2.304719435154218, "grad_norm": 2.1644768145051225, "learning_rate": 2.6873081813239977e-06, "loss": 0.2418, "step": 31010 }, { "epoch": 2.30479375696767, "grad_norm": 2.8953102214749125, "learning_rate": 2.686760918317286e-06, "loss": 0.2723, "step": 31011 }, { "epoch": 2.3048680787811224, "grad_norm": 2.5026315797662795, "learning_rate": 2.686213702392825e-06, "loss": 0.2439, "step": 31012 }, { "epoch": 2.3049424005945744, "grad_norm": 2.4622081710447055, "learning_rate": 2.6856665335541434e-06, "loss": 0.33, "step": 31013 }, { "epoch": 2.305016722408027, "grad_norm": 2.2528267375877364, "learning_rate": 2.6851194118047585e-06, "loss": 0.3371, "step": 31014 }, { "epoch": 2.305091044221479, "grad_norm": 2.169376341525499, "learning_rate": 2.684572337148198e-06, "loss": 0.278, "step": 31015 }, { "epoch": 2.3051653660349314, "grad_norm": 1.941724807843526, "learning_rate": 2.684025309587981e-06, "loss": 0.2355, "step": 31016 }, { "epoch": 2.3052396878483834, "grad_norm": 2.0565151580829504, "learning_rate": 2.6834783291276265e-06, "loss": 0.2808, "step": 31017 }, { "epoch": 2.305314009661836, "grad_norm": 1.9512995847124135, "learning_rate": 2.682931395770666e-06, "loss": 0.2485, "step": 31018 }, { "epoch": 2.305388331475288, "grad_norm": 2.4919519161199664, "learning_rate": 2.682384509520606e-06, "loss": 0.3532, "step": 31019 }, { "epoch": 2.3054626532887403, "grad_norm": 2.1824236148181346, "learning_rate": 2.681837670380979e-06, "loss": 0.2549, "step": 31020 }, { "epoch": 2.3055369751021924, "grad_norm": 1.8912320039211832, "learning_rate": 2.6812908783553005e-06, "loss": 0.2276, "step": 31021 }, { "epoch": 2.305611296915645, "grad_norm": 1.7652299000270681, "learning_rate": 2.6807441334470885e-06, "loss": 0.2031, "step": 31022 }, { "epoch": 2.305685618729097, "grad_norm": 2.536966910028906, "learning_rate": 2.6801974356598703e-06, "loss": 0.2694, "step": 31023 }, { "epoch": 2.3057599405425493, "grad_norm": 1.8117595450900055, "learning_rate": 2.6796507849971565e-06, "loss": 0.2477, "step": 31024 }, { "epoch": 2.3058342623560013, "grad_norm": 2.299024630397646, "learning_rate": 2.679104181462475e-06, "loss": 0.2718, "step": 31025 }, { "epoch": 2.3059085841694538, "grad_norm": 1.9956760930810145, "learning_rate": 2.6785576250593414e-06, "loss": 0.3016, "step": 31026 }, { "epoch": 2.305982905982906, "grad_norm": 2.4011760322294626, "learning_rate": 2.6780111157912704e-06, "loss": 0.2644, "step": 31027 }, { "epoch": 2.3060572277963582, "grad_norm": 1.9968325535818783, "learning_rate": 2.6774646536617866e-06, "loss": 0.1709, "step": 31028 }, { "epoch": 2.3061315496098107, "grad_norm": 1.9024013221299023, "learning_rate": 2.6769182386744064e-06, "loss": 0.2249, "step": 31029 }, { "epoch": 2.3062058714232627, "grad_norm": 2.8301965833411993, "learning_rate": 2.676371870832646e-06, "loss": 0.3234, "step": 31030 }, { "epoch": 2.3062801932367147, "grad_norm": 2.570606999801503, "learning_rate": 2.67582555014002e-06, "loss": 0.3563, "step": 31031 }, { "epoch": 2.306354515050167, "grad_norm": 2.0810955075658923, "learning_rate": 2.6752792766000524e-06, "loss": 0.263, "step": 31032 }, { "epoch": 2.3064288368636197, "grad_norm": 1.9754059080744166, "learning_rate": 2.674733050216257e-06, "loss": 0.2183, "step": 31033 }, { "epoch": 2.3065031586770717, "grad_norm": 2.1123599143267855, "learning_rate": 2.674186870992147e-06, "loss": 0.2489, "step": 31034 }, { "epoch": 2.306577480490524, "grad_norm": 2.333080589875858, "learning_rate": 2.673640738931245e-06, "loss": 0.3009, "step": 31035 }, { "epoch": 2.306651802303976, "grad_norm": 2.0947885828507182, "learning_rate": 2.673094654037064e-06, "loss": 0.2408, "step": 31036 }, { "epoch": 2.3067261241174286, "grad_norm": 3.583966752881116, "learning_rate": 2.6725486163131155e-06, "loss": 0.4108, "step": 31037 }, { "epoch": 2.3068004459308806, "grad_norm": 2.3950383252121608, "learning_rate": 2.6720026257629227e-06, "loss": 0.3026, "step": 31038 }, { "epoch": 2.306874767744333, "grad_norm": 2.2193247748857896, "learning_rate": 2.6714566823899934e-06, "loss": 0.261, "step": 31039 }, { "epoch": 2.306949089557785, "grad_norm": 2.4962608429670974, "learning_rate": 2.670910786197852e-06, "loss": 0.3424, "step": 31040 }, { "epoch": 2.3070234113712376, "grad_norm": 2.045736676600891, "learning_rate": 2.670364937190001e-06, "loss": 0.2436, "step": 31041 }, { "epoch": 2.3070977331846896, "grad_norm": 2.285325245140232, "learning_rate": 2.669819135369962e-06, "loss": 0.3053, "step": 31042 }, { "epoch": 2.307172054998142, "grad_norm": 2.430370103227344, "learning_rate": 2.6692733807412487e-06, "loss": 0.2848, "step": 31043 }, { "epoch": 2.307246376811594, "grad_norm": 2.343829828581151, "learning_rate": 2.6687276733073686e-06, "loss": 0.2584, "step": 31044 }, { "epoch": 2.3073206986250465, "grad_norm": 2.2682176033923453, "learning_rate": 2.668182013071843e-06, "loss": 0.2708, "step": 31045 }, { "epoch": 2.3073950204384985, "grad_norm": 1.874724610357751, "learning_rate": 2.667636400038177e-06, "loss": 0.2303, "step": 31046 }, { "epoch": 2.307469342251951, "grad_norm": 2.2889061682032352, "learning_rate": 2.667090834209892e-06, "loss": 0.2388, "step": 31047 }, { "epoch": 2.307543664065403, "grad_norm": 2.135288010704936, "learning_rate": 2.6665453155904954e-06, "loss": 0.2474, "step": 31048 }, { "epoch": 2.3076179858788555, "grad_norm": 2.465597861276252, "learning_rate": 2.665999844183497e-06, "loss": 0.2876, "step": 31049 }, { "epoch": 2.3076923076923075, "grad_norm": 2.247425210456056, "learning_rate": 2.6654544199924127e-06, "loss": 0.2354, "step": 31050 }, { "epoch": 2.30776662950576, "grad_norm": 2.619898551859439, "learning_rate": 2.6649090430207523e-06, "loss": 0.2968, "step": 31051 }, { "epoch": 2.3078409513192124, "grad_norm": 2.929537067618315, "learning_rate": 2.664363713272028e-06, "loss": 0.3142, "step": 31052 }, { "epoch": 2.3079152731326644, "grad_norm": 1.9595341371835635, "learning_rate": 2.663818430749746e-06, "loss": 0.277, "step": 31053 }, { "epoch": 2.3079895949461164, "grad_norm": 2.28422310878922, "learning_rate": 2.663273195457423e-06, "loss": 0.2887, "step": 31054 }, { "epoch": 2.308063916759569, "grad_norm": 2.4643141605929046, "learning_rate": 2.662728007398567e-06, "loss": 0.2642, "step": 31055 }, { "epoch": 2.3081382385730214, "grad_norm": 3.342995754021444, "learning_rate": 2.6621828665766846e-06, "loss": 0.4066, "step": 31056 }, { "epoch": 2.3082125603864734, "grad_norm": 2.296763237624721, "learning_rate": 2.6616377729952903e-06, "loss": 0.2645, "step": 31057 }, { "epoch": 2.308286882199926, "grad_norm": 2.2061134748947646, "learning_rate": 2.6610927266578925e-06, "loss": 0.3029, "step": 31058 }, { "epoch": 2.308361204013378, "grad_norm": 2.357587139832691, "learning_rate": 2.6605477275679947e-06, "loss": 0.2659, "step": 31059 }, { "epoch": 2.3084355258268303, "grad_norm": 1.8793665853171853, "learning_rate": 2.6600027757291136e-06, "loss": 0.1883, "step": 31060 }, { "epoch": 2.3085098476402823, "grad_norm": 1.6339611439899844, "learning_rate": 2.6594578711447506e-06, "loss": 0.1779, "step": 31061 }, { "epoch": 2.308584169453735, "grad_norm": 2.1143577905288797, "learning_rate": 2.6589130138184195e-06, "loss": 0.267, "step": 31062 }, { "epoch": 2.308658491267187, "grad_norm": 2.825880277865694, "learning_rate": 2.658368203753626e-06, "loss": 0.3517, "step": 31063 }, { "epoch": 2.3087328130806393, "grad_norm": 2.1354781673444596, "learning_rate": 2.657823440953876e-06, "loss": 0.2815, "step": 31064 }, { "epoch": 2.3088071348940913, "grad_norm": 2.2197526990676297, "learning_rate": 2.6572787254226794e-06, "loss": 0.2662, "step": 31065 }, { "epoch": 2.3088814567075437, "grad_norm": 2.3040065528067175, "learning_rate": 2.6567340571635368e-06, "loss": 0.3325, "step": 31066 }, { "epoch": 2.3089557785209958, "grad_norm": 1.6373244611583724, "learning_rate": 2.6561894361799635e-06, "loss": 0.2161, "step": 31067 }, { "epoch": 2.309030100334448, "grad_norm": 2.748526869225477, "learning_rate": 2.655644862475458e-06, "loss": 0.3744, "step": 31068 }, { "epoch": 2.3091044221479002, "grad_norm": 2.407986417356322, "learning_rate": 2.655100336053533e-06, "loss": 0.2833, "step": 31069 }, { "epoch": 2.3091787439613527, "grad_norm": 2.256415848685403, "learning_rate": 2.654555856917691e-06, "loss": 0.245, "step": 31070 }, { "epoch": 2.3092530657748047, "grad_norm": 2.3920984528205045, "learning_rate": 2.6540114250714343e-06, "loss": 0.3289, "step": 31071 }, { "epoch": 2.309327387588257, "grad_norm": 2.1959991780396932, "learning_rate": 2.653467040518274e-06, "loss": 0.2929, "step": 31072 }, { "epoch": 2.309401709401709, "grad_norm": 3.0834732902942763, "learning_rate": 2.652922703261709e-06, "loss": 0.3646, "step": 31073 }, { "epoch": 2.3094760312151617, "grad_norm": 3.4626897891706965, "learning_rate": 2.652378413305251e-06, "loss": 0.3376, "step": 31074 }, { "epoch": 2.309550353028614, "grad_norm": 2.4427624470451357, "learning_rate": 2.651834170652395e-06, "loss": 0.3203, "step": 31075 }, { "epoch": 2.309624674842066, "grad_norm": 2.268187238996716, "learning_rate": 2.6512899753066514e-06, "loss": 0.2591, "step": 31076 }, { "epoch": 2.3096989966555186, "grad_norm": 2.5125988095208416, "learning_rate": 2.6507458272715225e-06, "loss": 0.4326, "step": 31077 }, { "epoch": 2.3097733184689706, "grad_norm": 1.9115291855070682, "learning_rate": 2.6502017265505076e-06, "loss": 0.2482, "step": 31078 }, { "epoch": 2.309847640282423, "grad_norm": 2.600696187276397, "learning_rate": 2.6496576731471147e-06, "loss": 0.3186, "step": 31079 }, { "epoch": 2.309921962095875, "grad_norm": 2.3895555930869588, "learning_rate": 2.6491136670648445e-06, "loss": 0.2702, "step": 31080 }, { "epoch": 2.3099962839093275, "grad_norm": 2.0485707300600815, "learning_rate": 2.648569708307197e-06, "loss": 0.2186, "step": 31081 }, { "epoch": 2.3100706057227796, "grad_norm": 2.215107873471207, "learning_rate": 2.648025796877678e-06, "loss": 0.2653, "step": 31082 }, { "epoch": 2.310144927536232, "grad_norm": 1.9791181850677289, "learning_rate": 2.647481932779785e-06, "loss": 0.2445, "step": 31083 }, { "epoch": 2.310219249349684, "grad_norm": 1.7262384317185262, "learning_rate": 2.646938116017026e-06, "loss": 0.1667, "step": 31084 }, { "epoch": 2.3102935711631365, "grad_norm": 2.0302668128582906, "learning_rate": 2.6463943465928966e-06, "loss": 0.2214, "step": 31085 }, { "epoch": 2.3103678929765885, "grad_norm": 2.132772709829226, "learning_rate": 2.6458506245108985e-06, "loss": 0.1717, "step": 31086 }, { "epoch": 2.310442214790041, "grad_norm": 10.701779649894927, "learning_rate": 2.6453069497745333e-06, "loss": 0.3037, "step": 31087 }, { "epoch": 2.310516536603493, "grad_norm": 2.7228188858128273, "learning_rate": 2.644763322387297e-06, "loss": 0.3538, "step": 31088 }, { "epoch": 2.3105908584169454, "grad_norm": 2.0290950549940447, "learning_rate": 2.644219742352696e-06, "loss": 0.2152, "step": 31089 }, { "epoch": 2.3106651802303975, "grad_norm": 2.3827836217624454, "learning_rate": 2.643676209674224e-06, "loss": 0.3499, "step": 31090 }, { "epoch": 2.31073950204385, "grad_norm": 2.456941529747419, "learning_rate": 2.6431327243553852e-06, "loss": 0.3351, "step": 31091 }, { "epoch": 2.310813823857302, "grad_norm": 1.8600962153975544, "learning_rate": 2.642589286399676e-06, "loss": 0.2872, "step": 31092 }, { "epoch": 2.3108881456707544, "grad_norm": 1.9381675764362918, "learning_rate": 2.642045895810593e-06, "loss": 0.2195, "step": 31093 }, { "epoch": 2.310962467484207, "grad_norm": 2.4332751776395054, "learning_rate": 2.6415025525916394e-06, "loss": 0.3153, "step": 31094 }, { "epoch": 2.311036789297659, "grad_norm": 4.420824046105485, "learning_rate": 2.640959256746307e-06, "loss": 0.3917, "step": 31095 }, { "epoch": 2.311111111111111, "grad_norm": 2.341255388328742, "learning_rate": 2.6404160082781015e-06, "loss": 0.2877, "step": 31096 }, { "epoch": 2.3111854329245634, "grad_norm": 2.3217787819053344, "learning_rate": 2.6398728071905143e-06, "loss": 0.2848, "step": 31097 }, { "epoch": 2.311259754738016, "grad_norm": 1.9332212193323426, "learning_rate": 2.639329653487045e-06, "loss": 0.205, "step": 31098 }, { "epoch": 2.311334076551468, "grad_norm": 2.7247504154761524, "learning_rate": 2.6387865471711892e-06, "loss": 0.334, "step": 31099 }, { "epoch": 2.3114083983649203, "grad_norm": 2.801160543480081, "learning_rate": 2.6382434882464415e-06, "loss": 0.2834, "step": 31100 }, { "epoch": 2.3114827201783723, "grad_norm": 2.854751935703914, "learning_rate": 2.6377004767163017e-06, "loss": 0.2844, "step": 31101 }, { "epoch": 2.3115570419918248, "grad_norm": 2.7126616689865286, "learning_rate": 2.637157512584265e-06, "loss": 0.297, "step": 31102 }, { "epoch": 2.311631363805277, "grad_norm": 2.4696638504673154, "learning_rate": 2.6366145958538236e-06, "loss": 0.3095, "step": 31103 }, { "epoch": 2.3117056856187292, "grad_norm": 2.369247049805616, "learning_rate": 2.6360717265284776e-06, "loss": 0.1943, "step": 31104 }, { "epoch": 2.3117800074321813, "grad_norm": 2.410480390015353, "learning_rate": 2.6355289046117173e-06, "loss": 0.2549, "step": 31105 }, { "epoch": 2.3118543292456337, "grad_norm": 3.29791951490595, "learning_rate": 2.634986130107042e-06, "loss": 0.3386, "step": 31106 }, { "epoch": 2.3119286510590857, "grad_norm": 2.8328453795731643, "learning_rate": 2.6344434030179454e-06, "loss": 0.3361, "step": 31107 }, { "epoch": 2.312002972872538, "grad_norm": 2.556086766560169, "learning_rate": 2.6339007233479184e-06, "loss": 0.3312, "step": 31108 }, { "epoch": 2.31207729468599, "grad_norm": 2.386551887676862, "learning_rate": 2.6333580911004574e-06, "loss": 0.269, "step": 31109 }, { "epoch": 2.3121516164994427, "grad_norm": 2.7328557950258063, "learning_rate": 2.6328155062790518e-06, "loss": 0.3665, "step": 31110 }, { "epoch": 2.3122259383128947, "grad_norm": 2.6239166807347507, "learning_rate": 2.6322729688871996e-06, "loss": 0.3244, "step": 31111 }, { "epoch": 2.312300260126347, "grad_norm": 2.270381284174512, "learning_rate": 2.631730478928389e-06, "loss": 0.3324, "step": 31112 }, { "epoch": 2.312374581939799, "grad_norm": 1.9459039054158072, "learning_rate": 2.6311880364061194e-06, "loss": 0.174, "step": 31113 }, { "epoch": 2.3124489037532516, "grad_norm": 2.581147403494113, "learning_rate": 2.630645641323878e-06, "loss": 0.2247, "step": 31114 }, { "epoch": 2.3125232255667036, "grad_norm": 2.538293190545039, "learning_rate": 2.6301032936851544e-06, "loss": 0.3051, "step": 31115 }, { "epoch": 2.312597547380156, "grad_norm": 2.195931070003751, "learning_rate": 2.6295609934934476e-06, "loss": 0.3052, "step": 31116 }, { "epoch": 2.3126718691936086, "grad_norm": 1.885742271211129, "learning_rate": 2.6290187407522406e-06, "loss": 0.1892, "step": 31117 }, { "epoch": 2.3127461910070606, "grad_norm": 1.8620670399987853, "learning_rate": 2.628476535465031e-06, "loss": 0.2399, "step": 31118 }, { "epoch": 2.3128205128205126, "grad_norm": 2.1129759033620137, "learning_rate": 2.6279343776353085e-06, "loss": 0.2711, "step": 31119 }, { "epoch": 2.312894834633965, "grad_norm": 2.026055849536731, "learning_rate": 2.627392267266561e-06, "loss": 0.2519, "step": 31120 }, { "epoch": 2.3129691564474175, "grad_norm": 1.8168051978864326, "learning_rate": 2.6268502043622802e-06, "loss": 0.2333, "step": 31121 }, { "epoch": 2.3130434782608695, "grad_norm": 1.5539090194887386, "learning_rate": 2.6263081889259512e-06, "loss": 0.1941, "step": 31122 }, { "epoch": 2.313117800074322, "grad_norm": 2.710762150290293, "learning_rate": 2.6257662209610713e-06, "loss": 0.2444, "step": 31123 }, { "epoch": 2.313192121887774, "grad_norm": 2.258602278034413, "learning_rate": 2.625224300471125e-06, "loss": 0.2827, "step": 31124 }, { "epoch": 2.3132664437012265, "grad_norm": 2.371413673543576, "learning_rate": 2.6246824274595996e-06, "loss": 0.2765, "step": 31125 }, { "epoch": 2.3133407655146785, "grad_norm": 2.3045011910738387, "learning_rate": 2.62414060192999e-06, "loss": 0.2664, "step": 31126 }, { "epoch": 2.313415087328131, "grad_norm": 2.035380849732575, "learning_rate": 2.6235988238857756e-06, "loss": 0.2217, "step": 31127 }, { "epoch": 2.313489409141583, "grad_norm": 2.2638035521984587, "learning_rate": 2.6230570933304534e-06, "loss": 0.3075, "step": 31128 }, { "epoch": 2.3135637309550354, "grad_norm": 1.9398372823463936, "learning_rate": 2.6225154102675066e-06, "loss": 0.2129, "step": 31129 }, { "epoch": 2.3136380527684874, "grad_norm": 2.3044098260985577, "learning_rate": 2.6219737747004204e-06, "loss": 0.2438, "step": 31130 }, { "epoch": 2.31371237458194, "grad_norm": 2.611050323037686, "learning_rate": 2.6214321866326896e-06, "loss": 0.2746, "step": 31131 }, { "epoch": 2.313786696395392, "grad_norm": 1.6452293621300869, "learning_rate": 2.6208906460677897e-06, "loss": 0.2293, "step": 31132 }, { "epoch": 2.3138610182088444, "grad_norm": 2.656022213718791, "learning_rate": 2.620349153009215e-06, "loss": 0.326, "step": 31133 }, { "epoch": 2.3139353400222964, "grad_norm": 1.842352782971142, "learning_rate": 2.619807707460448e-06, "loss": 0.19, "step": 31134 }, { "epoch": 2.314009661835749, "grad_norm": 2.9713413003190015, "learning_rate": 2.619266309424978e-06, "loss": 0.3693, "step": 31135 }, { "epoch": 2.314083983649201, "grad_norm": 2.3526500902616916, "learning_rate": 2.618724958906289e-06, "loss": 0.3018, "step": 31136 }, { "epoch": 2.3141583054626533, "grad_norm": 1.7310417317433524, "learning_rate": 2.6181836559078633e-06, "loss": 0.2352, "step": 31137 }, { "epoch": 2.3142326272761053, "grad_norm": 2.529180676352962, "learning_rate": 2.6176424004331902e-06, "loss": 0.3231, "step": 31138 }, { "epoch": 2.314306949089558, "grad_norm": 2.2208131700869442, "learning_rate": 2.6171011924857492e-06, "loss": 0.2416, "step": 31139 }, { "epoch": 2.3143812709030103, "grad_norm": 2.397133444107078, "learning_rate": 2.6165600320690323e-06, "loss": 0.3338, "step": 31140 }, { "epoch": 2.3144555927164623, "grad_norm": 2.7011968794682044, "learning_rate": 2.6160189191865172e-06, "loss": 0.3145, "step": 31141 }, { "epoch": 2.3145299145299143, "grad_norm": 2.7626417875852614, "learning_rate": 2.6154778538416892e-06, "loss": 0.3734, "step": 31142 }, { "epoch": 2.3146042363433668, "grad_norm": 2.2021571391370967, "learning_rate": 2.614936836038032e-06, "loss": 0.2629, "step": 31143 }, { "epoch": 2.314678558156819, "grad_norm": 2.4807886131084564, "learning_rate": 2.614395865779026e-06, "loss": 0.3572, "step": 31144 }, { "epoch": 2.3147528799702712, "grad_norm": 2.3166366124874336, "learning_rate": 2.613854943068158e-06, "loss": 0.3014, "step": 31145 }, { "epoch": 2.3148272017837237, "grad_norm": 2.4133966266257505, "learning_rate": 2.61331406790891e-06, "loss": 0.2722, "step": 31146 }, { "epoch": 2.3149015235971757, "grad_norm": 2.3253141098730588, "learning_rate": 2.612773240304758e-06, "loss": 0.2381, "step": 31147 }, { "epoch": 2.314975845410628, "grad_norm": 2.7484226353930623, "learning_rate": 2.612232460259193e-06, "loss": 0.3217, "step": 31148 }, { "epoch": 2.31505016722408, "grad_norm": 3.133752906070998, "learning_rate": 2.6116917277756883e-06, "loss": 0.3854, "step": 31149 }, { "epoch": 2.3151244890375327, "grad_norm": 2.1707542678891563, "learning_rate": 2.6111510428577324e-06, "loss": 0.1832, "step": 31150 }, { "epoch": 2.3151988108509847, "grad_norm": 2.2346601516705387, "learning_rate": 2.610610405508802e-06, "loss": 0.2763, "step": 31151 }, { "epoch": 2.315273132664437, "grad_norm": 1.774101606497377, "learning_rate": 2.6100698157323757e-06, "loss": 0.2047, "step": 31152 }, { "epoch": 2.315347454477889, "grad_norm": 2.7813820112560004, "learning_rate": 2.6095292735319434e-06, "loss": 0.3383, "step": 31153 }, { "epoch": 2.3154217762913416, "grad_norm": 2.379741959905076, "learning_rate": 2.6089887789109713e-06, "loss": 0.2926, "step": 31154 }, { "epoch": 2.3154960981047936, "grad_norm": 2.3227837753832055, "learning_rate": 2.608448331872949e-06, "loss": 0.3146, "step": 31155 }, { "epoch": 2.315570419918246, "grad_norm": 2.5125723012340786, "learning_rate": 2.60790793242135e-06, "loss": 0.3047, "step": 31156 }, { "epoch": 2.315644741731698, "grad_norm": 2.259552378019651, "learning_rate": 2.6073675805596587e-06, "loss": 0.2995, "step": 31157 }, { "epoch": 2.3157190635451506, "grad_norm": 2.244544040503007, "learning_rate": 2.6068272762913514e-06, "loss": 0.2696, "step": 31158 }, { "epoch": 2.3157933853586026, "grad_norm": 2.1756033994080006, "learning_rate": 2.606287019619904e-06, "loss": 0.2305, "step": 31159 }, { "epoch": 2.315867707172055, "grad_norm": 2.339977086738399, "learning_rate": 2.6057468105488003e-06, "loss": 0.2733, "step": 31160 }, { "epoch": 2.315942028985507, "grad_norm": 2.6089726097877866, "learning_rate": 2.605206649081512e-06, "loss": 0.2642, "step": 31161 }, { "epoch": 2.3160163507989595, "grad_norm": 2.4641733289756207, "learning_rate": 2.6046665352215216e-06, "loss": 0.3399, "step": 31162 }, { "epoch": 2.316090672612412, "grad_norm": 2.3211975386878274, "learning_rate": 2.604126468972306e-06, "loss": 0.2765, "step": 31163 }, { "epoch": 2.316164994425864, "grad_norm": 2.4586238953863635, "learning_rate": 2.6035864503373386e-06, "loss": 0.3294, "step": 31164 }, { "epoch": 2.316239316239316, "grad_norm": 1.9805000932896166, "learning_rate": 2.6030464793200993e-06, "loss": 0.2047, "step": 31165 }, { "epoch": 2.3163136380527685, "grad_norm": 2.1685642466655053, "learning_rate": 2.6025065559240594e-06, "loss": 0.2515, "step": 31166 }, { "epoch": 2.316387959866221, "grad_norm": 2.5279997769317877, "learning_rate": 2.601966680152701e-06, "loss": 0.2552, "step": 31167 }, { "epoch": 2.316462281679673, "grad_norm": 2.479501541636243, "learning_rate": 2.6014268520094976e-06, "loss": 0.3046, "step": 31168 }, { "epoch": 2.3165366034931254, "grad_norm": 2.6259966820487253, "learning_rate": 2.6008870714979217e-06, "loss": 0.264, "step": 31169 }, { "epoch": 2.3166109253065774, "grad_norm": 2.5138458155219228, "learning_rate": 2.6003473386214527e-06, "loss": 0.2675, "step": 31170 }, { "epoch": 2.31668524712003, "grad_norm": 2.0279488536433794, "learning_rate": 2.5998076533835613e-06, "loss": 0.2335, "step": 31171 }, { "epoch": 2.316759568933482, "grad_norm": 2.3585192461270905, "learning_rate": 2.5992680157877283e-06, "loss": 0.2892, "step": 31172 }, { "epoch": 2.3168338907469344, "grad_norm": 2.235755077297549, "learning_rate": 2.5987284258374224e-06, "loss": 0.2735, "step": 31173 }, { "epoch": 2.3169082125603864, "grad_norm": 1.9808237967960955, "learning_rate": 2.5981888835361168e-06, "loss": 0.2686, "step": 31174 }, { "epoch": 2.316982534373839, "grad_norm": 2.230379714872618, "learning_rate": 2.597649388887289e-06, "loss": 0.3064, "step": 31175 }, { "epoch": 2.317056856187291, "grad_norm": 2.319114130632418, "learning_rate": 2.5971099418944114e-06, "loss": 0.3178, "step": 31176 }, { "epoch": 2.3171311780007433, "grad_norm": 2.3757900477089224, "learning_rate": 2.5965705425609555e-06, "loss": 0.2013, "step": 31177 }, { "epoch": 2.3172054998141953, "grad_norm": 2.994480392522164, "learning_rate": 2.5960311908903914e-06, "loss": 0.3733, "step": 31178 }, { "epoch": 2.317279821627648, "grad_norm": 2.3694708524143375, "learning_rate": 2.5954918868861968e-06, "loss": 0.3038, "step": 31179 }, { "epoch": 2.3173541434411, "grad_norm": 2.830035729594637, "learning_rate": 2.5949526305518424e-06, "loss": 0.3598, "step": 31180 }, { "epoch": 2.3174284652545523, "grad_norm": 2.90191470766228, "learning_rate": 2.5944134218907945e-06, "loss": 0.2938, "step": 31181 }, { "epoch": 2.3175027870680043, "grad_norm": 2.7010977113137247, "learning_rate": 2.5938742609065324e-06, "loss": 0.2963, "step": 31182 }, { "epoch": 2.3175771088814567, "grad_norm": 2.092096599083538, "learning_rate": 2.5933351476025237e-06, "loss": 0.2653, "step": 31183 }, { "epoch": 2.3176514306949088, "grad_norm": 2.3367421255930454, "learning_rate": 2.592796081982235e-06, "loss": 0.2852, "step": 31184 }, { "epoch": 2.317725752508361, "grad_norm": 2.3793347521060397, "learning_rate": 2.592257064049145e-06, "loss": 0.2864, "step": 31185 }, { "epoch": 2.3178000743218137, "grad_norm": 2.256787115343383, "learning_rate": 2.5917180938067154e-06, "loss": 0.2808, "step": 31186 }, { "epoch": 2.3178743961352657, "grad_norm": 2.9301449546761913, "learning_rate": 2.591179171258428e-06, "loss": 0.3138, "step": 31187 }, { "epoch": 2.3179487179487177, "grad_norm": 2.708079485959215, "learning_rate": 2.590640296407737e-06, "loss": 0.36, "step": 31188 }, { "epoch": 2.31802303976217, "grad_norm": 2.255176949386051, "learning_rate": 2.590101469258124e-06, "loss": 0.2189, "step": 31189 }, { "epoch": 2.3180973615756226, "grad_norm": 2.139786108976256, "learning_rate": 2.5895626898130523e-06, "loss": 0.2271, "step": 31190 }, { "epoch": 2.3181716833890746, "grad_norm": 2.8694741837521094, "learning_rate": 2.5890239580759877e-06, "loss": 0.3803, "step": 31191 }, { "epoch": 2.318246005202527, "grad_norm": 2.275629305580903, "learning_rate": 2.588485274050406e-06, "loss": 0.2494, "step": 31192 }, { "epoch": 2.318320327015979, "grad_norm": 2.1558558405301276, "learning_rate": 2.587946637739769e-06, "loss": 0.2238, "step": 31193 }, { "epoch": 2.3183946488294316, "grad_norm": 2.0539841710366376, "learning_rate": 2.5874080491475496e-06, "loss": 0.2485, "step": 31194 }, { "epoch": 2.3184689706428836, "grad_norm": 3.0380765465183694, "learning_rate": 2.5868695082772132e-06, "loss": 0.2252, "step": 31195 }, { "epoch": 2.318543292456336, "grad_norm": 2.8127738920638734, "learning_rate": 2.5863310151322228e-06, "loss": 0.3041, "step": 31196 }, { "epoch": 2.318617614269788, "grad_norm": 2.1941890263545543, "learning_rate": 2.5857925697160515e-06, "loss": 0.2678, "step": 31197 }, { "epoch": 2.3186919360832405, "grad_norm": 2.696893551940344, "learning_rate": 2.5852541720321634e-06, "loss": 0.3152, "step": 31198 }, { "epoch": 2.3187662578966926, "grad_norm": 2.0788599806900985, "learning_rate": 2.584715822084023e-06, "loss": 0.2577, "step": 31199 }, { "epoch": 2.318840579710145, "grad_norm": 2.368562605536669, "learning_rate": 2.584177519875095e-06, "loss": 0.3329, "step": 31200 }, { "epoch": 2.318914901523597, "grad_norm": 2.395462581784506, "learning_rate": 2.5836392654088515e-06, "loss": 0.2899, "step": 31201 }, { "epoch": 2.3189892233370495, "grad_norm": 2.1862802405122226, "learning_rate": 2.583101058688752e-06, "loss": 0.2885, "step": 31202 }, { "epoch": 2.3190635451505015, "grad_norm": 2.2061041538134916, "learning_rate": 2.5825628997182605e-06, "loss": 0.3371, "step": 31203 }, { "epoch": 2.319137866963954, "grad_norm": 3.0015621552188687, "learning_rate": 2.5820247885008478e-06, "loss": 0.3569, "step": 31204 }, { "epoch": 2.319212188777406, "grad_norm": 2.068178857081441, "learning_rate": 2.5814867250399742e-06, "loss": 0.2766, "step": 31205 }, { "epoch": 2.3192865105908584, "grad_norm": 2.7653383551367203, "learning_rate": 2.5809487093391017e-06, "loss": 0.3074, "step": 31206 }, { "epoch": 2.3193608324043105, "grad_norm": 2.5850043303389114, "learning_rate": 2.580410741401699e-06, "loss": 0.2593, "step": 31207 }, { "epoch": 2.319435154217763, "grad_norm": 2.353514316938076, "learning_rate": 2.579872821231223e-06, "loss": 0.2803, "step": 31208 }, { "epoch": 2.3195094760312154, "grad_norm": 2.0658884146264755, "learning_rate": 2.579334948831145e-06, "loss": 0.2604, "step": 31209 }, { "epoch": 2.3195837978446674, "grad_norm": 2.5556291510617544, "learning_rate": 2.578797124204924e-06, "loss": 0.3207, "step": 31210 }, { "epoch": 2.3196581196581194, "grad_norm": 2.1589771766618933, "learning_rate": 2.57825934735602e-06, "loss": 0.2955, "step": 31211 }, { "epoch": 2.319732441471572, "grad_norm": 2.419529703399757, "learning_rate": 2.5777216182878995e-06, "loss": 0.2491, "step": 31212 }, { "epoch": 2.3198067632850243, "grad_norm": 2.429140595759552, "learning_rate": 2.5771839370040176e-06, "loss": 0.2568, "step": 31213 }, { "epoch": 2.3198810850984763, "grad_norm": 2.2911162423417326, "learning_rate": 2.5766463035078436e-06, "loss": 0.2678, "step": 31214 }, { "epoch": 2.319955406911929, "grad_norm": 2.1553197947105804, "learning_rate": 2.576108717802833e-06, "loss": 0.2439, "step": 31215 }, { "epoch": 2.320029728725381, "grad_norm": 1.8799046954122207, "learning_rate": 2.5755711798924522e-06, "loss": 0.2131, "step": 31216 }, { "epoch": 2.3201040505388333, "grad_norm": 2.1743920152171, "learning_rate": 2.5750336897801585e-06, "loss": 0.1986, "step": 31217 }, { "epoch": 2.3201783723522853, "grad_norm": 2.4892702017887136, "learning_rate": 2.57449624746941e-06, "loss": 0.3222, "step": 31218 }, { "epoch": 2.3202526941657378, "grad_norm": 2.4159991005219066, "learning_rate": 2.573958852963673e-06, "loss": 0.2988, "step": 31219 }, { "epoch": 2.3203270159791898, "grad_norm": 2.323294352025953, "learning_rate": 2.573421506266399e-06, "loss": 0.2996, "step": 31220 }, { "epoch": 2.3204013377926422, "grad_norm": 2.776921324381662, "learning_rate": 2.57288420738106e-06, "loss": 0.3604, "step": 31221 }, { "epoch": 2.3204756596060943, "grad_norm": 2.4668104726210167, "learning_rate": 2.572346956311099e-06, "loss": 0.243, "step": 31222 }, { "epoch": 2.3205499814195467, "grad_norm": 1.8927404940328036, "learning_rate": 2.571809753059987e-06, "loss": 0.2617, "step": 31223 }, { "epoch": 2.3206243032329987, "grad_norm": 2.8339440167782795, "learning_rate": 2.571272597631179e-06, "loss": 0.3333, "step": 31224 }, { "epoch": 2.320698625046451, "grad_norm": 2.765988834221178, "learning_rate": 2.570735490028129e-06, "loss": 0.2824, "step": 31225 }, { "epoch": 2.320772946859903, "grad_norm": 3.523507220237739, "learning_rate": 2.5701984302543013e-06, "loss": 0.3721, "step": 31226 }, { "epoch": 2.3208472686733557, "grad_norm": 2.79676295415544, "learning_rate": 2.5696614183131508e-06, "loss": 0.3432, "step": 31227 }, { "epoch": 2.3209215904868077, "grad_norm": 1.8683758742149932, "learning_rate": 2.5691244542081305e-06, "loss": 0.2127, "step": 31228 }, { "epoch": 2.32099591230026, "grad_norm": 2.451578460503131, "learning_rate": 2.568587537942706e-06, "loss": 0.2944, "step": 31229 }, { "epoch": 2.321070234113712, "grad_norm": 2.510540508915202, "learning_rate": 2.5680506695203257e-06, "loss": 0.2922, "step": 31230 }, { "epoch": 2.3211445559271646, "grad_norm": 2.698092546374146, "learning_rate": 2.5675138489444517e-06, "loss": 0.3561, "step": 31231 }, { "epoch": 2.321218877740617, "grad_norm": 2.7165917593349374, "learning_rate": 2.5669770762185396e-06, "loss": 0.3395, "step": 31232 }, { "epoch": 2.321293199554069, "grad_norm": 2.2209663109552618, "learning_rate": 2.5664403513460422e-06, "loss": 0.2953, "step": 31233 }, { "epoch": 2.3213675213675216, "grad_norm": 2.2032766921500593, "learning_rate": 2.565903674330417e-06, "loss": 0.3338, "step": 31234 }, { "epoch": 2.3214418431809736, "grad_norm": 2.5380505967702667, "learning_rate": 2.5653670451751146e-06, "loss": 0.2218, "step": 31235 }, { "epoch": 2.321516164994426, "grad_norm": 2.3505165673172246, "learning_rate": 2.5648304638835965e-06, "loss": 0.2433, "step": 31236 }, { "epoch": 2.321590486807878, "grad_norm": 2.1639405765533053, "learning_rate": 2.5642939304593108e-06, "loss": 0.2787, "step": 31237 }, { "epoch": 2.3216648086213305, "grad_norm": 2.234075353357982, "learning_rate": 2.563757444905718e-06, "loss": 0.2839, "step": 31238 }, { "epoch": 2.3217391304347825, "grad_norm": 1.8300945794703878, "learning_rate": 2.5632210072262687e-06, "loss": 0.2523, "step": 31239 }, { "epoch": 2.321813452248235, "grad_norm": 2.974491886923099, "learning_rate": 2.562684617424415e-06, "loss": 0.3235, "step": 31240 }, { "epoch": 2.321887774061687, "grad_norm": 2.618017744121031, "learning_rate": 2.562148275503613e-06, "loss": 0.3392, "step": 31241 }, { "epoch": 2.3219620958751395, "grad_norm": 2.4061761131655373, "learning_rate": 2.5616119814673122e-06, "loss": 0.3342, "step": 31242 }, { "epoch": 2.3220364176885915, "grad_norm": 2.3221354345169014, "learning_rate": 2.561075735318974e-06, "loss": 0.2392, "step": 31243 }, { "epoch": 2.322110739502044, "grad_norm": 2.50055269432934, "learning_rate": 2.560539537062038e-06, "loss": 0.2959, "step": 31244 }, { "epoch": 2.322185061315496, "grad_norm": 2.1741985637270584, "learning_rate": 2.560003386699965e-06, "loss": 0.2654, "step": 31245 }, { "epoch": 2.3222593831289484, "grad_norm": 1.8604485869101113, "learning_rate": 2.5594672842362054e-06, "loss": 0.2116, "step": 31246 }, { "epoch": 2.3223337049424004, "grad_norm": 2.765212388868372, "learning_rate": 2.5589312296742055e-06, "loss": 0.3351, "step": 31247 }, { "epoch": 2.322408026755853, "grad_norm": 2.215614994672378, "learning_rate": 2.5583952230174235e-06, "loss": 0.2628, "step": 31248 }, { "epoch": 2.322482348569305, "grad_norm": 2.3011385737604098, "learning_rate": 2.5578592642693066e-06, "loss": 0.3013, "step": 31249 }, { "epoch": 2.3225566703827574, "grad_norm": 2.2238291192445128, "learning_rate": 2.557323353433303e-06, "loss": 0.3328, "step": 31250 }, { "epoch": 2.32263099219621, "grad_norm": 2.503242416642052, "learning_rate": 2.5567874905128674e-06, "loss": 0.2364, "step": 31251 }, { "epoch": 2.322705314009662, "grad_norm": 1.780275976767205, "learning_rate": 2.5562516755114462e-06, "loss": 0.1818, "step": 31252 }, { "epoch": 2.322779635823114, "grad_norm": 2.228310750882815, "learning_rate": 2.555715908432492e-06, "loss": 0.2551, "step": 31253 }, { "epoch": 2.3228539576365663, "grad_norm": 2.3835998639369538, "learning_rate": 2.5551801892794537e-06, "loss": 0.3238, "step": 31254 }, { "epoch": 2.322928279450019, "grad_norm": 2.8519123217571702, "learning_rate": 2.554644518055779e-06, "loss": 0.2634, "step": 31255 }, { "epoch": 2.323002601263471, "grad_norm": 2.431098799437967, "learning_rate": 2.554108894764916e-06, "loss": 0.3077, "step": 31256 }, { "epoch": 2.3230769230769233, "grad_norm": 2.289253238161972, "learning_rate": 2.5535733194103107e-06, "loss": 0.3251, "step": 31257 }, { "epoch": 2.3231512448903753, "grad_norm": 1.9365388846129832, "learning_rate": 2.5530377919954174e-06, "loss": 0.2392, "step": 31258 }, { "epoch": 2.3232255667038277, "grad_norm": 2.598263827337685, "learning_rate": 2.552502312523677e-06, "loss": 0.2994, "step": 31259 }, { "epoch": 2.3232998885172798, "grad_norm": 2.3722705545010787, "learning_rate": 2.5519668809985443e-06, "loss": 0.3043, "step": 31260 }, { "epoch": 2.323374210330732, "grad_norm": 2.4452419331852537, "learning_rate": 2.551431497423461e-06, "loss": 0.2435, "step": 31261 }, { "epoch": 2.3234485321441842, "grad_norm": 2.9049527087979676, "learning_rate": 2.5508961618018733e-06, "loss": 0.3113, "step": 31262 }, { "epoch": 2.3235228539576367, "grad_norm": 1.8434710772139062, "learning_rate": 2.550360874137232e-06, "loss": 0.2296, "step": 31263 }, { "epoch": 2.3235971757710887, "grad_norm": 1.9370161516748494, "learning_rate": 2.5498256344329776e-06, "loss": 0.3015, "step": 31264 }, { "epoch": 2.323671497584541, "grad_norm": 2.5382698998074633, "learning_rate": 2.5492904426925622e-06, "loss": 0.3538, "step": 31265 }, { "epoch": 2.323745819397993, "grad_norm": 2.128886556877627, "learning_rate": 2.5487552989194296e-06, "loss": 0.2698, "step": 31266 }, { "epoch": 2.3238201412114456, "grad_norm": 2.100546173198904, "learning_rate": 2.5482202031170224e-06, "loss": 0.2195, "step": 31267 }, { "epoch": 2.3238944630248977, "grad_norm": 2.4930746518671234, "learning_rate": 2.5476851552887872e-06, "loss": 0.3188, "step": 31268 }, { "epoch": 2.32396878483835, "grad_norm": 2.095861209467103, "learning_rate": 2.547150155438166e-06, "loss": 0.3106, "step": 31269 }, { "epoch": 2.324043106651802, "grad_norm": 2.7976676975125314, "learning_rate": 2.5466152035686074e-06, "loss": 0.3404, "step": 31270 }, { "epoch": 2.3241174284652546, "grad_norm": 2.8986945550987233, "learning_rate": 2.546080299683554e-06, "loss": 0.3443, "step": 31271 }, { "epoch": 2.3241917502787066, "grad_norm": 2.366942644674311, "learning_rate": 2.5455454437864458e-06, "loss": 0.3613, "step": 31272 }, { "epoch": 2.324266072092159, "grad_norm": 3.094714338296391, "learning_rate": 2.5450106358807325e-06, "loss": 0.2591, "step": 31273 }, { "epoch": 2.3243403939056115, "grad_norm": 2.5976596192255474, "learning_rate": 2.5444758759698495e-06, "loss": 0.3302, "step": 31274 }, { "epoch": 2.3244147157190636, "grad_norm": 2.2565051192075454, "learning_rate": 2.5439411640572486e-06, "loss": 0.2207, "step": 31275 }, { "epoch": 2.3244890375325156, "grad_norm": 2.608128102504348, "learning_rate": 2.5434065001463672e-06, "loss": 0.3022, "step": 31276 }, { "epoch": 2.324563359345968, "grad_norm": 2.5294242991078533, "learning_rate": 2.5428718842406475e-06, "loss": 0.2481, "step": 31277 }, { "epoch": 2.3246376811594205, "grad_norm": 1.974667568758653, "learning_rate": 2.5423373163435316e-06, "loss": 0.2408, "step": 31278 }, { "epoch": 2.3247120029728725, "grad_norm": 2.1785264945152725, "learning_rate": 2.541802796458459e-06, "loss": 0.2896, "step": 31279 }, { "epoch": 2.324786324786325, "grad_norm": 2.2581475734300014, "learning_rate": 2.5412683245888747e-06, "loss": 0.2191, "step": 31280 }, { "epoch": 2.324860646599777, "grad_norm": 2.244171889778408, "learning_rate": 2.5407339007382157e-06, "loss": 0.3155, "step": 31281 }, { "epoch": 2.3249349684132294, "grad_norm": 1.7184021535200669, "learning_rate": 2.5401995249099275e-06, "loss": 0.1667, "step": 31282 }, { "epoch": 2.3250092902266815, "grad_norm": 1.7268896523395783, "learning_rate": 2.539665197107448e-06, "loss": 0.2069, "step": 31283 }, { "epoch": 2.325083612040134, "grad_norm": 2.186142844966272, "learning_rate": 2.539130917334215e-06, "loss": 0.265, "step": 31284 }, { "epoch": 2.325157933853586, "grad_norm": 2.5682659890901207, "learning_rate": 2.5385966855936717e-06, "loss": 0.333, "step": 31285 }, { "epoch": 2.3252322556670384, "grad_norm": 2.5147786836888857, "learning_rate": 2.5380625018892526e-06, "loss": 0.2485, "step": 31286 }, { "epoch": 2.3253065774804904, "grad_norm": 2.108667146747127, "learning_rate": 2.537528366224403e-06, "loss": 0.2658, "step": 31287 }, { "epoch": 2.325380899293943, "grad_norm": 2.2640012957997757, "learning_rate": 2.536994278602559e-06, "loss": 0.2717, "step": 31288 }, { "epoch": 2.325455221107395, "grad_norm": 2.42368705485009, "learning_rate": 2.5364602390271585e-06, "loss": 0.2441, "step": 31289 }, { "epoch": 2.3255295429208473, "grad_norm": 2.455430634511004, "learning_rate": 2.5359262475016398e-06, "loss": 0.3187, "step": 31290 }, { "epoch": 2.3256038647342994, "grad_norm": 2.2893825908995384, "learning_rate": 2.535392304029437e-06, "loss": 0.3066, "step": 31291 }, { "epoch": 2.325678186547752, "grad_norm": 2.135333281691748, "learning_rate": 2.534858408613995e-06, "loss": 0.2731, "step": 31292 }, { "epoch": 2.325752508361204, "grad_norm": 2.519673922052044, "learning_rate": 2.5343245612587475e-06, "loss": 0.3437, "step": 31293 }, { "epoch": 2.3258268301746563, "grad_norm": 2.6404396065242803, "learning_rate": 2.5337907619671274e-06, "loss": 0.2469, "step": 31294 }, { "epoch": 2.3259011519881083, "grad_norm": 2.077961603892852, "learning_rate": 2.533257010742578e-06, "loss": 0.2531, "step": 31295 }, { "epoch": 2.3259754738015608, "grad_norm": 1.9944464869531127, "learning_rate": 2.5327233075885294e-06, "loss": 0.2104, "step": 31296 }, { "epoch": 2.3260497956150132, "grad_norm": 2.173136699161056, "learning_rate": 2.5321896525084246e-06, "loss": 0.2715, "step": 31297 }, { "epoch": 2.3261241174284653, "grad_norm": 2.3507956575147944, "learning_rate": 2.5316560455056947e-06, "loss": 0.3035, "step": 31298 }, { "epoch": 2.3261984392419173, "grad_norm": 2.7688799224449383, "learning_rate": 2.5311224865837724e-06, "loss": 0.4087, "step": 31299 }, { "epoch": 2.3262727610553697, "grad_norm": 2.554002496583746, "learning_rate": 2.530588975746102e-06, "loss": 0.3475, "step": 31300 }, { "epoch": 2.326347082868822, "grad_norm": 2.8758844302289255, "learning_rate": 2.530055512996107e-06, "loss": 0.3615, "step": 31301 }, { "epoch": 2.326421404682274, "grad_norm": 2.607357809419718, "learning_rate": 2.5295220983372292e-06, "loss": 0.3084, "step": 31302 }, { "epoch": 2.3264957264957267, "grad_norm": 2.226708934251045, "learning_rate": 2.528988731772898e-06, "loss": 0.264, "step": 31303 }, { "epoch": 2.3265700483091787, "grad_norm": 2.2185764003944697, "learning_rate": 2.528455413306553e-06, "loss": 0.2855, "step": 31304 }, { "epoch": 2.326644370122631, "grad_norm": 1.951020269080644, "learning_rate": 2.5279221429416233e-06, "loss": 0.2655, "step": 31305 }, { "epoch": 2.326718691936083, "grad_norm": 2.349417755101707, "learning_rate": 2.5273889206815407e-06, "loss": 0.2887, "step": 31306 }, { "epoch": 2.3267930137495356, "grad_norm": 2.3049926758311616, "learning_rate": 2.526855746529744e-06, "loss": 0.2312, "step": 31307 }, { "epoch": 2.3268673355629876, "grad_norm": 1.9592589707526693, "learning_rate": 2.526322620489662e-06, "loss": 0.2357, "step": 31308 }, { "epoch": 2.32694165737644, "grad_norm": 2.5943263612709218, "learning_rate": 2.525789542564724e-06, "loss": 0.2958, "step": 31309 }, { "epoch": 2.327015979189892, "grad_norm": 1.6758643146626706, "learning_rate": 2.5252565127583685e-06, "loss": 0.1598, "step": 31310 }, { "epoch": 2.3270903010033446, "grad_norm": 2.839391457363055, "learning_rate": 2.5247235310740227e-06, "loss": 0.2094, "step": 31311 }, { "epoch": 2.3271646228167966, "grad_norm": 1.7849049635419285, "learning_rate": 2.5241905975151193e-06, "loss": 0.2367, "step": 31312 }, { "epoch": 2.327238944630249, "grad_norm": 1.8578774899658885, "learning_rate": 2.523657712085087e-06, "loss": 0.2272, "step": 31313 }, { "epoch": 2.327313266443701, "grad_norm": 2.3541598129229766, "learning_rate": 2.5231248747873606e-06, "loss": 0.2708, "step": 31314 }, { "epoch": 2.3273875882571535, "grad_norm": 2.1420360896485606, "learning_rate": 2.5225920856253684e-06, "loss": 0.2196, "step": 31315 }, { "epoch": 2.3274619100706055, "grad_norm": 2.258599892841695, "learning_rate": 2.5220593446025368e-06, "loss": 0.2914, "step": 31316 }, { "epoch": 2.327536231884058, "grad_norm": 2.6374548168878107, "learning_rate": 2.5215266517223024e-06, "loss": 0.357, "step": 31317 }, { "epoch": 2.32761055369751, "grad_norm": 1.8194929325040674, "learning_rate": 2.520994006988089e-06, "loss": 0.2013, "step": 31318 }, { "epoch": 2.3276848755109625, "grad_norm": 2.293286987471604, "learning_rate": 2.520461410403331e-06, "loss": 0.2898, "step": 31319 }, { "epoch": 2.327759197324415, "grad_norm": 2.4340382646688385, "learning_rate": 2.5199288619714547e-06, "loss": 0.2151, "step": 31320 }, { "epoch": 2.327833519137867, "grad_norm": 3.4938915957326184, "learning_rate": 2.519396361695884e-06, "loss": 0.3558, "step": 31321 }, { "epoch": 2.327907840951319, "grad_norm": 1.8434166988147314, "learning_rate": 2.518863909580055e-06, "loss": 0.2342, "step": 31322 }, { "epoch": 2.3279821627647714, "grad_norm": 3.002647425582856, "learning_rate": 2.5183315056273926e-06, "loss": 0.3707, "step": 31323 }, { "epoch": 2.328056484578224, "grad_norm": 2.6742133294208457, "learning_rate": 2.5177991498413236e-06, "loss": 0.3579, "step": 31324 }, { "epoch": 2.328130806391676, "grad_norm": 2.8784819496725063, "learning_rate": 2.517266842225271e-06, "loss": 0.2766, "step": 31325 }, { "epoch": 2.3282051282051284, "grad_norm": 2.2780788514852857, "learning_rate": 2.5167345827826706e-06, "loss": 0.2674, "step": 31326 }, { "epoch": 2.3282794500185804, "grad_norm": 2.515955006307743, "learning_rate": 2.5162023715169446e-06, "loss": 0.3029, "step": 31327 }, { "epoch": 2.328353771832033, "grad_norm": 2.1279254459985375, "learning_rate": 2.5156702084315165e-06, "loss": 0.2611, "step": 31328 }, { "epoch": 2.328428093645485, "grad_norm": 2.505978718723753, "learning_rate": 2.5151380935298177e-06, "loss": 0.2351, "step": 31329 }, { "epoch": 2.3285024154589373, "grad_norm": 2.316611828369192, "learning_rate": 2.514606026815272e-06, "loss": 0.2611, "step": 31330 }, { "epoch": 2.3285767372723893, "grad_norm": 2.95206821859245, "learning_rate": 2.5140740082913005e-06, "loss": 0.3166, "step": 31331 }, { "epoch": 2.328651059085842, "grad_norm": 2.0637271734212947, "learning_rate": 2.5135420379613363e-06, "loss": 0.2234, "step": 31332 }, { "epoch": 2.328725380899294, "grad_norm": 2.1055391554045055, "learning_rate": 2.5130101158287966e-06, "loss": 0.2483, "step": 31333 }, { "epoch": 2.3287997027127463, "grad_norm": 3.900525930809235, "learning_rate": 2.512478241897115e-06, "loss": 0.3115, "step": 31334 }, { "epoch": 2.3288740245261983, "grad_norm": 1.8991793170679598, "learning_rate": 2.5119464161697038e-06, "loss": 0.2544, "step": 31335 }, { "epoch": 2.3289483463396508, "grad_norm": 2.224501674676531, "learning_rate": 2.511414638649996e-06, "loss": 0.246, "step": 31336 }, { "epoch": 2.3290226681531028, "grad_norm": 2.328174380214929, "learning_rate": 2.5108829093414133e-06, "loss": 0.3096, "step": 31337 }, { "epoch": 2.3290969899665552, "grad_norm": 2.55583592185434, "learning_rate": 2.510351228247374e-06, "loss": 0.3097, "step": 31338 }, { "epoch": 2.3291713117800072, "grad_norm": 2.3206010782950277, "learning_rate": 2.5098195953713076e-06, "loss": 0.345, "step": 31339 }, { "epoch": 2.3292456335934597, "grad_norm": 3.2383413075221483, "learning_rate": 2.5092880107166316e-06, "loss": 0.4055, "step": 31340 }, { "epoch": 2.3293199554069117, "grad_norm": 2.3730384340428863, "learning_rate": 2.508756474286773e-06, "loss": 0.3467, "step": 31341 }, { "epoch": 2.329394277220364, "grad_norm": 2.5349887263480033, "learning_rate": 2.508224986085153e-06, "loss": 0.3708, "step": 31342 }, { "epoch": 2.3294685990338166, "grad_norm": 2.0943695678648573, "learning_rate": 2.5076935461151874e-06, "loss": 0.2575, "step": 31343 }, { "epoch": 2.3295429208472687, "grad_norm": 2.8738791128299224, "learning_rate": 2.5071621543803047e-06, "loss": 0.3588, "step": 31344 }, { "epoch": 2.3296172426607207, "grad_norm": 2.589673933978376, "learning_rate": 2.5066308108839242e-06, "loss": 0.3636, "step": 31345 }, { "epoch": 2.329691564474173, "grad_norm": 1.9823592716162788, "learning_rate": 2.5060995156294654e-06, "loss": 0.3396, "step": 31346 }, { "epoch": 2.3297658862876256, "grad_norm": 2.4537798348330764, "learning_rate": 2.5055682686203465e-06, "loss": 0.3215, "step": 31347 }, { "epoch": 2.3298402081010776, "grad_norm": 2.380902397003755, "learning_rate": 2.5050370698599937e-06, "loss": 0.3176, "step": 31348 }, { "epoch": 2.32991452991453, "grad_norm": 2.6271024238850367, "learning_rate": 2.504505919351822e-06, "loss": 0.3038, "step": 31349 }, { "epoch": 2.329988851727982, "grad_norm": 2.6971035638618033, "learning_rate": 2.5039748170992495e-06, "loss": 0.3091, "step": 31350 }, { "epoch": 2.3300631735414346, "grad_norm": 2.7552057222299955, "learning_rate": 2.5034437631057017e-06, "loss": 0.2897, "step": 31351 }, { "epoch": 2.3301374953548866, "grad_norm": 2.6426060618768528, "learning_rate": 2.5029127573745938e-06, "loss": 0.3368, "step": 31352 }, { "epoch": 2.330211817168339, "grad_norm": 2.188919594262761, "learning_rate": 2.5023817999093425e-06, "loss": 0.3347, "step": 31353 }, { "epoch": 2.330286138981791, "grad_norm": 2.4698838805873558, "learning_rate": 2.50185089071337e-06, "loss": 0.3282, "step": 31354 }, { "epoch": 2.3303604607952435, "grad_norm": 2.3054597205905045, "learning_rate": 2.5013200297900897e-06, "loss": 0.3158, "step": 31355 }, { "epoch": 2.3304347826086955, "grad_norm": 2.3493127709143686, "learning_rate": 2.500789217142927e-06, "loss": 0.3417, "step": 31356 }, { "epoch": 2.330509104422148, "grad_norm": 2.6924367099541016, "learning_rate": 2.5002584527752893e-06, "loss": 0.3592, "step": 31357 }, { "epoch": 2.3305834262356, "grad_norm": 2.027286526226535, "learning_rate": 2.4997277366906014e-06, "loss": 0.2115, "step": 31358 }, { "epoch": 2.3306577480490525, "grad_norm": 2.4413207481196655, "learning_rate": 2.4991970688922773e-06, "loss": 0.2412, "step": 31359 }, { "epoch": 2.3307320698625045, "grad_norm": 2.0652761865948177, "learning_rate": 2.498666449383731e-06, "loss": 0.3172, "step": 31360 }, { "epoch": 2.330806391675957, "grad_norm": 2.440137716452954, "learning_rate": 2.4981358781683827e-06, "loss": 0.3756, "step": 31361 }, { "epoch": 2.330880713489409, "grad_norm": 2.20237877464468, "learning_rate": 2.4976053552496436e-06, "loss": 0.2986, "step": 31362 }, { "epoch": 2.3309550353028614, "grad_norm": 2.34296682462584, "learning_rate": 2.4970748806309355e-06, "loss": 0.2532, "step": 31363 }, { "epoch": 2.3310293571163134, "grad_norm": 2.6375393786464607, "learning_rate": 2.496544454315669e-06, "loss": 0.3199, "step": 31364 }, { "epoch": 2.331103678929766, "grad_norm": 2.1644816276468966, "learning_rate": 2.4960140763072584e-06, "loss": 0.3104, "step": 31365 }, { "epoch": 2.3311780007432183, "grad_norm": 3.4986088075371318, "learning_rate": 2.4954837466091206e-06, "loss": 0.2605, "step": 31366 }, { "epoch": 2.3312523225566704, "grad_norm": 2.5277902824812544, "learning_rate": 2.4949534652246686e-06, "loss": 0.3111, "step": 31367 }, { "epoch": 2.331326644370123, "grad_norm": 1.91616007184878, "learning_rate": 2.4944232321573213e-06, "loss": 0.2114, "step": 31368 }, { "epoch": 2.331400966183575, "grad_norm": 2.5151936003919455, "learning_rate": 2.4938930474104817e-06, "loss": 0.3136, "step": 31369 }, { "epoch": 2.3314752879970273, "grad_norm": 2.7953557869536416, "learning_rate": 2.4933629109875723e-06, "loss": 0.3225, "step": 31370 }, { "epoch": 2.3315496098104793, "grad_norm": 2.1790392881946503, "learning_rate": 2.492832822892004e-06, "loss": 0.3004, "step": 31371 }, { "epoch": 2.331623931623932, "grad_norm": 2.3454942276655135, "learning_rate": 2.4923027831271853e-06, "loss": 0.3281, "step": 31372 }, { "epoch": 2.331698253437384, "grad_norm": 2.2861260429386587, "learning_rate": 2.491772791696534e-06, "loss": 0.3015, "step": 31373 }, { "epoch": 2.3317725752508363, "grad_norm": 2.3024429076783237, "learning_rate": 2.49124284860346e-06, "loss": 0.3465, "step": 31374 }, { "epoch": 2.3318468970642883, "grad_norm": 2.5979061953693527, "learning_rate": 2.490712953851372e-06, "loss": 0.3086, "step": 31375 }, { "epoch": 2.3319212188777407, "grad_norm": 2.468127163379755, "learning_rate": 2.4901831074436866e-06, "loss": 0.279, "step": 31376 }, { "epoch": 2.3319955406911927, "grad_norm": 3.168097663819943, "learning_rate": 2.4896533093838105e-06, "loss": 0.2908, "step": 31377 }, { "epoch": 2.332069862504645, "grad_norm": 2.4220910594864473, "learning_rate": 2.4891235596751607e-06, "loss": 0.342, "step": 31378 }, { "epoch": 2.3321441843180972, "grad_norm": 2.139574651170792, "learning_rate": 2.488593858321142e-06, "loss": 0.2374, "step": 31379 }, { "epoch": 2.3322185061315497, "grad_norm": 2.3384952996101775, "learning_rate": 2.488064205325167e-06, "loss": 0.2757, "step": 31380 }, { "epoch": 2.3322928279450017, "grad_norm": 2.3461841256924907, "learning_rate": 2.4875346006906453e-06, "loss": 0.22, "step": 31381 }, { "epoch": 2.332367149758454, "grad_norm": 2.069309140171234, "learning_rate": 2.4870050444209827e-06, "loss": 0.2512, "step": 31382 }, { "epoch": 2.332441471571906, "grad_norm": 1.9941483063091603, "learning_rate": 2.4864755365195946e-06, "loss": 0.2484, "step": 31383 }, { "epoch": 2.3325157933853586, "grad_norm": 2.673978049922744, "learning_rate": 2.4859460769898834e-06, "loss": 0.3085, "step": 31384 }, { "epoch": 2.332590115198811, "grad_norm": 2.0064183106778017, "learning_rate": 2.485416665835266e-06, "loss": 0.2778, "step": 31385 }, { "epoch": 2.332664437012263, "grad_norm": 1.9535535640059543, "learning_rate": 2.4848873030591457e-06, "loss": 0.2408, "step": 31386 }, { "epoch": 2.332738758825715, "grad_norm": 3.234746177306303, "learning_rate": 2.4843579886649284e-06, "loss": 0.4063, "step": 31387 }, { "epoch": 2.3328130806391676, "grad_norm": 2.4809321013495773, "learning_rate": 2.4838287226560266e-06, "loss": 0.2872, "step": 31388 }, { "epoch": 2.33288740245262, "grad_norm": 2.239636348959457, "learning_rate": 2.483299505035842e-06, "loss": 0.276, "step": 31389 }, { "epoch": 2.332961724266072, "grad_norm": 2.2993343049250665, "learning_rate": 2.4827703358077927e-06, "loss": 0.2665, "step": 31390 }, { "epoch": 2.3330360460795245, "grad_norm": 2.4942369397876236, "learning_rate": 2.4822412149752707e-06, "loss": 0.321, "step": 31391 }, { "epoch": 2.3331103678929765, "grad_norm": 2.089781120044457, "learning_rate": 2.4817121425416933e-06, "loss": 0.2332, "step": 31392 }, { "epoch": 2.333184689706429, "grad_norm": 1.8600823884823645, "learning_rate": 2.4811831185104627e-06, "loss": 0.2023, "step": 31393 }, { "epoch": 2.333259011519881, "grad_norm": 2.299370910143215, "learning_rate": 2.4806541428849827e-06, "loss": 0.3931, "step": 31394 }, { "epoch": 2.3333333333333335, "grad_norm": 2.889593871914271, "learning_rate": 2.4801252156686638e-06, "loss": 0.2921, "step": 31395 }, { "epoch": 2.3334076551467855, "grad_norm": 3.077656626566841, "learning_rate": 2.4795963368649077e-06, "loss": 0.3832, "step": 31396 }, { "epoch": 2.333481976960238, "grad_norm": 3.5053756396286935, "learning_rate": 2.4790675064771186e-06, "loss": 0.3251, "step": 31397 }, { "epoch": 2.33355629877369, "grad_norm": 2.3123588403120428, "learning_rate": 2.4785387245087046e-06, "loss": 0.2673, "step": 31398 }, { "epoch": 2.3336306205871424, "grad_norm": 2.264816347810569, "learning_rate": 2.478009990963066e-06, "loss": 0.2981, "step": 31399 }, { "epoch": 2.3337049424005945, "grad_norm": 2.5938487961332184, "learning_rate": 2.4774813058436108e-06, "loss": 0.3275, "step": 31400 }, { "epoch": 2.333779264214047, "grad_norm": 2.5774272651526404, "learning_rate": 2.4769526691537415e-06, "loss": 0.3125, "step": 31401 }, { "epoch": 2.333853586027499, "grad_norm": 6.467995918695045, "learning_rate": 2.47642408089686e-06, "loss": 0.3845, "step": 31402 }, { "epoch": 2.3339279078409514, "grad_norm": 2.5834019696412245, "learning_rate": 2.4758955410763695e-06, "loss": 0.3323, "step": 31403 }, { "epoch": 2.3340022296544034, "grad_norm": 2.9930586591659876, "learning_rate": 2.475367049695672e-06, "loss": 0.3835, "step": 31404 }, { "epoch": 2.334076551467856, "grad_norm": 2.249025546042493, "learning_rate": 2.474838606758172e-06, "loss": 0.2664, "step": 31405 }, { "epoch": 2.334150873281308, "grad_norm": 2.7046027153997687, "learning_rate": 2.474310212267268e-06, "loss": 0.3016, "step": 31406 }, { "epoch": 2.3342251950947603, "grad_norm": 2.1822540196379974, "learning_rate": 2.473781866226368e-06, "loss": 0.2198, "step": 31407 }, { "epoch": 2.334299516908213, "grad_norm": 2.329654493288313, "learning_rate": 2.4732535686388693e-06, "loss": 0.2401, "step": 31408 }, { "epoch": 2.334373838721665, "grad_norm": 2.630811849497696, "learning_rate": 2.472725319508171e-06, "loss": 0.4052, "step": 31409 }, { "epoch": 2.334448160535117, "grad_norm": 2.583358565115094, "learning_rate": 2.4721971188376793e-06, "loss": 0.3099, "step": 31410 }, { "epoch": 2.3345224823485693, "grad_norm": 2.321783747526799, "learning_rate": 2.471668966630789e-06, "loss": 0.283, "step": 31411 }, { "epoch": 2.3345968041620218, "grad_norm": 2.9342403034692297, "learning_rate": 2.471140862890906e-06, "loss": 0.3462, "step": 31412 }, { "epoch": 2.3346711259754738, "grad_norm": 2.6055146958549447, "learning_rate": 2.470612807621431e-06, "loss": 0.3608, "step": 31413 }, { "epoch": 2.3347454477889262, "grad_norm": 2.8640392293358024, "learning_rate": 2.470084800825754e-06, "loss": 0.3472, "step": 31414 }, { "epoch": 2.3348197696023782, "grad_norm": 1.969400971497222, "learning_rate": 2.4695568425072825e-06, "loss": 0.2173, "step": 31415 }, { "epoch": 2.3348940914158307, "grad_norm": 1.910543311195524, "learning_rate": 2.4690289326694117e-06, "loss": 0.2245, "step": 31416 }, { "epoch": 2.3349684132292827, "grad_norm": 2.931124568648897, "learning_rate": 2.4685010713155434e-06, "loss": 0.3137, "step": 31417 }, { "epoch": 2.335042735042735, "grad_norm": 2.3101919368654626, "learning_rate": 2.467973258449076e-06, "loss": 0.2464, "step": 31418 }, { "epoch": 2.335117056856187, "grad_norm": 2.4405014575558237, "learning_rate": 2.4674454940734017e-06, "loss": 0.2936, "step": 31419 }, { "epoch": 2.3351913786696397, "grad_norm": 2.4370229573753415, "learning_rate": 2.4669177781919253e-06, "loss": 0.3203, "step": 31420 }, { "epoch": 2.3352657004830917, "grad_norm": 2.432916672171546, "learning_rate": 2.466390110808039e-06, "loss": 0.349, "step": 31421 }, { "epoch": 2.335340022296544, "grad_norm": 1.7554547847796467, "learning_rate": 2.465862491925145e-06, "loss": 0.2715, "step": 31422 }, { "epoch": 2.335414344109996, "grad_norm": 2.1358124702491454, "learning_rate": 2.465334921546638e-06, "loss": 0.2245, "step": 31423 }, { "epoch": 2.3354886659234486, "grad_norm": 2.814391493048528, "learning_rate": 2.4648073996759136e-06, "loss": 0.3318, "step": 31424 }, { "epoch": 2.3355629877369006, "grad_norm": 6.558726555271927, "learning_rate": 2.4642799263163673e-06, "loss": 0.3383, "step": 31425 }, { "epoch": 2.335637309550353, "grad_norm": 2.2662056403091904, "learning_rate": 2.4637525014713937e-06, "loss": 0.3299, "step": 31426 }, { "epoch": 2.335711631363805, "grad_norm": 2.1772110488934606, "learning_rate": 2.463225125144394e-06, "loss": 0.2235, "step": 31427 }, { "epoch": 2.3357859531772576, "grad_norm": 1.978184237299047, "learning_rate": 2.4626977973387557e-06, "loss": 0.2554, "step": 31428 }, { "epoch": 2.3358602749907096, "grad_norm": 2.005428513945692, "learning_rate": 2.4621705180578815e-06, "loss": 0.2223, "step": 31429 }, { "epoch": 2.335934596804162, "grad_norm": 1.9340752714405711, "learning_rate": 2.461643287305162e-06, "loss": 0.2444, "step": 31430 }, { "epoch": 2.3360089186176145, "grad_norm": 2.2967537565705323, "learning_rate": 2.461116105083989e-06, "loss": 0.2637, "step": 31431 }, { "epoch": 2.3360832404310665, "grad_norm": 2.4121898131390127, "learning_rate": 2.4605889713977626e-06, "loss": 0.34, "step": 31432 }, { "epoch": 2.3361575622445185, "grad_norm": 2.094091738791109, "learning_rate": 2.460061886249874e-06, "loss": 0.2295, "step": 31433 }, { "epoch": 2.336231884057971, "grad_norm": 2.266153085715142, "learning_rate": 2.459534849643711e-06, "loss": 0.3306, "step": 31434 }, { "epoch": 2.3363062058714235, "grad_norm": 2.4293688525193247, "learning_rate": 2.459007861582676e-06, "loss": 0.2646, "step": 31435 }, { "epoch": 2.3363805276848755, "grad_norm": 2.839657115403194, "learning_rate": 2.4584809220701554e-06, "loss": 0.2806, "step": 31436 }, { "epoch": 2.336454849498328, "grad_norm": 1.8416410495921354, "learning_rate": 2.4579540311095453e-06, "loss": 0.2215, "step": 31437 }, { "epoch": 2.33652917131178, "grad_norm": 2.0470159572984046, "learning_rate": 2.457427188704231e-06, "loss": 0.2459, "step": 31438 }, { "epoch": 2.3366034931252324, "grad_norm": 2.359168804750843, "learning_rate": 2.456900394857613e-06, "loss": 0.1674, "step": 31439 }, { "epoch": 2.3366778149386844, "grad_norm": 2.676150816481748, "learning_rate": 2.4563736495730795e-06, "loss": 0.3885, "step": 31440 }, { "epoch": 2.336752136752137, "grad_norm": 1.8012812079687595, "learning_rate": 2.4558469528540175e-06, "loss": 0.1964, "step": 31441 }, { "epoch": 2.336826458565589, "grad_norm": 2.9477945335446223, "learning_rate": 2.4553203047038234e-06, "loss": 0.3495, "step": 31442 }, { "epoch": 2.3369007803790414, "grad_norm": 2.8038613346340293, "learning_rate": 2.454793705125884e-06, "loss": 0.3309, "step": 31443 }, { "epoch": 2.3369751021924934, "grad_norm": 2.5520683066008227, "learning_rate": 2.454267154123594e-06, "loss": 0.3473, "step": 31444 }, { "epoch": 2.337049424005946, "grad_norm": 2.535655994648072, "learning_rate": 2.4537406517003405e-06, "loss": 0.3346, "step": 31445 }, { "epoch": 2.337123745819398, "grad_norm": 2.09855267341941, "learning_rate": 2.453214197859509e-06, "loss": 0.238, "step": 31446 }, { "epoch": 2.3371980676328503, "grad_norm": 1.7203341813818274, "learning_rate": 2.4526877926045e-06, "loss": 0.2296, "step": 31447 }, { "epoch": 2.3372723894463023, "grad_norm": 2.494346695176989, "learning_rate": 2.452161435938689e-06, "loss": 0.3348, "step": 31448 }, { "epoch": 2.337346711259755, "grad_norm": 3.0567763013333855, "learning_rate": 2.451635127865475e-06, "loss": 0.3252, "step": 31449 }, { "epoch": 2.337421033073207, "grad_norm": 2.16590778462657, "learning_rate": 2.4511088683882377e-06, "loss": 0.2368, "step": 31450 }, { "epoch": 2.3374953548866593, "grad_norm": 2.3941030330607, "learning_rate": 2.450582657510374e-06, "loss": 0.2674, "step": 31451 }, { "epoch": 2.3375696767001113, "grad_norm": 2.2897761570317194, "learning_rate": 2.450056495235268e-06, "loss": 0.3033, "step": 31452 }, { "epoch": 2.3376439985135637, "grad_norm": 2.2605581445908416, "learning_rate": 2.4495303815663017e-06, "loss": 0.3001, "step": 31453 }, { "epoch": 2.337718320327016, "grad_norm": 2.5159448734647127, "learning_rate": 2.449004316506871e-06, "loss": 0.2041, "step": 31454 }, { "epoch": 2.3377926421404682, "grad_norm": 2.051604119957371, "learning_rate": 2.4484783000603596e-06, "loss": 0.2439, "step": 31455 }, { "epoch": 2.3378669639539202, "grad_norm": 3.0629553268232654, "learning_rate": 2.447952332230149e-06, "loss": 0.333, "step": 31456 }, { "epoch": 2.3379412857673727, "grad_norm": 2.0934795898836445, "learning_rate": 2.447426413019632e-06, "loss": 0.2815, "step": 31457 }, { "epoch": 2.338015607580825, "grad_norm": 2.263511644502994, "learning_rate": 2.4469005424321934e-06, "loss": 0.2544, "step": 31458 }, { "epoch": 2.338089929394277, "grad_norm": 2.5039480253793744, "learning_rate": 2.4463747204712165e-06, "loss": 0.2911, "step": 31459 }, { "epoch": 2.3381642512077296, "grad_norm": 1.916382255056513, "learning_rate": 2.4458489471400837e-06, "loss": 0.2212, "step": 31460 }, { "epoch": 2.3382385730211817, "grad_norm": 3.1771366672177614, "learning_rate": 2.4453232224421875e-06, "loss": 0.418, "step": 31461 }, { "epoch": 2.338312894834634, "grad_norm": 2.159707649836068, "learning_rate": 2.444797546380907e-06, "loss": 0.3015, "step": 31462 }, { "epoch": 2.338387216648086, "grad_norm": 1.8278097298543174, "learning_rate": 2.4442719189596265e-06, "loss": 0.208, "step": 31463 }, { "epoch": 2.3384615384615386, "grad_norm": 3.667101426295377, "learning_rate": 2.443746340181733e-06, "loss": 0.3294, "step": 31464 }, { "epoch": 2.3385358602749906, "grad_norm": 2.1413075508914114, "learning_rate": 2.443220810050606e-06, "loss": 0.3156, "step": 31465 }, { "epoch": 2.338610182088443, "grad_norm": 1.8721072747136434, "learning_rate": 2.4426953285696342e-06, "loss": 0.1832, "step": 31466 }, { "epoch": 2.338684503901895, "grad_norm": 2.48229178514971, "learning_rate": 2.442169895742198e-06, "loss": 0.2679, "step": 31467 }, { "epoch": 2.3387588257153475, "grad_norm": 2.8858774247904804, "learning_rate": 2.4416445115716768e-06, "loss": 0.319, "step": 31468 }, { "epoch": 2.3388331475287996, "grad_norm": 2.583822987754149, "learning_rate": 2.4411191760614615e-06, "loss": 0.2871, "step": 31469 }, { "epoch": 2.338907469342252, "grad_norm": 2.7401163599183262, "learning_rate": 2.4405938892149237e-06, "loss": 0.3603, "step": 31470 }, { "epoch": 2.338981791155704, "grad_norm": 2.188007632418526, "learning_rate": 2.440068651035453e-06, "loss": 0.2554, "step": 31471 }, { "epoch": 2.3390561129691565, "grad_norm": 1.6901228910315815, "learning_rate": 2.4395434615264247e-06, "loss": 0.2016, "step": 31472 }, { "epoch": 2.3391304347826085, "grad_norm": 2.559744543003221, "learning_rate": 2.4390183206912266e-06, "loss": 0.3046, "step": 31473 }, { "epoch": 2.339204756596061, "grad_norm": 2.5669369882538855, "learning_rate": 2.4384932285332352e-06, "loss": 0.2438, "step": 31474 }, { "epoch": 2.339279078409513, "grad_norm": 2.7687786966120225, "learning_rate": 2.43796818505583e-06, "loss": 0.3914, "step": 31475 }, { "epoch": 2.3393534002229655, "grad_norm": 2.360045900622299, "learning_rate": 2.4374431902623963e-06, "loss": 0.2993, "step": 31476 }, { "epoch": 2.339427722036418, "grad_norm": 2.498333442340236, "learning_rate": 2.436918244156311e-06, "loss": 0.276, "step": 31477 }, { "epoch": 2.33950204384987, "grad_norm": 2.2815164763780493, "learning_rate": 2.4363933467409504e-06, "loss": 0.2515, "step": 31478 }, { "epoch": 2.339576365663322, "grad_norm": 2.4042092563425697, "learning_rate": 2.4358684980197e-06, "loss": 0.3031, "step": 31479 }, { "epoch": 2.3396506874767744, "grad_norm": 2.3090512284504, "learning_rate": 2.435343697995933e-06, "loss": 0.4017, "step": 31480 }, { "epoch": 2.339725009290227, "grad_norm": 2.482161853757965, "learning_rate": 2.4348189466730366e-06, "loss": 0.4083, "step": 31481 }, { "epoch": 2.339799331103679, "grad_norm": 1.7880227033193412, "learning_rate": 2.434294244054377e-06, "loss": 0.2036, "step": 31482 }, { "epoch": 2.3398736529171313, "grad_norm": 2.431089888796282, "learning_rate": 2.433769590143341e-06, "loss": 0.3306, "step": 31483 }, { "epoch": 2.3399479747305834, "grad_norm": 2.515183451408051, "learning_rate": 2.433244984943305e-06, "loss": 0.2827, "step": 31484 }, { "epoch": 2.340022296544036, "grad_norm": 2.341049979814205, "learning_rate": 2.432720428457642e-06, "loss": 0.3173, "step": 31485 }, { "epoch": 2.340096618357488, "grad_norm": 2.848103361910014, "learning_rate": 2.432195920689735e-06, "loss": 0.3072, "step": 31486 }, { "epoch": 2.3401709401709403, "grad_norm": 2.080610407520355, "learning_rate": 2.431671461642955e-06, "loss": 0.226, "step": 31487 }, { "epoch": 2.3402452619843923, "grad_norm": 2.251209835298406, "learning_rate": 2.4311470513206837e-06, "loss": 0.3387, "step": 31488 }, { "epoch": 2.3403195837978448, "grad_norm": 2.427170900363631, "learning_rate": 2.430622689726296e-06, "loss": 0.2915, "step": 31489 }, { "epoch": 2.340393905611297, "grad_norm": 3.1736266202262153, "learning_rate": 2.4300983768631637e-06, "loss": 0.3888, "step": 31490 }, { "epoch": 2.3404682274247492, "grad_norm": 2.0355513907152405, "learning_rate": 2.4295741127346674e-06, "loss": 0.2704, "step": 31491 }, { "epoch": 2.3405425492382013, "grad_norm": 2.4515557115146023, "learning_rate": 2.42904989734418e-06, "loss": 0.2994, "step": 31492 }, { "epoch": 2.3406168710516537, "grad_norm": 2.2565453508504767, "learning_rate": 2.4285257306950783e-06, "loss": 0.3085, "step": 31493 }, { "epoch": 2.3406911928651057, "grad_norm": 2.054854865770467, "learning_rate": 2.42800161279073e-06, "loss": 0.2353, "step": 31494 }, { "epoch": 2.340765514678558, "grad_norm": 1.7114270805632845, "learning_rate": 2.4274775436345187e-06, "loss": 0.2246, "step": 31495 }, { "epoch": 2.34083983649201, "grad_norm": 2.008613024925825, "learning_rate": 2.4269535232298135e-06, "loss": 0.2748, "step": 31496 }, { "epoch": 2.3409141583054627, "grad_norm": 2.606354962594508, "learning_rate": 2.4264295515799862e-06, "loss": 0.2572, "step": 31497 }, { "epoch": 2.3409884801189147, "grad_norm": 2.257925828092275, "learning_rate": 2.4259056286884155e-06, "loss": 0.2814, "step": 31498 }, { "epoch": 2.341062801932367, "grad_norm": 2.959424969301861, "learning_rate": 2.42538175455847e-06, "loss": 0.3326, "step": 31499 }, { "epoch": 2.3411371237458196, "grad_norm": 2.3163606052703796, "learning_rate": 2.4248579291935225e-06, "loss": 0.258, "step": 31500 }, { "epoch": 2.3412114455592716, "grad_norm": 2.129892300751286, "learning_rate": 2.4243341525969486e-06, "loss": 0.1918, "step": 31501 }, { "epoch": 2.3412857673727236, "grad_norm": 3.0889254822071552, "learning_rate": 2.423810424772115e-06, "loss": 0.3033, "step": 31502 }, { "epoch": 2.341360089186176, "grad_norm": 2.6067961464474507, "learning_rate": 2.423286745722404e-06, "loss": 0.2508, "step": 31503 }, { "epoch": 2.3414344109996286, "grad_norm": 2.25604865607511, "learning_rate": 2.4227631154511734e-06, "loss": 0.2911, "step": 31504 }, { "epoch": 2.3415087328130806, "grad_norm": 2.1759095829626887, "learning_rate": 2.4222395339618034e-06, "loss": 0.26, "step": 31505 }, { "epoch": 2.341583054626533, "grad_norm": 2.736878328249155, "learning_rate": 2.4217160012576614e-06, "loss": 0.4064, "step": 31506 }, { "epoch": 2.341657376439985, "grad_norm": 2.0602008775499505, "learning_rate": 2.421192517342117e-06, "loss": 0.2071, "step": 31507 }, { "epoch": 2.3417316982534375, "grad_norm": 2.4745108258355755, "learning_rate": 2.420669082218544e-06, "loss": 0.2308, "step": 31508 }, { "epoch": 2.3418060200668895, "grad_norm": 2.079353382832993, "learning_rate": 2.4201456958903068e-06, "loss": 0.2696, "step": 31509 }, { "epoch": 2.341880341880342, "grad_norm": 2.444074097783282, "learning_rate": 2.4196223583607823e-06, "loss": 0.2728, "step": 31510 }, { "epoch": 2.341954663693794, "grad_norm": 2.0602827348481823, "learning_rate": 2.419099069633336e-06, "loss": 0.2264, "step": 31511 }, { "epoch": 2.3420289855072465, "grad_norm": 2.896034399868111, "learning_rate": 2.4185758297113327e-06, "loss": 0.3215, "step": 31512 }, { "epoch": 2.3421033073206985, "grad_norm": 2.2415544140927968, "learning_rate": 2.4180526385981485e-06, "loss": 0.2225, "step": 31513 }, { "epoch": 2.342177629134151, "grad_norm": 2.4773306264871255, "learning_rate": 2.4175294962971442e-06, "loss": 0.2838, "step": 31514 }, { "epoch": 2.342251950947603, "grad_norm": 2.6199994659602392, "learning_rate": 2.417006402811698e-06, "loss": 0.327, "step": 31515 }, { "epoch": 2.3423262727610554, "grad_norm": 2.1454305653662757, "learning_rate": 2.4164833581451654e-06, "loss": 0.2936, "step": 31516 }, { "epoch": 2.3424005945745074, "grad_norm": 1.962074759797611, "learning_rate": 2.415960362300922e-06, "loss": 0.2915, "step": 31517 }, { "epoch": 2.34247491638796, "grad_norm": 2.2512012016862286, "learning_rate": 2.4154374152823323e-06, "loss": 0.2303, "step": 31518 }, { "epoch": 2.342549238201412, "grad_norm": 2.8025166802327472, "learning_rate": 2.4149145170927603e-06, "loss": 0.3042, "step": 31519 }, { "epoch": 2.3426235600148644, "grad_norm": 2.0467386203791107, "learning_rate": 2.414391667735577e-06, "loss": 0.2422, "step": 31520 }, { "epoch": 2.3426978818283164, "grad_norm": 1.7978571400470025, "learning_rate": 2.4138688672141474e-06, "loss": 0.253, "step": 31521 }, { "epoch": 2.342772203641769, "grad_norm": 2.252578542916368, "learning_rate": 2.4133461155318337e-06, "loss": 0.2733, "step": 31522 }, { "epoch": 2.3428465254552213, "grad_norm": 1.9466447775432336, "learning_rate": 2.412823412692007e-06, "loss": 0.2644, "step": 31523 }, { "epoch": 2.3429208472686733, "grad_norm": 1.9848567025364752, "learning_rate": 2.4123007586980265e-06, "loss": 0.2468, "step": 31524 }, { "epoch": 2.342995169082126, "grad_norm": 2.5500397419833942, "learning_rate": 2.4117781535532626e-06, "loss": 0.3702, "step": 31525 }, { "epoch": 2.343069490895578, "grad_norm": 2.3611738247831817, "learning_rate": 2.4112555972610774e-06, "loss": 0.2892, "step": 31526 }, { "epoch": 2.3431438127090303, "grad_norm": 2.3519724533276176, "learning_rate": 2.410733089824835e-06, "loss": 0.2712, "step": 31527 }, { "epoch": 2.3432181345224823, "grad_norm": 2.2016900696281425, "learning_rate": 2.410210631247899e-06, "loss": 0.2585, "step": 31528 }, { "epoch": 2.3432924563359347, "grad_norm": 2.0654376413800364, "learning_rate": 2.4096882215336303e-06, "loss": 0.2696, "step": 31529 }, { "epoch": 2.3433667781493868, "grad_norm": 2.226409018674664, "learning_rate": 2.4091658606853983e-06, "loss": 0.2082, "step": 31530 }, { "epoch": 2.3434410999628392, "grad_norm": 2.078425366460693, "learning_rate": 2.4086435487065594e-06, "loss": 0.214, "step": 31531 }, { "epoch": 2.3435154217762912, "grad_norm": 2.3770928173234136, "learning_rate": 2.408121285600482e-06, "loss": 0.3658, "step": 31532 }, { "epoch": 2.3435897435897437, "grad_norm": 2.1490175721106737, "learning_rate": 2.4075990713705267e-06, "loss": 0.298, "step": 31533 }, { "epoch": 2.3436640654031957, "grad_norm": 2.0299453543307435, "learning_rate": 2.4070769060200515e-06, "loss": 0.256, "step": 31534 }, { "epoch": 2.343738387216648, "grad_norm": 2.2613801072335455, "learning_rate": 2.406554789552423e-06, "loss": 0.275, "step": 31535 }, { "epoch": 2.3438127090301, "grad_norm": 2.2638191629565543, "learning_rate": 2.406032721970999e-06, "loss": 0.2991, "step": 31536 }, { "epoch": 2.3438870308435527, "grad_norm": 2.4910221124395466, "learning_rate": 2.4055107032791482e-06, "loss": 0.2868, "step": 31537 }, { "epoch": 2.3439613526570047, "grad_norm": 2.254712523699136, "learning_rate": 2.404988733480219e-06, "loss": 0.3082, "step": 31538 }, { "epoch": 2.344035674470457, "grad_norm": 2.407785855186451, "learning_rate": 2.4044668125775815e-06, "loss": 0.2323, "step": 31539 }, { "epoch": 2.344109996283909, "grad_norm": 2.19423793362609, "learning_rate": 2.403944940574592e-06, "loss": 0.2472, "step": 31540 }, { "epoch": 2.3441843180973616, "grad_norm": 2.380514560292823, "learning_rate": 2.403423117474609e-06, "loss": 0.2486, "step": 31541 }, { "epoch": 2.344258639910814, "grad_norm": 2.2648936885102304, "learning_rate": 2.4029013432809953e-06, "loss": 0.2218, "step": 31542 }, { "epoch": 2.344332961724266, "grad_norm": 2.2003602583537707, "learning_rate": 2.4023796179971104e-06, "loss": 0.3152, "step": 31543 }, { "epoch": 2.344407283537718, "grad_norm": 2.0105433963169332, "learning_rate": 2.4018579416263068e-06, "loss": 0.2296, "step": 31544 }, { "epoch": 2.3444816053511706, "grad_norm": 3.3466268223652786, "learning_rate": 2.4013363141719515e-06, "loss": 0.3413, "step": 31545 }, { "epoch": 2.344555927164623, "grad_norm": 3.127317336806812, "learning_rate": 2.4008147356373946e-06, "loss": 0.3002, "step": 31546 }, { "epoch": 2.344630248978075, "grad_norm": 2.00054474140908, "learning_rate": 2.400293206026002e-06, "loss": 0.2204, "step": 31547 }, { "epoch": 2.3447045707915275, "grad_norm": 2.165405989051869, "learning_rate": 2.399771725341128e-06, "loss": 0.2044, "step": 31548 }, { "epoch": 2.3447788926049795, "grad_norm": 2.3820577827739107, "learning_rate": 2.3992502935861282e-06, "loss": 0.2856, "step": 31549 }, { "epoch": 2.344853214418432, "grad_norm": 1.9789047302555425, "learning_rate": 2.398728910764361e-06, "loss": 0.2094, "step": 31550 }, { "epoch": 2.344927536231884, "grad_norm": 2.870025946386949, "learning_rate": 2.3982075768791793e-06, "loss": 0.287, "step": 31551 }, { "epoch": 2.3450018580453365, "grad_norm": 2.304125527459233, "learning_rate": 2.397686291933946e-06, "loss": 0.3051, "step": 31552 }, { "epoch": 2.3450761798587885, "grad_norm": 1.8371088238155686, "learning_rate": 2.3971650559320116e-06, "loss": 0.2226, "step": 31553 }, { "epoch": 2.345150501672241, "grad_norm": 2.4665714278637214, "learning_rate": 2.396643868876736e-06, "loss": 0.2777, "step": 31554 }, { "epoch": 2.345224823485693, "grad_norm": 2.661973832931766, "learning_rate": 2.396122730771473e-06, "loss": 0.3786, "step": 31555 }, { "epoch": 2.3452991452991454, "grad_norm": 1.4811259193401163, "learning_rate": 2.395601641619575e-06, "loss": 0.1681, "step": 31556 }, { "epoch": 2.3453734671125974, "grad_norm": 2.1453021457262453, "learning_rate": 2.395080601424401e-06, "loss": 0.3075, "step": 31557 }, { "epoch": 2.34544778892605, "grad_norm": 2.318301198794309, "learning_rate": 2.3945596101893044e-06, "loss": 0.3098, "step": 31558 }, { "epoch": 2.345522110739502, "grad_norm": 2.5397350223258934, "learning_rate": 2.3940386679176353e-06, "loss": 0.3047, "step": 31559 }, { "epoch": 2.3455964325529544, "grad_norm": 2.6861102565055406, "learning_rate": 2.393517774612756e-06, "loss": 0.3627, "step": 31560 }, { "epoch": 2.3456707543664064, "grad_norm": 2.7781721367782883, "learning_rate": 2.392996930278009e-06, "loss": 0.3228, "step": 31561 }, { "epoch": 2.345745076179859, "grad_norm": 2.149615516266979, "learning_rate": 2.3924761349167556e-06, "loss": 0.286, "step": 31562 }, { "epoch": 2.345819397993311, "grad_norm": 2.4660857539797716, "learning_rate": 2.391955388532344e-06, "loss": 0.3179, "step": 31563 }, { "epoch": 2.3458937198067633, "grad_norm": 2.1367105196481577, "learning_rate": 2.3914346911281318e-06, "loss": 0.2392, "step": 31564 }, { "epoch": 2.3459680416202158, "grad_norm": 1.74530696221664, "learning_rate": 2.3909140427074675e-06, "loss": 0.2808, "step": 31565 }, { "epoch": 2.346042363433668, "grad_norm": 2.3401301795950062, "learning_rate": 2.390393443273701e-06, "loss": 0.3511, "step": 31566 }, { "epoch": 2.34611668524712, "grad_norm": 2.0579197352021428, "learning_rate": 2.3898728928301894e-06, "loss": 0.2155, "step": 31567 }, { "epoch": 2.3461910070605723, "grad_norm": 2.1331032512290458, "learning_rate": 2.38935239138028e-06, "loss": 0.25, "step": 31568 }, { "epoch": 2.3462653288740247, "grad_norm": 2.1706951192685957, "learning_rate": 2.388831938927326e-06, "loss": 0.3151, "step": 31569 }, { "epoch": 2.3463396506874767, "grad_norm": 2.0654518724683553, "learning_rate": 2.3883115354746778e-06, "loss": 0.2674, "step": 31570 }, { "epoch": 2.346413972500929, "grad_norm": 1.6778525785395464, "learning_rate": 2.387791181025685e-06, "loss": 0.2226, "step": 31571 }, { "epoch": 2.346488294314381, "grad_norm": 1.6937828658096026, "learning_rate": 2.387270875583697e-06, "loss": 0.2058, "step": 31572 }, { "epoch": 2.3465626161278337, "grad_norm": 2.4922981953361716, "learning_rate": 2.3867506191520618e-06, "loss": 0.2785, "step": 31573 }, { "epoch": 2.3466369379412857, "grad_norm": 3.2285229420265833, "learning_rate": 2.386230411734134e-06, "loss": 0.4297, "step": 31574 }, { "epoch": 2.346711259754738, "grad_norm": 2.4018594539688567, "learning_rate": 2.3857102533332568e-06, "loss": 0.2894, "step": 31575 }, { "epoch": 2.34678558156819, "grad_norm": 2.350538280968462, "learning_rate": 2.3851901439527846e-06, "loss": 0.2673, "step": 31576 }, { "epoch": 2.3468599033816426, "grad_norm": 2.589205399860022, "learning_rate": 2.3846700835960632e-06, "loss": 0.3285, "step": 31577 }, { "epoch": 2.3469342251950946, "grad_norm": 2.561814931477445, "learning_rate": 2.3841500722664377e-06, "loss": 0.255, "step": 31578 }, { "epoch": 2.347008547008547, "grad_norm": 3.0253211347780504, "learning_rate": 2.3836301099672623e-06, "loss": 0.3699, "step": 31579 }, { "epoch": 2.347082868821999, "grad_norm": 2.367041009730774, "learning_rate": 2.383110196701881e-06, "loss": 0.2366, "step": 31580 }, { "epoch": 2.3471571906354516, "grad_norm": 2.042877597890231, "learning_rate": 2.382590332473638e-06, "loss": 0.2329, "step": 31581 }, { "epoch": 2.3472315124489036, "grad_norm": 2.5332304034253674, "learning_rate": 2.382070517285888e-06, "loss": 0.2873, "step": 31582 }, { "epoch": 2.347305834262356, "grad_norm": 2.590715754183648, "learning_rate": 2.381550751141969e-06, "loss": 0.2862, "step": 31583 }, { "epoch": 2.347380156075808, "grad_norm": 2.3913391328386684, "learning_rate": 2.3810310340452327e-06, "loss": 0.2787, "step": 31584 }, { "epoch": 2.3474544778892605, "grad_norm": 2.5497026244632797, "learning_rate": 2.38051136599902e-06, "loss": 0.2883, "step": 31585 }, { "epoch": 2.3475287997027126, "grad_norm": 3.0667234595010506, "learning_rate": 2.3799917470066826e-06, "loss": 0.3579, "step": 31586 }, { "epoch": 2.347603121516165, "grad_norm": 1.7748566611891439, "learning_rate": 2.3794721770715633e-06, "loss": 0.2472, "step": 31587 }, { "epoch": 2.3476774433296175, "grad_norm": 2.236036183018847, "learning_rate": 2.3789526561970044e-06, "loss": 0.2375, "step": 31588 }, { "epoch": 2.3477517651430695, "grad_norm": 2.5992023863093734, "learning_rate": 2.378433184386355e-06, "loss": 0.3346, "step": 31589 }, { "epoch": 2.3478260869565215, "grad_norm": 4.1560158645149565, "learning_rate": 2.3779137616429547e-06, "loss": 0.2502, "step": 31590 }, { "epoch": 2.347900408769974, "grad_norm": 2.0096487581042553, "learning_rate": 2.3773943879701534e-06, "loss": 0.2415, "step": 31591 }, { "epoch": 2.3479747305834264, "grad_norm": 2.5534816503526274, "learning_rate": 2.3768750633712913e-06, "loss": 0.3298, "step": 31592 }, { "epoch": 2.3480490523968784, "grad_norm": 2.205027957700774, "learning_rate": 2.376355787849709e-06, "loss": 0.1956, "step": 31593 }, { "epoch": 2.348123374210331, "grad_norm": 1.9307684166340513, "learning_rate": 2.3758365614087575e-06, "loss": 0.2706, "step": 31594 }, { "epoch": 2.348197696023783, "grad_norm": 2.1673626893602873, "learning_rate": 2.3753173840517697e-06, "loss": 0.2427, "step": 31595 }, { "epoch": 2.3482720178372354, "grad_norm": 2.7060367321683074, "learning_rate": 2.374798255782096e-06, "loss": 0.3115, "step": 31596 }, { "epoch": 2.3483463396506874, "grad_norm": 2.0237447162571325, "learning_rate": 2.3742791766030728e-06, "loss": 0.2308, "step": 31597 }, { "epoch": 2.34842066146414, "grad_norm": 2.361224153693418, "learning_rate": 2.373760146518047e-06, "loss": 0.2705, "step": 31598 }, { "epoch": 2.348494983277592, "grad_norm": 2.139538818255865, "learning_rate": 2.373241165530358e-06, "loss": 0.3178, "step": 31599 }, { "epoch": 2.3485693050910443, "grad_norm": 2.762988186477203, "learning_rate": 2.372722233643343e-06, "loss": 0.3185, "step": 31600 }, { "epoch": 2.3486436269044964, "grad_norm": 2.56962178592495, "learning_rate": 2.3722033508603504e-06, "loss": 0.2735, "step": 31601 }, { "epoch": 2.348717948717949, "grad_norm": 2.444576445737337, "learning_rate": 2.371684517184716e-06, "loss": 0.297, "step": 31602 }, { "epoch": 2.348792270531401, "grad_norm": 2.2106932729284234, "learning_rate": 2.371165732619778e-06, "loss": 0.3249, "step": 31603 }, { "epoch": 2.3488665923448533, "grad_norm": 1.9705928755571047, "learning_rate": 2.3706469971688826e-06, "loss": 0.2281, "step": 31604 }, { "epoch": 2.3489409141583053, "grad_norm": 2.851990817620323, "learning_rate": 2.3701283108353655e-06, "loss": 0.2879, "step": 31605 }, { "epoch": 2.3490152359717578, "grad_norm": 2.4610342720325074, "learning_rate": 2.369609673622567e-06, "loss": 0.2971, "step": 31606 }, { "epoch": 2.34908955778521, "grad_norm": 3.587854851461534, "learning_rate": 2.3690910855338223e-06, "loss": 0.2355, "step": 31607 }, { "epoch": 2.3491638795986622, "grad_norm": 2.680071625798058, "learning_rate": 2.3685725465724763e-06, "loss": 0.2844, "step": 31608 }, { "epoch": 2.3492382014121143, "grad_norm": 2.4023915712184825, "learning_rate": 2.368054056741864e-06, "loss": 0.2008, "step": 31609 }, { "epoch": 2.3493125232255667, "grad_norm": 2.2772103662523895, "learning_rate": 2.367535616045321e-06, "loss": 0.3196, "step": 31610 }, { "epoch": 2.349386845039019, "grad_norm": 2.7809994409111094, "learning_rate": 2.367017224486191e-06, "loss": 0.323, "step": 31611 }, { "epoch": 2.349461166852471, "grad_norm": 2.168465306764202, "learning_rate": 2.3664988820678047e-06, "loss": 0.2978, "step": 31612 }, { "epoch": 2.349535488665923, "grad_norm": 2.474063644275876, "learning_rate": 2.365980588793505e-06, "loss": 0.3259, "step": 31613 }, { "epoch": 2.3496098104793757, "grad_norm": 2.4644087447108607, "learning_rate": 2.365462344666627e-06, "loss": 0.2485, "step": 31614 }, { "epoch": 2.349684132292828, "grad_norm": 2.1076724348390035, "learning_rate": 2.3649441496905025e-06, "loss": 0.2525, "step": 31615 }, { "epoch": 2.34975845410628, "grad_norm": 1.8769915866916203, "learning_rate": 2.3644260038684787e-06, "loss": 0.2175, "step": 31616 }, { "epoch": 2.3498327759197326, "grad_norm": 2.6333544256408197, "learning_rate": 2.363907907203876e-06, "loss": 0.2973, "step": 31617 }, { "epoch": 2.3499070977331846, "grad_norm": 3.4221275304208505, "learning_rate": 2.3633898597000425e-06, "loss": 0.256, "step": 31618 }, { "epoch": 2.349981419546637, "grad_norm": 3.0959296328492574, "learning_rate": 2.3628718613603064e-06, "loss": 0.3006, "step": 31619 }, { "epoch": 2.350055741360089, "grad_norm": 2.1861487162692983, "learning_rate": 2.3623539121880067e-06, "loss": 0.2445, "step": 31620 }, { "epoch": 2.3501300631735416, "grad_norm": 2.1695070486018992, "learning_rate": 2.361836012186477e-06, "loss": 0.2858, "step": 31621 }, { "epoch": 2.3502043849869936, "grad_norm": 2.304370224230801, "learning_rate": 2.3613181613590474e-06, "loss": 0.2063, "step": 31622 }, { "epoch": 2.350278706800446, "grad_norm": 1.6120284184709266, "learning_rate": 2.3608003597090577e-06, "loss": 0.1994, "step": 31623 }, { "epoch": 2.350353028613898, "grad_norm": 2.489087517741168, "learning_rate": 2.36028260723984e-06, "loss": 0.274, "step": 31624 }, { "epoch": 2.3504273504273505, "grad_norm": 1.9730520958916058, "learning_rate": 2.3597649039547223e-06, "loss": 0.2216, "step": 31625 }, { "epoch": 2.3505016722408025, "grad_norm": 2.469740396104996, "learning_rate": 2.3592472498570453e-06, "loss": 0.2527, "step": 31626 }, { "epoch": 2.350575994054255, "grad_norm": 1.9962465843019774, "learning_rate": 2.358729644950135e-06, "loss": 0.2449, "step": 31627 }, { "epoch": 2.350650315867707, "grad_norm": 2.016166831454396, "learning_rate": 2.358212089237333e-06, "loss": 0.2063, "step": 31628 }, { "epoch": 2.3507246376811595, "grad_norm": 1.990947378308229, "learning_rate": 2.3576945827219587e-06, "loss": 0.1884, "step": 31629 }, { "epoch": 2.3507989594946115, "grad_norm": 2.294437849727875, "learning_rate": 2.357177125407353e-06, "loss": 0.2317, "step": 31630 }, { "epoch": 2.350873281308064, "grad_norm": 2.2179144257682193, "learning_rate": 2.3566597172968454e-06, "loss": 0.2835, "step": 31631 }, { "epoch": 2.350947603121516, "grad_norm": 2.347630094371665, "learning_rate": 2.3561423583937614e-06, "loss": 0.3147, "step": 31632 }, { "epoch": 2.3510219249349684, "grad_norm": 2.6789629741925247, "learning_rate": 2.355625048701441e-06, "loss": 0.3355, "step": 31633 }, { "epoch": 2.351096246748421, "grad_norm": 2.6087831459062243, "learning_rate": 2.3551077882232055e-06, "loss": 0.2697, "step": 31634 }, { "epoch": 2.351170568561873, "grad_norm": 2.515117282364889, "learning_rate": 2.3545905769623933e-06, "loss": 0.2898, "step": 31635 }, { "epoch": 2.351244890375325, "grad_norm": 2.6911233434776585, "learning_rate": 2.35407341492233e-06, "loss": 0.3226, "step": 31636 }, { "epoch": 2.3513192121887774, "grad_norm": 2.164083577176674, "learning_rate": 2.3535563021063423e-06, "loss": 0.2267, "step": 31637 }, { "epoch": 2.35139353400223, "grad_norm": 2.198164553981793, "learning_rate": 2.3530392385177646e-06, "loss": 0.3005, "step": 31638 }, { "epoch": 2.351467855815682, "grad_norm": 1.9044659282836134, "learning_rate": 2.3525222241599246e-06, "loss": 0.2298, "step": 31639 }, { "epoch": 2.3515421776291343, "grad_norm": 2.919819643873188, "learning_rate": 2.3520052590361486e-06, "loss": 0.2431, "step": 31640 }, { "epoch": 2.3516164994425863, "grad_norm": 2.894305815844845, "learning_rate": 2.351488343149764e-06, "loss": 0.3167, "step": 31641 }, { "epoch": 2.351690821256039, "grad_norm": 2.161766102630191, "learning_rate": 2.3509714765041035e-06, "loss": 0.2945, "step": 31642 }, { "epoch": 2.351765143069491, "grad_norm": 2.338359144058807, "learning_rate": 2.350454659102491e-06, "loss": 0.2119, "step": 31643 }, { "epoch": 2.3518394648829433, "grad_norm": 2.2824939760091074, "learning_rate": 2.3499378909482527e-06, "loss": 0.2934, "step": 31644 }, { "epoch": 2.3519137866963953, "grad_norm": 2.431346255445839, "learning_rate": 2.349421172044719e-06, "loss": 0.2231, "step": 31645 }, { "epoch": 2.3519881085098477, "grad_norm": 2.3593250595939628, "learning_rate": 2.3489045023952164e-06, "loss": 0.3449, "step": 31646 }, { "epoch": 2.3520624303232998, "grad_norm": 2.2486626575534823, "learning_rate": 2.3483878820030647e-06, "loss": 0.2619, "step": 31647 }, { "epoch": 2.352136752136752, "grad_norm": 2.1374442859664926, "learning_rate": 2.347871310871599e-06, "loss": 0.2617, "step": 31648 }, { "epoch": 2.3522110739502042, "grad_norm": 2.589878957442933, "learning_rate": 2.347354789004138e-06, "loss": 0.3434, "step": 31649 }, { "epoch": 2.3522853957636567, "grad_norm": 2.244629491139498, "learning_rate": 2.3468383164040154e-06, "loss": 0.1323, "step": 31650 }, { "epoch": 2.3523597175771087, "grad_norm": 2.4143199851544157, "learning_rate": 2.3463218930745444e-06, "loss": 0.2068, "step": 31651 }, { "epoch": 2.352434039390561, "grad_norm": 2.7237456424812674, "learning_rate": 2.345805519019059e-06, "loss": 0.2655, "step": 31652 }, { "epoch": 2.352508361204013, "grad_norm": 1.8612646105621418, "learning_rate": 2.3452891942408807e-06, "loss": 0.2587, "step": 31653 }, { "epoch": 2.3525826830174656, "grad_norm": 2.098034194889938, "learning_rate": 2.34477291874333e-06, "loss": 0.3052, "step": 31654 }, { "epoch": 2.3526570048309177, "grad_norm": 2.389451787594726, "learning_rate": 2.344256692529737e-06, "loss": 0.3034, "step": 31655 }, { "epoch": 2.35273132664437, "grad_norm": 1.9265459005072476, "learning_rate": 2.3437405156034197e-06, "loss": 0.2586, "step": 31656 }, { "epoch": 2.3528056484578226, "grad_norm": 2.413207488885256, "learning_rate": 2.3432243879677063e-06, "loss": 0.3084, "step": 31657 }, { "epoch": 2.3528799702712746, "grad_norm": 2.2349690052696074, "learning_rate": 2.3427083096259175e-06, "loss": 0.2541, "step": 31658 }, { "epoch": 2.352954292084727, "grad_norm": 2.3837639413943816, "learning_rate": 2.3421922805813722e-06, "loss": 0.298, "step": 31659 }, { "epoch": 2.353028613898179, "grad_norm": 3.3642031839904725, "learning_rate": 2.3416763008373988e-06, "loss": 0.3864, "step": 31660 }, { "epoch": 2.3531029357116315, "grad_norm": 2.5655118837180386, "learning_rate": 2.341160370397315e-06, "loss": 0.3358, "step": 31661 }, { "epoch": 2.3531772575250836, "grad_norm": 2.3021032935897945, "learning_rate": 2.340644489264444e-06, "loss": 0.2232, "step": 31662 }, { "epoch": 2.353251579338536, "grad_norm": 3.3136755924420136, "learning_rate": 2.3401286574421045e-06, "loss": 0.4479, "step": 31663 }, { "epoch": 2.353325901151988, "grad_norm": 2.994167559657819, "learning_rate": 2.3396128749336212e-06, "loss": 0.329, "step": 31664 }, { "epoch": 2.3534002229654405, "grad_norm": 1.9374260791845044, "learning_rate": 2.3390971417423124e-06, "loss": 0.2422, "step": 31665 }, { "epoch": 2.3534745447788925, "grad_norm": 2.121536119360117, "learning_rate": 2.338581457871496e-06, "loss": 0.2358, "step": 31666 }, { "epoch": 2.353548866592345, "grad_norm": 2.286657557498298, "learning_rate": 2.338065823324498e-06, "loss": 0.2698, "step": 31667 }, { "epoch": 2.353623188405797, "grad_norm": 2.976024265911285, "learning_rate": 2.3375502381046335e-06, "loss": 0.3279, "step": 31668 }, { "epoch": 2.3536975102192494, "grad_norm": 2.6386927352702525, "learning_rate": 2.337034702215222e-06, "loss": 0.3021, "step": 31669 }, { "epoch": 2.3537718320327015, "grad_norm": 2.1465130772929184, "learning_rate": 2.336519215659585e-06, "loss": 0.2645, "step": 31670 }, { "epoch": 2.353846153846154, "grad_norm": 1.9994010021317632, "learning_rate": 2.336003778441036e-06, "loss": 0.2214, "step": 31671 }, { "epoch": 2.353920475659606, "grad_norm": 2.3354017123419153, "learning_rate": 2.335488390562901e-06, "loss": 0.2326, "step": 31672 }, { "epoch": 2.3539947974730584, "grad_norm": 2.083764464763939, "learning_rate": 2.334973052028494e-06, "loss": 0.2561, "step": 31673 }, { "epoch": 2.3540691192865104, "grad_norm": 2.132408806938576, "learning_rate": 2.3344577628411335e-06, "loss": 0.2507, "step": 31674 }, { "epoch": 2.354143441099963, "grad_norm": 1.6436572238197829, "learning_rate": 2.3339425230041356e-06, "loss": 0.2585, "step": 31675 }, { "epoch": 2.3542177629134153, "grad_norm": 2.044380230092881, "learning_rate": 2.3334273325208147e-06, "loss": 0.2658, "step": 31676 }, { "epoch": 2.3542920847268674, "grad_norm": 1.9782876383561256, "learning_rate": 2.332912191394494e-06, "loss": 0.2379, "step": 31677 }, { "epoch": 2.3543664065403194, "grad_norm": 2.271321980424323, "learning_rate": 2.332397099628484e-06, "loss": 0.2571, "step": 31678 }, { "epoch": 2.354440728353772, "grad_norm": 2.2159939463740543, "learning_rate": 2.331882057226106e-06, "loss": 0.2916, "step": 31679 }, { "epoch": 2.3545150501672243, "grad_norm": 2.449471810291575, "learning_rate": 2.3313670641906737e-06, "loss": 0.3184, "step": 31680 }, { "epoch": 2.3545893719806763, "grad_norm": 2.727403348245227, "learning_rate": 2.3308521205254996e-06, "loss": 0.3106, "step": 31681 }, { "epoch": 2.3546636937941288, "grad_norm": 2.0936675417662713, "learning_rate": 2.3303372262339053e-06, "loss": 0.254, "step": 31682 }, { "epoch": 2.354738015607581, "grad_norm": 2.8310117712825367, "learning_rate": 2.329822381319198e-06, "loss": 0.3591, "step": 31683 }, { "epoch": 2.3548123374210332, "grad_norm": 2.4102539114494763, "learning_rate": 2.3293075857847025e-06, "loss": 0.3161, "step": 31684 }, { "epoch": 2.3548866592344853, "grad_norm": 2.4377235119694167, "learning_rate": 2.3287928396337234e-06, "loss": 0.2986, "step": 31685 }, { "epoch": 2.3549609810479377, "grad_norm": 2.086000328930149, "learning_rate": 2.3282781428695754e-06, "loss": 0.2471, "step": 31686 }, { "epoch": 2.3550353028613897, "grad_norm": 2.6173072608801977, "learning_rate": 2.3277634954955776e-06, "loss": 0.2569, "step": 31687 }, { "epoch": 2.355109624674842, "grad_norm": 2.6920671575475303, "learning_rate": 2.3272488975150377e-06, "loss": 0.3213, "step": 31688 }, { "epoch": 2.355183946488294, "grad_norm": 2.200223338758494, "learning_rate": 2.326734348931273e-06, "loss": 0.2595, "step": 31689 }, { "epoch": 2.3552582683017467, "grad_norm": 2.354020363307574, "learning_rate": 2.326219849747596e-06, "loss": 0.2202, "step": 31690 }, { "epoch": 2.3553325901151987, "grad_norm": 2.175074991219377, "learning_rate": 2.325705399967314e-06, "loss": 0.3104, "step": 31691 }, { "epoch": 2.355406911928651, "grad_norm": 2.626464427951596, "learning_rate": 2.325190999593745e-06, "loss": 0.2584, "step": 31692 }, { "epoch": 2.355481233742103, "grad_norm": 2.5146281003376383, "learning_rate": 2.3246766486301954e-06, "loss": 0.3178, "step": 31693 }, { "epoch": 2.3555555555555556, "grad_norm": 2.121245923783267, "learning_rate": 2.3241623470799824e-06, "loss": 0.2343, "step": 31694 }, { "epoch": 2.3556298773690076, "grad_norm": 2.1536392147309784, "learning_rate": 2.3236480949464135e-06, "loss": 0.2647, "step": 31695 }, { "epoch": 2.35570419918246, "grad_norm": 2.39727594161251, "learning_rate": 2.3231338922327993e-06, "loss": 0.1995, "step": 31696 }, { "epoch": 2.355778520995912, "grad_norm": 2.098697874476477, "learning_rate": 2.322619738942452e-06, "loss": 0.2537, "step": 31697 }, { "epoch": 2.3558528428093646, "grad_norm": 2.716755002384791, "learning_rate": 2.3221056350786775e-06, "loss": 0.3216, "step": 31698 }, { "epoch": 2.355927164622817, "grad_norm": 2.3479692527899476, "learning_rate": 2.321591580644791e-06, "loss": 0.2137, "step": 31699 }, { "epoch": 2.356001486436269, "grad_norm": 2.8573157440186803, "learning_rate": 2.3210775756440963e-06, "loss": 0.2754, "step": 31700 }, { "epoch": 2.356075808249721, "grad_norm": 1.9349041553367337, "learning_rate": 2.32056362007991e-06, "loss": 0.2746, "step": 31701 }, { "epoch": 2.3561501300631735, "grad_norm": 2.8775497525745197, "learning_rate": 2.3200497139555345e-06, "loss": 0.3344, "step": 31702 }, { "epoch": 2.356224451876626, "grad_norm": 2.5047727913606157, "learning_rate": 2.319535857274279e-06, "loss": 0.3248, "step": 31703 }, { "epoch": 2.356298773690078, "grad_norm": 2.19580173900913, "learning_rate": 2.3190220500394555e-06, "loss": 0.2552, "step": 31704 }, { "epoch": 2.3563730955035305, "grad_norm": 2.314052320262102, "learning_rate": 2.31850829225437e-06, "loss": 0.2873, "step": 31705 }, { "epoch": 2.3564474173169825, "grad_norm": 2.341075729716967, "learning_rate": 2.317994583922326e-06, "loss": 0.3419, "step": 31706 }, { "epoch": 2.356521739130435, "grad_norm": 2.854403091469062, "learning_rate": 2.3174809250466414e-06, "loss": 0.3249, "step": 31707 }, { "epoch": 2.356596060943887, "grad_norm": 2.4279021692106717, "learning_rate": 2.316967315630608e-06, "loss": 0.3067, "step": 31708 }, { "epoch": 2.3566703827573394, "grad_norm": 2.4418086760366298, "learning_rate": 2.3164537556775457e-06, "loss": 0.2737, "step": 31709 }, { "epoch": 2.3567447045707914, "grad_norm": 2.115945972349631, "learning_rate": 2.315940245190751e-06, "loss": 0.2915, "step": 31710 }, { "epoch": 2.356819026384244, "grad_norm": 1.8431510511013274, "learning_rate": 2.3154267841735376e-06, "loss": 0.2336, "step": 31711 }, { "epoch": 2.356893348197696, "grad_norm": 2.3415073204420533, "learning_rate": 2.3149133726292085e-06, "loss": 0.3207, "step": 31712 }, { "epoch": 2.3569676700111484, "grad_norm": 2.125114556655388, "learning_rate": 2.314400010561064e-06, "loss": 0.2444, "step": 31713 }, { "epoch": 2.3570419918246004, "grad_norm": 2.499576233009118, "learning_rate": 2.3138866979724174e-06, "loss": 0.2779, "step": 31714 }, { "epoch": 2.357116313638053, "grad_norm": 2.643045069364229, "learning_rate": 2.313373434866566e-06, "loss": 0.3153, "step": 31715 }, { "epoch": 2.357190635451505, "grad_norm": 2.1363884335479537, "learning_rate": 2.312860221246821e-06, "loss": 0.232, "step": 31716 }, { "epoch": 2.3572649572649573, "grad_norm": 2.5538468119906246, "learning_rate": 2.312347057116483e-06, "loss": 0.3183, "step": 31717 }, { "epoch": 2.3573392790784093, "grad_norm": 2.0654380766454916, "learning_rate": 2.311833942478855e-06, "loss": 0.3007, "step": 31718 }, { "epoch": 2.357413600891862, "grad_norm": 2.1956694815811106, "learning_rate": 2.311320877337242e-06, "loss": 0.2429, "step": 31719 }, { "epoch": 2.357487922705314, "grad_norm": 2.2788713089120054, "learning_rate": 2.3108078616949435e-06, "loss": 0.2753, "step": 31720 }, { "epoch": 2.3575622445187663, "grad_norm": 2.1597894340974966, "learning_rate": 2.3102948955552673e-06, "loss": 0.264, "step": 31721 }, { "epoch": 2.3576365663322187, "grad_norm": 2.6404932303286723, "learning_rate": 2.3097819789215104e-06, "loss": 0.3284, "step": 31722 }, { "epoch": 2.3577108881456708, "grad_norm": 2.185030159478558, "learning_rate": 2.3092691117969813e-06, "loss": 0.2848, "step": 31723 }, { "epoch": 2.3577852099591228, "grad_norm": 2.372843447787636, "learning_rate": 2.3087562941849796e-06, "loss": 0.2778, "step": 31724 }, { "epoch": 2.3578595317725752, "grad_norm": 2.296156345291121, "learning_rate": 2.3082435260888016e-06, "loss": 0.2609, "step": 31725 }, { "epoch": 2.3579338535860277, "grad_norm": 2.7314528652435235, "learning_rate": 2.3077308075117565e-06, "loss": 0.3818, "step": 31726 }, { "epoch": 2.3580081753994797, "grad_norm": 2.1775022262662165, "learning_rate": 2.3072181384571404e-06, "loss": 0.2185, "step": 31727 }, { "epoch": 2.358082497212932, "grad_norm": 2.4663763758568407, "learning_rate": 2.3067055189282526e-06, "loss": 0.329, "step": 31728 }, { "epoch": 2.358156819026384, "grad_norm": 1.8549282996238632, "learning_rate": 2.306192948928401e-06, "loss": 0.2029, "step": 31729 }, { "epoch": 2.3582311408398366, "grad_norm": 2.324560493152376, "learning_rate": 2.305680428460875e-06, "loss": 0.3062, "step": 31730 }, { "epoch": 2.3583054626532887, "grad_norm": 2.3993028660927957, "learning_rate": 2.305167957528981e-06, "loss": 0.3091, "step": 31731 }, { "epoch": 2.358379784466741, "grad_norm": 4.681345725809549, "learning_rate": 2.3046555361360134e-06, "loss": 0.3414, "step": 31732 }, { "epoch": 2.358454106280193, "grad_norm": 2.4579221135448157, "learning_rate": 2.3041431642852773e-06, "loss": 0.2371, "step": 31733 }, { "epoch": 2.3585284280936456, "grad_norm": 2.5451970124135075, "learning_rate": 2.303630841980069e-06, "loss": 0.3567, "step": 31734 }, { "epoch": 2.3586027499070976, "grad_norm": 2.0719695678577725, "learning_rate": 2.3031185692236823e-06, "loss": 0.2365, "step": 31735 }, { "epoch": 2.35867707172055, "grad_norm": 2.4728963485886335, "learning_rate": 2.3026063460194226e-06, "loss": 0.2711, "step": 31736 }, { "epoch": 2.358751393534002, "grad_norm": 2.3159682765733463, "learning_rate": 2.3020941723705803e-06, "loss": 0.2397, "step": 31737 }, { "epoch": 2.3588257153474546, "grad_norm": 3.2028569944113308, "learning_rate": 2.3015820482804583e-06, "loss": 0.368, "step": 31738 }, { "epoch": 2.3589000371609066, "grad_norm": 2.552589472519135, "learning_rate": 2.3010699737523534e-06, "loss": 0.2783, "step": 31739 }, { "epoch": 2.358974358974359, "grad_norm": 2.636112007675589, "learning_rate": 2.3005579487895566e-06, "loss": 0.3575, "step": 31740 }, { "epoch": 2.359048680787811, "grad_norm": 2.599095681080195, "learning_rate": 2.300045973395374e-06, "loss": 0.3262, "step": 31741 }, { "epoch": 2.3591230026012635, "grad_norm": 2.4871360417274633, "learning_rate": 2.2995340475730908e-06, "loss": 0.2355, "step": 31742 }, { "epoch": 2.3591973244147155, "grad_norm": 1.7863581752808975, "learning_rate": 2.29902217132601e-06, "loss": 0.2507, "step": 31743 }, { "epoch": 2.359271646228168, "grad_norm": 2.043029942263127, "learning_rate": 2.298510344657423e-06, "loss": 0.2302, "step": 31744 }, { "epoch": 2.3593459680416204, "grad_norm": 2.261282632193673, "learning_rate": 2.2979985675706286e-06, "loss": 0.2558, "step": 31745 }, { "epoch": 2.3594202898550725, "grad_norm": 2.55134881941501, "learning_rate": 2.29748684006892e-06, "loss": 0.3146, "step": 31746 }, { "epoch": 2.3594946116685245, "grad_norm": 3.7018742053605016, "learning_rate": 2.296975162155589e-06, "loss": 0.2725, "step": 31747 }, { "epoch": 2.359568933481977, "grad_norm": 2.987395772871108, "learning_rate": 2.2964635338339346e-06, "loss": 0.3417, "step": 31748 }, { "epoch": 2.3596432552954294, "grad_norm": 2.7267265880945657, "learning_rate": 2.2959519551072484e-06, "loss": 0.3749, "step": 31749 }, { "epoch": 2.3597175771088814, "grad_norm": 2.23217082669747, "learning_rate": 2.295440425978821e-06, "loss": 0.255, "step": 31750 }, { "epoch": 2.359791898922334, "grad_norm": 3.149710239400334, "learning_rate": 2.294928946451952e-06, "loss": 0.3349, "step": 31751 }, { "epoch": 2.359866220735786, "grad_norm": 2.0560679381733435, "learning_rate": 2.2944175165299297e-06, "loss": 0.2631, "step": 31752 }, { "epoch": 2.3599405425492384, "grad_norm": 2.554875441613341, "learning_rate": 2.2939061362160476e-06, "loss": 0.3014, "step": 31753 }, { "epoch": 2.3600148643626904, "grad_norm": 2.4842311414446727, "learning_rate": 2.293394805513596e-06, "loss": 0.2712, "step": 31754 }, { "epoch": 2.360089186176143, "grad_norm": 2.2414455153362813, "learning_rate": 2.292883524425871e-06, "loss": 0.2513, "step": 31755 }, { "epoch": 2.360163507989595, "grad_norm": 2.0147891418363195, "learning_rate": 2.292372292956162e-06, "loss": 0.3027, "step": 31756 }, { "epoch": 2.3602378298030473, "grad_norm": 2.2200750384600103, "learning_rate": 2.291861111107757e-06, "loss": 0.1974, "step": 31757 }, { "epoch": 2.3603121516164993, "grad_norm": 2.7812654587826255, "learning_rate": 2.2913499788839532e-06, "loss": 0.2612, "step": 31758 }, { "epoch": 2.360386473429952, "grad_norm": 2.090718402012514, "learning_rate": 2.290838896288036e-06, "loss": 0.1881, "step": 31759 }, { "epoch": 2.360460795243404, "grad_norm": 2.3047684662586545, "learning_rate": 2.2903278633233005e-06, "loss": 0.2622, "step": 31760 }, { "epoch": 2.3605351170568563, "grad_norm": 2.2261197723563426, "learning_rate": 2.2898168799930343e-06, "loss": 0.2934, "step": 31761 }, { "epoch": 2.3606094388703083, "grad_norm": 2.251377646783659, "learning_rate": 2.289305946300524e-06, "loss": 0.2253, "step": 31762 }, { "epoch": 2.3606837606837607, "grad_norm": 2.5589747716919633, "learning_rate": 2.288795062249067e-06, "loss": 0.322, "step": 31763 }, { "epoch": 2.3607580824972128, "grad_norm": 2.2610230548631156, "learning_rate": 2.2882842278419425e-06, "loss": 0.25, "step": 31764 }, { "epoch": 2.360832404310665, "grad_norm": 2.2920243048123567, "learning_rate": 2.2877734430824462e-06, "loss": 0.2775, "step": 31765 }, { "epoch": 2.3609067261241172, "grad_norm": 2.348759194514455, "learning_rate": 2.2872627079738618e-06, "loss": 0.2939, "step": 31766 }, { "epoch": 2.3609810479375697, "grad_norm": 2.1047578669600835, "learning_rate": 2.286752022519483e-06, "loss": 0.2361, "step": 31767 }, { "epoch": 2.361055369751022, "grad_norm": 2.5483137565523952, "learning_rate": 2.2862413867225943e-06, "loss": 0.2505, "step": 31768 }, { "epoch": 2.361129691564474, "grad_norm": 3.119847718897047, "learning_rate": 2.285730800586481e-06, "loss": 0.3246, "step": 31769 }, { "epoch": 2.361204013377926, "grad_norm": 1.949493765808305, "learning_rate": 2.2852202641144337e-06, "loss": 0.203, "step": 31770 }, { "epoch": 2.3612783351913786, "grad_norm": 2.1534920883572397, "learning_rate": 2.2847097773097393e-06, "loss": 0.2334, "step": 31771 }, { "epoch": 2.361352657004831, "grad_norm": 1.8940678502605448, "learning_rate": 2.28419934017568e-06, "loss": 0.2446, "step": 31772 }, { "epoch": 2.361426978818283, "grad_norm": 2.9997155482642506, "learning_rate": 2.283688952715548e-06, "loss": 0.2943, "step": 31773 }, { "epoch": 2.3615013006317356, "grad_norm": 7.286126824390613, "learning_rate": 2.283178614932626e-06, "loss": 0.3452, "step": 31774 }, { "epoch": 2.3615756224451876, "grad_norm": 3.53857086255429, "learning_rate": 2.2826683268301997e-06, "loss": 0.2982, "step": 31775 }, { "epoch": 2.36164994425864, "grad_norm": 2.55483909931141, "learning_rate": 2.2821580884115525e-06, "loss": 0.2372, "step": 31776 }, { "epoch": 2.361724266072092, "grad_norm": 2.6647282949356894, "learning_rate": 2.281647899679972e-06, "loss": 0.2677, "step": 31777 }, { "epoch": 2.3617985878855445, "grad_norm": 2.3324893111654297, "learning_rate": 2.281137760638743e-06, "loss": 0.2178, "step": 31778 }, { "epoch": 2.3618729096989965, "grad_norm": 2.3564037161333333, "learning_rate": 2.280627671291146e-06, "loss": 0.3222, "step": 31779 }, { "epoch": 2.361947231512449, "grad_norm": 2.8626640269958528, "learning_rate": 2.2801176316404692e-06, "loss": 0.2835, "step": 31780 }, { "epoch": 2.362021553325901, "grad_norm": 2.9070398622473306, "learning_rate": 2.2796076416899924e-06, "loss": 0.3236, "step": 31781 }, { "epoch": 2.3620958751393535, "grad_norm": 2.040080078837232, "learning_rate": 2.2790977014430037e-06, "loss": 0.2558, "step": 31782 }, { "epoch": 2.3621701969528055, "grad_norm": 1.932509830020921, "learning_rate": 2.2785878109027835e-06, "loss": 0.1989, "step": 31783 }, { "epoch": 2.362244518766258, "grad_norm": 2.1008511479515164, "learning_rate": 2.278077970072612e-06, "loss": 0.2212, "step": 31784 }, { "epoch": 2.36231884057971, "grad_norm": 2.0638758954268335, "learning_rate": 2.277568178955776e-06, "loss": 0.2442, "step": 31785 }, { "epoch": 2.3623931623931624, "grad_norm": 2.551895143084806, "learning_rate": 2.277058437555555e-06, "loss": 0.3227, "step": 31786 }, { "epoch": 2.3624674842066145, "grad_norm": 2.376143041367738, "learning_rate": 2.276548745875232e-06, "loss": 0.2454, "step": 31787 }, { "epoch": 2.362541806020067, "grad_norm": 3.201852312060186, "learning_rate": 2.276039103918084e-06, "loss": 0.3334, "step": 31788 }, { "epoch": 2.362616127833519, "grad_norm": 1.6959486795370675, "learning_rate": 2.2755295116873987e-06, "loss": 0.2612, "step": 31789 }, { "epoch": 2.3626904496469714, "grad_norm": 2.467127093243796, "learning_rate": 2.2750199691864537e-06, "loss": 0.2087, "step": 31790 }, { "epoch": 2.362764771460424, "grad_norm": 2.678660962436166, "learning_rate": 2.2745104764185256e-06, "loss": 0.2814, "step": 31791 }, { "epoch": 2.362839093273876, "grad_norm": 2.23140396236076, "learning_rate": 2.274001033386902e-06, "loss": 0.2735, "step": 31792 }, { "epoch": 2.362913415087328, "grad_norm": 1.931326177587113, "learning_rate": 2.273491640094858e-06, "loss": 0.233, "step": 31793 }, { "epoch": 2.3629877369007803, "grad_norm": 1.9485583763765741, "learning_rate": 2.2729822965456716e-06, "loss": 0.261, "step": 31794 }, { "epoch": 2.363062058714233, "grad_norm": 2.2494516677293435, "learning_rate": 2.272473002742628e-06, "loss": 0.2746, "step": 31795 }, { "epoch": 2.363136380527685, "grad_norm": 2.482377662476987, "learning_rate": 2.2719637586889974e-06, "loss": 0.2948, "step": 31796 }, { "epoch": 2.3632107023411373, "grad_norm": 3.8356713743537556, "learning_rate": 2.2714545643880704e-06, "loss": 0.4925, "step": 31797 }, { "epoch": 2.3632850241545893, "grad_norm": 2.26333524005064, "learning_rate": 2.2709454198431114e-06, "loss": 0.282, "step": 31798 }, { "epoch": 2.3633593459680418, "grad_norm": 2.6379814355870734, "learning_rate": 2.270436325057408e-06, "loss": 0.3386, "step": 31799 }, { "epoch": 2.3634336677814938, "grad_norm": 2.4670520238296234, "learning_rate": 2.2699272800342353e-06, "loss": 0.2777, "step": 31800 }, { "epoch": 2.3635079895949462, "grad_norm": 3.056894726021525, "learning_rate": 2.2694182847768652e-06, "loss": 0.2913, "step": 31801 }, { "epoch": 2.3635823114083983, "grad_norm": 1.8541459106498226, "learning_rate": 2.268909339288583e-06, "loss": 0.2118, "step": 31802 }, { "epoch": 2.3636566332218507, "grad_norm": 2.221824614231872, "learning_rate": 2.268400443572658e-06, "loss": 0.2674, "step": 31803 }, { "epoch": 2.3637309550353027, "grad_norm": 1.9673909634220788, "learning_rate": 2.2678915976323734e-06, "loss": 0.2711, "step": 31804 }, { "epoch": 2.363805276848755, "grad_norm": 2.1285962385483317, "learning_rate": 2.267382801471001e-06, "loss": 0.2224, "step": 31805 }, { "epoch": 2.363879598662207, "grad_norm": 1.9607043499751584, "learning_rate": 2.266874055091813e-06, "loss": 0.2511, "step": 31806 }, { "epoch": 2.3639539204756597, "grad_norm": 2.2313555775895155, "learning_rate": 2.2663653584980926e-06, "loss": 0.344, "step": 31807 }, { "epoch": 2.3640282422891117, "grad_norm": 1.8021902531109106, "learning_rate": 2.2658567116931105e-06, "loss": 0.1896, "step": 31808 }, { "epoch": 2.364102564102564, "grad_norm": 2.724402578652259, "learning_rate": 2.2653481146801416e-06, "loss": 0.2982, "step": 31809 }, { "epoch": 2.364176885916016, "grad_norm": 2.2773386123737613, "learning_rate": 2.26483956746246e-06, "loss": 0.3045, "step": 31810 }, { "epoch": 2.3642512077294686, "grad_norm": 3.1533251177108013, "learning_rate": 2.2643310700433375e-06, "loss": 0.2647, "step": 31811 }, { "epoch": 2.3643255295429206, "grad_norm": 2.7711198080778883, "learning_rate": 2.263822622426053e-06, "loss": 0.2898, "step": 31812 }, { "epoch": 2.364399851356373, "grad_norm": 2.0949192465709925, "learning_rate": 2.263314224613874e-06, "loss": 0.2442, "step": 31813 }, { "epoch": 2.3644741731698256, "grad_norm": 1.8465448709265775, "learning_rate": 2.26280587661008e-06, "loss": 0.227, "step": 31814 }, { "epoch": 2.3645484949832776, "grad_norm": 1.8722452819599689, "learning_rate": 2.262297578417939e-06, "loss": 0.1933, "step": 31815 }, { "epoch": 2.36462281679673, "grad_norm": 2.5336341188666966, "learning_rate": 2.2617893300407234e-06, "loss": 0.2669, "step": 31816 }, { "epoch": 2.364697138610182, "grad_norm": 4.24002706968589, "learning_rate": 2.2612811314817086e-06, "loss": 0.2879, "step": 31817 }, { "epoch": 2.3647714604236345, "grad_norm": 2.504939140109682, "learning_rate": 2.2607729827441617e-06, "loss": 0.3328, "step": 31818 }, { "epoch": 2.3648457822370865, "grad_norm": 3.1067755165822404, "learning_rate": 2.260264883831359e-06, "loss": 0.2866, "step": 31819 }, { "epoch": 2.364920104050539, "grad_norm": 2.431509777403952, "learning_rate": 2.2597568347465705e-06, "loss": 0.289, "step": 31820 }, { "epoch": 2.364994425863991, "grad_norm": 2.160979295121723, "learning_rate": 2.259248835493065e-06, "loss": 0.2738, "step": 31821 }, { "epoch": 2.3650687476774435, "grad_norm": 2.2484025488986807, "learning_rate": 2.2587408860741145e-06, "loss": 0.2624, "step": 31822 }, { "epoch": 2.3651430694908955, "grad_norm": 2.229160284810903, "learning_rate": 2.2582329864929853e-06, "loss": 0.2221, "step": 31823 }, { "epoch": 2.365217391304348, "grad_norm": 1.9259024807633522, "learning_rate": 2.2577251367529527e-06, "loss": 0.2152, "step": 31824 }, { "epoch": 2.3652917131178, "grad_norm": 3.1089473861129626, "learning_rate": 2.2572173368572824e-06, "loss": 0.328, "step": 31825 }, { "epoch": 2.3653660349312524, "grad_norm": 2.9207539210754447, "learning_rate": 2.256709586809247e-06, "loss": 0.3789, "step": 31826 }, { "epoch": 2.3654403567447044, "grad_norm": 2.3605472260549485, "learning_rate": 2.256201886612114e-06, "loss": 0.2495, "step": 31827 }, { "epoch": 2.365514678558157, "grad_norm": 2.5640688894161516, "learning_rate": 2.2556942362691482e-06, "loss": 0.2671, "step": 31828 }, { "epoch": 2.365589000371609, "grad_norm": 2.5738005567457183, "learning_rate": 2.255186635783624e-06, "loss": 0.349, "step": 31829 }, { "epoch": 2.3656633221850614, "grad_norm": 2.3449721787703997, "learning_rate": 2.2546790851588064e-06, "loss": 0.3219, "step": 31830 }, { "epoch": 2.3657376439985134, "grad_norm": 2.8662551799788467, "learning_rate": 2.254171584397963e-06, "loss": 0.3848, "step": 31831 }, { "epoch": 2.365811965811966, "grad_norm": 2.4203270689512246, "learning_rate": 2.253664133504362e-06, "loss": 0.2645, "step": 31832 }, { "epoch": 2.3658862876254183, "grad_norm": 2.112033009427027, "learning_rate": 2.2531567324812655e-06, "loss": 0.2393, "step": 31833 }, { "epoch": 2.3659606094388703, "grad_norm": 2.5030788812405897, "learning_rate": 2.252649381331946e-06, "loss": 0.2318, "step": 31834 }, { "epoch": 2.3660349312523223, "grad_norm": 3.0353737801972995, "learning_rate": 2.252142080059666e-06, "loss": 0.3968, "step": 31835 }, { "epoch": 2.366109253065775, "grad_norm": 2.709784548731214, "learning_rate": 2.251634828667697e-06, "loss": 0.3019, "step": 31836 }, { "epoch": 2.3661835748792273, "grad_norm": 2.6092961142216207, "learning_rate": 2.2511276271592997e-06, "loss": 0.3489, "step": 31837 }, { "epoch": 2.3662578966926793, "grad_norm": 2.20007506002783, "learning_rate": 2.250620475537738e-06, "loss": 0.2872, "step": 31838 }, { "epoch": 2.3663322185061317, "grad_norm": 2.479608304154999, "learning_rate": 2.250113373806283e-06, "loss": 0.2074, "step": 31839 }, { "epoch": 2.3664065403195838, "grad_norm": 2.281325362153605, "learning_rate": 2.249606321968193e-06, "loss": 0.2308, "step": 31840 }, { "epoch": 2.366480862133036, "grad_norm": 2.111428551983987, "learning_rate": 2.2490993200267373e-06, "loss": 0.2411, "step": 31841 }, { "epoch": 2.3665551839464882, "grad_norm": 2.2111697758118676, "learning_rate": 2.2485923679851784e-06, "loss": 0.2975, "step": 31842 }, { "epoch": 2.3666295057599407, "grad_norm": 2.951146255361716, "learning_rate": 2.24808546584678e-06, "loss": 0.4051, "step": 31843 }, { "epoch": 2.3667038275733927, "grad_norm": 2.308866759675728, "learning_rate": 2.2475786136148046e-06, "loss": 0.2688, "step": 31844 }, { "epoch": 2.366778149386845, "grad_norm": 2.682553548748232, "learning_rate": 2.247071811292514e-06, "loss": 0.3234, "step": 31845 }, { "epoch": 2.366852471200297, "grad_norm": 2.2555661338328044, "learning_rate": 2.2465650588831757e-06, "loss": 0.325, "step": 31846 }, { "epoch": 2.3669267930137496, "grad_norm": 2.4220311864615014, "learning_rate": 2.2460583563900452e-06, "loss": 0.2234, "step": 31847 }, { "epoch": 2.3670011148272017, "grad_norm": 2.48313505939771, "learning_rate": 2.245551703816392e-06, "loss": 0.3281, "step": 31848 }, { "epoch": 2.367075436640654, "grad_norm": 2.8179573929750905, "learning_rate": 2.245045101165475e-06, "loss": 0.2969, "step": 31849 }, { "epoch": 2.367149758454106, "grad_norm": 2.329812857302358, "learning_rate": 2.2445385484405524e-06, "loss": 0.3054, "step": 31850 }, { "epoch": 2.3672240802675586, "grad_norm": 2.714053366621977, "learning_rate": 2.2440320456448904e-06, "loss": 0.2185, "step": 31851 }, { "epoch": 2.3672984020810106, "grad_norm": 2.4929998288961785, "learning_rate": 2.2435255927817477e-06, "loss": 0.3162, "step": 31852 }, { "epoch": 2.367372723894463, "grad_norm": 2.463069112581425, "learning_rate": 2.243019189854383e-06, "loss": 0.3336, "step": 31853 }, { "epoch": 2.367447045707915, "grad_norm": 1.8814139290732055, "learning_rate": 2.2425128368660633e-06, "loss": 0.2224, "step": 31854 }, { "epoch": 2.3675213675213675, "grad_norm": 2.3270585653145752, "learning_rate": 2.242006533820037e-06, "loss": 0.2707, "step": 31855 }, { "epoch": 2.36759568933482, "grad_norm": 2.0899638565954506, "learning_rate": 2.241500280719574e-06, "loss": 0.2761, "step": 31856 }, { "epoch": 2.367670011148272, "grad_norm": 1.7074039276277282, "learning_rate": 2.240994077567926e-06, "loss": 0.2003, "step": 31857 }, { "epoch": 2.367744332961724, "grad_norm": 2.6329101100535284, "learning_rate": 2.2404879243683596e-06, "loss": 0.3704, "step": 31858 }, { "epoch": 2.3678186547751765, "grad_norm": 2.217379825013863, "learning_rate": 2.2399818211241273e-06, "loss": 0.2118, "step": 31859 }, { "epoch": 2.367892976588629, "grad_norm": 2.2109328186782804, "learning_rate": 2.2394757678384883e-06, "loss": 0.2863, "step": 31860 }, { "epoch": 2.367967298402081, "grad_norm": 2.1026014235976334, "learning_rate": 2.2389697645147047e-06, "loss": 0.2682, "step": 31861 }, { "epoch": 2.3680416202155334, "grad_norm": 2.2106657676531274, "learning_rate": 2.2384638111560277e-06, "loss": 0.2533, "step": 31862 }, { "epoch": 2.3681159420289855, "grad_norm": 2.6352306524406464, "learning_rate": 2.2379579077657198e-06, "loss": 0.2821, "step": 31863 }, { "epoch": 2.368190263842438, "grad_norm": 2.0631317945230516, "learning_rate": 2.2374520543470367e-06, "loss": 0.248, "step": 31864 }, { "epoch": 2.36826458565589, "grad_norm": 1.8767724950795808, "learning_rate": 2.236946250903235e-06, "loss": 0.2308, "step": 31865 }, { "epoch": 2.3683389074693424, "grad_norm": 2.174039288175187, "learning_rate": 2.236440497437571e-06, "loss": 0.2946, "step": 31866 }, { "epoch": 2.3684132292827944, "grad_norm": 2.7332864365694554, "learning_rate": 2.235934793953296e-06, "loss": 0.327, "step": 31867 }, { "epoch": 2.368487551096247, "grad_norm": 2.086242234370871, "learning_rate": 2.235429140453673e-06, "loss": 0.2192, "step": 31868 }, { "epoch": 2.368561872909699, "grad_norm": 2.35090641058785, "learning_rate": 2.234923536941953e-06, "loss": 0.2919, "step": 31869 }, { "epoch": 2.3686361947231513, "grad_norm": 2.793200601215874, "learning_rate": 2.234417983421394e-06, "loss": 0.3427, "step": 31870 }, { "epoch": 2.3687105165366034, "grad_norm": 2.416374589313669, "learning_rate": 2.2339124798952494e-06, "loss": 0.2466, "step": 31871 }, { "epoch": 2.368784838350056, "grad_norm": 3.0787709643020564, "learning_rate": 2.2334070263667694e-06, "loss": 0.2821, "step": 31872 }, { "epoch": 2.368859160163508, "grad_norm": 3.33215476683779, "learning_rate": 2.2329016228392165e-06, "loss": 0.324, "step": 31873 }, { "epoch": 2.3689334819769603, "grad_norm": 2.5185554825886824, "learning_rate": 2.2323962693158386e-06, "loss": 0.3589, "step": 31874 }, { "epoch": 2.3690078037904123, "grad_norm": 2.737514636748002, "learning_rate": 2.2318909657998887e-06, "loss": 0.3083, "step": 31875 }, { "epoch": 2.3690821256038648, "grad_norm": 3.1170305446851296, "learning_rate": 2.231385712294627e-06, "loss": 0.291, "step": 31876 }, { "epoch": 2.369156447417317, "grad_norm": 2.180956009619985, "learning_rate": 2.230880508803295e-06, "loss": 0.3027, "step": 31877 }, { "epoch": 2.3692307692307693, "grad_norm": 2.504683172366489, "learning_rate": 2.230375355329154e-06, "loss": 0.2994, "step": 31878 }, { "epoch": 2.3693050910442217, "grad_norm": 2.356427516039312, "learning_rate": 2.229870251875451e-06, "loss": 0.2863, "step": 31879 }, { "epoch": 2.3693794128576737, "grad_norm": 2.2439661758626706, "learning_rate": 2.2293651984454436e-06, "loss": 0.2291, "step": 31880 }, { "epoch": 2.3694537346711257, "grad_norm": 2.377121055912577, "learning_rate": 2.2288601950423784e-06, "loss": 0.3164, "step": 31881 }, { "epoch": 2.369528056484578, "grad_norm": 2.7829158395556806, "learning_rate": 2.2283552416695053e-06, "loss": 0.4182, "step": 31882 }, { "epoch": 2.3696023782980307, "grad_norm": 2.1342747860507347, "learning_rate": 2.227850338330082e-06, "loss": 0.2371, "step": 31883 }, { "epoch": 2.3696767001114827, "grad_norm": 1.8567292374705269, "learning_rate": 2.227345485027351e-06, "loss": 0.2045, "step": 31884 }, { "epoch": 2.369751021924935, "grad_norm": 2.465432945471503, "learning_rate": 2.2268406817645693e-06, "loss": 0.2879, "step": 31885 }, { "epoch": 2.369825343738387, "grad_norm": 2.2099928286111217, "learning_rate": 2.226335928544985e-06, "loss": 0.3023, "step": 31886 }, { "epoch": 2.3698996655518396, "grad_norm": 2.501571492470638, "learning_rate": 2.225831225371846e-06, "loss": 0.2987, "step": 31887 }, { "epoch": 2.3699739873652916, "grad_norm": 1.8174403452463341, "learning_rate": 2.2253265722484017e-06, "loss": 0.2307, "step": 31888 }, { "epoch": 2.370048309178744, "grad_norm": 2.412894462854256, "learning_rate": 2.224821969177899e-06, "loss": 0.3116, "step": 31889 }, { "epoch": 2.370122630992196, "grad_norm": 2.486794379111112, "learning_rate": 2.2243174161635927e-06, "loss": 0.3467, "step": 31890 }, { "epoch": 2.3701969528056486, "grad_norm": 2.2866204716431184, "learning_rate": 2.2238129132087237e-06, "loss": 0.2956, "step": 31891 }, { "epoch": 2.3702712746191006, "grad_norm": 2.1914057410252266, "learning_rate": 2.2233084603165465e-06, "loss": 0.2795, "step": 31892 }, { "epoch": 2.370345596432553, "grad_norm": 2.543511223190918, "learning_rate": 2.2228040574903066e-06, "loss": 0.3435, "step": 31893 }, { "epoch": 2.370419918246005, "grad_norm": 2.2020758463417254, "learning_rate": 2.2222997047332473e-06, "loss": 0.2877, "step": 31894 }, { "epoch": 2.3704942400594575, "grad_norm": 2.410983761633189, "learning_rate": 2.221795402048622e-06, "loss": 0.304, "step": 31895 }, { "epoch": 2.3705685618729095, "grad_norm": 1.9012633240524823, "learning_rate": 2.221291149439674e-06, "loss": 0.2005, "step": 31896 }, { "epoch": 2.370642883686362, "grad_norm": 2.407337453459941, "learning_rate": 2.2207869469096476e-06, "loss": 0.3516, "step": 31897 }, { "epoch": 2.370717205499814, "grad_norm": 2.778377198026983, "learning_rate": 2.2202827944617933e-06, "loss": 0.3458, "step": 31898 }, { "epoch": 2.3707915273132665, "grad_norm": 2.0179927723580544, "learning_rate": 2.219778692099356e-06, "loss": 0.2414, "step": 31899 }, { "epoch": 2.3708658491267185, "grad_norm": 2.1077020814382723, "learning_rate": 2.219274639825578e-06, "loss": 0.2902, "step": 31900 }, { "epoch": 2.370940170940171, "grad_norm": 2.3049597246747626, "learning_rate": 2.2187706376437047e-06, "loss": 0.2123, "step": 31901 }, { "epoch": 2.3710144927536234, "grad_norm": 2.4843416354229397, "learning_rate": 2.2182666855569845e-06, "loss": 0.2377, "step": 31902 }, { "epoch": 2.3710888145670754, "grad_norm": 2.8527314301934457, "learning_rate": 2.2177627835686598e-06, "loss": 0.2746, "step": 31903 }, { "epoch": 2.3711631363805274, "grad_norm": 3.3568319832091875, "learning_rate": 2.2172589316819713e-06, "loss": 0.2996, "step": 31904 }, { "epoch": 2.37123745819398, "grad_norm": 2.74830977983871, "learning_rate": 2.21675512990017e-06, "loss": 0.3366, "step": 31905 }, { "epoch": 2.3713117800074324, "grad_norm": 2.4123210713576135, "learning_rate": 2.2162513782264914e-06, "loss": 0.2931, "step": 31906 }, { "epoch": 2.3713861018208844, "grad_norm": 2.351492403878309, "learning_rate": 2.215747676664186e-06, "loss": 0.3203, "step": 31907 }, { "epoch": 2.371460423634337, "grad_norm": 2.58394487668287, "learning_rate": 2.215244025216493e-06, "loss": 0.2626, "step": 31908 }, { "epoch": 2.371534745447789, "grad_norm": 2.8823667183272206, "learning_rate": 2.214740423886652e-06, "loss": 0.2635, "step": 31909 }, { "epoch": 2.3716090672612413, "grad_norm": 2.1325601846796385, "learning_rate": 2.214236872677914e-06, "loss": 0.3266, "step": 31910 }, { "epoch": 2.3716833890746933, "grad_norm": 2.212419865544049, "learning_rate": 2.2137333715935093e-06, "loss": 0.2396, "step": 31911 }, { "epoch": 2.371757710888146, "grad_norm": 3.1367588381547318, "learning_rate": 2.213229920636688e-06, "loss": 0.3317, "step": 31912 }, { "epoch": 2.371832032701598, "grad_norm": 1.7490481056134186, "learning_rate": 2.2127265198106852e-06, "loss": 0.2336, "step": 31913 }, { "epoch": 2.3719063545150503, "grad_norm": 2.778719930606323, "learning_rate": 2.212223169118748e-06, "loss": 0.3271, "step": 31914 }, { "epoch": 2.3719806763285023, "grad_norm": 2.1852308557254507, "learning_rate": 2.2117198685641138e-06, "loss": 0.2822, "step": 31915 }, { "epoch": 2.3720549981419548, "grad_norm": 2.6035336295651064, "learning_rate": 2.211216618150022e-06, "loss": 0.2334, "step": 31916 }, { "epoch": 2.3721293199554068, "grad_norm": 2.2254712466048976, "learning_rate": 2.210713417879714e-06, "loss": 0.2618, "step": 31917 }, { "epoch": 2.3722036417688592, "grad_norm": 2.7561376477165678, "learning_rate": 2.2102102677564298e-06, "loss": 0.3429, "step": 31918 }, { "epoch": 2.3722779635823112, "grad_norm": 2.0405115701470473, "learning_rate": 2.2097071677834047e-06, "loss": 0.2239, "step": 31919 }, { "epoch": 2.3723522853957637, "grad_norm": 2.209526673108554, "learning_rate": 2.2092041179638836e-06, "loss": 0.257, "step": 31920 }, { "epoch": 2.3724266072092157, "grad_norm": 2.220762059639634, "learning_rate": 2.208701118301102e-06, "loss": 0.2629, "step": 31921 }, { "epoch": 2.372500929022668, "grad_norm": 2.372876272238209, "learning_rate": 2.208198168798299e-06, "loss": 0.3141, "step": 31922 }, { "epoch": 2.37257525083612, "grad_norm": 1.9894902875605431, "learning_rate": 2.2076952694587083e-06, "loss": 0.2598, "step": 31923 }, { "epoch": 2.3726495726495727, "grad_norm": 2.1504146998284073, "learning_rate": 2.2071924202855745e-06, "loss": 0.222, "step": 31924 }, { "epoch": 2.372723894463025, "grad_norm": 1.772479567584705, "learning_rate": 2.206689621282131e-06, "loss": 0.1858, "step": 31925 }, { "epoch": 2.372798216276477, "grad_norm": 3.439415875599021, "learning_rate": 2.206186872451612e-06, "loss": 0.2487, "step": 31926 }, { "epoch": 2.372872538089929, "grad_norm": 1.9387547818666957, "learning_rate": 2.2056841737972613e-06, "loss": 0.2484, "step": 31927 }, { "epoch": 2.3729468599033816, "grad_norm": 2.3233351526852237, "learning_rate": 2.2051815253223086e-06, "loss": 0.3411, "step": 31928 }, { "epoch": 2.373021181716834, "grad_norm": 2.4972329007542045, "learning_rate": 2.204678927029996e-06, "loss": 0.312, "step": 31929 }, { "epoch": 2.373095503530286, "grad_norm": 2.2354290222103947, "learning_rate": 2.204176378923555e-06, "loss": 0.2795, "step": 31930 }, { "epoch": 2.3731698253437385, "grad_norm": 2.4717610627785356, "learning_rate": 2.2036738810062195e-06, "loss": 0.3342, "step": 31931 }, { "epoch": 2.3732441471571906, "grad_norm": 2.3926773227401155, "learning_rate": 2.2031714332812306e-06, "loss": 0.2926, "step": 31932 }, { "epoch": 2.373318468970643, "grad_norm": 2.3310122304611336, "learning_rate": 2.202669035751819e-06, "loss": 0.2759, "step": 31933 }, { "epoch": 2.373392790784095, "grad_norm": 3.192757935804835, "learning_rate": 2.202166688421219e-06, "loss": 0.3504, "step": 31934 }, { "epoch": 2.3734671125975475, "grad_norm": 2.3529257283604132, "learning_rate": 2.201664391292666e-06, "loss": 0.276, "step": 31935 }, { "epoch": 2.3735414344109995, "grad_norm": 3.082489969317973, "learning_rate": 2.20116214436939e-06, "loss": 0.2905, "step": 31936 }, { "epoch": 2.373615756224452, "grad_norm": 1.7721611555168841, "learning_rate": 2.20065994765463e-06, "loss": 0.239, "step": 31937 }, { "epoch": 2.373690078037904, "grad_norm": 1.9126482824121058, "learning_rate": 2.2001578011516132e-06, "loss": 0.2277, "step": 31938 }, { "epoch": 2.3737643998513565, "grad_norm": 2.253532916570933, "learning_rate": 2.1996557048635803e-06, "loss": 0.2826, "step": 31939 }, { "epoch": 2.3738387216648085, "grad_norm": 2.116207597807251, "learning_rate": 2.199153658793758e-06, "loss": 0.2954, "step": 31940 }, { "epoch": 2.373913043478261, "grad_norm": 1.9425072182646719, "learning_rate": 2.198651662945377e-06, "loss": 0.1958, "step": 31941 }, { "epoch": 2.373987365291713, "grad_norm": 2.0204172247505916, "learning_rate": 2.1981497173216747e-06, "loss": 0.2909, "step": 31942 }, { "epoch": 2.3740616871051654, "grad_norm": 2.224655756913443, "learning_rate": 2.197647821925877e-06, "loss": 0.2856, "step": 31943 }, { "epoch": 2.3741360089186174, "grad_norm": 1.8780116645641773, "learning_rate": 2.1971459767612234e-06, "loss": 0.2301, "step": 31944 }, { "epoch": 2.37421033073207, "grad_norm": 2.0494860150862904, "learning_rate": 2.1966441818309337e-06, "loss": 0.2132, "step": 31945 }, { "epoch": 2.374284652545522, "grad_norm": 2.34839607246726, "learning_rate": 2.1961424371382456e-06, "loss": 0.2525, "step": 31946 }, { "epoch": 2.3743589743589744, "grad_norm": 2.0867689922787958, "learning_rate": 2.195640742686389e-06, "loss": 0.2828, "step": 31947 }, { "epoch": 2.374433296172427, "grad_norm": 2.899868947201371, "learning_rate": 2.1951390984785894e-06, "loss": 0.2681, "step": 31948 }, { "epoch": 2.374507617985879, "grad_norm": 2.1083979048100794, "learning_rate": 2.194637504518081e-06, "loss": 0.2473, "step": 31949 }, { "epoch": 2.374581939799331, "grad_norm": 1.9458534673604115, "learning_rate": 2.1941359608080892e-06, "loss": 0.2131, "step": 31950 }, { "epoch": 2.3746562616127833, "grad_norm": 2.453902497939459, "learning_rate": 2.1936344673518484e-06, "loss": 0.3007, "step": 31951 }, { "epoch": 2.3747305834262358, "grad_norm": 1.6472566325789268, "learning_rate": 2.193133024152583e-06, "loss": 0.2092, "step": 31952 }, { "epoch": 2.374804905239688, "grad_norm": 2.5637189189201286, "learning_rate": 2.1926316312135197e-06, "loss": 0.3349, "step": 31953 }, { "epoch": 2.3748792270531403, "grad_norm": 2.5063536103096227, "learning_rate": 2.1921302885378912e-06, "loss": 0.356, "step": 31954 }, { "epoch": 2.3749535488665923, "grad_norm": 2.884362530580179, "learning_rate": 2.191628996128924e-06, "loss": 0.32, "step": 31955 }, { "epoch": 2.3750278706800447, "grad_norm": 2.0735254101625946, "learning_rate": 2.1911277539898434e-06, "loss": 0.3242, "step": 31956 }, { "epoch": 2.3751021924934967, "grad_norm": 2.0085340755787606, "learning_rate": 2.1906265621238763e-06, "loss": 0.2217, "step": 31957 }, { "epoch": 2.375176514306949, "grad_norm": 2.889436347730868, "learning_rate": 2.190125420534248e-06, "loss": 0.2948, "step": 31958 }, { "epoch": 2.375250836120401, "grad_norm": 2.48307174194706, "learning_rate": 2.1896243292241904e-06, "loss": 0.2629, "step": 31959 }, { "epoch": 2.3753251579338537, "grad_norm": 2.1035668843322397, "learning_rate": 2.189123288196924e-06, "loss": 0.2554, "step": 31960 }, { "epoch": 2.3753994797473057, "grad_norm": 1.8349626334404898, "learning_rate": 2.1886222974556783e-06, "loss": 0.2482, "step": 31961 }, { "epoch": 2.375473801560758, "grad_norm": 2.6457831515479344, "learning_rate": 2.1881213570036763e-06, "loss": 0.3959, "step": 31962 }, { "epoch": 2.37554812337421, "grad_norm": 2.41920646720262, "learning_rate": 2.1876204668441415e-06, "loss": 0.2558, "step": 31963 }, { "epoch": 2.3756224451876626, "grad_norm": 2.0026096415644536, "learning_rate": 2.1871196269803043e-06, "loss": 0.2289, "step": 31964 }, { "epoch": 2.3756967670011147, "grad_norm": 2.068653934856377, "learning_rate": 2.186618837415382e-06, "loss": 0.2345, "step": 31965 }, { "epoch": 2.375771088814567, "grad_norm": 2.3723648413695924, "learning_rate": 2.1861180981526077e-06, "loss": 0.1986, "step": 31966 }, { "epoch": 2.3758454106280196, "grad_norm": 1.9614004778243053, "learning_rate": 2.1856174091951943e-06, "loss": 0.2358, "step": 31967 }, { "epoch": 2.3759197324414716, "grad_norm": 2.161816574219034, "learning_rate": 2.1851167705463728e-06, "loss": 0.2721, "step": 31968 }, { "epoch": 2.3759940542549236, "grad_norm": 2.4577123205873317, "learning_rate": 2.184616182209365e-06, "loss": 0.2063, "step": 31969 }, { "epoch": 2.376068376068376, "grad_norm": 2.8617781938075195, "learning_rate": 2.184115644187389e-06, "loss": 0.3099, "step": 31970 }, { "epoch": 2.3761426978818285, "grad_norm": 2.5068807506645716, "learning_rate": 2.183615156483675e-06, "loss": 0.3494, "step": 31971 }, { "epoch": 2.3762170196952805, "grad_norm": 1.9960943894452046, "learning_rate": 2.183114719101437e-06, "loss": 0.2743, "step": 31972 }, { "epoch": 2.376291341508733, "grad_norm": 2.5743232996040413, "learning_rate": 2.1826143320439042e-06, "loss": 0.3324, "step": 31973 }, { "epoch": 2.376365663322185, "grad_norm": 1.7688345149149716, "learning_rate": 2.1821139953142944e-06, "loss": 0.2257, "step": 31974 }, { "epoch": 2.3764399851356375, "grad_norm": 2.15145831720895, "learning_rate": 2.1816137089158274e-06, "loss": 0.3286, "step": 31975 }, { "epoch": 2.3765143069490895, "grad_norm": 2.789708435105974, "learning_rate": 2.181113472851727e-06, "loss": 0.2908, "step": 31976 }, { "epoch": 2.376588628762542, "grad_norm": 2.9521266512941904, "learning_rate": 2.180613287125213e-06, "loss": 0.2908, "step": 31977 }, { "epoch": 2.376662950575994, "grad_norm": 1.7666757442894003, "learning_rate": 2.1801131517395047e-06, "loss": 0.1595, "step": 31978 }, { "epoch": 2.3767372723894464, "grad_norm": 2.38586703079518, "learning_rate": 2.179613066697822e-06, "loss": 0.2921, "step": 31979 }, { "epoch": 2.3768115942028984, "grad_norm": 2.662233818008069, "learning_rate": 2.1791130320033825e-06, "loss": 0.3801, "step": 31980 }, { "epoch": 2.376885916016351, "grad_norm": 2.0870749191333116, "learning_rate": 2.1786130476594103e-06, "loss": 0.2337, "step": 31981 }, { "epoch": 2.376960237829803, "grad_norm": 2.049744149370564, "learning_rate": 2.178113113669118e-06, "loss": 0.2096, "step": 31982 }, { "epoch": 2.3770345596432554, "grad_norm": 2.251243133813408, "learning_rate": 2.1776132300357313e-06, "loss": 0.2397, "step": 31983 }, { "epoch": 2.3771088814567074, "grad_norm": 2.3010109993433963, "learning_rate": 2.1771133967624637e-06, "loss": 0.2721, "step": 31984 }, { "epoch": 2.37718320327016, "grad_norm": 2.347816197371336, "learning_rate": 2.1766136138525317e-06, "loss": 0.2502, "step": 31985 }, { "epoch": 2.377257525083612, "grad_norm": 1.8718795202876122, "learning_rate": 2.1761138813091586e-06, "loss": 0.1847, "step": 31986 }, { "epoch": 2.3773318468970643, "grad_norm": 2.0651910260307793, "learning_rate": 2.1756141991355553e-06, "loss": 0.2449, "step": 31987 }, { "epoch": 2.3774061687105164, "grad_norm": 2.229339695204606, "learning_rate": 2.175114567334945e-06, "loss": 0.2988, "step": 31988 }, { "epoch": 2.377480490523969, "grad_norm": 1.8375999739262652, "learning_rate": 2.1746149859105405e-06, "loss": 0.1912, "step": 31989 }, { "epoch": 2.3775548123374213, "grad_norm": 2.681509499822894, "learning_rate": 2.1741154548655584e-06, "loss": 0.3633, "step": 31990 }, { "epoch": 2.3776291341508733, "grad_norm": 2.7839517709877177, "learning_rate": 2.173615974203216e-06, "loss": 0.2822, "step": 31991 }, { "epoch": 2.3777034559643253, "grad_norm": 2.8182200644248963, "learning_rate": 2.1731165439267243e-06, "loss": 0.2975, "step": 31992 }, { "epoch": 2.3777777777777778, "grad_norm": 2.133109761504754, "learning_rate": 2.1726171640393046e-06, "loss": 0.2855, "step": 31993 }, { "epoch": 2.3778520995912302, "grad_norm": 2.1660094462838866, "learning_rate": 2.1721178345441675e-06, "loss": 0.3001, "step": 31994 }, { "epoch": 2.3779264214046822, "grad_norm": 2.427903136743206, "learning_rate": 2.1716185554445324e-06, "loss": 0.3007, "step": 31995 }, { "epoch": 2.3780007432181347, "grad_norm": 3.679263766326148, "learning_rate": 2.1711193267436105e-06, "loss": 0.2592, "step": 31996 }, { "epoch": 2.3780750650315867, "grad_norm": 2.3800287578919668, "learning_rate": 2.1706201484446134e-06, "loss": 0.2715, "step": 31997 }, { "epoch": 2.378149386845039, "grad_norm": 3.4419080324124627, "learning_rate": 2.1701210205507605e-06, "loss": 0.3834, "step": 31998 }, { "epoch": 2.378223708658491, "grad_norm": 2.348441143233909, "learning_rate": 2.1696219430652622e-06, "loss": 0.2367, "step": 31999 }, { "epoch": 2.3782980304719437, "grad_norm": 2.86001438464382, "learning_rate": 2.1691229159913317e-06, "loss": 0.3789, "step": 32000 }, { "epoch": 2.3783723522853957, "grad_norm": 2.319026309405656, "learning_rate": 2.1686239393321805e-06, "loss": 0.3421, "step": 32001 }, { "epoch": 2.378446674098848, "grad_norm": 1.835259829689257, "learning_rate": 2.1681250130910205e-06, "loss": 0.1831, "step": 32002 }, { "epoch": 2.3785209959123, "grad_norm": 2.4668631617484715, "learning_rate": 2.1676261372710684e-06, "loss": 0.2705, "step": 32003 }, { "epoch": 2.3785953177257526, "grad_norm": 2.8515889956493976, "learning_rate": 2.167127311875529e-06, "loss": 0.2656, "step": 32004 }, { "epoch": 2.3786696395392046, "grad_norm": 2.085106499707082, "learning_rate": 2.1666285369076203e-06, "loss": 0.2322, "step": 32005 }, { "epoch": 2.378743961352657, "grad_norm": 2.1254770667770564, "learning_rate": 2.166129812370551e-06, "loss": 0.2686, "step": 32006 }, { "epoch": 2.378818283166109, "grad_norm": 2.680905441634146, "learning_rate": 2.1656311382675287e-06, "loss": 0.3498, "step": 32007 }, { "epoch": 2.3788926049795616, "grad_norm": 3.0264622957230327, "learning_rate": 2.165132514601769e-06, "loss": 0.2425, "step": 32008 }, { "epoch": 2.3789669267930136, "grad_norm": 2.4360928161429745, "learning_rate": 2.164633941376477e-06, "loss": 0.2922, "step": 32009 }, { "epoch": 2.379041248606466, "grad_norm": 2.014672887199641, "learning_rate": 2.1641354185948684e-06, "loss": 0.2414, "step": 32010 }, { "epoch": 2.379115570419918, "grad_norm": 2.079710419390063, "learning_rate": 2.163636946260149e-06, "loss": 0.2134, "step": 32011 }, { "epoch": 2.3791898922333705, "grad_norm": 2.1255578399566346, "learning_rate": 2.1631385243755287e-06, "loss": 0.2383, "step": 32012 }, { "epoch": 2.379264214046823, "grad_norm": 2.900420540458688, "learning_rate": 2.1626401529442167e-06, "loss": 0.3599, "step": 32013 }, { "epoch": 2.379338535860275, "grad_norm": 2.7844561817950977, "learning_rate": 2.1621418319694163e-06, "loss": 0.3513, "step": 32014 }, { "epoch": 2.379412857673727, "grad_norm": 2.354111418417048, "learning_rate": 2.161643561454344e-06, "loss": 0.2811, "step": 32015 }, { "epoch": 2.3794871794871795, "grad_norm": 1.9963766615399152, "learning_rate": 2.1611453414022e-06, "loss": 0.2291, "step": 32016 }, { "epoch": 2.379561501300632, "grad_norm": 2.149997561553729, "learning_rate": 2.160647171816199e-06, "loss": 0.2608, "step": 32017 }, { "epoch": 2.379635823114084, "grad_norm": 2.0060421479203097, "learning_rate": 2.160149052699545e-06, "loss": 0.2694, "step": 32018 }, { "epoch": 2.3797101449275364, "grad_norm": 2.326719565302091, "learning_rate": 2.159650984055441e-06, "loss": 0.2388, "step": 32019 }, { "epoch": 2.3797844667409884, "grad_norm": 1.894921604484005, "learning_rate": 2.1591529658871014e-06, "loss": 0.2385, "step": 32020 }, { "epoch": 2.379858788554441, "grad_norm": 2.521370906067398, "learning_rate": 2.158654998197727e-06, "loss": 0.2404, "step": 32021 }, { "epoch": 2.379933110367893, "grad_norm": 2.841451689012017, "learning_rate": 2.1581570809905217e-06, "loss": 0.363, "step": 32022 }, { "epoch": 2.3800074321813454, "grad_norm": 1.914243849869475, "learning_rate": 2.1576592142687e-06, "loss": 0.2427, "step": 32023 }, { "epoch": 2.3800817539947974, "grad_norm": 2.1026671799206516, "learning_rate": 2.1571613980354566e-06, "loss": 0.2439, "step": 32024 }, { "epoch": 2.38015607580825, "grad_norm": 2.3524694164598734, "learning_rate": 2.1566636322940037e-06, "loss": 0.1896, "step": 32025 }, { "epoch": 2.380230397621702, "grad_norm": 3.2237162019122687, "learning_rate": 2.1561659170475403e-06, "loss": 0.3117, "step": 32026 }, { "epoch": 2.3803047194351543, "grad_norm": 2.5146711851842736, "learning_rate": 2.1556682522992766e-06, "loss": 0.2635, "step": 32027 }, { "epoch": 2.3803790412486063, "grad_norm": 2.47932708550887, "learning_rate": 2.155170638052414e-06, "loss": 0.2993, "step": 32028 }, { "epoch": 2.380453363062059, "grad_norm": 1.748747740909789, "learning_rate": 2.154673074310153e-06, "loss": 0.1739, "step": 32029 }, { "epoch": 2.380527684875511, "grad_norm": 2.564678266563166, "learning_rate": 2.154175561075702e-06, "loss": 0.326, "step": 32030 }, { "epoch": 2.3806020066889633, "grad_norm": 2.1321837074619654, "learning_rate": 2.1536780983522587e-06, "loss": 0.3185, "step": 32031 }, { "epoch": 2.3806763285024153, "grad_norm": 2.5369513931880485, "learning_rate": 2.1531806861430325e-06, "loss": 0.3104, "step": 32032 }, { "epoch": 2.3807506503158677, "grad_norm": 2.8624956532722674, "learning_rate": 2.1526833244512215e-06, "loss": 0.3444, "step": 32033 }, { "epoch": 2.3808249721293198, "grad_norm": 2.5698362175017717, "learning_rate": 2.1521860132800277e-06, "loss": 0.3162, "step": 32034 }, { "epoch": 2.380899293942772, "grad_norm": 1.9543405970685352, "learning_rate": 2.1516887526326522e-06, "loss": 0.1984, "step": 32035 }, { "epoch": 2.3809736157562247, "grad_norm": 2.430878873247921, "learning_rate": 2.1511915425122964e-06, "loss": 0.3376, "step": 32036 }, { "epoch": 2.3810479375696767, "grad_norm": 2.2821606989073344, "learning_rate": 2.1506943829221637e-06, "loss": 0.29, "step": 32037 }, { "epoch": 2.3811222593831287, "grad_norm": 7.2991300001444746, "learning_rate": 2.1501972738654508e-06, "loss": 0.3083, "step": 32038 }, { "epoch": 2.381196581196581, "grad_norm": 2.2306727532631863, "learning_rate": 2.1497002153453637e-06, "loss": 0.2583, "step": 32039 }, { "epoch": 2.3812709030100336, "grad_norm": 3.295022864015214, "learning_rate": 2.149203207365099e-06, "loss": 0.4372, "step": 32040 }, { "epoch": 2.3813452248234857, "grad_norm": 2.3488400652943446, "learning_rate": 2.1487062499278543e-06, "loss": 0.2552, "step": 32041 }, { "epoch": 2.381419546636938, "grad_norm": 2.4122749247390205, "learning_rate": 2.1482093430368346e-06, "loss": 0.2456, "step": 32042 }, { "epoch": 2.38149386845039, "grad_norm": 1.4937160396600786, "learning_rate": 2.147712486695235e-06, "loss": 0.1854, "step": 32043 }, { "epoch": 2.3815681902638426, "grad_norm": 1.8907618045003547, "learning_rate": 2.1472156809062516e-06, "loss": 0.2403, "step": 32044 }, { "epoch": 2.3816425120772946, "grad_norm": 2.111766630753038, "learning_rate": 2.14671892567309e-06, "loss": 0.2478, "step": 32045 }, { "epoch": 2.381716833890747, "grad_norm": 2.3599430412846076, "learning_rate": 2.1462222209989437e-06, "loss": 0.3533, "step": 32046 }, { "epoch": 2.381791155704199, "grad_norm": 2.5757861644370563, "learning_rate": 2.1457255668870115e-06, "loss": 0.2786, "step": 32047 }, { "epoch": 2.3818654775176515, "grad_norm": 2.52367320898722, "learning_rate": 2.1452289633404877e-06, "loss": 0.2924, "step": 32048 }, { "epoch": 2.3819397993311036, "grad_norm": 2.209892986516153, "learning_rate": 2.1447324103625754e-06, "loss": 0.2696, "step": 32049 }, { "epoch": 2.382014121144556, "grad_norm": 1.8920827427181774, "learning_rate": 2.144235907956469e-06, "loss": 0.2284, "step": 32050 }, { "epoch": 2.382088442958008, "grad_norm": 1.9449180974947884, "learning_rate": 2.14373945612536e-06, "loss": 0.2247, "step": 32051 }, { "epoch": 2.3821627647714605, "grad_norm": 2.386826369090751, "learning_rate": 2.1432430548724526e-06, "loss": 0.2699, "step": 32052 }, { "epoch": 2.3822370865849125, "grad_norm": 2.204437302386383, "learning_rate": 2.1427467042009355e-06, "loss": 0.247, "step": 32053 }, { "epoch": 2.382311408398365, "grad_norm": 2.7112981945438666, "learning_rate": 2.14225040411401e-06, "loss": 0.2742, "step": 32054 }, { "epoch": 2.382385730211817, "grad_norm": 2.128184404396002, "learning_rate": 2.141754154614869e-06, "loss": 0.2689, "step": 32055 }, { "epoch": 2.3824600520252694, "grad_norm": 3.4277999442822757, "learning_rate": 2.1412579557067035e-06, "loss": 0.2874, "step": 32056 }, { "epoch": 2.3825343738387215, "grad_norm": 2.475437701035944, "learning_rate": 2.1407618073927183e-06, "loss": 0.2669, "step": 32057 }, { "epoch": 2.382608695652174, "grad_norm": 2.3023232405571923, "learning_rate": 2.140265709676095e-06, "loss": 0.2176, "step": 32058 }, { "epoch": 2.3826830174656264, "grad_norm": 2.494099040209852, "learning_rate": 2.139769662560035e-06, "loss": 0.2699, "step": 32059 }, { "epoch": 2.3827573392790784, "grad_norm": 2.370502080296136, "learning_rate": 2.1392736660477277e-06, "loss": 0.2843, "step": 32060 }, { "epoch": 2.3828316610925304, "grad_norm": 2.9905290382377765, "learning_rate": 2.138777720142372e-06, "loss": 0.368, "step": 32061 }, { "epoch": 2.382905982905983, "grad_norm": 2.0550737772497727, "learning_rate": 2.138281824847157e-06, "loss": 0.2731, "step": 32062 }, { "epoch": 2.3829803047194353, "grad_norm": 2.1317457792108123, "learning_rate": 2.1377859801652736e-06, "loss": 0.3147, "step": 32063 }, { "epoch": 2.3830546265328874, "grad_norm": 1.955838710616327, "learning_rate": 2.1372901860999173e-06, "loss": 0.2477, "step": 32064 }, { "epoch": 2.38312894834634, "grad_norm": 2.6305843080296274, "learning_rate": 2.136794442654281e-06, "loss": 0.3043, "step": 32065 }, { "epoch": 2.383203270159792, "grad_norm": 2.257769282072746, "learning_rate": 2.1362987498315502e-06, "loss": 0.2465, "step": 32066 }, { "epoch": 2.3832775919732443, "grad_norm": 2.8490239995686797, "learning_rate": 2.135803107634922e-06, "loss": 0.3705, "step": 32067 }, { "epoch": 2.3833519137866963, "grad_norm": 2.951230849915813, "learning_rate": 2.1353075160675863e-06, "loss": 0.366, "step": 32068 }, { "epoch": 2.3834262356001488, "grad_norm": 2.659838248631798, "learning_rate": 2.1348119751327324e-06, "loss": 0.2446, "step": 32069 }, { "epoch": 2.383500557413601, "grad_norm": 2.445354208515328, "learning_rate": 2.1343164848335483e-06, "loss": 0.2376, "step": 32070 }, { "epoch": 2.3835748792270532, "grad_norm": 2.4145738559488255, "learning_rate": 2.13382104517323e-06, "loss": 0.3035, "step": 32071 }, { "epoch": 2.3836492010405053, "grad_norm": 2.6232017177689073, "learning_rate": 2.133325656154962e-06, "loss": 0.3547, "step": 32072 }, { "epoch": 2.3837235228539577, "grad_norm": 2.510572655060312, "learning_rate": 2.132830317781934e-06, "loss": 0.2915, "step": 32073 }, { "epoch": 2.3837978446674097, "grad_norm": 1.852472915958392, "learning_rate": 2.1323350300573387e-06, "loss": 0.2811, "step": 32074 }, { "epoch": 2.383872166480862, "grad_norm": 2.6228438480113323, "learning_rate": 2.1318397929843594e-06, "loss": 0.286, "step": 32075 }, { "epoch": 2.383946488294314, "grad_norm": 2.2977653296368774, "learning_rate": 2.131344606566189e-06, "loss": 0.2473, "step": 32076 }, { "epoch": 2.3840208101077667, "grad_norm": 2.621886691979714, "learning_rate": 2.1308494708060146e-06, "loss": 0.2717, "step": 32077 }, { "epoch": 2.3840951319212187, "grad_norm": 2.6310641596160806, "learning_rate": 2.130354385707021e-06, "loss": 0.2856, "step": 32078 }, { "epoch": 2.384169453734671, "grad_norm": 2.724710913366683, "learning_rate": 2.129859351272402e-06, "loss": 0.3816, "step": 32079 }, { "epoch": 2.384243775548123, "grad_norm": 2.5152943010202367, "learning_rate": 2.1293643675053355e-06, "loss": 0.3067, "step": 32080 }, { "epoch": 2.3843180973615756, "grad_norm": 2.6042831016588637, "learning_rate": 2.1288694344090144e-06, "loss": 0.3047, "step": 32081 }, { "epoch": 2.384392419175028, "grad_norm": 2.40447703378856, "learning_rate": 2.128374551986624e-06, "loss": 0.3152, "step": 32082 }, { "epoch": 2.38446674098848, "grad_norm": 1.638790652859252, "learning_rate": 2.127879720241347e-06, "loss": 0.2067, "step": 32083 }, { "epoch": 2.384541062801932, "grad_norm": 2.4124068081668772, "learning_rate": 2.127384939176376e-06, "loss": 0.3108, "step": 32084 }, { "epoch": 2.3846153846153846, "grad_norm": 2.2475737244050564, "learning_rate": 2.1268902087948875e-06, "loss": 0.3205, "step": 32085 }, { "epoch": 2.384689706428837, "grad_norm": 2.372690517729964, "learning_rate": 2.1263955291000747e-06, "loss": 0.3357, "step": 32086 }, { "epoch": 2.384764028242289, "grad_norm": 2.6217626606966817, "learning_rate": 2.125900900095119e-06, "loss": 0.2777, "step": 32087 }, { "epoch": 2.3848383500557415, "grad_norm": 2.276132635300764, "learning_rate": 2.125406321783202e-06, "loss": 0.2883, "step": 32088 }, { "epoch": 2.3849126718691935, "grad_norm": 3.4987333638254996, "learning_rate": 2.1249117941675135e-06, "loss": 0.393, "step": 32089 }, { "epoch": 2.384986993682646, "grad_norm": 2.3440440721535842, "learning_rate": 2.1244173172512316e-06, "loss": 0.2278, "step": 32090 }, { "epoch": 2.385061315496098, "grad_norm": 2.252803492567691, "learning_rate": 2.1239228910375475e-06, "loss": 0.2928, "step": 32091 }, { "epoch": 2.3851356373095505, "grad_norm": 2.1571091621006144, "learning_rate": 2.123428515529634e-06, "loss": 0.298, "step": 32092 }, { "epoch": 2.3852099591230025, "grad_norm": 2.0326669372173267, "learning_rate": 2.122934190730682e-06, "loss": 0.153, "step": 32093 }, { "epoch": 2.385284280936455, "grad_norm": 2.121246418865969, "learning_rate": 2.12243991664387e-06, "loss": 0.2985, "step": 32094 }, { "epoch": 2.385358602749907, "grad_norm": 1.8828865738182643, "learning_rate": 2.121945693272379e-06, "loss": 0.2058, "step": 32095 }, { "epoch": 2.3854329245633594, "grad_norm": 2.216215253301232, "learning_rate": 2.1214515206193965e-06, "loss": 0.2228, "step": 32096 }, { "epoch": 2.3855072463768114, "grad_norm": 2.086521558413241, "learning_rate": 2.120957398688096e-06, "loss": 0.2878, "step": 32097 }, { "epoch": 2.385581568190264, "grad_norm": 2.26165664235841, "learning_rate": 2.1204633274816665e-06, "loss": 0.2698, "step": 32098 }, { "epoch": 2.385655890003716, "grad_norm": 2.8359348616832833, "learning_rate": 2.1199693070032846e-06, "loss": 0.3527, "step": 32099 }, { "epoch": 2.3857302118171684, "grad_norm": 2.2244569673422014, "learning_rate": 2.119475337256129e-06, "loss": 0.2538, "step": 32100 }, { "epoch": 2.3858045336306204, "grad_norm": 2.367298772299861, "learning_rate": 2.1189814182433853e-06, "loss": 0.3074, "step": 32101 }, { "epoch": 2.385878855444073, "grad_norm": 2.0455858433509513, "learning_rate": 2.1184875499682313e-06, "loss": 0.2647, "step": 32102 }, { "epoch": 2.385953177257525, "grad_norm": 1.8363421868788694, "learning_rate": 2.117993732433844e-06, "loss": 0.2411, "step": 32103 }, { "epoch": 2.3860274990709773, "grad_norm": 2.007397515346255, "learning_rate": 2.1174999656434048e-06, "loss": 0.2442, "step": 32104 }, { "epoch": 2.38610182088443, "grad_norm": 1.8101174306869177, "learning_rate": 2.117006249600089e-06, "loss": 0.1835, "step": 32105 }, { "epoch": 2.386176142697882, "grad_norm": 2.704113563096152, "learning_rate": 2.11651258430708e-06, "loss": 0.2631, "step": 32106 }, { "epoch": 2.3862504645113343, "grad_norm": 2.271799981533294, "learning_rate": 2.1160189697675514e-06, "loss": 0.2752, "step": 32107 }, { "epoch": 2.3863247863247863, "grad_norm": 2.4828145355987146, "learning_rate": 2.1155254059846864e-06, "loss": 0.2618, "step": 32108 }, { "epoch": 2.3863991081382387, "grad_norm": 1.9137421153817997, "learning_rate": 2.11503189296166e-06, "loss": 0.2127, "step": 32109 }, { "epoch": 2.3864734299516908, "grad_norm": 2.6570426530395665, "learning_rate": 2.114538430701647e-06, "loss": 0.2991, "step": 32110 }, { "epoch": 2.386547751765143, "grad_norm": 2.5878830202119865, "learning_rate": 2.1140450192078275e-06, "loss": 0.3449, "step": 32111 }, { "epoch": 2.3866220735785952, "grad_norm": 3.2418432913479864, "learning_rate": 2.113551658483376e-06, "loss": 0.3265, "step": 32112 }, { "epoch": 2.3866963953920477, "grad_norm": 1.9655584056321305, "learning_rate": 2.113058348531475e-06, "loss": 0.2433, "step": 32113 }, { "epoch": 2.3867707172054997, "grad_norm": 2.3951120284901353, "learning_rate": 2.1125650893552886e-06, "loss": 0.2237, "step": 32114 }, { "epoch": 2.386845039018952, "grad_norm": 2.2918374234748464, "learning_rate": 2.112071880958002e-06, "loss": 0.216, "step": 32115 }, { "epoch": 2.386919360832404, "grad_norm": 2.696624680983655, "learning_rate": 2.1115787233427877e-06, "loss": 0.3208, "step": 32116 }, { "epoch": 2.3869936826458567, "grad_norm": 2.6244896493797865, "learning_rate": 2.111085616512817e-06, "loss": 0.3104, "step": 32117 }, { "epoch": 2.3870680044593087, "grad_norm": 2.2744585093196727, "learning_rate": 2.1105925604712708e-06, "loss": 0.3207, "step": 32118 }, { "epoch": 2.387142326272761, "grad_norm": 2.763957364802056, "learning_rate": 2.1100995552213175e-06, "loss": 0.3153, "step": 32119 }, { "epoch": 2.387216648086213, "grad_norm": 1.9660634822891545, "learning_rate": 2.1096066007661374e-06, "loss": 0.3059, "step": 32120 }, { "epoch": 2.3872909698996656, "grad_norm": 2.4466034903487075, "learning_rate": 2.1091136971089e-06, "loss": 0.3414, "step": 32121 }, { "epoch": 2.3873652917131176, "grad_norm": 2.664973645727933, "learning_rate": 2.1086208442527765e-06, "loss": 0.2718, "step": 32122 }, { "epoch": 2.38743961352657, "grad_norm": 2.352899800365557, "learning_rate": 2.1081280422009455e-06, "loss": 0.2463, "step": 32123 }, { "epoch": 2.3875139353400225, "grad_norm": 2.402208382860907, "learning_rate": 2.1076352909565758e-06, "loss": 0.3015, "step": 32124 }, { "epoch": 2.3875882571534746, "grad_norm": 2.4127093391893712, "learning_rate": 2.1071425905228415e-06, "loss": 0.3022, "step": 32125 }, { "epoch": 2.3876625789669266, "grad_norm": 1.8991990058166217, "learning_rate": 2.1066499409029127e-06, "loss": 0.1646, "step": 32126 }, { "epoch": 2.387736900780379, "grad_norm": 2.1845319276063258, "learning_rate": 2.1061573420999604e-06, "loss": 0.3245, "step": 32127 }, { "epoch": 2.3878112225938315, "grad_norm": 2.9992096402704678, "learning_rate": 2.1056647941171603e-06, "loss": 0.3225, "step": 32128 }, { "epoch": 2.3878855444072835, "grad_norm": 2.0038824605091956, "learning_rate": 2.105172296957677e-06, "loss": 0.2852, "step": 32129 }, { "epoch": 2.387959866220736, "grad_norm": 2.238907706236388, "learning_rate": 2.104679850624688e-06, "loss": 0.284, "step": 32130 }, { "epoch": 2.388034188034188, "grad_norm": 2.536445694421104, "learning_rate": 2.1041874551213604e-06, "loss": 0.3416, "step": 32131 }, { "epoch": 2.3881085098476404, "grad_norm": 3.4108957309860712, "learning_rate": 2.1036951104508618e-06, "loss": 0.3761, "step": 32132 }, { "epoch": 2.3881828316610925, "grad_norm": 2.2655977190965735, "learning_rate": 2.1032028166163678e-06, "loss": 0.2604, "step": 32133 }, { "epoch": 2.388257153474545, "grad_norm": 2.9915450624418947, "learning_rate": 2.1027105736210394e-06, "loss": 0.3534, "step": 32134 }, { "epoch": 2.388331475287997, "grad_norm": 1.9730957651106324, "learning_rate": 2.102218381468055e-06, "loss": 0.3123, "step": 32135 }, { "epoch": 2.3884057971014494, "grad_norm": 2.100167339641, "learning_rate": 2.101726240160579e-06, "loss": 0.2649, "step": 32136 }, { "epoch": 2.3884801189149014, "grad_norm": 2.1777796277114283, "learning_rate": 2.1012341497017786e-06, "loss": 0.2755, "step": 32137 }, { "epoch": 2.388554440728354, "grad_norm": 2.3397797890814815, "learning_rate": 2.1007421100948234e-06, "loss": 0.2586, "step": 32138 }, { "epoch": 2.388628762541806, "grad_norm": 2.6006965665219486, "learning_rate": 2.1002501213428783e-06, "loss": 0.4188, "step": 32139 }, { "epoch": 2.3887030843552584, "grad_norm": 2.4943086488241306, "learning_rate": 2.0997581834491153e-06, "loss": 0.3076, "step": 32140 }, { "epoch": 2.3887774061687104, "grad_norm": 1.9132713249785613, "learning_rate": 2.0992662964166967e-06, "loss": 0.2495, "step": 32141 }, { "epoch": 2.388851727982163, "grad_norm": 1.7380071963326322, "learning_rate": 2.0987744602487946e-06, "loss": 0.1478, "step": 32142 }, { "epoch": 2.388926049795615, "grad_norm": 2.0963323723790266, "learning_rate": 2.098282674948572e-06, "loss": 0.2314, "step": 32143 }, { "epoch": 2.3890003716090673, "grad_norm": 2.375859382781699, "learning_rate": 2.0977909405191933e-06, "loss": 0.2905, "step": 32144 }, { "epoch": 2.3890746934225193, "grad_norm": 1.9083931518379973, "learning_rate": 2.0972992569638283e-06, "loss": 0.2074, "step": 32145 }, { "epoch": 2.389149015235972, "grad_norm": 2.4370999573089054, "learning_rate": 2.0968076242856416e-06, "loss": 0.3603, "step": 32146 }, { "epoch": 2.3892233370494242, "grad_norm": 2.2050489605958745, "learning_rate": 2.096316042487797e-06, "loss": 0.2525, "step": 32147 }, { "epoch": 2.3892976588628763, "grad_norm": 5.577292752397164, "learning_rate": 2.09582451157346e-06, "loss": 0.3379, "step": 32148 }, { "epoch": 2.3893719806763283, "grad_norm": 2.79622006843355, "learning_rate": 2.095333031545792e-06, "loss": 0.3211, "step": 32149 }, { "epoch": 2.3894463024897807, "grad_norm": 2.3759359842378505, "learning_rate": 2.094841602407962e-06, "loss": 0.3308, "step": 32150 }, { "epoch": 2.389520624303233, "grad_norm": 2.940836316367991, "learning_rate": 2.0943502241631297e-06, "loss": 0.2782, "step": 32151 }, { "epoch": 2.389594946116685, "grad_norm": 2.6673661673612163, "learning_rate": 2.0938588968144624e-06, "loss": 0.28, "step": 32152 }, { "epoch": 2.3896692679301377, "grad_norm": 2.2862373343397486, "learning_rate": 2.093367620365121e-06, "loss": 0.322, "step": 32153 }, { "epoch": 2.3897435897435897, "grad_norm": 2.3861748468326005, "learning_rate": 2.092876394818266e-06, "loss": 0.2932, "step": 32154 }, { "epoch": 2.389817911557042, "grad_norm": 2.4342597870202622, "learning_rate": 2.0923852201770666e-06, "loss": 0.2693, "step": 32155 }, { "epoch": 2.389892233370494, "grad_norm": 2.6755898973389773, "learning_rate": 2.0918940964446766e-06, "loss": 0.2912, "step": 32156 }, { "epoch": 2.3899665551839466, "grad_norm": 2.2890762292607136, "learning_rate": 2.0914030236242655e-06, "loss": 0.3259, "step": 32157 }, { "epoch": 2.3900408769973986, "grad_norm": 2.5512980070100775, "learning_rate": 2.090912001718991e-06, "loss": 0.2766, "step": 32158 }, { "epoch": 2.390115198810851, "grad_norm": 2.6648065917636075, "learning_rate": 2.090421030732015e-06, "loss": 0.3647, "step": 32159 }, { "epoch": 2.390189520624303, "grad_norm": 2.658547201972697, "learning_rate": 2.0899301106664983e-06, "loss": 0.3628, "step": 32160 }, { "epoch": 2.3902638424377556, "grad_norm": 1.9748347509192807, "learning_rate": 2.089439241525598e-06, "loss": 0.2743, "step": 32161 }, { "epoch": 2.3903381642512076, "grad_norm": 1.8425953889166375, "learning_rate": 2.0889484233124802e-06, "loss": 0.1964, "step": 32162 }, { "epoch": 2.39041248606466, "grad_norm": 1.8874966925859153, "learning_rate": 2.0884576560302993e-06, "loss": 0.2302, "step": 32163 }, { "epoch": 2.390486807878112, "grad_norm": 2.3645497215499742, "learning_rate": 2.08796693968222e-06, "loss": 0.2704, "step": 32164 }, { "epoch": 2.3905611296915645, "grad_norm": 2.7527346154251986, "learning_rate": 2.0874762742713983e-06, "loss": 0.3275, "step": 32165 }, { "epoch": 2.3906354515050166, "grad_norm": 2.5178969912652067, "learning_rate": 2.086985659800992e-06, "loss": 0.2785, "step": 32166 }, { "epoch": 2.390709773318469, "grad_norm": 2.0982715465841433, "learning_rate": 2.086495096274165e-06, "loss": 0.3133, "step": 32167 }, { "epoch": 2.390784095131921, "grad_norm": 2.275081863654418, "learning_rate": 2.08600458369407e-06, "loss": 0.231, "step": 32168 }, { "epoch": 2.3908584169453735, "grad_norm": 1.914169343081007, "learning_rate": 2.0855141220638654e-06, "loss": 0.2435, "step": 32169 }, { "epoch": 2.390932738758826, "grad_norm": 2.3335644561385664, "learning_rate": 2.0850237113867155e-06, "loss": 0.2315, "step": 32170 }, { "epoch": 2.391007060572278, "grad_norm": 2.6444434672648853, "learning_rate": 2.084533351665767e-06, "loss": 0.2671, "step": 32171 }, { "epoch": 2.39108138238573, "grad_norm": 2.4975273407865686, "learning_rate": 2.0840430429041847e-06, "loss": 0.2667, "step": 32172 }, { "epoch": 2.3911557041991824, "grad_norm": 2.130297709426666, "learning_rate": 2.0835527851051197e-06, "loss": 0.2427, "step": 32173 }, { "epoch": 2.391230026012635, "grad_norm": 2.6247163963639317, "learning_rate": 2.083062578271734e-06, "loss": 0.2627, "step": 32174 }, { "epoch": 2.391304347826087, "grad_norm": 2.6310673413164682, "learning_rate": 2.0825724224071796e-06, "loss": 0.2589, "step": 32175 }, { "epoch": 2.3913786696395394, "grad_norm": 2.5527537594726426, "learning_rate": 2.082082317514612e-06, "loss": 0.3196, "step": 32176 }, { "epoch": 2.3914529914529914, "grad_norm": 2.1968696486153343, "learning_rate": 2.0815922635971897e-06, "loss": 0.2974, "step": 32177 }, { "epoch": 2.391527313266444, "grad_norm": 2.3788392657351007, "learning_rate": 2.081102260658062e-06, "loss": 0.308, "step": 32178 }, { "epoch": 2.391601635079896, "grad_norm": 2.3571631987823505, "learning_rate": 2.0806123087003906e-06, "loss": 0.3089, "step": 32179 }, { "epoch": 2.3916759568933483, "grad_norm": 4.016488030386634, "learning_rate": 2.0801224077273263e-06, "loss": 0.3225, "step": 32180 }, { "epoch": 2.3917502787068003, "grad_norm": 2.422881106259014, "learning_rate": 2.0796325577420227e-06, "loss": 0.2567, "step": 32181 }, { "epoch": 2.391824600520253, "grad_norm": 2.026969207983954, "learning_rate": 2.079142758747634e-06, "loss": 0.2474, "step": 32182 }, { "epoch": 2.391898922333705, "grad_norm": 2.376432875231906, "learning_rate": 2.07865301074731e-06, "loss": 0.2162, "step": 32183 }, { "epoch": 2.3919732441471573, "grad_norm": 2.0744666866039276, "learning_rate": 2.0781633137442104e-06, "loss": 0.2575, "step": 32184 }, { "epoch": 2.3920475659606093, "grad_norm": 2.3017423473380925, "learning_rate": 2.077673667741481e-06, "loss": 0.277, "step": 32185 }, { "epoch": 2.3921218877740618, "grad_norm": 2.352584894180394, "learning_rate": 2.077184072742281e-06, "loss": 0.2076, "step": 32186 }, { "epoch": 2.3921962095875138, "grad_norm": 2.53648467318042, "learning_rate": 2.0766945287497584e-06, "loss": 0.2586, "step": 32187 }, { "epoch": 2.3922705314009662, "grad_norm": 1.7782473718340268, "learning_rate": 2.0762050357670627e-06, "loss": 0.1908, "step": 32188 }, { "epoch": 2.3923448532144183, "grad_norm": 2.308434308216217, "learning_rate": 2.0757155937973507e-06, "loss": 0.2615, "step": 32189 }, { "epoch": 2.3924191750278707, "grad_norm": 2.709884722564617, "learning_rate": 2.0752262028437698e-06, "loss": 0.326, "step": 32190 }, { "epoch": 2.3924934968413227, "grad_norm": 2.8535879688442662, "learning_rate": 2.074736862909469e-06, "loss": 0.2471, "step": 32191 }, { "epoch": 2.392567818654775, "grad_norm": 2.3950625202427487, "learning_rate": 2.0742475739976063e-06, "loss": 0.2769, "step": 32192 }, { "epoch": 2.3926421404682277, "grad_norm": 1.6643832839843533, "learning_rate": 2.0737583361113213e-06, "loss": 0.1919, "step": 32193 }, { "epoch": 2.3927164622816797, "grad_norm": 2.858898980236219, "learning_rate": 2.0732691492537714e-06, "loss": 0.2417, "step": 32194 }, { "epoch": 2.3927907840951317, "grad_norm": 2.4794373246497385, "learning_rate": 2.0727800134281016e-06, "loss": 0.3146, "step": 32195 }, { "epoch": 2.392865105908584, "grad_norm": 1.8149854138264732, "learning_rate": 2.072290928637465e-06, "loss": 0.1914, "step": 32196 }, { "epoch": 2.3929394277220366, "grad_norm": 2.413340432744933, "learning_rate": 2.0718018948850072e-06, "loss": 0.2496, "step": 32197 }, { "epoch": 2.3930137495354886, "grad_norm": 2.608868135239798, "learning_rate": 2.071312912173876e-06, "loss": 0.3287, "step": 32198 }, { "epoch": 2.393088071348941, "grad_norm": 2.488683304114372, "learning_rate": 2.0708239805072227e-06, "loss": 0.273, "step": 32199 }, { "epoch": 2.393162393162393, "grad_norm": 1.9920548008785293, "learning_rate": 2.070335099888191e-06, "loss": 0.2579, "step": 32200 }, { "epoch": 2.3932367149758456, "grad_norm": 1.935116410472549, "learning_rate": 2.069846270319933e-06, "loss": 0.2147, "step": 32201 }, { "epoch": 2.3933110367892976, "grad_norm": 2.421706443679656, "learning_rate": 2.0693574918055924e-06, "loss": 0.3315, "step": 32202 }, { "epoch": 2.39338535860275, "grad_norm": 2.1817011413962755, "learning_rate": 2.068868764348315e-06, "loss": 0.2501, "step": 32203 }, { "epoch": 2.393459680416202, "grad_norm": 2.4768493340077495, "learning_rate": 2.068380087951255e-06, "loss": 0.2858, "step": 32204 }, { "epoch": 2.3935340022296545, "grad_norm": 2.251962464640321, "learning_rate": 2.067891462617546e-06, "loss": 0.2973, "step": 32205 }, { "epoch": 2.3936083240431065, "grad_norm": 2.064173033532634, "learning_rate": 2.0674028883503428e-06, "loss": 0.2567, "step": 32206 }, { "epoch": 2.393682645856559, "grad_norm": 1.9929793895409298, "learning_rate": 2.066914365152789e-06, "loss": 0.236, "step": 32207 }, { "epoch": 2.393756967670011, "grad_norm": 2.9235798913507303, "learning_rate": 2.066425893028026e-06, "loss": 0.3314, "step": 32208 }, { "epoch": 2.3938312894834635, "grad_norm": 1.6377097292467886, "learning_rate": 2.0659374719792036e-06, "loss": 0.1618, "step": 32209 }, { "epoch": 2.3939056112969155, "grad_norm": 1.976944236293898, "learning_rate": 2.0654491020094624e-06, "loss": 0.2506, "step": 32210 }, { "epoch": 2.393979933110368, "grad_norm": 2.1067155197553125, "learning_rate": 2.0649607831219497e-06, "loss": 0.2823, "step": 32211 }, { "epoch": 2.39405425492382, "grad_norm": 2.184013440865205, "learning_rate": 2.0644725153198085e-06, "loss": 0.3292, "step": 32212 }, { "epoch": 2.3941285767372724, "grad_norm": 1.925541381286533, "learning_rate": 2.0639842986061788e-06, "loss": 0.2258, "step": 32213 }, { "epoch": 2.3942028985507244, "grad_norm": 2.249879062863234, "learning_rate": 2.0634961329842084e-06, "loss": 0.2827, "step": 32214 }, { "epoch": 2.394277220364177, "grad_norm": 2.193788481430356, "learning_rate": 2.0630080184570387e-06, "loss": 0.2926, "step": 32215 }, { "epoch": 2.3943515421776294, "grad_norm": 2.584451645630322, "learning_rate": 2.0625199550278117e-06, "loss": 0.2985, "step": 32216 }, { "epoch": 2.3944258639910814, "grad_norm": 2.1781281984383454, "learning_rate": 2.062031942699666e-06, "loss": 0.3015, "step": 32217 }, { "epoch": 2.3945001858045334, "grad_norm": 2.3272208367672564, "learning_rate": 2.06154398147575e-06, "loss": 0.3361, "step": 32218 }, { "epoch": 2.394574507617986, "grad_norm": 2.2822086985430943, "learning_rate": 2.061056071359202e-06, "loss": 0.3178, "step": 32219 }, { "epoch": 2.3946488294314383, "grad_norm": 3.2530175769333343, "learning_rate": 2.06056821235316e-06, "loss": 0.3093, "step": 32220 }, { "epoch": 2.3947231512448903, "grad_norm": 2.1354189016744507, "learning_rate": 2.0600804044607716e-06, "loss": 0.2551, "step": 32221 }, { "epoch": 2.394797473058343, "grad_norm": 2.473065454382725, "learning_rate": 2.0595926476851703e-06, "loss": 0.2786, "step": 32222 }, { "epoch": 2.394871794871795, "grad_norm": 2.07722185936147, "learning_rate": 2.0591049420295016e-06, "loss": 0.2333, "step": 32223 }, { "epoch": 2.3949461166852473, "grad_norm": 2.451059269981563, "learning_rate": 2.0586172874969036e-06, "loss": 0.2698, "step": 32224 }, { "epoch": 2.3950204384986993, "grad_norm": 3.25347867684438, "learning_rate": 2.058129684090513e-06, "loss": 0.2822, "step": 32225 }, { "epoch": 2.3950947603121517, "grad_norm": 2.9350529789530238, "learning_rate": 2.0576421318134765e-06, "loss": 0.3558, "step": 32226 }, { "epoch": 2.3951690821256038, "grad_norm": 2.225843431123598, "learning_rate": 2.0571546306689226e-06, "loss": 0.3035, "step": 32227 }, { "epoch": 2.395243403939056, "grad_norm": 2.144566550786833, "learning_rate": 2.056667180659998e-06, "loss": 0.2435, "step": 32228 }, { "epoch": 2.3953177257525082, "grad_norm": 2.223337767688304, "learning_rate": 2.0561797817898376e-06, "loss": 0.3006, "step": 32229 }, { "epoch": 2.3953920475659607, "grad_norm": 2.132037694564023, "learning_rate": 2.055692434061577e-06, "loss": 0.2722, "step": 32230 }, { "epoch": 2.3954663693794127, "grad_norm": 1.8411072392716468, "learning_rate": 2.05520513747836e-06, "loss": 0.183, "step": 32231 }, { "epoch": 2.395540691192865, "grad_norm": 2.506913456917684, "learning_rate": 2.0547178920433165e-06, "loss": 0.3192, "step": 32232 }, { "epoch": 2.395615013006317, "grad_norm": 1.759022973245661, "learning_rate": 2.0542306977595895e-06, "loss": 0.2077, "step": 32233 }, { "epoch": 2.3956893348197696, "grad_norm": 2.754770807881997, "learning_rate": 2.053743554630313e-06, "loss": 0.2607, "step": 32234 }, { "epoch": 2.3957636566332217, "grad_norm": 2.1510883774926293, "learning_rate": 2.0532564626586203e-06, "loss": 0.2758, "step": 32235 }, { "epoch": 2.395837978446674, "grad_norm": 2.712408559001155, "learning_rate": 2.0527694218476534e-06, "loss": 0.2974, "step": 32236 }, { "epoch": 2.395912300260126, "grad_norm": 2.2434935530543596, "learning_rate": 2.052282432200542e-06, "loss": 0.2703, "step": 32237 }, { "epoch": 2.3959866220735786, "grad_norm": 2.530245373059084, "learning_rate": 2.0517954937204287e-06, "loss": 0.3031, "step": 32238 }, { "epoch": 2.396060943887031, "grad_norm": 2.6117571065465666, "learning_rate": 2.051308606410438e-06, "loss": 0.2386, "step": 32239 }, { "epoch": 2.396135265700483, "grad_norm": 4.241575176680304, "learning_rate": 2.050821770273713e-06, "loss": 0.2716, "step": 32240 }, { "epoch": 2.396209587513935, "grad_norm": 2.240638342886104, "learning_rate": 2.050334985313386e-06, "loss": 0.2549, "step": 32241 }, { "epoch": 2.3962839093273876, "grad_norm": 2.736425945016524, "learning_rate": 2.0498482515325856e-06, "loss": 0.3795, "step": 32242 }, { "epoch": 2.39635823114084, "grad_norm": 2.69101423343839, "learning_rate": 2.0493615689344537e-06, "loss": 0.2777, "step": 32243 }, { "epoch": 2.396432552954292, "grad_norm": 2.191177339342479, "learning_rate": 2.048874937522116e-06, "loss": 0.2402, "step": 32244 }, { "epoch": 2.3965068747677445, "grad_norm": 2.878884745254167, "learning_rate": 2.048388357298712e-06, "loss": 0.3832, "step": 32245 }, { "epoch": 2.3965811965811965, "grad_norm": 2.1061736019026607, "learning_rate": 2.0479018282673712e-06, "loss": 0.2548, "step": 32246 }, { "epoch": 2.396655518394649, "grad_norm": 2.243733051088016, "learning_rate": 2.0474153504312233e-06, "loss": 0.2934, "step": 32247 }, { "epoch": 2.396729840208101, "grad_norm": 2.3494101169965185, "learning_rate": 2.0469289237934054e-06, "loss": 0.326, "step": 32248 }, { "epoch": 2.3968041620215534, "grad_norm": 2.4060372073519396, "learning_rate": 2.046442548357046e-06, "loss": 0.3279, "step": 32249 }, { "epoch": 2.3968784838350055, "grad_norm": 2.1018448304300614, "learning_rate": 2.045956224125276e-06, "loss": 0.2765, "step": 32250 }, { "epoch": 2.396952805648458, "grad_norm": 2.3726969000009874, "learning_rate": 2.045469951101229e-06, "loss": 0.3406, "step": 32251 }, { "epoch": 2.39702712746191, "grad_norm": 2.5332196825259157, "learning_rate": 2.0449837292880294e-06, "loss": 0.2769, "step": 32252 }, { "epoch": 2.3971014492753624, "grad_norm": 1.8252613539939215, "learning_rate": 2.044497558688816e-06, "loss": 0.2234, "step": 32253 }, { "epoch": 2.3971757710888144, "grad_norm": 2.132712219793122, "learning_rate": 2.04401143930671e-06, "loss": 0.2546, "step": 32254 }, { "epoch": 2.397250092902267, "grad_norm": 2.1290819561978336, "learning_rate": 2.04352537114485e-06, "loss": 0.275, "step": 32255 }, { "epoch": 2.397324414715719, "grad_norm": 2.7756086793755577, "learning_rate": 2.0430393542063597e-06, "loss": 0.3337, "step": 32256 }, { "epoch": 2.3973987365291713, "grad_norm": 2.692231605084658, "learning_rate": 2.042553388494367e-06, "loss": 0.3292, "step": 32257 }, { "epoch": 2.397473058342624, "grad_norm": 2.4102464485210113, "learning_rate": 2.042067474012005e-06, "loss": 0.2827, "step": 32258 }, { "epoch": 2.397547380156076, "grad_norm": 2.294309299921592, "learning_rate": 2.0415816107623975e-06, "loss": 0.3139, "step": 32259 }, { "epoch": 2.397621701969528, "grad_norm": 2.364769351095236, "learning_rate": 2.04109579874868e-06, "loss": 0.3333, "step": 32260 }, { "epoch": 2.3976960237829803, "grad_norm": 2.403715796136944, "learning_rate": 2.040610037973969e-06, "loss": 0.2764, "step": 32261 }, { "epoch": 2.3977703455964328, "grad_norm": 3.1751957966281883, "learning_rate": 2.0401243284414006e-06, "loss": 0.261, "step": 32262 }, { "epoch": 2.3978446674098848, "grad_norm": 2.3816043006924277, "learning_rate": 2.0396386701540984e-06, "loss": 0.2807, "step": 32263 }, { "epoch": 2.3979189892233372, "grad_norm": 2.3753290959776714, "learning_rate": 2.0391530631151867e-06, "loss": 0.3063, "step": 32264 }, { "epoch": 2.3979933110367893, "grad_norm": 2.9845522326455662, "learning_rate": 2.0386675073277984e-06, "loss": 0.2789, "step": 32265 }, { "epoch": 2.3980676328502417, "grad_norm": 2.486796146485065, "learning_rate": 2.0381820027950518e-06, "loss": 0.3401, "step": 32266 }, { "epoch": 2.3981419546636937, "grad_norm": 2.3503024346638868, "learning_rate": 2.0376965495200783e-06, "loss": 0.2239, "step": 32267 }, { "epoch": 2.398216276477146, "grad_norm": 2.598405583356664, "learning_rate": 2.037211147506003e-06, "loss": 0.2811, "step": 32268 }, { "epoch": 2.398290598290598, "grad_norm": 2.48316607787985, "learning_rate": 2.036725796755945e-06, "loss": 0.3179, "step": 32269 }, { "epoch": 2.3983649201040507, "grad_norm": 2.4057888384194333, "learning_rate": 2.0362404972730376e-06, "loss": 0.2561, "step": 32270 }, { "epoch": 2.3984392419175027, "grad_norm": 2.3614154061248582, "learning_rate": 2.035755249060399e-06, "loss": 0.2771, "step": 32271 }, { "epoch": 2.398513563730955, "grad_norm": 2.08217826201503, "learning_rate": 2.0352700521211554e-06, "loss": 0.2561, "step": 32272 }, { "epoch": 2.398587885544407, "grad_norm": 2.506426295702745, "learning_rate": 2.03478490645843e-06, "loss": 0.3456, "step": 32273 }, { "epoch": 2.3986622073578596, "grad_norm": 2.2575192264440878, "learning_rate": 2.034299812075343e-06, "loss": 0.2944, "step": 32274 }, { "epoch": 2.3987365291713116, "grad_norm": 1.7353118718289975, "learning_rate": 2.0338147689750244e-06, "loss": 0.2796, "step": 32275 }, { "epoch": 2.398810850984764, "grad_norm": 1.8805781017327678, "learning_rate": 2.0333297771605885e-06, "loss": 0.2323, "step": 32276 }, { "epoch": 2.398885172798216, "grad_norm": 1.7697248888627983, "learning_rate": 2.032844836635166e-06, "loss": 0.2145, "step": 32277 }, { "epoch": 2.3989594946116686, "grad_norm": 2.470342908505457, "learning_rate": 2.032359947401875e-06, "loss": 0.3007, "step": 32278 }, { "epoch": 2.3990338164251206, "grad_norm": 2.858602233869228, "learning_rate": 2.0318751094638346e-06, "loss": 0.2491, "step": 32279 }, { "epoch": 2.399108138238573, "grad_norm": 2.5842645184156976, "learning_rate": 2.0313903228241715e-06, "loss": 0.3882, "step": 32280 }, { "epoch": 2.3991824600520255, "grad_norm": 1.9828358769543288, "learning_rate": 2.0309055874860008e-06, "loss": 0.2842, "step": 32281 }, { "epoch": 2.3992567818654775, "grad_norm": 2.193683587192136, "learning_rate": 2.03042090345245e-06, "loss": 0.3119, "step": 32282 }, { "epoch": 2.3993311036789295, "grad_norm": 2.3506173343266346, "learning_rate": 2.0299362707266356e-06, "loss": 0.3306, "step": 32283 }, { "epoch": 2.399405425492382, "grad_norm": 2.2736210232406706, "learning_rate": 2.029451689311678e-06, "loss": 0.2695, "step": 32284 }, { "epoch": 2.3994797473058345, "grad_norm": 2.810589183660174, "learning_rate": 2.0289671592106964e-06, "loss": 0.2979, "step": 32285 }, { "epoch": 2.3995540691192865, "grad_norm": 2.161849500031489, "learning_rate": 2.0284826804268076e-06, "loss": 0.3088, "step": 32286 }, { "epoch": 2.399628390932739, "grad_norm": 2.3090737610132566, "learning_rate": 2.027998252963137e-06, "loss": 0.2612, "step": 32287 }, { "epoch": 2.399702712746191, "grad_norm": 2.1747656914628464, "learning_rate": 2.0275138768227976e-06, "loss": 0.3099, "step": 32288 }, { "epoch": 2.3997770345596434, "grad_norm": 2.3822747276743614, "learning_rate": 2.0270295520089124e-06, "loss": 0.2753, "step": 32289 }, { "epoch": 2.3998513563730954, "grad_norm": 2.0093841789542375, "learning_rate": 2.0265452785245977e-06, "loss": 0.2426, "step": 32290 }, { "epoch": 2.399925678186548, "grad_norm": 2.5549734655507943, "learning_rate": 2.026061056372968e-06, "loss": 0.294, "step": 32291 }, { "epoch": 2.4, "grad_norm": 2.0555981268502372, "learning_rate": 2.0255768855571457e-06, "loss": 0.2398, "step": 32292 }, { "epoch": 2.4000743218134524, "grad_norm": 4.919469045542795, "learning_rate": 2.0250927660802455e-06, "loss": 0.3288, "step": 32293 }, { "epoch": 2.4001486436269044, "grad_norm": 2.360994381009672, "learning_rate": 2.024608697945385e-06, "loss": 0.321, "step": 32294 }, { "epoch": 2.400222965440357, "grad_norm": 2.7866626926082096, "learning_rate": 2.024124681155679e-06, "loss": 0.288, "step": 32295 }, { "epoch": 2.400297287253809, "grad_norm": 2.4699742102012285, "learning_rate": 2.0236407157142423e-06, "loss": 0.3527, "step": 32296 }, { "epoch": 2.4003716090672613, "grad_norm": 2.449860817517604, "learning_rate": 2.0231568016241954e-06, "loss": 0.271, "step": 32297 }, { "epoch": 2.4004459308807133, "grad_norm": 1.9895360993236948, "learning_rate": 2.022672938888649e-06, "loss": 0.2234, "step": 32298 }, { "epoch": 2.400520252694166, "grad_norm": 2.7795242053412506, "learning_rate": 2.022189127510722e-06, "loss": 0.3466, "step": 32299 }, { "epoch": 2.400594574507618, "grad_norm": 2.5198056501699004, "learning_rate": 2.021705367493528e-06, "loss": 0.2632, "step": 32300 }, { "epoch": 2.4006688963210703, "grad_norm": 3.162257996687269, "learning_rate": 2.021221658840179e-06, "loss": 0.2797, "step": 32301 }, { "epoch": 2.4007432181345223, "grad_norm": 2.656405042603623, "learning_rate": 2.0207380015537923e-06, "loss": 0.3367, "step": 32302 }, { "epoch": 2.4008175399479748, "grad_norm": 2.5165162396138725, "learning_rate": 2.020254395637479e-06, "loss": 0.3076, "step": 32303 }, { "epoch": 2.400891861761427, "grad_norm": 1.8148081083479641, "learning_rate": 2.019770841094355e-06, "loss": 0.1809, "step": 32304 }, { "epoch": 2.4009661835748792, "grad_norm": 2.5746912060565297, "learning_rate": 2.0192873379275337e-06, "loss": 0.3505, "step": 32305 }, { "epoch": 2.4010405053883312, "grad_norm": 2.82755384189226, "learning_rate": 2.018803886140125e-06, "loss": 0.3835, "step": 32306 }, { "epoch": 2.4011148272017837, "grad_norm": 2.13502500205362, "learning_rate": 2.0183204857352435e-06, "loss": 0.2418, "step": 32307 }, { "epoch": 2.401189149015236, "grad_norm": 2.3277906816275267, "learning_rate": 2.017837136715998e-06, "loss": 0.2201, "step": 32308 }, { "epoch": 2.401263470828688, "grad_norm": 2.1641236485885815, "learning_rate": 2.0173538390855053e-06, "loss": 0.2147, "step": 32309 }, { "epoch": 2.4013377926421406, "grad_norm": 2.178083429567644, "learning_rate": 2.016870592846872e-06, "loss": 0.2742, "step": 32310 }, { "epoch": 2.4014121144555927, "grad_norm": 2.405875357867628, "learning_rate": 2.0163873980032146e-06, "loss": 0.3082, "step": 32311 }, { "epoch": 2.401486436269045, "grad_norm": 2.466336298831246, "learning_rate": 2.0159042545576403e-06, "loss": 0.2384, "step": 32312 }, { "epoch": 2.401560758082497, "grad_norm": 2.4697766874145666, "learning_rate": 2.0154211625132568e-06, "loss": 0.2982, "step": 32313 }, { "epoch": 2.4016350798959496, "grad_norm": 2.7535995964428936, "learning_rate": 2.0149381218731814e-06, "loss": 0.2938, "step": 32314 }, { "epoch": 2.4017094017094016, "grad_norm": 2.5934856598778278, "learning_rate": 2.01445513264052e-06, "loss": 0.3084, "step": 32315 }, { "epoch": 2.401783723522854, "grad_norm": 2.2133682221730115, "learning_rate": 2.013972194818379e-06, "loss": 0.3204, "step": 32316 }, { "epoch": 2.401858045336306, "grad_norm": 2.369227094866187, "learning_rate": 2.013489308409876e-06, "loss": 0.3197, "step": 32317 }, { "epoch": 2.4019323671497586, "grad_norm": 2.4571645394254107, "learning_rate": 2.01300647341811e-06, "loss": 0.2724, "step": 32318 }, { "epoch": 2.4020066889632106, "grad_norm": 1.9082006751405196, "learning_rate": 2.0125236898461953e-06, "loss": 0.2274, "step": 32319 }, { "epoch": 2.402081010776663, "grad_norm": 3.072997080564245, "learning_rate": 2.0120409576972366e-06, "loss": 0.3246, "step": 32320 }, { "epoch": 2.402155332590115, "grad_norm": 2.141991298687429, "learning_rate": 2.011558276974346e-06, "loss": 0.2659, "step": 32321 }, { "epoch": 2.4022296544035675, "grad_norm": 3.392034646330388, "learning_rate": 2.011075647680629e-06, "loss": 0.2814, "step": 32322 }, { "epoch": 2.4023039762170195, "grad_norm": 2.8124253280764457, "learning_rate": 2.01059306981919e-06, "loss": 0.3081, "step": 32323 }, { "epoch": 2.402378298030472, "grad_norm": 2.05318083164647, "learning_rate": 2.01011054339314e-06, "loss": 0.2284, "step": 32324 }, { "epoch": 2.402452619843924, "grad_norm": 2.0580804793450516, "learning_rate": 2.0096280684055826e-06, "loss": 0.3073, "step": 32325 }, { "epoch": 2.4025269416573765, "grad_norm": 2.3027170054463326, "learning_rate": 2.009145644859627e-06, "loss": 0.2378, "step": 32326 }, { "epoch": 2.402601263470829, "grad_norm": 2.529750678200306, "learning_rate": 2.0086632727583767e-06, "loss": 0.2781, "step": 32327 }, { "epoch": 2.402675585284281, "grad_norm": 2.28931504468456, "learning_rate": 2.0081809521049377e-06, "loss": 0.2855, "step": 32328 }, { "epoch": 2.402749907097733, "grad_norm": 1.789457934160761, "learning_rate": 2.0076986829024157e-06, "loss": 0.175, "step": 32329 }, { "epoch": 2.4028242289111854, "grad_norm": 3.516278550905026, "learning_rate": 2.0072164651539117e-06, "loss": 0.3342, "step": 32330 }, { "epoch": 2.402898550724638, "grad_norm": 2.234560908933569, "learning_rate": 2.006734298862536e-06, "loss": 0.2532, "step": 32331 }, { "epoch": 2.40297287253809, "grad_norm": 2.306829011402032, "learning_rate": 2.006252184031391e-06, "loss": 0.2772, "step": 32332 }, { "epoch": 2.4030471943515423, "grad_norm": 1.6670648195352276, "learning_rate": 2.005770120663576e-06, "loss": 0.2122, "step": 32333 }, { "epoch": 2.4031215161649944, "grad_norm": 1.8731754854194647, "learning_rate": 2.005288108762201e-06, "loss": 0.2125, "step": 32334 }, { "epoch": 2.403195837978447, "grad_norm": 1.6998634197321596, "learning_rate": 2.004806148330365e-06, "loss": 0.2289, "step": 32335 }, { "epoch": 2.403270159791899, "grad_norm": 2.494025736594641, "learning_rate": 2.0043242393711728e-06, "loss": 0.241, "step": 32336 }, { "epoch": 2.4033444816053513, "grad_norm": 2.002557766089098, "learning_rate": 2.0038423818877275e-06, "loss": 0.2732, "step": 32337 }, { "epoch": 2.4034188034188033, "grad_norm": 2.603389557133927, "learning_rate": 2.0033605758831275e-06, "loss": 0.2752, "step": 32338 }, { "epoch": 2.403493125232256, "grad_norm": 2.621741624297911, "learning_rate": 2.002878821360482e-06, "loss": 0.3053, "step": 32339 }, { "epoch": 2.403567447045708, "grad_norm": 2.5867600302479126, "learning_rate": 2.0023971183228818e-06, "loss": 0.3152, "step": 32340 }, { "epoch": 2.4036417688591603, "grad_norm": 3.0120032577093747, "learning_rate": 2.0019154667734375e-06, "loss": 0.3005, "step": 32341 }, { "epoch": 2.4037160906726123, "grad_norm": 2.1097719706772375, "learning_rate": 2.0014338667152446e-06, "loss": 0.2584, "step": 32342 }, { "epoch": 2.4037904124860647, "grad_norm": 2.7857383180758104, "learning_rate": 2.0009523181514067e-06, "loss": 0.3568, "step": 32343 }, { "epoch": 2.4038647342995167, "grad_norm": 2.4085729518414327, "learning_rate": 2.000470821085023e-06, "loss": 0.2493, "step": 32344 }, { "epoch": 2.403939056112969, "grad_norm": 2.3995477672362457, "learning_rate": 1.99998937551919e-06, "loss": 0.2925, "step": 32345 }, { "epoch": 2.4040133779264212, "grad_norm": 2.305989833346134, "learning_rate": 1.999507981457014e-06, "loss": 0.2619, "step": 32346 }, { "epoch": 2.4040876997398737, "grad_norm": 1.9621462843485409, "learning_rate": 1.999026638901588e-06, "loss": 0.2132, "step": 32347 }, { "epoch": 2.4041620215533257, "grad_norm": 2.7580144873925083, "learning_rate": 1.9985453478560147e-06, "loss": 0.2723, "step": 32348 }, { "epoch": 2.404236343366778, "grad_norm": 2.661992118501903, "learning_rate": 1.998064108323392e-06, "loss": 0.3595, "step": 32349 }, { "epoch": 2.4043106651802306, "grad_norm": 2.3225794738063197, "learning_rate": 1.9975829203068153e-06, "loss": 0.222, "step": 32350 }, { "epoch": 2.4043849869936826, "grad_norm": 2.219340306480708, "learning_rate": 1.997101783809389e-06, "loss": 0.2058, "step": 32351 }, { "epoch": 2.4044593088071347, "grad_norm": 2.4371252384610735, "learning_rate": 1.996620698834201e-06, "loss": 0.2961, "step": 32352 }, { "epoch": 2.404533630620587, "grad_norm": 2.2526449624808085, "learning_rate": 1.996139665384357e-06, "loss": 0.277, "step": 32353 }, { "epoch": 2.4046079524340396, "grad_norm": 2.406354565658555, "learning_rate": 1.99565868346295e-06, "loss": 0.2583, "step": 32354 }, { "epoch": 2.4046822742474916, "grad_norm": 2.176748672074417, "learning_rate": 1.9951777530730744e-06, "loss": 0.2304, "step": 32355 }, { "epoch": 2.404756596060944, "grad_norm": 2.0613520089867525, "learning_rate": 1.994696874217832e-06, "loss": 0.2194, "step": 32356 }, { "epoch": 2.404830917874396, "grad_norm": 3.6624324903455694, "learning_rate": 1.9942160469003125e-06, "loss": 0.4105, "step": 32357 }, { "epoch": 2.4049052396878485, "grad_norm": 2.2673445645983694, "learning_rate": 1.993735271123618e-06, "loss": 0.2704, "step": 32358 }, { "epoch": 2.4049795615013005, "grad_norm": 2.4054834684704485, "learning_rate": 1.9932545468908395e-06, "loss": 0.2517, "step": 32359 }, { "epoch": 2.405053883314753, "grad_norm": 2.4078029139154067, "learning_rate": 1.9927738742050706e-06, "loss": 0.2704, "step": 32360 }, { "epoch": 2.405128205128205, "grad_norm": 2.3528466298670376, "learning_rate": 1.9922932530694118e-06, "loss": 0.2667, "step": 32361 }, { "epoch": 2.4052025269416575, "grad_norm": 1.862135179721371, "learning_rate": 1.991812683486952e-06, "loss": 0.2318, "step": 32362 }, { "epoch": 2.4052768487551095, "grad_norm": 2.622976358705722, "learning_rate": 1.9913321654607876e-06, "loss": 0.2697, "step": 32363 }, { "epoch": 2.405351170568562, "grad_norm": 3.0115228106632337, "learning_rate": 1.9908516989940073e-06, "loss": 0.3833, "step": 32364 }, { "epoch": 2.405425492382014, "grad_norm": 2.9541113763077953, "learning_rate": 1.9903712840897127e-06, "loss": 0.2999, "step": 32365 }, { "epoch": 2.4054998141954664, "grad_norm": 2.8063824763958993, "learning_rate": 1.9898909207509907e-06, "loss": 0.3031, "step": 32366 }, { "epoch": 2.4055741360089185, "grad_norm": 2.330832770467465, "learning_rate": 1.9894106089809327e-06, "loss": 0.2681, "step": 32367 }, { "epoch": 2.405648457822371, "grad_norm": 2.4959559749334974, "learning_rate": 1.9889303487826374e-06, "loss": 0.2835, "step": 32368 }, { "epoch": 2.405722779635823, "grad_norm": 3.007535774471791, "learning_rate": 1.9884501401591893e-06, "loss": 0.2453, "step": 32369 }, { "epoch": 2.4057971014492754, "grad_norm": 2.3770968112650954, "learning_rate": 1.9879699831136866e-06, "loss": 0.3203, "step": 32370 }, { "epoch": 2.4058714232627274, "grad_norm": 2.11119213904685, "learning_rate": 1.987489877649217e-06, "loss": 0.256, "step": 32371 }, { "epoch": 2.40594574507618, "grad_norm": 2.16817912871686, "learning_rate": 1.9870098237688695e-06, "loss": 0.233, "step": 32372 }, { "epoch": 2.4060200668896323, "grad_norm": 2.318843884588649, "learning_rate": 1.9865298214757424e-06, "loss": 0.3069, "step": 32373 }, { "epoch": 2.4060943887030843, "grad_norm": 2.136784435059696, "learning_rate": 1.9860498707729146e-06, "loss": 0.22, "step": 32374 }, { "epoch": 2.4061687105165364, "grad_norm": 2.141591620208917, "learning_rate": 1.985569971663485e-06, "loss": 0.2524, "step": 32375 }, { "epoch": 2.406243032329989, "grad_norm": 2.989719791343873, "learning_rate": 1.9850901241505394e-06, "loss": 0.265, "step": 32376 }, { "epoch": 2.4063173541434413, "grad_norm": 2.2925694108905113, "learning_rate": 1.984610328237165e-06, "loss": 0.2899, "step": 32377 }, { "epoch": 2.4063916759568933, "grad_norm": 2.703095263826277, "learning_rate": 1.984130583926458e-06, "loss": 0.3122, "step": 32378 }, { "epoch": 2.4064659977703458, "grad_norm": 2.076800176759543, "learning_rate": 1.983650891221498e-06, "loss": 0.2624, "step": 32379 }, { "epoch": 2.4065403195837978, "grad_norm": 1.7989547568636255, "learning_rate": 1.983171250125381e-06, "loss": 0.2187, "step": 32380 }, { "epoch": 2.4066146413972502, "grad_norm": 2.468829024440814, "learning_rate": 1.9826916606411907e-06, "loss": 0.3285, "step": 32381 }, { "epoch": 2.4066889632107022, "grad_norm": 2.4760877489843782, "learning_rate": 1.9822121227720138e-06, "loss": 0.2285, "step": 32382 }, { "epoch": 2.4067632850241547, "grad_norm": 2.260686289363545, "learning_rate": 1.981732636520941e-06, "loss": 0.2375, "step": 32383 }, { "epoch": 2.4068376068376067, "grad_norm": 2.912082527394828, "learning_rate": 1.981253201891058e-06, "loss": 0.2383, "step": 32384 }, { "epoch": 2.406911928651059, "grad_norm": 2.142024651148959, "learning_rate": 1.980773818885451e-06, "loss": 0.28, "step": 32385 }, { "epoch": 2.406986250464511, "grad_norm": 2.4042290940756517, "learning_rate": 1.980294487507203e-06, "loss": 0.2689, "step": 32386 }, { "epoch": 2.4070605722779637, "grad_norm": 1.8769829207749498, "learning_rate": 1.9798152077594056e-06, "loss": 0.2555, "step": 32387 }, { "epoch": 2.4071348940914157, "grad_norm": 2.1273631534912587, "learning_rate": 1.9793359796451427e-06, "loss": 0.2416, "step": 32388 }, { "epoch": 2.407209215904868, "grad_norm": 2.187609792798488, "learning_rate": 1.9788568031674948e-06, "loss": 0.2126, "step": 32389 }, { "epoch": 2.40728353771832, "grad_norm": 2.352101276357476, "learning_rate": 1.978377678329554e-06, "loss": 0.2776, "step": 32390 }, { "epoch": 2.4073578595317726, "grad_norm": 2.2729163510892096, "learning_rate": 1.9778986051343984e-06, "loss": 0.3529, "step": 32391 }, { "epoch": 2.4074321813452246, "grad_norm": 2.397577257074034, "learning_rate": 1.9774195835851184e-06, "loss": 0.2502, "step": 32392 }, { "epoch": 2.407506503158677, "grad_norm": 2.4973588046124533, "learning_rate": 1.9769406136847947e-06, "loss": 0.2322, "step": 32393 }, { "epoch": 2.407580824972129, "grad_norm": 2.6975959367249502, "learning_rate": 1.976461695436508e-06, "loss": 0.2851, "step": 32394 }, { "epoch": 2.4076551467855816, "grad_norm": 2.5654748473560685, "learning_rate": 1.975982828843347e-06, "loss": 0.3316, "step": 32395 }, { "epoch": 2.407729468599034, "grad_norm": 2.300384620617534, "learning_rate": 1.9755040139083925e-06, "loss": 0.2975, "step": 32396 }, { "epoch": 2.407803790412486, "grad_norm": 2.547102906605381, "learning_rate": 1.975025250634728e-06, "loss": 0.3289, "step": 32397 }, { "epoch": 2.4078781122259385, "grad_norm": 2.5263782911807153, "learning_rate": 1.9745465390254324e-06, "loss": 0.2536, "step": 32398 }, { "epoch": 2.4079524340393905, "grad_norm": 2.04571809099789, "learning_rate": 1.974067879083589e-06, "loss": 0.2537, "step": 32399 }, { "epoch": 2.408026755852843, "grad_norm": 2.5780700061418216, "learning_rate": 1.9735892708122816e-06, "loss": 0.3347, "step": 32400 }, { "epoch": 2.408101077666295, "grad_norm": 2.305686576318621, "learning_rate": 1.9731107142145877e-06, "loss": 0.2424, "step": 32401 }, { "epoch": 2.4081753994797475, "grad_norm": 2.373744476597311, "learning_rate": 1.9726322092935933e-06, "loss": 0.2764, "step": 32402 }, { "epoch": 2.4082497212931995, "grad_norm": 2.0352593444840985, "learning_rate": 1.9721537560523775e-06, "loss": 0.2237, "step": 32403 }, { "epoch": 2.408324043106652, "grad_norm": 2.0612809400019114, "learning_rate": 1.971675354494015e-06, "loss": 0.2124, "step": 32404 }, { "epoch": 2.408398364920104, "grad_norm": 2.1582816125471997, "learning_rate": 1.971197004621592e-06, "loss": 0.2303, "step": 32405 }, { "epoch": 2.4084726867335564, "grad_norm": 2.569915118029696, "learning_rate": 1.970718706438185e-06, "loss": 0.2993, "step": 32406 }, { "epoch": 2.4085470085470084, "grad_norm": 2.9314094235721653, "learning_rate": 1.970240459946879e-06, "loss": 0.2857, "step": 32407 }, { "epoch": 2.408621330360461, "grad_norm": 2.7322401435476045, "learning_rate": 1.969762265150743e-06, "loss": 0.2539, "step": 32408 }, { "epoch": 2.408695652173913, "grad_norm": 2.532666075017885, "learning_rate": 1.9692841220528647e-06, "loss": 0.3089, "step": 32409 }, { "epoch": 2.4087699739873654, "grad_norm": 2.157440751799667, "learning_rate": 1.9688060306563173e-06, "loss": 0.214, "step": 32410 }, { "epoch": 2.4088442958008174, "grad_norm": 2.776003091625991, "learning_rate": 1.968327990964177e-06, "loss": 0.2595, "step": 32411 }, { "epoch": 2.40891861761427, "grad_norm": 2.1146565787505534, "learning_rate": 1.967850002979528e-06, "loss": 0.2293, "step": 32412 }, { "epoch": 2.408992939427722, "grad_norm": 2.283168633878439, "learning_rate": 1.967372066705441e-06, "loss": 0.3081, "step": 32413 }, { "epoch": 2.4090672612411743, "grad_norm": 2.8143746785493775, "learning_rate": 1.9668941821449973e-06, "loss": 0.2997, "step": 32414 }, { "epoch": 2.409141583054627, "grad_norm": 2.501180340668335, "learning_rate": 1.9664163493012734e-06, "loss": 0.3401, "step": 32415 }, { "epoch": 2.409215904868079, "grad_norm": 2.269712142199132, "learning_rate": 1.9659385681773414e-06, "loss": 0.293, "step": 32416 }, { "epoch": 2.409290226681531, "grad_norm": 1.914864290568895, "learning_rate": 1.965460838776282e-06, "loss": 0.2082, "step": 32417 }, { "epoch": 2.4093645484949833, "grad_norm": 2.133160137554402, "learning_rate": 1.9649831611011692e-06, "loss": 0.3657, "step": 32418 }, { "epoch": 2.4094388703084357, "grad_norm": 1.5394338210427356, "learning_rate": 1.964505535155078e-06, "loss": 0.1687, "step": 32419 }, { "epoch": 2.4095131921218877, "grad_norm": 2.3718766767679864, "learning_rate": 1.964027960941084e-06, "loss": 0.2379, "step": 32420 }, { "epoch": 2.40958751393534, "grad_norm": 2.597188926705092, "learning_rate": 1.9635504384622574e-06, "loss": 0.323, "step": 32421 }, { "epoch": 2.4096618357487922, "grad_norm": 2.3388502927188424, "learning_rate": 1.9630729677216786e-06, "loss": 0.3022, "step": 32422 }, { "epoch": 2.4097361575622447, "grad_norm": 2.8067826043712483, "learning_rate": 1.962595548722417e-06, "loss": 0.348, "step": 32423 }, { "epoch": 2.4098104793756967, "grad_norm": 1.6916373642475322, "learning_rate": 1.9621181814675503e-06, "loss": 0.2314, "step": 32424 }, { "epoch": 2.409884801189149, "grad_norm": 2.1813683405085094, "learning_rate": 1.9616408659601503e-06, "loss": 0.2267, "step": 32425 }, { "epoch": 2.409959123002601, "grad_norm": 2.2804654331811283, "learning_rate": 1.9611636022032865e-06, "loss": 0.3111, "step": 32426 }, { "epoch": 2.4100334448160536, "grad_norm": 2.7716233078633565, "learning_rate": 1.960686390200037e-06, "loss": 0.3052, "step": 32427 }, { "epoch": 2.4101077666295057, "grad_norm": 2.481034411723218, "learning_rate": 1.960209229953468e-06, "loss": 0.3535, "step": 32428 }, { "epoch": 2.410182088442958, "grad_norm": 2.267728853124237, "learning_rate": 1.959732121466659e-06, "loss": 0.2669, "step": 32429 }, { "epoch": 2.41025641025641, "grad_norm": 2.2797690707853353, "learning_rate": 1.9592550647426756e-06, "loss": 0.2667, "step": 32430 }, { "epoch": 2.4103307320698626, "grad_norm": 2.6154682016109922, "learning_rate": 1.9587780597845917e-06, "loss": 0.3027, "step": 32431 }, { "epoch": 2.4104050538833146, "grad_norm": 2.0160824877212504, "learning_rate": 1.958301106595478e-06, "loss": 0.2776, "step": 32432 }, { "epoch": 2.410479375696767, "grad_norm": 2.196273571011122, "learning_rate": 1.9578242051784014e-06, "loss": 0.2759, "step": 32433 }, { "epoch": 2.410553697510219, "grad_norm": 2.5616242359253847, "learning_rate": 1.9573473555364376e-06, "loss": 0.2947, "step": 32434 }, { "epoch": 2.4106280193236715, "grad_norm": 2.2883898859711445, "learning_rate": 1.9568705576726522e-06, "loss": 0.2503, "step": 32435 }, { "epoch": 2.4107023411371236, "grad_norm": 2.525907458713801, "learning_rate": 1.9563938115901195e-06, "loss": 0.3117, "step": 32436 }, { "epoch": 2.410776662950576, "grad_norm": 2.134101779809056, "learning_rate": 1.9559171172919057e-06, "loss": 0.2643, "step": 32437 }, { "epoch": 2.4108509847640285, "grad_norm": 1.810666628862975, "learning_rate": 1.9554404747810775e-06, "loss": 0.1878, "step": 32438 }, { "epoch": 2.4109253065774805, "grad_norm": 2.2030560189400226, "learning_rate": 1.954963884060708e-06, "loss": 0.2582, "step": 32439 }, { "epoch": 2.4109996283909325, "grad_norm": 2.1303282131351904, "learning_rate": 1.954487345133864e-06, "loss": 0.2579, "step": 32440 }, { "epoch": 2.411073950204385, "grad_norm": 2.47512718353202, "learning_rate": 1.9540108580036143e-06, "loss": 0.268, "step": 32441 }, { "epoch": 2.4111482720178374, "grad_norm": 2.241946118961789, "learning_rate": 1.9535344226730233e-06, "loss": 0.2578, "step": 32442 }, { "epoch": 2.4112225938312895, "grad_norm": 2.2044820359211035, "learning_rate": 1.9530580391451593e-06, "loss": 0.2529, "step": 32443 }, { "epoch": 2.411296915644742, "grad_norm": 2.5931474645435793, "learning_rate": 1.9525817074230912e-06, "loss": 0.338, "step": 32444 }, { "epoch": 2.411371237458194, "grad_norm": 2.9270674891250352, "learning_rate": 1.9521054275098827e-06, "loss": 0.2739, "step": 32445 }, { "epoch": 2.4114455592716464, "grad_norm": 5.167605297255141, "learning_rate": 1.9516291994086047e-06, "loss": 0.2689, "step": 32446 }, { "epoch": 2.4115198810850984, "grad_norm": 4.1562099504461765, "learning_rate": 1.951153023122321e-06, "loss": 0.3039, "step": 32447 }, { "epoch": 2.411594202898551, "grad_norm": 2.749182934410628, "learning_rate": 1.9506768986540925e-06, "loss": 0.3322, "step": 32448 }, { "epoch": 2.411668524712003, "grad_norm": 2.2753009060304468, "learning_rate": 1.9502008260069916e-06, "loss": 0.295, "step": 32449 }, { "epoch": 2.4117428465254553, "grad_norm": 2.809662320902545, "learning_rate": 1.949724805184078e-06, "loss": 0.2664, "step": 32450 }, { "epoch": 2.4118171683389074, "grad_norm": 2.5955380727714914, "learning_rate": 1.94924883618842e-06, "loss": 0.2442, "step": 32451 }, { "epoch": 2.41189149015236, "grad_norm": 2.5343765200694834, "learning_rate": 1.948772919023081e-06, "loss": 0.2262, "step": 32452 }, { "epoch": 2.411965811965812, "grad_norm": 2.2826332902528703, "learning_rate": 1.9482970536911237e-06, "loss": 0.2827, "step": 32453 }, { "epoch": 2.4120401337792643, "grad_norm": 7.087594052927866, "learning_rate": 1.947821240195612e-06, "loss": 0.2759, "step": 32454 }, { "epoch": 2.4121144555927163, "grad_norm": 2.1547846319046906, "learning_rate": 1.9473454785396074e-06, "loss": 0.2118, "step": 32455 }, { "epoch": 2.4121887774061688, "grad_norm": 1.843695797410097, "learning_rate": 1.9468697687261774e-06, "loss": 0.2391, "step": 32456 }, { "epoch": 2.412263099219621, "grad_norm": 2.0317722397308398, "learning_rate": 1.9463941107583818e-06, "loss": 0.2716, "step": 32457 }, { "epoch": 2.4123374210330732, "grad_norm": 2.0644580901827903, "learning_rate": 1.9459185046392804e-06, "loss": 0.2372, "step": 32458 }, { "epoch": 2.4124117428465253, "grad_norm": 2.4117332469796047, "learning_rate": 1.9454429503719396e-06, "loss": 0.2831, "step": 32459 }, { "epoch": 2.4124860646599777, "grad_norm": 2.3866313499261014, "learning_rate": 1.9449674479594183e-06, "loss": 0.3189, "step": 32460 }, { "epoch": 2.41256038647343, "grad_norm": 2.171814042251025, "learning_rate": 1.9444919974047795e-06, "loss": 0.2618, "step": 32461 }, { "epoch": 2.412634708286882, "grad_norm": 2.29054305148986, "learning_rate": 1.944016598711085e-06, "loss": 0.2616, "step": 32462 }, { "epoch": 2.412709030100334, "grad_norm": 2.9024488183209423, "learning_rate": 1.943541251881389e-06, "loss": 0.2984, "step": 32463 }, { "epoch": 2.4127833519137867, "grad_norm": 2.4354128390593988, "learning_rate": 1.9430659569187625e-06, "loss": 0.239, "step": 32464 }, { "epoch": 2.412857673727239, "grad_norm": 2.633930196327543, "learning_rate": 1.9425907138262547e-06, "loss": 0.3065, "step": 32465 }, { "epoch": 2.412931995540691, "grad_norm": 2.37287752072706, "learning_rate": 1.9421155226069323e-06, "loss": 0.3011, "step": 32466 }, { "epoch": 2.4130063173541436, "grad_norm": 2.1957775105475386, "learning_rate": 1.9416403832638485e-06, "loss": 0.2636, "step": 32467 }, { "epoch": 2.4130806391675956, "grad_norm": 2.022909925793452, "learning_rate": 1.9411652958000694e-06, "loss": 0.2613, "step": 32468 }, { "epoch": 2.413154960981048, "grad_norm": 2.391058643627018, "learning_rate": 1.9406902602186494e-06, "loss": 0.3501, "step": 32469 }, { "epoch": 2.4132292827945, "grad_norm": 2.9150592906192605, "learning_rate": 1.9402152765226456e-06, "loss": 0.4375, "step": 32470 }, { "epoch": 2.4133036046079526, "grad_norm": 3.0675835307665613, "learning_rate": 1.93974034471512e-06, "loss": 0.3442, "step": 32471 }, { "epoch": 2.4133779264214046, "grad_norm": 2.330491295463391, "learning_rate": 1.939265464799124e-06, "loss": 0.3017, "step": 32472 }, { "epoch": 2.413452248234857, "grad_norm": 2.0884115174084776, "learning_rate": 1.9387906367777222e-06, "loss": 0.2494, "step": 32473 }, { "epoch": 2.413526570048309, "grad_norm": 2.269077984055825, "learning_rate": 1.9383158606539675e-06, "loss": 0.3335, "step": 32474 }, { "epoch": 2.4136008918617615, "grad_norm": 2.4702382977798663, "learning_rate": 1.937841136430918e-06, "loss": 0.3026, "step": 32475 }, { "epoch": 2.4136752136752135, "grad_norm": 2.1402281276466386, "learning_rate": 1.9373664641116285e-06, "loss": 0.2699, "step": 32476 }, { "epoch": 2.413749535488666, "grad_norm": 2.100265774814237, "learning_rate": 1.936891843699152e-06, "loss": 0.1816, "step": 32477 }, { "epoch": 2.413823857302118, "grad_norm": 2.593459486311406, "learning_rate": 1.9364172751965513e-06, "loss": 0.2524, "step": 32478 }, { "epoch": 2.4138981791155705, "grad_norm": 2.3761106030593035, "learning_rate": 1.935942758606877e-06, "loss": 0.3474, "step": 32479 }, { "epoch": 2.4139725009290225, "grad_norm": 2.1337187324276314, "learning_rate": 1.9354682939331826e-06, "loss": 0.2537, "step": 32480 }, { "epoch": 2.414046822742475, "grad_norm": 2.2615206687738536, "learning_rate": 1.9349938811785273e-06, "loss": 0.2353, "step": 32481 }, { "epoch": 2.414121144555927, "grad_norm": 2.572482380682727, "learning_rate": 1.93451952034596e-06, "loss": 0.3099, "step": 32482 }, { "epoch": 2.4141954663693794, "grad_norm": 2.1182652487396885, "learning_rate": 1.934045211438541e-06, "loss": 0.2644, "step": 32483 }, { "epoch": 2.414269788182832, "grad_norm": 1.8123776229484476, "learning_rate": 1.9335709544593196e-06, "loss": 0.176, "step": 32484 }, { "epoch": 2.414344109996284, "grad_norm": 2.414073104189578, "learning_rate": 1.9330967494113473e-06, "loss": 0.3201, "step": 32485 }, { "epoch": 2.414418431809736, "grad_norm": 2.6604835500867274, "learning_rate": 1.9326225962976852e-06, "loss": 0.3791, "step": 32486 }, { "epoch": 2.4144927536231884, "grad_norm": 2.2240118727608356, "learning_rate": 1.9321484951213755e-06, "loss": 0.2405, "step": 32487 }, { "epoch": 2.414567075436641, "grad_norm": 2.694813925166947, "learning_rate": 1.9316744458854764e-06, "loss": 0.3402, "step": 32488 }, { "epoch": 2.414641397250093, "grad_norm": 1.9984249441542763, "learning_rate": 1.9312004485930367e-06, "loss": 0.253, "step": 32489 }, { "epoch": 2.4147157190635453, "grad_norm": 2.0356803502210465, "learning_rate": 1.9307265032471122e-06, "loss": 0.2734, "step": 32490 }, { "epoch": 2.4147900408769973, "grad_norm": 2.6771503812025075, "learning_rate": 1.930252609850752e-06, "loss": 0.3268, "step": 32491 }, { "epoch": 2.41486436269045, "grad_norm": 2.138072310690792, "learning_rate": 1.9297787684070046e-06, "loss": 0.2383, "step": 32492 }, { "epoch": 2.414938684503902, "grad_norm": 2.4763417604791864, "learning_rate": 1.9293049789189257e-06, "loss": 0.3157, "step": 32493 }, { "epoch": 2.4150130063173543, "grad_norm": 1.9552724866616584, "learning_rate": 1.9288312413895583e-06, "loss": 0.2424, "step": 32494 }, { "epoch": 2.4150873281308063, "grad_norm": 2.3777191225419716, "learning_rate": 1.9283575558219603e-06, "loss": 0.2648, "step": 32495 }, { "epoch": 2.4151616499442587, "grad_norm": 3.1413164324591687, "learning_rate": 1.927883922219178e-06, "loss": 0.2352, "step": 32496 }, { "epoch": 2.4152359717577108, "grad_norm": 2.765381052310809, "learning_rate": 1.9274103405842594e-06, "loss": 0.3493, "step": 32497 }, { "epoch": 2.4153102935711632, "grad_norm": 2.5022822129201447, "learning_rate": 1.9269368109202535e-06, "loss": 0.2876, "step": 32498 }, { "epoch": 2.4153846153846152, "grad_norm": 2.558543174197401, "learning_rate": 1.9264633332302087e-06, "loss": 0.2864, "step": 32499 }, { "epoch": 2.4154589371980677, "grad_norm": 2.2448761902271865, "learning_rate": 1.9259899075171752e-06, "loss": 0.2907, "step": 32500 }, { "epoch": 2.4155332590115197, "grad_norm": 2.692424590541386, "learning_rate": 1.9255165337842006e-06, "loss": 0.2827, "step": 32501 }, { "epoch": 2.415607580824972, "grad_norm": 1.9819302910117202, "learning_rate": 1.925043212034329e-06, "loss": 0.2743, "step": 32502 }, { "epoch": 2.415681902638424, "grad_norm": 2.0068067454777543, "learning_rate": 1.924569942270612e-06, "loss": 0.2863, "step": 32503 }, { "epoch": 2.4157562244518767, "grad_norm": 2.9565388544246614, "learning_rate": 1.9240967244960927e-06, "loss": 0.2867, "step": 32504 }, { "epoch": 2.4158305462653287, "grad_norm": 2.4191496425237227, "learning_rate": 1.923623558713822e-06, "loss": 0.2968, "step": 32505 }, { "epoch": 2.415904868078781, "grad_norm": 1.9868095033520996, "learning_rate": 1.9231504449268432e-06, "loss": 0.2479, "step": 32506 }, { "epoch": 2.4159791898922336, "grad_norm": 2.443349011779422, "learning_rate": 1.922677383138202e-06, "loss": 0.3087, "step": 32507 }, { "epoch": 2.4160535117056856, "grad_norm": 2.1414873767586995, "learning_rate": 1.9222043733509453e-06, "loss": 0.2816, "step": 32508 }, { "epoch": 2.4161278335191376, "grad_norm": 2.526996832044625, "learning_rate": 1.921731415568119e-06, "loss": 0.3278, "step": 32509 }, { "epoch": 2.41620215533259, "grad_norm": 1.7836029592730087, "learning_rate": 1.921258509792766e-06, "loss": 0.2644, "step": 32510 }, { "epoch": 2.4162764771460425, "grad_norm": 2.7348258314625524, "learning_rate": 1.920785656027929e-06, "loss": 0.3266, "step": 32511 }, { "epoch": 2.4163507989594946, "grad_norm": 2.219579068374629, "learning_rate": 1.9203128542766582e-06, "loss": 0.2615, "step": 32512 }, { "epoch": 2.416425120772947, "grad_norm": 2.6300393954167447, "learning_rate": 1.9198401045419935e-06, "loss": 0.3469, "step": 32513 }, { "epoch": 2.416499442586399, "grad_norm": 2.6638362343866158, "learning_rate": 1.9193674068269764e-06, "loss": 0.2699, "step": 32514 }, { "epoch": 2.4165737643998515, "grad_norm": 2.684348426260579, "learning_rate": 1.9188947611346544e-06, "loss": 0.2833, "step": 32515 }, { "epoch": 2.4166480862133035, "grad_norm": 2.231343462787032, "learning_rate": 1.9184221674680668e-06, "loss": 0.2716, "step": 32516 }, { "epoch": 2.416722408026756, "grad_norm": 3.779104407845243, "learning_rate": 1.917949625830261e-06, "loss": 0.286, "step": 32517 }, { "epoch": 2.416796729840208, "grad_norm": 2.5229253895472796, "learning_rate": 1.917477136224275e-06, "loss": 0.2759, "step": 32518 }, { "epoch": 2.4168710516536605, "grad_norm": 2.2058674063586365, "learning_rate": 1.91700469865315e-06, "loss": 0.2319, "step": 32519 }, { "epoch": 2.4169453734671125, "grad_norm": 1.7873996067117714, "learning_rate": 1.9165323131199344e-06, "loss": 0.1944, "step": 32520 }, { "epoch": 2.417019695280565, "grad_norm": 2.43938695808123, "learning_rate": 1.916059979627659e-06, "loss": 0.2531, "step": 32521 }, { "epoch": 2.417094017094017, "grad_norm": 2.6057036852883, "learning_rate": 1.915587698179373e-06, "loss": 0.3011, "step": 32522 }, { "epoch": 2.4171683389074694, "grad_norm": 2.3756105190723593, "learning_rate": 1.9151154687781136e-06, "loss": 0.3147, "step": 32523 }, { "epoch": 2.4172426607209214, "grad_norm": 2.1843320960131862, "learning_rate": 1.9146432914269184e-06, "loss": 0.2274, "step": 32524 }, { "epoch": 2.417316982534374, "grad_norm": 2.313885596044104, "learning_rate": 1.914171166128834e-06, "loss": 0.3036, "step": 32525 }, { "epoch": 2.417391304347826, "grad_norm": 3.2182223610412306, "learning_rate": 1.9136990928868927e-06, "loss": 0.3619, "step": 32526 }, { "epoch": 2.4174656261612784, "grad_norm": 2.127127335151097, "learning_rate": 1.913227071704139e-06, "loss": 0.241, "step": 32527 }, { "epoch": 2.4175399479747304, "grad_norm": 2.582505947440428, "learning_rate": 1.912755102583611e-06, "loss": 0.3548, "step": 32528 }, { "epoch": 2.417614269788183, "grad_norm": 1.7798078901621774, "learning_rate": 1.912283185528343e-06, "loss": 0.1966, "step": 32529 }, { "epoch": 2.4176885916016353, "grad_norm": 1.877220377614088, "learning_rate": 1.9118113205413778e-06, "loss": 0.2386, "step": 32530 }, { "epoch": 2.4177629134150873, "grad_norm": 2.059870394770535, "learning_rate": 1.9113395076257533e-06, "loss": 0.2052, "step": 32531 }, { "epoch": 2.4178372352285393, "grad_norm": 1.708762406145588, "learning_rate": 1.910867746784505e-06, "loss": 0.2376, "step": 32532 }, { "epoch": 2.417911557041992, "grad_norm": 2.82948367950299, "learning_rate": 1.9103960380206665e-06, "loss": 0.2616, "step": 32533 }, { "epoch": 2.4179858788554442, "grad_norm": 2.1551490594297467, "learning_rate": 1.9099243813372825e-06, "loss": 0.2572, "step": 32534 }, { "epoch": 2.4180602006688963, "grad_norm": 4.015146530102202, "learning_rate": 1.909452776737385e-06, "loss": 0.2414, "step": 32535 }, { "epoch": 2.4181345224823487, "grad_norm": 2.5795428277201027, "learning_rate": 1.9089812242240092e-06, "loss": 0.2783, "step": 32536 }, { "epoch": 2.4182088442958007, "grad_norm": 2.284718275524668, "learning_rate": 1.9085097238001935e-06, "loss": 0.2863, "step": 32537 }, { "epoch": 2.418283166109253, "grad_norm": 2.2576107999753745, "learning_rate": 1.9080382754689706e-06, "loss": 0.1821, "step": 32538 }, { "epoch": 2.418357487922705, "grad_norm": 2.196707845123897, "learning_rate": 1.90756687923338e-06, "loss": 0.261, "step": 32539 }, { "epoch": 2.4184318097361577, "grad_norm": 2.6302242975815036, "learning_rate": 1.9070955350964537e-06, "loss": 0.2939, "step": 32540 }, { "epoch": 2.4185061315496097, "grad_norm": 2.464956503166137, "learning_rate": 1.9066242430612236e-06, "loss": 0.2958, "step": 32541 }, { "epoch": 2.418580453363062, "grad_norm": 2.00384785136103, "learning_rate": 1.9061530031307295e-06, "loss": 0.2943, "step": 32542 }, { "epoch": 2.418654775176514, "grad_norm": 2.3538975290408803, "learning_rate": 1.9056818153080026e-06, "loss": 0.3419, "step": 32543 }, { "epoch": 2.4187290969899666, "grad_norm": 2.534131900377612, "learning_rate": 1.9052106795960766e-06, "loss": 0.2751, "step": 32544 }, { "epoch": 2.4188034188034186, "grad_norm": 2.1904759979482202, "learning_rate": 1.904739595997983e-06, "loss": 0.2802, "step": 32545 }, { "epoch": 2.418877740616871, "grad_norm": 2.1185197871159467, "learning_rate": 1.9042685645167547e-06, "loss": 0.2552, "step": 32546 }, { "epoch": 2.418952062430323, "grad_norm": 2.5638142585237387, "learning_rate": 1.9037975851554269e-06, "loss": 0.281, "step": 32547 }, { "epoch": 2.4190263842437756, "grad_norm": 2.349031540130352, "learning_rate": 1.9033266579170274e-06, "loss": 0.2752, "step": 32548 }, { "epoch": 2.419100706057228, "grad_norm": 2.4079298021883857, "learning_rate": 1.9028557828045946e-06, "loss": 0.2465, "step": 32549 }, { "epoch": 2.41917502787068, "grad_norm": 2.552486873713885, "learning_rate": 1.9023849598211552e-06, "loss": 0.2903, "step": 32550 }, { "epoch": 2.419249349684132, "grad_norm": 2.175709015228048, "learning_rate": 1.901914188969739e-06, "loss": 0.2216, "step": 32551 }, { "epoch": 2.4193236714975845, "grad_norm": 2.4056582943118494, "learning_rate": 1.9014434702533813e-06, "loss": 0.3111, "step": 32552 }, { "epoch": 2.419397993311037, "grad_norm": 2.2171818727377666, "learning_rate": 1.9009728036751073e-06, "loss": 0.2147, "step": 32553 }, { "epoch": 2.419472315124489, "grad_norm": 2.580638171451839, "learning_rate": 1.900502189237956e-06, "loss": 0.3131, "step": 32554 }, { "epoch": 2.4195466369379415, "grad_norm": 2.738497958628133, "learning_rate": 1.900031626944947e-06, "loss": 0.3205, "step": 32555 }, { "epoch": 2.4196209587513935, "grad_norm": 2.1349197202278596, "learning_rate": 1.8995611167991156e-06, "loss": 0.2035, "step": 32556 }, { "epoch": 2.419695280564846, "grad_norm": 2.175491744556853, "learning_rate": 1.8990906588034897e-06, "loss": 0.3087, "step": 32557 }, { "epoch": 2.419769602378298, "grad_norm": 1.9270865173139868, "learning_rate": 1.8986202529610954e-06, "loss": 0.2915, "step": 32558 }, { "epoch": 2.4198439241917504, "grad_norm": 2.147798505756018, "learning_rate": 1.8981498992749659e-06, "loss": 0.2957, "step": 32559 }, { "epoch": 2.4199182460052024, "grad_norm": 2.186022599207691, "learning_rate": 1.8976795977481243e-06, "loss": 0.2458, "step": 32560 }, { "epoch": 2.419992567818655, "grad_norm": 3.2237833237221403, "learning_rate": 1.8972093483836041e-06, "loss": 0.2036, "step": 32561 }, { "epoch": 2.420066889632107, "grad_norm": 2.850379636555318, "learning_rate": 1.8967391511844292e-06, "loss": 0.3269, "step": 32562 }, { "epoch": 2.4201412114455594, "grad_norm": 2.9152013253484483, "learning_rate": 1.8962690061536249e-06, "loss": 0.3439, "step": 32563 }, { "epoch": 2.4202155332590114, "grad_norm": 1.9015021888503745, "learning_rate": 1.8957989132942223e-06, "loss": 0.325, "step": 32564 }, { "epoch": 2.420289855072464, "grad_norm": 2.2510962960444734, "learning_rate": 1.8953288726092467e-06, "loss": 0.3707, "step": 32565 }, { "epoch": 2.420364176885916, "grad_norm": 2.2394840832045366, "learning_rate": 1.8948588841017223e-06, "loss": 0.2412, "step": 32566 }, { "epoch": 2.4204384986993683, "grad_norm": 2.17043796998561, "learning_rate": 1.894388947774677e-06, "loss": 0.2716, "step": 32567 }, { "epoch": 2.4205128205128204, "grad_norm": 2.3273190426051746, "learning_rate": 1.893919063631131e-06, "loss": 0.2831, "step": 32568 }, { "epoch": 2.420587142326273, "grad_norm": 2.882876716567632, "learning_rate": 1.893449231674117e-06, "loss": 0.3028, "step": 32569 }, { "epoch": 2.420661464139725, "grad_norm": 2.2968223818323295, "learning_rate": 1.8929794519066535e-06, "loss": 0.3193, "step": 32570 }, { "epoch": 2.4207357859531773, "grad_norm": 2.3222879666263228, "learning_rate": 1.8925097243317714e-06, "loss": 0.2228, "step": 32571 }, { "epoch": 2.4208101077666297, "grad_norm": 1.8284726858314373, "learning_rate": 1.8920400489524892e-06, "loss": 0.1708, "step": 32572 }, { "epoch": 2.4208844295800818, "grad_norm": 2.383259911268801, "learning_rate": 1.8915704257718304e-06, "loss": 0.2792, "step": 32573 }, { "epoch": 2.420958751393534, "grad_norm": 2.3327510701017635, "learning_rate": 1.8911008547928234e-06, "loss": 0.2395, "step": 32574 }, { "epoch": 2.4210330732069862, "grad_norm": 1.9804084105343895, "learning_rate": 1.8906313360184858e-06, "loss": 0.258, "step": 32575 }, { "epoch": 2.4211073950204387, "grad_norm": 2.402214065645341, "learning_rate": 1.8901618694518476e-06, "loss": 0.291, "step": 32576 }, { "epoch": 2.4211817168338907, "grad_norm": 2.498352807897259, "learning_rate": 1.8896924550959218e-06, "loss": 0.2436, "step": 32577 }, { "epoch": 2.421256038647343, "grad_norm": 2.6002017146674077, "learning_rate": 1.889223092953738e-06, "loss": 0.3008, "step": 32578 }, { "epoch": 2.421330360460795, "grad_norm": 1.6576416544687458, "learning_rate": 1.8887537830283142e-06, "loss": 0.1746, "step": 32579 }, { "epoch": 2.4214046822742477, "grad_norm": 2.6217383935643666, "learning_rate": 1.8882845253226711e-06, "loss": 0.3184, "step": 32580 }, { "epoch": 2.4214790040876997, "grad_norm": 1.9960277667523907, "learning_rate": 1.887815319839833e-06, "loss": 0.232, "step": 32581 }, { "epoch": 2.421553325901152, "grad_norm": 1.9990853705649663, "learning_rate": 1.8873461665828174e-06, "loss": 0.2356, "step": 32582 }, { "epoch": 2.421627647714604, "grad_norm": 2.3754477496354403, "learning_rate": 1.8868770655546476e-06, "loss": 0.2895, "step": 32583 }, { "epoch": 2.4217019695280566, "grad_norm": 2.4846337700226657, "learning_rate": 1.8864080167583432e-06, "loss": 0.2798, "step": 32584 }, { "epoch": 2.4217762913415086, "grad_norm": 2.9641844193353926, "learning_rate": 1.8859390201969196e-06, "loss": 0.3467, "step": 32585 }, { "epoch": 2.421850613154961, "grad_norm": 2.506278006553949, "learning_rate": 1.8854700758734034e-06, "loss": 0.3315, "step": 32586 }, { "epoch": 2.421924934968413, "grad_norm": 2.091106114029775, "learning_rate": 1.885001183790809e-06, "loss": 0.2295, "step": 32587 }, { "epoch": 2.4219992567818656, "grad_norm": 2.108337183357235, "learning_rate": 1.8845323439521557e-06, "loss": 0.1979, "step": 32588 }, { "epoch": 2.4220735785953176, "grad_norm": 2.8073150807103415, "learning_rate": 1.8840635563604625e-06, "loss": 0.3358, "step": 32589 }, { "epoch": 2.42214790040877, "grad_norm": 2.9806742646165363, "learning_rate": 1.8835948210187438e-06, "loss": 0.3525, "step": 32590 }, { "epoch": 2.422222222222222, "grad_norm": 2.0858321135863838, "learning_rate": 1.8831261379300236e-06, "loss": 0.2639, "step": 32591 }, { "epoch": 2.4222965440356745, "grad_norm": 2.100100696812482, "learning_rate": 1.882657507097314e-06, "loss": 0.2392, "step": 32592 }, { "epoch": 2.4223708658491265, "grad_norm": 2.598850222329277, "learning_rate": 1.882188928523636e-06, "loss": 0.2809, "step": 32593 }, { "epoch": 2.422445187662579, "grad_norm": 1.8381675097092953, "learning_rate": 1.881720402212005e-06, "loss": 0.2402, "step": 32594 }, { "epoch": 2.4225195094760315, "grad_norm": 3.253349767633522, "learning_rate": 1.881251928165434e-06, "loss": 0.3073, "step": 32595 }, { "epoch": 2.4225938312894835, "grad_norm": 2.5557926298429434, "learning_rate": 1.8807835063869451e-06, "loss": 0.2761, "step": 32596 }, { "epoch": 2.4226681531029355, "grad_norm": 2.5629641362788624, "learning_rate": 1.880315136879547e-06, "loss": 0.3458, "step": 32597 }, { "epoch": 2.422742474916388, "grad_norm": 2.2124945843631765, "learning_rate": 1.8798468196462617e-06, "loss": 0.2611, "step": 32598 }, { "epoch": 2.4228167967298404, "grad_norm": 3.2097179725354312, "learning_rate": 1.8793785546901022e-06, "loss": 0.4374, "step": 32599 }, { "epoch": 2.4228911185432924, "grad_norm": 1.921181574057807, "learning_rate": 1.8789103420140808e-06, "loss": 0.2464, "step": 32600 }, { "epoch": 2.422965440356745, "grad_norm": 2.2646020285346067, "learning_rate": 1.8784421816212139e-06, "loss": 0.2693, "step": 32601 }, { "epoch": 2.423039762170197, "grad_norm": 2.290052763009232, "learning_rate": 1.8779740735145125e-06, "loss": 0.2584, "step": 32602 }, { "epoch": 2.4231140839836494, "grad_norm": 1.8359197882427016, "learning_rate": 1.877506017696995e-06, "loss": 0.2642, "step": 32603 }, { "epoch": 2.4231884057971014, "grad_norm": 2.283783494283524, "learning_rate": 1.8770380141716716e-06, "loss": 0.3475, "step": 32604 }, { "epoch": 2.423262727610554, "grad_norm": 2.3204809855669883, "learning_rate": 1.876570062941554e-06, "loss": 0.2786, "step": 32605 }, { "epoch": 2.423337049424006, "grad_norm": 2.5784243622373086, "learning_rate": 1.87610216400966e-06, "loss": 0.3273, "step": 32606 }, { "epoch": 2.4234113712374583, "grad_norm": 3.252296442867407, "learning_rate": 1.875634317378996e-06, "loss": 0.2233, "step": 32607 }, { "epoch": 2.4234856930509103, "grad_norm": 2.2040450404948806, "learning_rate": 1.8751665230525784e-06, "loss": 0.2753, "step": 32608 }, { "epoch": 2.423560014864363, "grad_norm": 2.2696480595569737, "learning_rate": 1.874698781033417e-06, "loss": 0.2974, "step": 32609 }, { "epoch": 2.423634336677815, "grad_norm": 2.655604805648038, "learning_rate": 1.8742310913245244e-06, "loss": 0.2245, "step": 32610 }, { "epoch": 2.4237086584912673, "grad_norm": 2.398097358777818, "learning_rate": 1.8737634539289095e-06, "loss": 0.2883, "step": 32611 }, { "epoch": 2.4237829803047193, "grad_norm": 2.9545032599933223, "learning_rate": 1.8732958688495816e-06, "loss": 0.3872, "step": 32612 }, { "epoch": 2.4238573021181717, "grad_norm": 2.1131251294545303, "learning_rate": 1.8728283360895561e-06, "loss": 0.2799, "step": 32613 }, { "epoch": 2.4239316239316238, "grad_norm": 2.184957751416708, "learning_rate": 1.872360855651838e-06, "loss": 0.1835, "step": 32614 }, { "epoch": 2.424005945745076, "grad_norm": 2.4104959511613018, "learning_rate": 1.8718934275394408e-06, "loss": 0.3276, "step": 32615 }, { "epoch": 2.4240802675585282, "grad_norm": 2.7525783609269756, "learning_rate": 1.8714260517553716e-06, "loss": 0.3791, "step": 32616 }, { "epoch": 2.4241545893719807, "grad_norm": 1.9115730238323312, "learning_rate": 1.8709587283026376e-06, "loss": 0.2743, "step": 32617 }, { "epoch": 2.424228911185433, "grad_norm": 2.8244015882405655, "learning_rate": 1.8704914571842525e-06, "loss": 0.3458, "step": 32618 }, { "epoch": 2.424303232998885, "grad_norm": 2.304614110909661, "learning_rate": 1.8700242384032185e-06, "loss": 0.2484, "step": 32619 }, { "epoch": 2.424377554812337, "grad_norm": 2.7034670945253922, "learning_rate": 1.8695570719625488e-06, "loss": 0.3675, "step": 32620 }, { "epoch": 2.4244518766257896, "grad_norm": 3.576491356322799, "learning_rate": 1.86908995786525e-06, "loss": 0.2982, "step": 32621 }, { "epoch": 2.424526198439242, "grad_norm": 2.6592814115380414, "learning_rate": 1.868622896114327e-06, "loss": 0.2986, "step": 32622 }, { "epoch": 2.424600520252694, "grad_norm": 2.826488647423153, "learning_rate": 1.868155886712788e-06, "loss": 0.3385, "step": 32623 }, { "epoch": 2.4246748420661466, "grad_norm": 2.4056207080648453, "learning_rate": 1.867688929663638e-06, "loss": 0.2799, "step": 32624 }, { "epoch": 2.4247491638795986, "grad_norm": 2.0455949406824105, "learning_rate": 1.8672220249698857e-06, "loss": 0.2735, "step": 32625 }, { "epoch": 2.424823485693051, "grad_norm": 2.609469120984466, "learning_rate": 1.8667551726345379e-06, "loss": 0.3547, "step": 32626 }, { "epoch": 2.424897807506503, "grad_norm": 2.4951282116349067, "learning_rate": 1.866288372660594e-06, "loss": 0.2641, "step": 32627 }, { "epoch": 2.4249721293199555, "grad_norm": 2.583868312043122, "learning_rate": 1.8658216250510674e-06, "loss": 0.3041, "step": 32628 }, { "epoch": 2.4250464511334076, "grad_norm": 2.3680530059452756, "learning_rate": 1.8653549298089557e-06, "loss": 0.2902, "step": 32629 }, { "epoch": 2.42512077294686, "grad_norm": 1.9804183438196916, "learning_rate": 1.8648882869372697e-06, "loss": 0.2676, "step": 32630 }, { "epoch": 2.425195094760312, "grad_norm": 2.4306961417676103, "learning_rate": 1.8644216964390104e-06, "loss": 0.255, "step": 32631 }, { "epoch": 2.4252694165737645, "grad_norm": 1.666606834917155, "learning_rate": 1.86395515831718e-06, "loss": 0.2452, "step": 32632 }, { "epoch": 2.4253437383872165, "grad_norm": 2.417248960156377, "learning_rate": 1.8634886725747892e-06, "loss": 0.2762, "step": 32633 }, { "epoch": 2.425418060200669, "grad_norm": 2.3942574988961467, "learning_rate": 1.8630222392148312e-06, "loss": 0.2822, "step": 32634 }, { "epoch": 2.425492382014121, "grad_norm": 2.4412281774979756, "learning_rate": 1.8625558582403159e-06, "loss": 0.3158, "step": 32635 }, { "epoch": 2.4255667038275734, "grad_norm": 3.351654865646466, "learning_rate": 1.8620895296542418e-06, "loss": 0.3349, "step": 32636 }, { "epoch": 2.4256410256410255, "grad_norm": 2.0817201312087867, "learning_rate": 1.8616232534596156e-06, "loss": 0.2415, "step": 32637 }, { "epoch": 2.425715347454478, "grad_norm": 2.033093302293175, "learning_rate": 1.861157029659436e-06, "loss": 0.2286, "step": 32638 }, { "epoch": 2.42578966926793, "grad_norm": 2.7023446317252358, "learning_rate": 1.860690858256703e-06, "loss": 0.34, "step": 32639 }, { "epoch": 2.4258639910813824, "grad_norm": 2.080811842759078, "learning_rate": 1.8602247392544225e-06, "loss": 0.2423, "step": 32640 }, { "epoch": 2.425938312894835, "grad_norm": 2.9461690882795004, "learning_rate": 1.8597586726555895e-06, "loss": 0.3169, "step": 32641 }, { "epoch": 2.426012634708287, "grad_norm": 2.5903920607203035, "learning_rate": 1.859292658463211e-06, "loss": 0.2759, "step": 32642 }, { "epoch": 2.426086956521739, "grad_norm": 2.3387555744734607, "learning_rate": 1.8588266966802847e-06, "loss": 0.2654, "step": 32643 }, { "epoch": 2.4261612783351914, "grad_norm": 2.283959836181473, "learning_rate": 1.8583607873098087e-06, "loss": 0.3625, "step": 32644 }, { "epoch": 2.426235600148644, "grad_norm": 2.487053733420431, "learning_rate": 1.8578949303547832e-06, "loss": 0.2696, "step": 32645 }, { "epoch": 2.426309921962096, "grad_norm": 2.526010377180253, "learning_rate": 1.8574291258182064e-06, "loss": 0.2582, "step": 32646 }, { "epoch": 2.4263842437755483, "grad_norm": 2.214497020378099, "learning_rate": 1.85696337370308e-06, "loss": 0.1814, "step": 32647 }, { "epoch": 2.4264585655890003, "grad_norm": 2.55455210770609, "learning_rate": 1.8564976740124018e-06, "loss": 0.3117, "step": 32648 }, { "epoch": 2.4265328874024528, "grad_norm": 2.6329407427441627, "learning_rate": 1.8560320267491649e-06, "loss": 0.2565, "step": 32649 }, { "epoch": 2.426607209215905, "grad_norm": 2.86083754275022, "learning_rate": 1.8555664319163757e-06, "loss": 0.2695, "step": 32650 }, { "epoch": 2.4266815310293572, "grad_norm": 2.1824834696326794, "learning_rate": 1.855100889517023e-06, "loss": 0.2518, "step": 32651 }, { "epoch": 2.4267558528428093, "grad_norm": 2.033485591230338, "learning_rate": 1.8546353995541122e-06, "loss": 0.2125, "step": 32652 }, { "epoch": 2.4268301746562617, "grad_norm": 2.5033422841447104, "learning_rate": 1.8541699620306353e-06, "loss": 0.2526, "step": 32653 }, { "epoch": 2.4269044964697137, "grad_norm": 2.4380997330546945, "learning_rate": 1.8537045769495864e-06, "loss": 0.2022, "step": 32654 }, { "epoch": 2.426978818283166, "grad_norm": 2.341531691014376, "learning_rate": 1.853239244313968e-06, "loss": 0.2816, "step": 32655 }, { "epoch": 2.427053140096618, "grad_norm": 1.9767816875649291, "learning_rate": 1.8527739641267727e-06, "loss": 0.2357, "step": 32656 }, { "epoch": 2.4271274619100707, "grad_norm": 2.006679690524916, "learning_rate": 1.8523087363909952e-06, "loss": 0.2579, "step": 32657 }, { "epoch": 2.4272017837235227, "grad_norm": 2.059548905510631, "learning_rate": 1.8518435611096297e-06, "loss": 0.2703, "step": 32658 }, { "epoch": 2.427276105536975, "grad_norm": 2.8223610737069356, "learning_rate": 1.8513784382856747e-06, "loss": 0.3061, "step": 32659 }, { "epoch": 2.427350427350427, "grad_norm": 2.0693844427143264, "learning_rate": 1.8509133679221225e-06, "loss": 0.2291, "step": 32660 }, { "epoch": 2.4274247491638796, "grad_norm": 2.2866913100841626, "learning_rate": 1.850448350021964e-06, "loss": 0.2981, "step": 32661 }, { "epoch": 2.4274990709773316, "grad_norm": 2.2421853982864133, "learning_rate": 1.8499833845881997e-06, "loss": 0.2828, "step": 32662 }, { "epoch": 2.427573392790784, "grad_norm": 2.480558556142419, "learning_rate": 1.849518471623817e-06, "loss": 0.3345, "step": 32663 }, { "epoch": 2.4276477146042366, "grad_norm": 1.8461232632507967, "learning_rate": 1.8490536111318135e-06, "loss": 0.247, "step": 32664 }, { "epoch": 2.4277220364176886, "grad_norm": 2.636456974468274, "learning_rate": 1.8485888031151799e-06, "loss": 0.2752, "step": 32665 }, { "epoch": 2.4277963582311406, "grad_norm": 2.117699802114176, "learning_rate": 1.8481240475769057e-06, "loss": 0.2584, "step": 32666 }, { "epoch": 2.427870680044593, "grad_norm": 2.395467270422812, "learning_rate": 1.847659344519992e-06, "loss": 0.2358, "step": 32667 }, { "epoch": 2.4279450018580455, "grad_norm": 2.187232237603531, "learning_rate": 1.8471946939474195e-06, "loss": 0.2446, "step": 32668 }, { "epoch": 2.4280193236714975, "grad_norm": 2.785411263542818, "learning_rate": 1.8467300958621858e-06, "loss": 0.3048, "step": 32669 }, { "epoch": 2.42809364548495, "grad_norm": 2.4099659720821647, "learning_rate": 1.846265550267281e-06, "loss": 0.2443, "step": 32670 }, { "epoch": 2.428167967298402, "grad_norm": 2.474814787857348, "learning_rate": 1.8458010571656937e-06, "loss": 0.3585, "step": 32671 }, { "epoch": 2.4282422891118545, "grad_norm": 2.094923269224925, "learning_rate": 1.8453366165604182e-06, "loss": 0.2338, "step": 32672 }, { "epoch": 2.4283166109253065, "grad_norm": 1.928024844964995, "learning_rate": 1.8448722284544408e-06, "loss": 0.2525, "step": 32673 }, { "epoch": 2.428390932738759, "grad_norm": 4.435654505082898, "learning_rate": 1.8444078928507537e-06, "loss": 0.3202, "step": 32674 }, { "epoch": 2.428465254552211, "grad_norm": 2.718890976275442, "learning_rate": 1.8439436097523467e-06, "loss": 0.3435, "step": 32675 }, { "epoch": 2.4285395763656634, "grad_norm": 1.9181744848891311, "learning_rate": 1.8434793791622041e-06, "loss": 0.2852, "step": 32676 }, { "epoch": 2.4286138981791154, "grad_norm": 2.224711997514624, "learning_rate": 1.8430152010833213e-06, "loss": 0.2727, "step": 32677 }, { "epoch": 2.428688219992568, "grad_norm": 2.808398011893846, "learning_rate": 1.8425510755186838e-06, "loss": 0.2917, "step": 32678 }, { "epoch": 2.42876254180602, "grad_norm": 2.58054045474819, "learning_rate": 1.842087002471279e-06, "loss": 0.3278, "step": 32679 }, { "epoch": 2.4288368636194724, "grad_norm": 2.295933985113314, "learning_rate": 1.8416229819440912e-06, "loss": 0.2298, "step": 32680 }, { "epoch": 2.4289111854329244, "grad_norm": 3.0289758750229407, "learning_rate": 1.8411590139401148e-06, "loss": 0.3148, "step": 32681 }, { "epoch": 2.428985507246377, "grad_norm": 3.0731820658308395, "learning_rate": 1.8406950984623328e-06, "loss": 0.3168, "step": 32682 }, { "epoch": 2.429059829059829, "grad_norm": 2.0540266385236063, "learning_rate": 1.8402312355137309e-06, "loss": 0.2242, "step": 32683 }, { "epoch": 2.4291341508732813, "grad_norm": 2.087939902771745, "learning_rate": 1.8397674250972985e-06, "loss": 0.3463, "step": 32684 }, { "epoch": 2.4292084726867333, "grad_norm": 3.2520227017008505, "learning_rate": 1.8393036672160169e-06, "loss": 0.3301, "step": 32685 }, { "epoch": 2.429282794500186, "grad_norm": 2.446538669191721, "learning_rate": 1.8388399618728792e-06, "loss": 0.3292, "step": 32686 }, { "epoch": 2.4293571163136383, "grad_norm": 2.018668023341133, "learning_rate": 1.838376309070865e-06, "loss": 0.2282, "step": 32687 }, { "epoch": 2.4294314381270903, "grad_norm": 2.3878126833893525, "learning_rate": 1.8379127088129578e-06, "loss": 0.3048, "step": 32688 }, { "epoch": 2.4295057599405427, "grad_norm": 2.353179190275126, "learning_rate": 1.8374491611021517e-06, "loss": 0.3009, "step": 32689 }, { "epoch": 2.4295800817539948, "grad_norm": 1.963005567773651, "learning_rate": 1.8369856659414175e-06, "loss": 0.22, "step": 32690 }, { "epoch": 2.429654403567447, "grad_norm": 2.305033248765746, "learning_rate": 1.8365222233337498e-06, "loss": 0.2904, "step": 32691 }, { "epoch": 2.4297287253808992, "grad_norm": 2.630249919665049, "learning_rate": 1.8360588332821284e-06, "loss": 0.2546, "step": 32692 }, { "epoch": 2.4298030471943517, "grad_norm": 2.0755106269469428, "learning_rate": 1.835595495789534e-06, "loss": 0.2971, "step": 32693 }, { "epoch": 2.4298773690078037, "grad_norm": 2.1238202522318814, "learning_rate": 1.8351322108589543e-06, "loss": 0.1653, "step": 32694 }, { "epoch": 2.429951690821256, "grad_norm": 2.592568500083263, "learning_rate": 1.834668978493368e-06, "loss": 0.2535, "step": 32695 }, { "epoch": 2.430026012634708, "grad_norm": 2.6676105953948013, "learning_rate": 1.8342057986957607e-06, "loss": 0.2902, "step": 32696 }, { "epoch": 2.4301003344481606, "grad_norm": 2.2589320150955805, "learning_rate": 1.8337426714691131e-06, "loss": 0.279, "step": 32697 }, { "epoch": 2.4301746562616127, "grad_norm": 2.0301916675920437, "learning_rate": 1.833279596816404e-06, "loss": 0.2604, "step": 32698 }, { "epoch": 2.430248978075065, "grad_norm": 2.4961325038385556, "learning_rate": 1.8328165747406202e-06, "loss": 0.284, "step": 32699 }, { "epoch": 2.430323299888517, "grad_norm": 1.9485982849537413, "learning_rate": 1.8323536052447366e-06, "loss": 0.2515, "step": 32700 }, { "epoch": 2.4303976217019696, "grad_norm": 2.5914029617919256, "learning_rate": 1.8318906883317412e-06, "loss": 0.3535, "step": 32701 }, { "epoch": 2.4304719435154216, "grad_norm": 2.2520328033087686, "learning_rate": 1.8314278240046058e-06, "loss": 0.2722, "step": 32702 }, { "epoch": 2.430546265328874, "grad_norm": 2.19573748233844, "learning_rate": 1.830965012266316e-06, "loss": 0.23, "step": 32703 }, { "epoch": 2.430620587142326, "grad_norm": 1.7787167286652281, "learning_rate": 1.8305022531198491e-06, "loss": 0.156, "step": 32704 }, { "epoch": 2.4306949089557786, "grad_norm": 2.184427843771993, "learning_rate": 1.830039546568183e-06, "loss": 0.2159, "step": 32705 }, { "epoch": 2.430769230769231, "grad_norm": 2.6010548703923377, "learning_rate": 1.829576892614301e-06, "loss": 0.3075, "step": 32706 }, { "epoch": 2.430843552582683, "grad_norm": 2.314330082174672, "learning_rate": 1.8291142912611759e-06, "loss": 0.2809, "step": 32707 }, { "epoch": 2.430917874396135, "grad_norm": 2.2910416609023385, "learning_rate": 1.8286517425117922e-06, "loss": 0.2662, "step": 32708 }, { "epoch": 2.4309921962095875, "grad_norm": 2.002753900330403, "learning_rate": 1.8281892463691241e-06, "loss": 0.2327, "step": 32709 }, { "epoch": 2.43106651802304, "grad_norm": 2.2966734230111756, "learning_rate": 1.8277268028361483e-06, "loss": 0.3146, "step": 32710 }, { "epoch": 2.431140839836492, "grad_norm": 2.61244465827019, "learning_rate": 1.8272644119158445e-06, "loss": 0.3368, "step": 32711 }, { "epoch": 2.4312151616499444, "grad_norm": 2.933791556051827, "learning_rate": 1.8268020736111891e-06, "loss": 0.2408, "step": 32712 }, { "epoch": 2.4312894834633965, "grad_norm": 2.92940184060352, "learning_rate": 1.8263397879251576e-06, "loss": 0.3087, "step": 32713 }, { "epoch": 2.431363805276849, "grad_norm": 2.1761285428467105, "learning_rate": 1.8258775548607255e-06, "loss": 0.2451, "step": 32714 }, { "epoch": 2.431438127090301, "grad_norm": 2.3453108551322512, "learning_rate": 1.825415374420868e-06, "loss": 0.3069, "step": 32715 }, { "epoch": 2.4315124489037534, "grad_norm": 2.615530161329851, "learning_rate": 1.8249532466085651e-06, "loss": 0.3034, "step": 32716 }, { "epoch": 2.4315867707172054, "grad_norm": 2.5856715693379084, "learning_rate": 1.8244911714267855e-06, "loss": 0.3236, "step": 32717 }, { "epoch": 2.431661092530658, "grad_norm": 2.3605652421348564, "learning_rate": 1.8240291488785111e-06, "loss": 0.3068, "step": 32718 }, { "epoch": 2.43173541434411, "grad_norm": 2.698840670255202, "learning_rate": 1.823567178966712e-06, "loss": 0.3275, "step": 32719 }, { "epoch": 2.4318097361575624, "grad_norm": 2.639236121131908, "learning_rate": 1.8231052616943613e-06, "loss": 0.3103, "step": 32720 }, { "epoch": 2.4318840579710144, "grad_norm": 2.1958140079562876, "learning_rate": 1.8226433970644365e-06, "loss": 0.235, "step": 32721 }, { "epoch": 2.431958379784467, "grad_norm": 1.9297997882646805, "learning_rate": 1.8221815850799073e-06, "loss": 0.2963, "step": 32722 }, { "epoch": 2.432032701597919, "grad_norm": 2.254603167010041, "learning_rate": 1.821719825743753e-06, "loss": 0.3028, "step": 32723 }, { "epoch": 2.4321070234113713, "grad_norm": 2.803242299679518, "learning_rate": 1.8212581190589385e-06, "loss": 0.2973, "step": 32724 }, { "epoch": 2.4321813452248233, "grad_norm": 2.0083004850578, "learning_rate": 1.8207964650284404e-06, "loss": 0.3129, "step": 32725 }, { "epoch": 2.432255667038276, "grad_norm": 2.668732098774645, "learning_rate": 1.820334863655232e-06, "loss": 0.3649, "step": 32726 }, { "epoch": 2.432329988851728, "grad_norm": 2.1655257137569963, "learning_rate": 1.8198733149422798e-06, "loss": 0.2639, "step": 32727 }, { "epoch": 2.4324043106651803, "grad_norm": 2.4031265015964434, "learning_rate": 1.8194118188925614e-06, "loss": 0.2935, "step": 32728 }, { "epoch": 2.4324786324786327, "grad_norm": 1.9617447554273924, "learning_rate": 1.8189503755090454e-06, "loss": 0.2429, "step": 32729 }, { "epoch": 2.4325529542920847, "grad_norm": 2.5623969162365614, "learning_rate": 1.8184889847947008e-06, "loss": 0.3415, "step": 32730 }, { "epoch": 2.4326272761055368, "grad_norm": 2.579614890183574, "learning_rate": 1.8180276467525004e-06, "loss": 0.4011, "step": 32731 }, { "epoch": 2.432701597918989, "grad_norm": 2.1795450390956455, "learning_rate": 1.8175663613854123e-06, "loss": 0.2462, "step": 32732 }, { "epoch": 2.4327759197324417, "grad_norm": 2.3456987978482813, "learning_rate": 1.8171051286964092e-06, "loss": 0.3139, "step": 32733 }, { "epoch": 2.4328502415458937, "grad_norm": 2.408553133241429, "learning_rate": 1.8166439486884591e-06, "loss": 0.2479, "step": 32734 }, { "epoch": 2.432924563359346, "grad_norm": 2.2882997285947444, "learning_rate": 1.8161828213645305e-06, "loss": 0.2872, "step": 32735 }, { "epoch": 2.432998885172798, "grad_norm": 1.6680672504228988, "learning_rate": 1.8157217467275923e-06, "loss": 0.225, "step": 32736 }, { "epoch": 2.4330732069862506, "grad_norm": 2.4406682589116726, "learning_rate": 1.8152607247806108e-06, "loss": 0.2456, "step": 32737 }, { "epoch": 2.4331475287997026, "grad_norm": 2.5126503294105276, "learning_rate": 1.8147997555265572e-06, "loss": 0.304, "step": 32738 }, { "epoch": 2.433221850613155, "grad_norm": 3.0350400862480047, "learning_rate": 1.8143388389683969e-06, "loss": 0.3458, "step": 32739 }, { "epoch": 2.433296172426607, "grad_norm": 2.1323614743437083, "learning_rate": 1.8138779751091008e-06, "loss": 0.192, "step": 32740 }, { "epoch": 2.4333704942400596, "grad_norm": 1.728976746496861, "learning_rate": 1.813417163951633e-06, "loss": 0.2178, "step": 32741 }, { "epoch": 2.4334448160535116, "grad_norm": 2.2015428696742987, "learning_rate": 1.8129564054989579e-06, "loss": 0.2398, "step": 32742 }, { "epoch": 2.433519137866964, "grad_norm": 2.739642169397935, "learning_rate": 1.8124956997540478e-06, "loss": 0.334, "step": 32743 }, { "epoch": 2.433593459680416, "grad_norm": 2.5667483045585615, "learning_rate": 1.8120350467198633e-06, "loss": 0.2493, "step": 32744 }, { "epoch": 2.4336677814938685, "grad_norm": 2.1713349294802198, "learning_rate": 1.8115744463993745e-06, "loss": 0.2669, "step": 32745 }, { "epoch": 2.4337421033073205, "grad_norm": 2.4078438097573174, "learning_rate": 1.8111138987955445e-06, "loss": 0.2644, "step": 32746 }, { "epoch": 2.433816425120773, "grad_norm": 2.566610324822345, "learning_rate": 1.8106534039113388e-06, "loss": 0.2734, "step": 32747 }, { "epoch": 2.433890746934225, "grad_norm": 2.4877936054212197, "learning_rate": 1.8101929617497205e-06, "loss": 0.2594, "step": 32748 }, { "epoch": 2.4339650687476775, "grad_norm": 1.7895668145352677, "learning_rate": 1.8097325723136538e-06, "loss": 0.2195, "step": 32749 }, { "epoch": 2.4340393905611295, "grad_norm": 2.5113108511878517, "learning_rate": 1.8092722356061054e-06, "loss": 0.2934, "step": 32750 }, { "epoch": 2.434113712374582, "grad_norm": 2.187974378908584, "learning_rate": 1.8088119516300373e-06, "loss": 0.21, "step": 32751 }, { "epoch": 2.4341880341880344, "grad_norm": 2.4294768969314022, "learning_rate": 1.8083517203884105e-06, "loss": 0.2413, "step": 32752 }, { "epoch": 2.4342623560014864, "grad_norm": 1.9164094966506282, "learning_rate": 1.8078915418841936e-06, "loss": 0.2865, "step": 32753 }, { "epoch": 2.4343366778149385, "grad_norm": 2.0406586609498194, "learning_rate": 1.807431416120342e-06, "loss": 0.2144, "step": 32754 }, { "epoch": 2.434410999628391, "grad_norm": 2.4971421735238652, "learning_rate": 1.806971343099826e-06, "loss": 0.2873, "step": 32755 }, { "epoch": 2.4344853214418434, "grad_norm": 2.510638027953999, "learning_rate": 1.8065113228256026e-06, "loss": 0.2908, "step": 32756 }, { "epoch": 2.4345596432552954, "grad_norm": 6.335323296534571, "learning_rate": 1.8060513553006332e-06, "loss": 0.2684, "step": 32757 }, { "epoch": 2.434633965068748, "grad_norm": 2.927570193211475, "learning_rate": 1.8055914405278817e-06, "loss": 0.2776, "step": 32758 }, { "epoch": 2.4347082868822, "grad_norm": 3.857258412145129, "learning_rate": 1.805131578510304e-06, "loss": 0.3808, "step": 32759 }, { "epoch": 2.4347826086956523, "grad_norm": 2.4138625530578763, "learning_rate": 1.8046717692508664e-06, "loss": 0.2678, "step": 32760 }, { "epoch": 2.4348569305091043, "grad_norm": 2.5621014243966003, "learning_rate": 1.8042120127525242e-06, "loss": 0.3323, "step": 32761 }, { "epoch": 2.434931252322557, "grad_norm": 2.5883137777191467, "learning_rate": 1.8037523090182418e-06, "loss": 0.3164, "step": 32762 }, { "epoch": 2.435005574136009, "grad_norm": 3.822780504928916, "learning_rate": 1.803292658050978e-06, "loss": 0.3044, "step": 32763 }, { "epoch": 2.4350798959494613, "grad_norm": 2.9214201073794697, "learning_rate": 1.802833059853687e-06, "loss": 0.3515, "step": 32764 }, { "epoch": 2.4351542177629133, "grad_norm": 2.7286094184132375, "learning_rate": 1.8023735144293353e-06, "loss": 0.3317, "step": 32765 }, { "epoch": 2.4352285395763658, "grad_norm": 2.4343093563558225, "learning_rate": 1.801914021780874e-06, "loss": 0.2448, "step": 32766 }, { "epoch": 2.4353028613898178, "grad_norm": 2.32634249734389, "learning_rate": 1.8014545819112682e-06, "loss": 0.2809, "step": 32767 }, { "epoch": 2.4353771832032702, "grad_norm": 2.699867416808902, "learning_rate": 1.8009951948234716e-06, "loss": 0.2969, "step": 32768 }, { "epoch": 2.4354515050167223, "grad_norm": 3.324961956687857, "learning_rate": 1.8005358605204427e-06, "loss": 0.3626, "step": 32769 }, { "epoch": 2.4355258268301747, "grad_norm": 2.3421250972890477, "learning_rate": 1.800076579005139e-06, "loss": 0.3122, "step": 32770 }, { "epoch": 2.4356001486436267, "grad_norm": 2.058877624478202, "learning_rate": 1.7996173502805137e-06, "loss": 0.2714, "step": 32771 }, { "epoch": 2.435674470457079, "grad_norm": 2.3344884391047707, "learning_rate": 1.799158174349529e-06, "loss": 0.316, "step": 32772 }, { "epoch": 2.435748792270531, "grad_norm": 1.9099399214654762, "learning_rate": 1.7986990512151393e-06, "loss": 0.2134, "step": 32773 }, { "epoch": 2.4358231140839837, "grad_norm": 1.8765993211953633, "learning_rate": 1.7982399808802963e-06, "loss": 0.2144, "step": 32774 }, { "epoch": 2.435897435897436, "grad_norm": 3.2477419560434653, "learning_rate": 1.7977809633479614e-06, "loss": 0.3305, "step": 32775 }, { "epoch": 2.435971757710888, "grad_norm": 1.9595957791178962, "learning_rate": 1.7973219986210843e-06, "loss": 0.2825, "step": 32776 }, { "epoch": 2.43604607952434, "grad_norm": 2.360047362013444, "learning_rate": 1.7968630867026249e-06, "loss": 0.2742, "step": 32777 }, { "epoch": 2.4361204013377926, "grad_norm": 2.614593362962774, "learning_rate": 1.7964042275955362e-06, "loss": 0.2763, "step": 32778 }, { "epoch": 2.436194723151245, "grad_norm": 2.2389901875893528, "learning_rate": 1.7959454213027683e-06, "loss": 0.2559, "step": 32779 }, { "epoch": 2.436269044964697, "grad_norm": 2.547535212648813, "learning_rate": 1.7954866678272831e-06, "loss": 0.3205, "step": 32780 }, { "epoch": 2.4363433667781496, "grad_norm": 3.214418791980211, "learning_rate": 1.7950279671720238e-06, "loss": 0.2428, "step": 32781 }, { "epoch": 2.4364176885916016, "grad_norm": 2.7623167716942194, "learning_rate": 1.7945693193399515e-06, "loss": 0.2942, "step": 32782 }, { "epoch": 2.436492010405054, "grad_norm": 3.948982979402089, "learning_rate": 1.794110724334014e-06, "loss": 0.2175, "step": 32783 }, { "epoch": 2.436566332218506, "grad_norm": 2.5262656635244367, "learning_rate": 1.7936521821571685e-06, "loss": 0.2744, "step": 32784 }, { "epoch": 2.4366406540319585, "grad_norm": 2.202601836651571, "learning_rate": 1.793193692812364e-06, "loss": 0.3116, "step": 32785 }, { "epoch": 2.4367149758454105, "grad_norm": 2.797988995032767, "learning_rate": 1.7927352563025502e-06, "loss": 0.3263, "step": 32786 }, { "epoch": 2.436789297658863, "grad_norm": 3.494778877204903, "learning_rate": 1.792276872630684e-06, "loss": 0.3135, "step": 32787 }, { "epoch": 2.436863619472315, "grad_norm": 2.075851695064546, "learning_rate": 1.7918185417997102e-06, "loss": 0.2534, "step": 32788 }, { "epoch": 2.4369379412857675, "grad_norm": 2.06438886490989, "learning_rate": 1.7913602638125848e-06, "loss": 0.2323, "step": 32789 }, { "epoch": 2.4370122630992195, "grad_norm": 2.2402943436969425, "learning_rate": 1.7909020386722575e-06, "loss": 0.3036, "step": 32790 }, { "epoch": 2.437086584912672, "grad_norm": 2.6502519615694116, "learning_rate": 1.790443866381676e-06, "loss": 0.2788, "step": 32791 }, { "epoch": 2.437160906726124, "grad_norm": 1.8757281587884183, "learning_rate": 1.789985746943791e-06, "loss": 0.2371, "step": 32792 }, { "epoch": 2.4372352285395764, "grad_norm": 1.8791222578046067, "learning_rate": 1.7895276803615503e-06, "loss": 0.199, "step": 32793 }, { "epoch": 2.4373095503530284, "grad_norm": 2.818798966782136, "learning_rate": 1.7890696666379058e-06, "loss": 0.3333, "step": 32794 }, { "epoch": 2.437383872166481, "grad_norm": 1.9880883560749072, "learning_rate": 1.7886117057758046e-06, "loss": 0.2205, "step": 32795 }, { "epoch": 2.437458193979933, "grad_norm": 2.3334117597353066, "learning_rate": 1.7881537977781926e-06, "loss": 0.3448, "step": 32796 }, { "epoch": 2.4375325157933854, "grad_norm": 2.251173691956775, "learning_rate": 1.7876959426480233e-06, "loss": 0.2702, "step": 32797 }, { "epoch": 2.437606837606838, "grad_norm": 2.384039744772792, "learning_rate": 1.787238140388239e-06, "loss": 0.3219, "step": 32798 }, { "epoch": 2.43768115942029, "grad_norm": 2.626851942316199, "learning_rate": 1.7867803910017922e-06, "loss": 0.2286, "step": 32799 }, { "epoch": 2.437755481233742, "grad_norm": 2.3510262687651906, "learning_rate": 1.7863226944916267e-06, "loss": 0.2209, "step": 32800 }, { "epoch": 2.4378298030471943, "grad_norm": 2.937070321830196, "learning_rate": 1.7858650508606867e-06, "loss": 0.2462, "step": 32801 }, { "epoch": 2.437904124860647, "grad_norm": 2.903274626215367, "learning_rate": 1.7854074601119276e-06, "loss": 0.3562, "step": 32802 }, { "epoch": 2.437978446674099, "grad_norm": 2.1641466184752445, "learning_rate": 1.7849499222482824e-06, "loss": 0.2525, "step": 32803 }, { "epoch": 2.4380527684875513, "grad_norm": 2.432177772323654, "learning_rate": 1.784492437272707e-06, "loss": 0.3274, "step": 32804 }, { "epoch": 2.4381270903010033, "grad_norm": 2.408893113294315, "learning_rate": 1.7840350051881394e-06, "loss": 0.2963, "step": 32805 }, { "epoch": 2.4382014121144557, "grad_norm": 2.474406563540429, "learning_rate": 1.7835776259975312e-06, "loss": 0.2878, "step": 32806 }, { "epoch": 2.4382757339279078, "grad_norm": 2.0641906349166796, "learning_rate": 1.7831202997038244e-06, "loss": 0.3115, "step": 32807 }, { "epoch": 2.43835005574136, "grad_norm": 1.8404038933562816, "learning_rate": 1.7826630263099599e-06, "loss": 0.209, "step": 32808 }, { "epoch": 2.4384243775548122, "grad_norm": 1.7023890701114424, "learning_rate": 1.7822058058188874e-06, "loss": 0.1894, "step": 32809 }, { "epoch": 2.4384986993682647, "grad_norm": 2.8517762950860064, "learning_rate": 1.7817486382335446e-06, "loss": 0.3023, "step": 32810 }, { "epoch": 2.4385730211817167, "grad_norm": 2.2264630864732182, "learning_rate": 1.7812915235568795e-06, "loss": 0.2819, "step": 32811 }, { "epoch": 2.438647342995169, "grad_norm": 2.8935659066330444, "learning_rate": 1.7808344617918338e-06, "loss": 0.3019, "step": 32812 }, { "epoch": 2.438721664808621, "grad_norm": 3.4041590199184375, "learning_rate": 1.7803774529413476e-06, "loss": 0.2874, "step": 32813 }, { "epoch": 2.4387959866220736, "grad_norm": 2.3734776210498776, "learning_rate": 1.7799204970083683e-06, "loss": 0.3234, "step": 32814 }, { "epoch": 2.4388703084355257, "grad_norm": 2.216710886678767, "learning_rate": 1.7794635939958304e-06, "loss": 0.2865, "step": 32815 }, { "epoch": 2.438944630248978, "grad_norm": 2.1193087209442925, "learning_rate": 1.7790067439066817e-06, "loss": 0.2714, "step": 32816 }, { "epoch": 2.43901895206243, "grad_norm": 1.8007374141702344, "learning_rate": 1.7785499467438604e-06, "loss": 0.2086, "step": 32817 }, { "epoch": 2.4390932738758826, "grad_norm": 2.9513119505524026, "learning_rate": 1.7780932025103059e-06, "loss": 0.265, "step": 32818 }, { "epoch": 2.4391675956893346, "grad_norm": 2.505375643672994, "learning_rate": 1.7776365112089644e-06, "loss": 0.2623, "step": 32819 }, { "epoch": 2.439241917502787, "grad_norm": 2.9164574936954955, "learning_rate": 1.777179872842768e-06, "loss": 0.3225, "step": 32820 }, { "epoch": 2.4393162393162395, "grad_norm": 2.212188471320685, "learning_rate": 1.7767232874146645e-06, "loss": 0.2298, "step": 32821 }, { "epoch": 2.4393905611296915, "grad_norm": 2.511895687783335, "learning_rate": 1.776266754927589e-06, "loss": 0.2733, "step": 32822 }, { "epoch": 2.4394648829431436, "grad_norm": 2.555538265809521, "learning_rate": 1.7758102753844796e-06, "loss": 0.2874, "step": 32823 }, { "epoch": 2.439539204756596, "grad_norm": 2.45976640282895, "learning_rate": 1.7753538487882794e-06, "loss": 0.221, "step": 32824 }, { "epoch": 2.4396135265700485, "grad_norm": 2.071530697746268, "learning_rate": 1.7748974751419246e-06, "loss": 0.2277, "step": 32825 }, { "epoch": 2.4396878483835005, "grad_norm": 2.402464271660278, "learning_rate": 1.774441154448353e-06, "loss": 0.2895, "step": 32826 }, { "epoch": 2.439762170196953, "grad_norm": 2.3126019565536002, "learning_rate": 1.7739848867104992e-06, "loss": 0.2771, "step": 32827 }, { "epoch": 2.439836492010405, "grad_norm": 2.386165620038547, "learning_rate": 1.7735286719313072e-06, "loss": 0.3194, "step": 32828 }, { "epoch": 2.4399108138238574, "grad_norm": 2.459530210328632, "learning_rate": 1.7730725101137102e-06, "loss": 0.3335, "step": 32829 }, { "epoch": 2.4399851356373095, "grad_norm": 2.634909937964112, "learning_rate": 1.772616401260644e-06, "loss": 0.2874, "step": 32830 }, { "epoch": 2.440059457450762, "grad_norm": 2.0702673300477006, "learning_rate": 1.772160345375048e-06, "loss": 0.2281, "step": 32831 }, { "epoch": 2.440133779264214, "grad_norm": 2.467180196153466, "learning_rate": 1.7717043424598546e-06, "loss": 0.2512, "step": 32832 }, { "epoch": 2.4402081010776664, "grad_norm": 2.4192006698824846, "learning_rate": 1.7712483925180046e-06, "loss": 0.2777, "step": 32833 }, { "epoch": 2.4402824228911184, "grad_norm": 1.620615471934006, "learning_rate": 1.7707924955524292e-06, "loss": 0.1704, "step": 32834 }, { "epoch": 2.440356744704571, "grad_norm": 2.220660341131558, "learning_rate": 1.7703366515660635e-06, "loss": 0.2965, "step": 32835 }, { "epoch": 2.440431066518023, "grad_norm": 2.434391346345475, "learning_rate": 1.7698808605618478e-06, "loss": 0.3669, "step": 32836 }, { "epoch": 2.4405053883314753, "grad_norm": 2.120369010657203, "learning_rate": 1.7694251225427072e-06, "loss": 0.3316, "step": 32837 }, { "epoch": 2.4405797101449274, "grad_norm": 2.3442575427075325, "learning_rate": 1.768969437511584e-06, "loss": 0.2895, "step": 32838 }, { "epoch": 2.44065403195838, "grad_norm": 2.679755736021047, "learning_rate": 1.768513805471408e-06, "loss": 0.2157, "step": 32839 }, { "epoch": 2.4407283537718323, "grad_norm": 2.2827902651072, "learning_rate": 1.76805822642511e-06, "loss": 0.274, "step": 32840 }, { "epoch": 2.4408026755852843, "grad_norm": 2.1063629642313213, "learning_rate": 1.767602700375628e-06, "loss": 0.2064, "step": 32841 }, { "epoch": 2.4408769973987363, "grad_norm": 2.123662971046743, "learning_rate": 1.7671472273258916e-06, "loss": 0.2484, "step": 32842 }, { "epoch": 2.4409513192121888, "grad_norm": 2.3303668943331535, "learning_rate": 1.7666918072788352e-06, "loss": 0.2456, "step": 32843 }, { "epoch": 2.4410256410256412, "grad_norm": 2.3058666730338184, "learning_rate": 1.7662364402373911e-06, "loss": 0.2744, "step": 32844 }, { "epoch": 2.4410999628390933, "grad_norm": 2.35614170377852, "learning_rate": 1.7657811262044867e-06, "loss": 0.2703, "step": 32845 }, { "epoch": 2.4411742846525457, "grad_norm": 2.7021561801171385, "learning_rate": 1.7653258651830584e-06, "loss": 0.2562, "step": 32846 }, { "epoch": 2.4412486064659977, "grad_norm": 1.9860007948871883, "learning_rate": 1.764870657176032e-06, "loss": 0.1938, "step": 32847 }, { "epoch": 2.44132292827945, "grad_norm": 2.5249608372149446, "learning_rate": 1.7644155021863474e-06, "loss": 0.2798, "step": 32848 }, { "epoch": 2.441397250092902, "grad_norm": 2.5799466038815426, "learning_rate": 1.7639604002169219e-06, "loss": 0.3805, "step": 32849 }, { "epoch": 2.4414715719063547, "grad_norm": 2.2626125314646957, "learning_rate": 1.7635053512706956e-06, "loss": 0.3169, "step": 32850 }, { "epoch": 2.4415458937198067, "grad_norm": 2.0865844918748526, "learning_rate": 1.763050355350594e-06, "loss": 0.2157, "step": 32851 }, { "epoch": 2.441620215533259, "grad_norm": 2.812021437572092, "learning_rate": 1.7625954124595445e-06, "loss": 0.3454, "step": 32852 }, { "epoch": 2.441694537346711, "grad_norm": 2.0161268022300867, "learning_rate": 1.7621405226004806e-06, "loss": 0.2818, "step": 32853 }, { "epoch": 2.4417688591601636, "grad_norm": 2.121595707189651, "learning_rate": 1.7616856857763286e-06, "loss": 0.2254, "step": 32854 }, { "epoch": 2.4418431809736156, "grad_norm": 1.5829584828415317, "learning_rate": 1.7612309019900142e-06, "loss": 0.192, "step": 32855 }, { "epoch": 2.441917502787068, "grad_norm": 3.4460539932366054, "learning_rate": 1.7607761712444704e-06, "loss": 0.3677, "step": 32856 }, { "epoch": 2.44199182460052, "grad_norm": 1.9794455563918891, "learning_rate": 1.7603214935426193e-06, "loss": 0.2581, "step": 32857 }, { "epoch": 2.4420661464139726, "grad_norm": 2.1270735602011213, "learning_rate": 1.7598668688873942e-06, "loss": 0.2861, "step": 32858 }, { "epoch": 2.4421404682274246, "grad_norm": 2.2035475564775533, "learning_rate": 1.7594122972817186e-06, "loss": 0.2935, "step": 32859 }, { "epoch": 2.442214790040877, "grad_norm": 1.9096308540084135, "learning_rate": 1.7589577787285184e-06, "loss": 0.2711, "step": 32860 }, { "epoch": 2.442289111854329, "grad_norm": 2.0383525764057038, "learning_rate": 1.7585033132307206e-06, "loss": 0.2856, "step": 32861 }, { "epoch": 2.4423634336677815, "grad_norm": 1.838645458151073, "learning_rate": 1.7580489007912483e-06, "loss": 0.2482, "step": 32862 }, { "epoch": 2.442437755481234, "grad_norm": 1.909271084269022, "learning_rate": 1.7575945414130314e-06, "loss": 0.2481, "step": 32863 }, { "epoch": 2.442512077294686, "grad_norm": 2.0580109531145303, "learning_rate": 1.757140235098992e-06, "loss": 0.2739, "step": 32864 }, { "epoch": 2.442586399108138, "grad_norm": 2.291666014738921, "learning_rate": 1.7566859818520577e-06, "loss": 0.2517, "step": 32865 }, { "epoch": 2.4426607209215905, "grad_norm": 1.9728966459267339, "learning_rate": 1.7562317816751518e-06, "loss": 0.2517, "step": 32866 }, { "epoch": 2.442735042735043, "grad_norm": 2.4833134142275157, "learning_rate": 1.7557776345711951e-06, "loss": 0.2783, "step": 32867 }, { "epoch": 2.442809364548495, "grad_norm": 2.1198128670817455, "learning_rate": 1.7553235405431169e-06, "loss": 0.2376, "step": 32868 }, { "epoch": 2.4428836863619474, "grad_norm": 2.5171184966246174, "learning_rate": 1.7548694995938354e-06, "loss": 0.2804, "step": 32869 }, { "epoch": 2.4429580081753994, "grad_norm": 2.4315149595311145, "learning_rate": 1.7544155117262807e-06, "loss": 0.2907, "step": 32870 }, { "epoch": 2.443032329988852, "grad_norm": 2.414038920914278, "learning_rate": 1.753961576943367e-06, "loss": 0.3008, "step": 32871 }, { "epoch": 2.443106651802304, "grad_norm": 2.6223141430702843, "learning_rate": 1.7535076952480234e-06, "loss": 0.2782, "step": 32872 }, { "epoch": 2.4431809736157564, "grad_norm": 2.3969469582819443, "learning_rate": 1.7530538666431684e-06, "loss": 0.2377, "step": 32873 }, { "epoch": 2.4432552954292084, "grad_norm": 5.746036072279281, "learning_rate": 1.7526000911317231e-06, "loss": 0.2009, "step": 32874 }, { "epoch": 2.443329617242661, "grad_norm": 2.617509452470317, "learning_rate": 1.7521463687166119e-06, "loss": 0.3619, "step": 32875 }, { "epoch": 2.443403939056113, "grad_norm": 2.288126728807726, "learning_rate": 1.7516926994007554e-06, "loss": 0.2748, "step": 32876 }, { "epoch": 2.4434782608695653, "grad_norm": 1.9707183756695963, "learning_rate": 1.7512390831870707e-06, "loss": 0.2619, "step": 32877 }, { "epoch": 2.4435525826830173, "grad_norm": 2.5557763122929176, "learning_rate": 1.750785520078483e-06, "loss": 0.3231, "step": 32878 }, { "epoch": 2.44362690449647, "grad_norm": 2.566765241972108, "learning_rate": 1.7503320100779076e-06, "loss": 0.2677, "step": 32879 }, { "epoch": 2.443701226309922, "grad_norm": 1.8159574194665877, "learning_rate": 1.74987855318827e-06, "loss": 0.2081, "step": 32880 }, { "epoch": 2.4437755481233743, "grad_norm": 2.418914984209131, "learning_rate": 1.7494251494124847e-06, "loss": 0.2893, "step": 32881 }, { "epoch": 2.4438498699368263, "grad_norm": 2.5140720556300558, "learning_rate": 1.7489717987534738e-06, "loss": 0.3601, "step": 32882 }, { "epoch": 2.4439241917502788, "grad_norm": 2.4627023457605297, "learning_rate": 1.7485185012141536e-06, "loss": 0.2812, "step": 32883 }, { "epoch": 2.4439985135637308, "grad_norm": 2.4355515637158436, "learning_rate": 1.7480652567974398e-06, "loss": 0.2629, "step": 32884 }, { "epoch": 2.4440728353771832, "grad_norm": 2.0488931538271076, "learning_rate": 1.7476120655062579e-06, "loss": 0.2272, "step": 32885 }, { "epoch": 2.4441471571906357, "grad_norm": 2.088249670903653, "learning_rate": 1.7471589273435173e-06, "loss": 0.2458, "step": 32886 }, { "epoch": 2.4442214790040877, "grad_norm": 1.8664870452141225, "learning_rate": 1.746705842312143e-06, "loss": 0.219, "step": 32887 }, { "epoch": 2.4442958008175397, "grad_norm": 1.910186200533771, "learning_rate": 1.7462528104150479e-06, "loss": 0.2767, "step": 32888 }, { "epoch": 2.444370122630992, "grad_norm": 2.8392753466289693, "learning_rate": 1.745799831655146e-06, "loss": 0.2696, "step": 32889 }, { "epoch": 2.4444444444444446, "grad_norm": 2.7689157884280147, "learning_rate": 1.745346906035359e-06, "loss": 0.2494, "step": 32890 }, { "epoch": 2.4445187662578967, "grad_norm": 2.176309854062638, "learning_rate": 1.7448940335585984e-06, "loss": 0.1982, "step": 32891 }, { "epoch": 2.444593088071349, "grad_norm": 2.7186548339627357, "learning_rate": 1.7444412142277845e-06, "loss": 0.2786, "step": 32892 }, { "epoch": 2.444667409884801, "grad_norm": 2.9815839546059415, "learning_rate": 1.743988448045828e-06, "loss": 0.3604, "step": 32893 }, { "epoch": 2.4447417316982536, "grad_norm": 2.43014405477399, "learning_rate": 1.7435357350156469e-06, "loss": 0.2504, "step": 32894 }, { "epoch": 2.4448160535117056, "grad_norm": 2.676624410583099, "learning_rate": 1.7430830751401539e-06, "loss": 0.2943, "step": 32895 }, { "epoch": 2.444890375325158, "grad_norm": 2.0927442582632376, "learning_rate": 1.7426304684222616e-06, "loss": 0.189, "step": 32896 }, { "epoch": 2.44496469713861, "grad_norm": 2.4738055236844922, "learning_rate": 1.7421779148648877e-06, "loss": 0.3447, "step": 32897 }, { "epoch": 2.4450390189520625, "grad_norm": 2.44046863450445, "learning_rate": 1.741725414470945e-06, "loss": 0.283, "step": 32898 }, { "epoch": 2.4451133407655146, "grad_norm": 2.8824885952542867, "learning_rate": 1.7412729672433427e-06, "loss": 0.4043, "step": 32899 }, { "epoch": 2.445187662578967, "grad_norm": 4.909189374430005, "learning_rate": 1.740820573184998e-06, "loss": 0.241, "step": 32900 }, { "epoch": 2.445261984392419, "grad_norm": 2.8482213382023125, "learning_rate": 1.7403682322988202e-06, "loss": 0.2964, "step": 32901 }, { "epoch": 2.4453363062058715, "grad_norm": 2.4620509805618704, "learning_rate": 1.7399159445877255e-06, "loss": 0.3474, "step": 32902 }, { "epoch": 2.4454106280193235, "grad_norm": 2.401720830468998, "learning_rate": 1.739463710054623e-06, "loss": 0.3212, "step": 32903 }, { "epoch": 2.445484949832776, "grad_norm": 1.828459967291459, "learning_rate": 1.7390115287024257e-06, "loss": 0.2215, "step": 32904 }, { "epoch": 2.445559271646228, "grad_norm": 2.057743923952203, "learning_rate": 1.7385594005340423e-06, "loss": 0.2113, "step": 32905 }, { "epoch": 2.4456335934596805, "grad_norm": 2.235785562587599, "learning_rate": 1.7381073255523827e-06, "loss": 0.3115, "step": 32906 }, { "epoch": 2.4457079152731325, "grad_norm": 2.0307960808764207, "learning_rate": 1.7376553037603627e-06, "loss": 0.1975, "step": 32907 }, { "epoch": 2.445782237086585, "grad_norm": 2.8908834067811826, "learning_rate": 1.7372033351608852e-06, "loss": 0.211, "step": 32908 }, { "epoch": 2.4458565589000374, "grad_norm": 3.273669089240175, "learning_rate": 1.736751419756868e-06, "loss": 0.3515, "step": 32909 }, { "epoch": 2.4459308807134894, "grad_norm": 2.145032604135772, "learning_rate": 1.7362995575512154e-06, "loss": 0.2302, "step": 32910 }, { "epoch": 2.4460052025269414, "grad_norm": 2.2782264267190797, "learning_rate": 1.7358477485468351e-06, "loss": 0.3038, "step": 32911 }, { "epoch": 2.446079524340394, "grad_norm": 1.988942232014425, "learning_rate": 1.7353959927466413e-06, "loss": 0.2125, "step": 32912 }, { "epoch": 2.4461538461538463, "grad_norm": 2.294439721325118, "learning_rate": 1.7349442901535362e-06, "loss": 0.291, "step": 32913 }, { "epoch": 2.4462281679672984, "grad_norm": 1.5412020333604846, "learning_rate": 1.7344926407704333e-06, "loss": 0.2007, "step": 32914 }, { "epoch": 2.446302489780751, "grad_norm": 2.200438825973273, "learning_rate": 1.7340410446002377e-06, "loss": 0.3255, "step": 32915 }, { "epoch": 2.446376811594203, "grad_norm": 2.923935275484121, "learning_rate": 1.7335895016458581e-06, "loss": 0.3359, "step": 32916 }, { "epoch": 2.4464511334076553, "grad_norm": 2.574043617146546, "learning_rate": 1.7331380119101993e-06, "loss": 0.3047, "step": 32917 }, { "epoch": 2.4465254552211073, "grad_norm": 2.676465660006286, "learning_rate": 1.7326865753961663e-06, "loss": 0.3008, "step": 32918 }, { "epoch": 2.4465997770345598, "grad_norm": 1.6643381231086338, "learning_rate": 1.7322351921066716e-06, "loss": 0.1943, "step": 32919 }, { "epoch": 2.446674098848012, "grad_norm": 1.60138152174892, "learning_rate": 1.731783862044618e-06, "loss": 0.1862, "step": 32920 }, { "epoch": 2.4467484206614643, "grad_norm": 1.915940166430023, "learning_rate": 1.731332585212907e-06, "loss": 0.265, "step": 32921 }, { "epoch": 2.4468227424749163, "grad_norm": 2.8907619467534893, "learning_rate": 1.7308813616144515e-06, "loss": 0.3341, "step": 32922 }, { "epoch": 2.4468970642883687, "grad_norm": 2.227286156158959, "learning_rate": 1.7304301912521493e-06, "loss": 0.2597, "step": 32923 }, { "epoch": 2.4469713861018207, "grad_norm": 2.343135350592192, "learning_rate": 1.7299790741289123e-06, "loss": 0.3312, "step": 32924 }, { "epoch": 2.447045707915273, "grad_norm": 2.0959114450478467, "learning_rate": 1.7295280102476397e-06, "loss": 0.2235, "step": 32925 }, { "epoch": 2.447120029728725, "grad_norm": 1.915229290390319, "learning_rate": 1.7290769996112344e-06, "loss": 0.2326, "step": 32926 }, { "epoch": 2.4471943515421777, "grad_norm": 2.538885514828896, "learning_rate": 1.7286260422226075e-06, "loss": 0.2544, "step": 32927 }, { "epoch": 2.4472686733556297, "grad_norm": 2.766827588359517, "learning_rate": 1.7281751380846523e-06, "loss": 0.3664, "step": 32928 }, { "epoch": 2.447342995169082, "grad_norm": 2.112663959271545, "learning_rate": 1.7277242872002774e-06, "loss": 0.2488, "step": 32929 }, { "epoch": 2.447417316982534, "grad_norm": 2.3445453155343663, "learning_rate": 1.727273489572383e-06, "loss": 0.2399, "step": 32930 }, { "epoch": 2.4474916387959866, "grad_norm": 2.151188371004384, "learning_rate": 1.7268227452038755e-06, "loss": 0.2525, "step": 32931 }, { "epoch": 2.447565960609439, "grad_norm": 1.9991385613688837, "learning_rate": 1.7263720540976535e-06, "loss": 0.2554, "step": 32932 }, { "epoch": 2.447640282422891, "grad_norm": 2.0544792186881478, "learning_rate": 1.7259214162566163e-06, "loss": 0.2018, "step": 32933 }, { "epoch": 2.447714604236343, "grad_norm": 2.1404182590502896, "learning_rate": 1.72547083168367e-06, "loss": 0.2434, "step": 32934 }, { "epoch": 2.4477889260497956, "grad_norm": 2.870887028533064, "learning_rate": 1.72502030038171e-06, "loss": 0.2736, "step": 32935 }, { "epoch": 2.447863247863248, "grad_norm": 2.2893173125416486, "learning_rate": 1.7245698223536434e-06, "loss": 0.2495, "step": 32936 }, { "epoch": 2.4479375696767, "grad_norm": 2.437843983320183, "learning_rate": 1.7241193976023674e-06, "loss": 0.2628, "step": 32937 }, { "epoch": 2.4480118914901525, "grad_norm": 2.7562097659689027, "learning_rate": 1.7236690261307808e-06, "loss": 0.2992, "step": 32938 }, { "epoch": 2.4480862133036045, "grad_norm": 2.7351516359246015, "learning_rate": 1.7232187079417828e-06, "loss": 0.3136, "step": 32939 }, { "epoch": 2.448160535117057, "grad_norm": 2.9984589175618632, "learning_rate": 1.722768443038272e-06, "loss": 0.3042, "step": 32940 }, { "epoch": 2.448234856930509, "grad_norm": 2.117397284075836, "learning_rate": 1.72231823142315e-06, "loss": 0.2159, "step": 32941 }, { "epoch": 2.4483091787439615, "grad_norm": 2.604358253162395, "learning_rate": 1.7218680730993143e-06, "loss": 0.2306, "step": 32942 }, { "epoch": 2.4483835005574135, "grad_norm": 1.893694799944065, "learning_rate": 1.7214179680696597e-06, "loss": 0.2259, "step": 32943 }, { "epoch": 2.448457822370866, "grad_norm": 2.481292449194374, "learning_rate": 1.7209679163370897e-06, "loss": 0.2205, "step": 32944 }, { "epoch": 2.448532144184318, "grad_norm": 2.094667494498057, "learning_rate": 1.7205179179044963e-06, "loss": 0.2387, "step": 32945 }, { "epoch": 2.4486064659977704, "grad_norm": 2.422464677643755, "learning_rate": 1.7200679727747805e-06, "loss": 0.2173, "step": 32946 }, { "epoch": 2.4486807878112224, "grad_norm": 1.8799372493972066, "learning_rate": 1.7196180809508379e-06, "loss": 0.1998, "step": 32947 }, { "epoch": 2.448755109624675, "grad_norm": 2.338551990524804, "learning_rate": 1.7191682424355615e-06, "loss": 0.2288, "step": 32948 }, { "epoch": 2.448829431438127, "grad_norm": 2.786874557764622, "learning_rate": 1.718718457231856e-06, "loss": 0.3613, "step": 32949 }, { "epoch": 2.4489037532515794, "grad_norm": 2.60021715952975, "learning_rate": 1.7182687253426056e-06, "loss": 0.3139, "step": 32950 }, { "epoch": 2.4489780750650314, "grad_norm": 3.523487861118974, "learning_rate": 1.7178190467707146e-06, "loss": 0.3671, "step": 32951 }, { "epoch": 2.449052396878484, "grad_norm": 2.6297110765210854, "learning_rate": 1.7173694215190728e-06, "loss": 0.2827, "step": 32952 }, { "epoch": 2.449126718691936, "grad_norm": 2.1746714015067425, "learning_rate": 1.7169198495905782e-06, "loss": 0.2189, "step": 32953 }, { "epoch": 2.4492010405053883, "grad_norm": 2.4201858191212033, "learning_rate": 1.7164703309881249e-06, "loss": 0.2822, "step": 32954 }, { "epoch": 2.449275362318841, "grad_norm": 2.433184662036202, "learning_rate": 1.7160208657146027e-06, "loss": 0.2985, "step": 32955 }, { "epoch": 2.449349684132293, "grad_norm": 2.203031684657579, "learning_rate": 1.7155714537729106e-06, "loss": 0.3305, "step": 32956 }, { "epoch": 2.449424005945745, "grad_norm": 2.250052263161187, "learning_rate": 1.7151220951659375e-06, "loss": 0.2982, "step": 32957 }, { "epoch": 2.4494983277591973, "grad_norm": 2.5705145646165968, "learning_rate": 1.714672789896581e-06, "loss": 0.3368, "step": 32958 }, { "epoch": 2.4495726495726498, "grad_norm": 2.8358670388785896, "learning_rate": 1.714223537967732e-06, "loss": 0.3338, "step": 32959 }, { "epoch": 2.4496469713861018, "grad_norm": 2.4476403890570246, "learning_rate": 1.713774339382278e-06, "loss": 0.2788, "step": 32960 }, { "epoch": 2.4497212931995542, "grad_norm": 2.1401259573714486, "learning_rate": 1.7133251941431205e-06, "loss": 0.3196, "step": 32961 }, { "epoch": 2.4497956150130062, "grad_norm": 1.9219141249734528, "learning_rate": 1.7128761022531403e-06, "loss": 0.2524, "step": 32962 }, { "epoch": 2.4498699368264587, "grad_norm": 2.6533357772947483, "learning_rate": 1.7124270637152362e-06, "loss": 0.2543, "step": 32963 }, { "epoch": 2.4499442586399107, "grad_norm": 2.6294112376033296, "learning_rate": 1.711978078532297e-06, "loss": 0.2794, "step": 32964 }, { "epoch": 2.450018580453363, "grad_norm": 1.8293165155934685, "learning_rate": 1.7115291467072104e-06, "loss": 0.2419, "step": 32965 }, { "epoch": 2.450092902266815, "grad_norm": 2.335538821019203, "learning_rate": 1.7110802682428717e-06, "loss": 0.2238, "step": 32966 }, { "epoch": 2.4501672240802677, "grad_norm": 3.1653540661645816, "learning_rate": 1.7106314431421656e-06, "loss": 0.4394, "step": 32967 }, { "epoch": 2.4502415458937197, "grad_norm": 2.2472247326399515, "learning_rate": 1.7101826714079862e-06, "loss": 0.2676, "step": 32968 }, { "epoch": 2.450315867707172, "grad_norm": 3.1782535943924275, "learning_rate": 1.709733953043221e-06, "loss": 0.3046, "step": 32969 }, { "epoch": 2.450390189520624, "grad_norm": 2.0381266183897475, "learning_rate": 1.709285288050756e-06, "loss": 0.219, "step": 32970 }, { "epoch": 2.4504645113340766, "grad_norm": 1.865838279214814, "learning_rate": 1.708836676433484e-06, "loss": 0.1865, "step": 32971 }, { "epoch": 2.4505388331475286, "grad_norm": 2.349322722962081, "learning_rate": 1.708388118194292e-06, "loss": 0.2819, "step": 32972 }, { "epoch": 2.450613154960981, "grad_norm": 2.407929089804469, "learning_rate": 1.707939613336067e-06, "loss": 0.2859, "step": 32973 }, { "epoch": 2.450687476774433, "grad_norm": 2.125258344539706, "learning_rate": 1.707491161861693e-06, "loss": 0.309, "step": 32974 }, { "epoch": 2.4507617985878856, "grad_norm": 2.7482327732329015, "learning_rate": 1.707042763774065e-06, "loss": 0.3554, "step": 32975 }, { "epoch": 2.4508361204013376, "grad_norm": 2.1408234515819777, "learning_rate": 1.706594419076064e-06, "loss": 0.2123, "step": 32976 }, { "epoch": 2.45091044221479, "grad_norm": 2.0825807146827264, "learning_rate": 1.706146127770576e-06, "loss": 0.289, "step": 32977 }, { "epoch": 2.4509847640282425, "grad_norm": 2.32642727911975, "learning_rate": 1.7056978898604903e-06, "loss": 0.3232, "step": 32978 }, { "epoch": 2.4510590858416945, "grad_norm": 2.0224158099835794, "learning_rate": 1.7052497053486927e-06, "loss": 0.2144, "step": 32979 }, { "epoch": 2.451133407655147, "grad_norm": 2.195783466393889, "learning_rate": 1.704801574238063e-06, "loss": 0.3282, "step": 32980 }, { "epoch": 2.451207729468599, "grad_norm": 2.396011823727958, "learning_rate": 1.7043534965314946e-06, "loss": 0.3374, "step": 32981 }, { "epoch": 2.4512820512820515, "grad_norm": 2.037975774132536, "learning_rate": 1.7039054722318637e-06, "loss": 0.2782, "step": 32982 }, { "epoch": 2.4513563730955035, "grad_norm": 2.897752572113504, "learning_rate": 1.7034575013420651e-06, "loss": 0.3012, "step": 32983 }, { "epoch": 2.451430694908956, "grad_norm": 2.3380358581640475, "learning_rate": 1.7030095838649718e-06, "loss": 0.3272, "step": 32984 }, { "epoch": 2.451505016722408, "grad_norm": 2.9334033209792074, "learning_rate": 1.7025617198034738e-06, "loss": 0.3685, "step": 32985 }, { "epoch": 2.4515793385358604, "grad_norm": 2.2092286805199297, "learning_rate": 1.7021139091604532e-06, "loss": 0.2483, "step": 32986 }, { "epoch": 2.4516536603493124, "grad_norm": 3.217282902022471, "learning_rate": 1.701666151938791e-06, "loss": 0.3748, "step": 32987 }, { "epoch": 2.451727982162765, "grad_norm": 1.9246247849784959, "learning_rate": 1.701218448141374e-06, "loss": 0.2401, "step": 32988 }, { "epoch": 2.451802303976217, "grad_norm": 2.6090761888354232, "learning_rate": 1.7007707977710786e-06, "loss": 0.2992, "step": 32989 }, { "epoch": 2.4518766257896694, "grad_norm": 1.7270775643960232, "learning_rate": 1.7003232008307936e-06, "loss": 0.2108, "step": 32990 }, { "epoch": 2.4519509476031214, "grad_norm": 3.3195787503796, "learning_rate": 1.6998756573233965e-06, "loss": 0.3127, "step": 32991 }, { "epoch": 2.452025269416574, "grad_norm": 2.5255541428696917, "learning_rate": 1.6994281672517666e-06, "loss": 0.2209, "step": 32992 }, { "epoch": 2.452099591230026, "grad_norm": 2.686839572097941, "learning_rate": 1.6989807306187912e-06, "loss": 0.2847, "step": 32993 }, { "epoch": 2.4521739130434783, "grad_norm": 1.9796561171735456, "learning_rate": 1.6985333474273458e-06, "loss": 0.2094, "step": 32994 }, { "epoch": 2.4522482348569303, "grad_norm": 2.614080568318253, "learning_rate": 1.6980860176803126e-06, "loss": 0.2901, "step": 32995 }, { "epoch": 2.452322556670383, "grad_norm": 3.0114099730083455, "learning_rate": 1.6976387413805684e-06, "loss": 0.2668, "step": 32996 }, { "epoch": 2.4523968784838353, "grad_norm": 5.456583625714824, "learning_rate": 1.6971915185309973e-06, "loss": 0.3094, "step": 32997 }, { "epoch": 2.4524712002972873, "grad_norm": 2.6007745045737756, "learning_rate": 1.6967443491344771e-06, "loss": 0.2681, "step": 32998 }, { "epoch": 2.4525455221107393, "grad_norm": 2.598452793060608, "learning_rate": 1.696297233193882e-06, "loss": 0.2442, "step": 32999 }, { "epoch": 2.4526198439241917, "grad_norm": 2.6895583405326966, "learning_rate": 1.695850170712099e-06, "loss": 0.2805, "step": 33000 }, { "epoch": 2.452694165737644, "grad_norm": 2.056782923731028, "learning_rate": 1.6954031616919997e-06, "loss": 0.1998, "step": 33001 }, { "epoch": 2.452768487551096, "grad_norm": 2.459815500492045, "learning_rate": 1.6949562061364632e-06, "loss": 0.2998, "step": 33002 }, { "epoch": 2.4528428093645487, "grad_norm": 2.6376717921839323, "learning_rate": 1.6945093040483695e-06, "loss": 0.2401, "step": 33003 }, { "epoch": 2.4529171311780007, "grad_norm": 2.9617179849191486, "learning_rate": 1.6940624554305908e-06, "loss": 0.3709, "step": 33004 }, { "epoch": 2.452991452991453, "grad_norm": 2.7646332913804432, "learning_rate": 1.693615660286011e-06, "loss": 0.3674, "step": 33005 }, { "epoch": 2.453065774804905, "grad_norm": 2.082781778065238, "learning_rate": 1.6931689186175015e-06, "loss": 0.2656, "step": 33006 }, { "epoch": 2.4531400966183576, "grad_norm": 2.6941775007086077, "learning_rate": 1.692722230427939e-06, "loss": 0.3311, "step": 33007 }, { "epoch": 2.4532144184318097, "grad_norm": 2.090009672229689, "learning_rate": 1.6922755957202007e-06, "loss": 0.3077, "step": 33008 }, { "epoch": 2.453288740245262, "grad_norm": 2.3158859979563418, "learning_rate": 1.6918290144971582e-06, "loss": 0.2794, "step": 33009 }, { "epoch": 2.453363062058714, "grad_norm": 2.664712437673429, "learning_rate": 1.6913824867616923e-06, "loss": 0.2632, "step": 33010 }, { "epoch": 2.4534373838721666, "grad_norm": 2.7774423318278636, "learning_rate": 1.6909360125166718e-06, "loss": 0.3106, "step": 33011 }, { "epoch": 2.4535117056856186, "grad_norm": 1.7910079755740493, "learning_rate": 1.6904895917649777e-06, "loss": 0.2338, "step": 33012 }, { "epoch": 2.453586027499071, "grad_norm": 2.2969259769557158, "learning_rate": 1.69004322450948e-06, "loss": 0.3093, "step": 33013 }, { "epoch": 2.453660349312523, "grad_norm": 2.411536522329177, "learning_rate": 1.6895969107530497e-06, "loss": 0.2186, "step": 33014 }, { "epoch": 2.4537346711259755, "grad_norm": 2.388066055756181, "learning_rate": 1.6891506504985655e-06, "loss": 0.257, "step": 33015 }, { "epoch": 2.4538089929394276, "grad_norm": 2.5764598864252943, "learning_rate": 1.6887044437488965e-06, "loss": 0.2954, "step": 33016 }, { "epoch": 2.45388331475288, "grad_norm": 1.9728975323860636, "learning_rate": 1.6882582905069222e-06, "loss": 0.2572, "step": 33017 }, { "epoch": 2.453957636566332, "grad_norm": 2.6612705097379026, "learning_rate": 1.6878121907755041e-06, "loss": 0.2611, "step": 33018 }, { "epoch": 2.4540319583797845, "grad_norm": 2.568352080367614, "learning_rate": 1.687366144557523e-06, "loss": 0.3079, "step": 33019 }, { "epoch": 2.454106280193237, "grad_norm": 3.4712368338144337, "learning_rate": 1.6869201518558476e-06, "loss": 0.2248, "step": 33020 }, { "epoch": 2.454180602006689, "grad_norm": 2.4633424853689836, "learning_rate": 1.6864742126733458e-06, "loss": 0.2674, "step": 33021 }, { "epoch": 2.454254923820141, "grad_norm": 2.162647305709961, "learning_rate": 1.6860283270128942e-06, "loss": 0.3088, "step": 33022 }, { "epoch": 2.4543292456335934, "grad_norm": 2.1654151780047926, "learning_rate": 1.6855824948773604e-06, "loss": 0.24, "step": 33023 }, { "epoch": 2.454403567447046, "grad_norm": 2.342284617694837, "learning_rate": 1.685136716269613e-06, "loss": 0.2915, "step": 33024 }, { "epoch": 2.454477889260498, "grad_norm": 2.710001855324717, "learning_rate": 1.684690991192527e-06, "loss": 0.2968, "step": 33025 }, { "epoch": 2.4545522110739504, "grad_norm": 2.772862670771083, "learning_rate": 1.684245319648965e-06, "loss": 0.308, "step": 33026 }, { "epoch": 2.4546265328874024, "grad_norm": 2.1897568671587475, "learning_rate": 1.683799701641804e-06, "loss": 0.2647, "step": 33027 }, { "epoch": 2.454700854700855, "grad_norm": 1.9748543393060303, "learning_rate": 1.6833541371739092e-06, "loss": 0.2537, "step": 33028 }, { "epoch": 2.454775176514307, "grad_norm": 2.5437368744307105, "learning_rate": 1.6829086262481476e-06, "loss": 0.3257, "step": 33029 }, { "epoch": 2.4548494983277593, "grad_norm": 2.4281767319964835, "learning_rate": 1.68246316886739e-06, "loss": 0.2893, "step": 33030 }, { "epoch": 2.4549238201412114, "grad_norm": 1.7192803096473452, "learning_rate": 1.6820177650344993e-06, "loss": 0.2114, "step": 33031 }, { "epoch": 2.454998141954664, "grad_norm": 1.8639741236661542, "learning_rate": 1.6815724147523503e-06, "loss": 0.2144, "step": 33032 }, { "epoch": 2.455072463768116, "grad_norm": 2.1887732196536542, "learning_rate": 1.681127118023803e-06, "loss": 0.3094, "step": 33033 }, { "epoch": 2.4551467855815683, "grad_norm": 2.0746051978773052, "learning_rate": 1.6806818748517318e-06, "loss": 0.3017, "step": 33034 }, { "epoch": 2.4552211073950203, "grad_norm": 2.3043457029865095, "learning_rate": 1.6802366852389973e-06, "loss": 0.2778, "step": 33035 }, { "epoch": 2.4552954292084728, "grad_norm": 3.2284011004764084, "learning_rate": 1.6797915491884654e-06, "loss": 0.3486, "step": 33036 }, { "epoch": 2.455369751021925, "grad_norm": 1.9272285759945829, "learning_rate": 1.6793464667030068e-06, "loss": 0.2294, "step": 33037 }, { "epoch": 2.4554440728353772, "grad_norm": 2.5301910842436057, "learning_rate": 1.6789014377854807e-06, "loss": 0.2879, "step": 33038 }, { "epoch": 2.4555183946488293, "grad_norm": 2.0708127158426857, "learning_rate": 1.678456462438758e-06, "loss": 0.3098, "step": 33039 }, { "epoch": 2.4555927164622817, "grad_norm": 2.2822161368743705, "learning_rate": 1.6780115406657006e-06, "loss": 0.2969, "step": 33040 }, { "epoch": 2.4556670382757337, "grad_norm": 2.1565866080534946, "learning_rate": 1.6775666724691741e-06, "loss": 0.2767, "step": 33041 }, { "epoch": 2.455741360089186, "grad_norm": 2.710179203507647, "learning_rate": 1.677121857852041e-06, "loss": 0.3398, "step": 33042 }, { "epoch": 2.4558156819026387, "grad_norm": 2.3116679273368463, "learning_rate": 1.676677096817163e-06, "loss": 0.3524, "step": 33043 }, { "epoch": 2.4558900037160907, "grad_norm": 2.4309715277588086, "learning_rate": 1.6762323893674082e-06, "loss": 0.2736, "step": 33044 }, { "epoch": 2.4559643255295427, "grad_norm": 2.8720201421138585, "learning_rate": 1.6757877355056374e-06, "loss": 0.259, "step": 33045 }, { "epoch": 2.456038647342995, "grad_norm": 1.8159276667256212, "learning_rate": 1.6753431352347105e-06, "loss": 0.2082, "step": 33046 }, { "epoch": 2.4561129691564476, "grad_norm": 2.268733697392696, "learning_rate": 1.6748985885574963e-06, "loss": 0.2894, "step": 33047 }, { "epoch": 2.4561872909698996, "grad_norm": 2.1341822830687813, "learning_rate": 1.6744540954768495e-06, "loss": 0.2584, "step": 33048 }, { "epoch": 2.456261612783352, "grad_norm": 1.8878738257556467, "learning_rate": 1.6740096559956376e-06, "loss": 0.2294, "step": 33049 }, { "epoch": 2.456335934596804, "grad_norm": 1.9068777039716955, "learning_rate": 1.6735652701167205e-06, "loss": 0.2409, "step": 33050 }, { "epoch": 2.4564102564102566, "grad_norm": 2.587390977162462, "learning_rate": 1.673120937842957e-06, "loss": 0.2636, "step": 33051 }, { "epoch": 2.4564845782237086, "grad_norm": 2.6761453769639942, "learning_rate": 1.6726766591772093e-06, "loss": 0.3968, "step": 33052 }, { "epoch": 2.456558900037161, "grad_norm": 2.7555459194860457, "learning_rate": 1.672232434122335e-06, "loss": 0.3528, "step": 33053 }, { "epoch": 2.456633221850613, "grad_norm": 2.4247777160721298, "learning_rate": 1.6717882626811987e-06, "loss": 0.2948, "step": 33054 }, { "epoch": 2.4567075436640655, "grad_norm": 1.9178243759201785, "learning_rate": 1.6713441448566549e-06, "loss": 0.2141, "step": 33055 }, { "epoch": 2.4567818654775175, "grad_norm": 2.5547633280095536, "learning_rate": 1.6709000806515674e-06, "loss": 0.3226, "step": 33056 }, { "epoch": 2.45685618729097, "grad_norm": 2.3398395272690427, "learning_rate": 1.6704560700687933e-06, "loss": 0.2991, "step": 33057 }, { "epoch": 2.456930509104422, "grad_norm": 2.5437744142573657, "learning_rate": 1.670012113111189e-06, "loss": 0.3451, "step": 33058 }, { "epoch": 2.4570048309178745, "grad_norm": 2.6190826091599764, "learning_rate": 1.6695682097816158e-06, "loss": 0.2535, "step": 33059 }, { "epoch": 2.4570791527313265, "grad_norm": 2.3882682254591145, "learning_rate": 1.6691243600829289e-06, "loss": 0.2876, "step": 33060 }, { "epoch": 2.457153474544779, "grad_norm": 2.974667148998323, "learning_rate": 1.6686805640179882e-06, "loss": 0.3812, "step": 33061 }, { "epoch": 2.457227796358231, "grad_norm": 1.8933307443676635, "learning_rate": 1.6682368215896506e-06, "loss": 0.2252, "step": 33062 }, { "epoch": 2.4573021181716834, "grad_norm": 3.101785456447272, "learning_rate": 1.6677931328007725e-06, "loss": 0.4032, "step": 33063 }, { "epoch": 2.4573764399851354, "grad_norm": 2.158825027120466, "learning_rate": 1.6673494976542093e-06, "loss": 0.2518, "step": 33064 }, { "epoch": 2.457450761798588, "grad_norm": 2.1848934300539313, "learning_rate": 1.6669059161528144e-06, "loss": 0.3105, "step": 33065 }, { "epoch": 2.4575250836120404, "grad_norm": 2.4306205870935704, "learning_rate": 1.6664623882994513e-06, "loss": 0.2499, "step": 33066 }, { "epoch": 2.4575994054254924, "grad_norm": 2.5141511299639774, "learning_rate": 1.6660189140969696e-06, "loss": 0.3811, "step": 33067 }, { "epoch": 2.4576737272389444, "grad_norm": 2.145056052787843, "learning_rate": 1.6655754935482227e-06, "loss": 0.3126, "step": 33068 }, { "epoch": 2.457748049052397, "grad_norm": 2.128759118502663, "learning_rate": 1.6651321266560715e-06, "loss": 0.1961, "step": 33069 }, { "epoch": 2.4578223708658493, "grad_norm": 2.441434430712704, "learning_rate": 1.6646888134233652e-06, "loss": 0.3122, "step": 33070 }, { "epoch": 2.4578966926793013, "grad_norm": 1.9510328138415172, "learning_rate": 1.6642455538529622e-06, "loss": 0.2488, "step": 33071 }, { "epoch": 2.457971014492754, "grad_norm": 2.5445209852081976, "learning_rate": 1.6638023479477128e-06, "loss": 0.282, "step": 33072 }, { "epoch": 2.458045336306206, "grad_norm": 2.462732288067007, "learning_rate": 1.66335919571047e-06, "loss": 0.2849, "step": 33073 }, { "epoch": 2.4581196581196583, "grad_norm": 2.1198494551509275, "learning_rate": 1.6629160971440928e-06, "loss": 0.2279, "step": 33074 }, { "epoch": 2.4581939799331103, "grad_norm": 2.7175825340813793, "learning_rate": 1.6624730522514242e-06, "loss": 0.3304, "step": 33075 }, { "epoch": 2.4582683017465627, "grad_norm": 3.06164713459224, "learning_rate": 1.6620300610353234e-06, "loss": 0.3096, "step": 33076 }, { "epoch": 2.4583426235600148, "grad_norm": 2.318807050232185, "learning_rate": 1.661587123498639e-06, "loss": 0.2848, "step": 33077 }, { "epoch": 2.458416945373467, "grad_norm": 3.319723039177573, "learning_rate": 1.6611442396442257e-06, "loss": 0.319, "step": 33078 }, { "epoch": 2.4584912671869192, "grad_norm": 2.524115299661972, "learning_rate": 1.660701409474933e-06, "loss": 0.3442, "step": 33079 }, { "epoch": 2.4585655890003717, "grad_norm": 2.1139379012707398, "learning_rate": 1.66025863299361e-06, "loss": 0.2236, "step": 33080 }, { "epoch": 2.4586399108138237, "grad_norm": 4.132397216870845, "learning_rate": 1.659815910203112e-06, "loss": 0.2335, "step": 33081 }, { "epoch": 2.458714232627276, "grad_norm": 2.966787808653048, "learning_rate": 1.6593732411062825e-06, "loss": 0.2633, "step": 33082 }, { "epoch": 2.458788554440728, "grad_norm": 2.5090910905161583, "learning_rate": 1.658930625705979e-06, "loss": 0.3495, "step": 33083 }, { "epoch": 2.4588628762541807, "grad_norm": 2.017434069945996, "learning_rate": 1.658488064005047e-06, "loss": 0.2964, "step": 33084 }, { "epoch": 2.4589371980676327, "grad_norm": 1.8706407897855042, "learning_rate": 1.6580455560063357e-06, "loss": 0.2568, "step": 33085 }, { "epoch": 2.459011519881085, "grad_norm": 1.8463387662167048, "learning_rate": 1.6576031017126947e-06, "loss": 0.2895, "step": 33086 }, { "epoch": 2.459085841694537, "grad_norm": 2.345292838127166, "learning_rate": 1.6571607011269697e-06, "loss": 0.3175, "step": 33087 }, { "epoch": 2.4591601635079896, "grad_norm": 2.819529674039392, "learning_rate": 1.6567183542520137e-06, "loss": 0.2902, "step": 33088 }, { "epoch": 2.459234485321442, "grad_norm": 3.1596599512680497, "learning_rate": 1.6562760610906726e-06, "loss": 0.3057, "step": 33089 }, { "epoch": 2.459308807134894, "grad_norm": 2.4510881649662206, "learning_rate": 1.6558338216457892e-06, "loss": 0.3436, "step": 33090 }, { "epoch": 2.459383128948346, "grad_norm": 1.8358287811586989, "learning_rate": 1.6553916359202182e-06, "loss": 0.2299, "step": 33091 }, { "epoch": 2.4594574507617986, "grad_norm": 1.8336505791083617, "learning_rate": 1.6549495039168006e-06, "loss": 0.2458, "step": 33092 }, { "epoch": 2.459531772575251, "grad_norm": 2.05290710552425, "learning_rate": 1.6545074256383875e-06, "loss": 0.2805, "step": 33093 }, { "epoch": 2.459606094388703, "grad_norm": 2.619543452231121, "learning_rate": 1.654065401087822e-06, "loss": 0.358, "step": 33094 }, { "epoch": 2.4596804162021555, "grad_norm": 2.392715588755297, "learning_rate": 1.6536234302679487e-06, "loss": 0.2227, "step": 33095 }, { "epoch": 2.4597547380156075, "grad_norm": 3.0894901204293403, "learning_rate": 1.6531815131816188e-06, "loss": 0.4459, "step": 33096 }, { "epoch": 2.45982905982906, "grad_norm": 1.9106231177138882, "learning_rate": 1.6527396498316684e-06, "loss": 0.2538, "step": 33097 }, { "epoch": 2.459903381642512, "grad_norm": 2.2060923986680754, "learning_rate": 1.65229784022095e-06, "loss": 0.2933, "step": 33098 }, { "epoch": 2.4599777034559644, "grad_norm": 3.151735369644225, "learning_rate": 1.6518560843523023e-06, "loss": 0.3604, "step": 33099 }, { "epoch": 2.4600520252694165, "grad_norm": 1.9118551234778247, "learning_rate": 1.6514143822285744e-06, "loss": 0.1953, "step": 33100 }, { "epoch": 2.460126347082869, "grad_norm": 2.80222727793402, "learning_rate": 1.6509727338526082e-06, "loss": 0.3187, "step": 33101 }, { "epoch": 2.460200668896321, "grad_norm": 1.8760163685403648, "learning_rate": 1.6505311392272427e-06, "loss": 0.2451, "step": 33102 }, { "epoch": 2.4602749907097734, "grad_norm": 2.401911301580005, "learning_rate": 1.6500895983553278e-06, "loss": 0.2999, "step": 33103 }, { "epoch": 2.4603493125232254, "grad_norm": 2.58825657413194, "learning_rate": 1.6496481112397e-06, "loss": 0.3263, "step": 33104 }, { "epoch": 2.460423634336678, "grad_norm": 2.3493012013387675, "learning_rate": 1.6492066778832072e-06, "loss": 0.3244, "step": 33105 }, { "epoch": 2.46049795615013, "grad_norm": 2.4198241749384235, "learning_rate": 1.6487652982886882e-06, "loss": 0.2602, "step": 33106 }, { "epoch": 2.4605722779635824, "grad_norm": 2.9782283703632753, "learning_rate": 1.648323972458985e-06, "loss": 0.3158, "step": 33107 }, { "epoch": 2.4606465997770344, "grad_norm": 1.6908537784960136, "learning_rate": 1.647882700396939e-06, "loss": 0.1962, "step": 33108 }, { "epoch": 2.460720921590487, "grad_norm": 2.293776544575172, "learning_rate": 1.6474414821053874e-06, "loss": 0.2649, "step": 33109 }, { "epoch": 2.460795243403939, "grad_norm": 2.2810128721872234, "learning_rate": 1.6470003175871775e-06, "loss": 0.2539, "step": 33110 }, { "epoch": 2.4608695652173913, "grad_norm": 2.195528928334732, "learning_rate": 1.6465592068451452e-06, "loss": 0.2441, "step": 33111 }, { "epoch": 2.4609438870308438, "grad_norm": 4.962489613447834, "learning_rate": 1.6461181498821298e-06, "loss": 0.3236, "step": 33112 }, { "epoch": 2.461018208844296, "grad_norm": 2.655837617628185, "learning_rate": 1.6456771467009747e-06, "loss": 0.3271, "step": 33113 }, { "epoch": 2.461092530657748, "grad_norm": 2.7924875678007637, "learning_rate": 1.645236197304514e-06, "loss": 0.2577, "step": 33114 }, { "epoch": 2.4611668524712003, "grad_norm": 1.8079577999421959, "learning_rate": 1.6447953016955908e-06, "loss": 0.2224, "step": 33115 }, { "epoch": 2.4612411742846527, "grad_norm": 2.3891729556903685, "learning_rate": 1.6443544598770433e-06, "loss": 0.3396, "step": 33116 }, { "epoch": 2.4613154960981047, "grad_norm": 2.41979167447472, "learning_rate": 1.6439136718517045e-06, "loss": 0.2905, "step": 33117 }, { "epoch": 2.461389817911557, "grad_norm": 1.9115746798448476, "learning_rate": 1.6434729376224201e-06, "loss": 0.1876, "step": 33118 }, { "epoch": 2.461464139725009, "grad_norm": 2.522534470918924, "learning_rate": 1.6430322571920222e-06, "loss": 0.3333, "step": 33119 }, { "epoch": 2.4615384615384617, "grad_norm": 2.507666055118919, "learning_rate": 1.6425916305633484e-06, "loss": 0.3473, "step": 33120 }, { "epoch": 2.4616127833519137, "grad_norm": 2.31781525069255, "learning_rate": 1.6421510577392342e-06, "loss": 0.2794, "step": 33121 }, { "epoch": 2.461687105165366, "grad_norm": 2.9094526993705006, "learning_rate": 1.6417105387225208e-06, "loss": 0.262, "step": 33122 }, { "epoch": 2.461761426978818, "grad_norm": 2.4022749998479536, "learning_rate": 1.6412700735160413e-06, "loss": 0.2046, "step": 33123 }, { "epoch": 2.4618357487922706, "grad_norm": 2.1347835702938065, "learning_rate": 1.6408296621226282e-06, "loss": 0.2008, "step": 33124 }, { "epoch": 2.4619100706057226, "grad_norm": 2.342224008190138, "learning_rate": 1.6403893045451224e-06, "loss": 0.3096, "step": 33125 }, { "epoch": 2.461984392419175, "grad_norm": 2.753415702937488, "learning_rate": 1.6399490007863573e-06, "loss": 0.3057, "step": 33126 }, { "epoch": 2.462058714232627, "grad_norm": 2.4816181098733625, "learning_rate": 1.6395087508491636e-06, "loss": 0.3002, "step": 33127 }, { "epoch": 2.4621330360460796, "grad_norm": 2.303106316834191, "learning_rate": 1.6390685547363816e-06, "loss": 0.2528, "step": 33128 }, { "epoch": 2.4622073578595316, "grad_norm": 2.029494061450689, "learning_rate": 1.63862841245084e-06, "loss": 0.2518, "step": 33129 }, { "epoch": 2.462281679672984, "grad_norm": 2.250308970559446, "learning_rate": 1.6381883239953788e-06, "loss": 0.2191, "step": 33130 }, { "epoch": 2.4623560014864365, "grad_norm": 2.266548662487303, "learning_rate": 1.6377482893728236e-06, "loss": 0.2934, "step": 33131 }, { "epoch": 2.4624303232998885, "grad_norm": 3.427095446617724, "learning_rate": 1.637308308586013e-06, "loss": 0.2995, "step": 33132 }, { "epoch": 2.4625046451133406, "grad_norm": 1.7929976799418896, "learning_rate": 1.6368683816377772e-06, "loss": 0.2007, "step": 33133 }, { "epoch": 2.462578966926793, "grad_norm": 2.184464029491877, "learning_rate": 1.6364285085309473e-06, "loss": 0.2136, "step": 33134 }, { "epoch": 2.4626532887402455, "grad_norm": 2.5547739943109957, "learning_rate": 1.6359886892683585e-06, "loss": 0.2709, "step": 33135 }, { "epoch": 2.4627276105536975, "grad_norm": 1.9074961156914616, "learning_rate": 1.635548923852839e-06, "loss": 0.2483, "step": 33136 }, { "epoch": 2.46280193236715, "grad_norm": 2.5183401288804994, "learning_rate": 1.6351092122872225e-06, "loss": 0.2816, "step": 33137 }, { "epoch": 2.462876254180602, "grad_norm": 2.2148845815423286, "learning_rate": 1.6346695545743397e-06, "loss": 0.2671, "step": 33138 }, { "epoch": 2.4629505759940544, "grad_norm": 2.1378926243586656, "learning_rate": 1.6342299507170178e-06, "loss": 0.28, "step": 33139 }, { "epoch": 2.4630248978075064, "grad_norm": 2.3546936569628385, "learning_rate": 1.6337904007180915e-06, "loss": 0.2876, "step": 33140 }, { "epoch": 2.463099219620959, "grad_norm": 2.789351793748675, "learning_rate": 1.6333509045803886e-06, "loss": 0.3014, "step": 33141 }, { "epoch": 2.463173541434411, "grad_norm": 3.222791579319735, "learning_rate": 1.6329114623067387e-06, "loss": 0.3953, "step": 33142 }, { "epoch": 2.4632478632478634, "grad_norm": 2.4500650573697826, "learning_rate": 1.632472073899969e-06, "loss": 0.2785, "step": 33143 }, { "epoch": 2.4633221850613154, "grad_norm": 1.9682909490278748, "learning_rate": 1.632032739362911e-06, "loss": 0.2257, "step": 33144 }, { "epoch": 2.463396506874768, "grad_norm": 2.080334164602606, "learning_rate": 1.6315934586983928e-06, "loss": 0.2861, "step": 33145 }, { "epoch": 2.46347082868822, "grad_norm": 2.314466864773365, "learning_rate": 1.6311542319092389e-06, "loss": 0.276, "step": 33146 }, { "epoch": 2.4635451505016723, "grad_norm": 1.9916896938703064, "learning_rate": 1.6307150589982822e-06, "loss": 0.1935, "step": 33147 }, { "epoch": 2.4636194723151243, "grad_norm": 1.5633732455692078, "learning_rate": 1.6302759399683477e-06, "loss": 0.2007, "step": 33148 }, { "epoch": 2.463693794128577, "grad_norm": 2.052497175430603, "learning_rate": 1.6298368748222604e-06, "loss": 0.2086, "step": 33149 }, { "epoch": 2.463768115942029, "grad_norm": 2.570139817060362, "learning_rate": 1.6293978635628515e-06, "loss": 0.314, "step": 33150 }, { "epoch": 2.4638424377554813, "grad_norm": 2.4012530112656827, "learning_rate": 1.6289589061929422e-06, "loss": 0.2647, "step": 33151 }, { "epoch": 2.4639167595689333, "grad_norm": 2.2851857521117274, "learning_rate": 1.6285200027153637e-06, "loss": 0.2164, "step": 33152 }, { "epoch": 2.4639910813823858, "grad_norm": 2.235333899537634, "learning_rate": 1.6280811531329387e-06, "loss": 0.2969, "step": 33153 }, { "epoch": 2.464065403195838, "grad_norm": 2.0588570649971976, "learning_rate": 1.6276423574484922e-06, "loss": 0.222, "step": 33154 }, { "epoch": 2.4641397250092902, "grad_norm": 2.169029288237216, "learning_rate": 1.6272036156648507e-06, "loss": 0.2853, "step": 33155 }, { "epoch": 2.4642140468227423, "grad_norm": 2.563382040267568, "learning_rate": 1.6267649277848353e-06, "loss": 0.3408, "step": 33156 }, { "epoch": 2.4642883686361947, "grad_norm": 2.6152399059734965, "learning_rate": 1.6263262938112756e-06, "loss": 0.332, "step": 33157 }, { "epoch": 2.464362690449647, "grad_norm": 3.1284129068860196, "learning_rate": 1.6258877137469896e-06, "loss": 0.3203, "step": 33158 }, { "epoch": 2.464437012263099, "grad_norm": 2.2347005273651033, "learning_rate": 1.6254491875948074e-06, "loss": 0.2758, "step": 33159 }, { "epoch": 2.4645113340765517, "grad_norm": 2.4961586616013176, "learning_rate": 1.6250107153575478e-06, "loss": 0.3212, "step": 33160 }, { "epoch": 2.4645856558900037, "grad_norm": 2.079519934820307, "learning_rate": 1.624572297038033e-06, "loss": 0.2682, "step": 33161 }, { "epoch": 2.464659977703456, "grad_norm": 2.4146797302637166, "learning_rate": 1.6241339326390892e-06, "loss": 0.2926, "step": 33162 }, { "epoch": 2.464734299516908, "grad_norm": 2.2483311363614655, "learning_rate": 1.6236956221635348e-06, "loss": 0.2519, "step": 33163 }, { "epoch": 2.4648086213303606, "grad_norm": 2.118063592563465, "learning_rate": 1.623257365614197e-06, "loss": 0.2847, "step": 33164 }, { "epoch": 2.4648829431438126, "grad_norm": 3.1937501566733064, "learning_rate": 1.6228191629938894e-06, "loss": 0.3063, "step": 33165 }, { "epoch": 2.464957264957265, "grad_norm": 2.839004043415727, "learning_rate": 1.6223810143054397e-06, "loss": 0.3575, "step": 33166 }, { "epoch": 2.465031586770717, "grad_norm": 2.21195258743614, "learning_rate": 1.6219429195516667e-06, "loss": 0.3004, "step": 33167 }, { "epoch": 2.4651059085841696, "grad_norm": 2.8780589517422897, "learning_rate": 1.6215048787353883e-06, "loss": 0.2557, "step": 33168 }, { "epoch": 2.4651802303976216, "grad_norm": 2.357102852948786, "learning_rate": 1.6210668918594285e-06, "loss": 0.2512, "step": 33169 }, { "epoch": 2.465254552211074, "grad_norm": 2.615540522079342, "learning_rate": 1.6206289589266056e-06, "loss": 0.236, "step": 33170 }, { "epoch": 2.465328874024526, "grad_norm": 2.0063466070030516, "learning_rate": 1.6201910799397357e-06, "loss": 0.195, "step": 33171 }, { "epoch": 2.4654031958379785, "grad_norm": 2.2825339653766448, "learning_rate": 1.6197532549016437e-06, "loss": 0.2511, "step": 33172 }, { "epoch": 2.4654775176514305, "grad_norm": 2.867232167783474, "learning_rate": 1.6193154838151426e-06, "loss": 0.2986, "step": 33173 }, { "epoch": 2.465551839464883, "grad_norm": 2.3993150938440446, "learning_rate": 1.6188777666830568e-06, "loss": 0.2954, "step": 33174 }, { "epoch": 2.465626161278335, "grad_norm": 2.322832824764601, "learning_rate": 1.6184401035082009e-06, "loss": 0.2444, "step": 33175 }, { "epoch": 2.4657004830917875, "grad_norm": 2.7001568378199634, "learning_rate": 1.6180024942933925e-06, "loss": 0.3395, "step": 33176 }, { "epoch": 2.46577480490524, "grad_norm": 1.9963899005979413, "learning_rate": 1.6175649390414482e-06, "loss": 0.1717, "step": 33177 }, { "epoch": 2.465849126718692, "grad_norm": 2.362764663192007, "learning_rate": 1.617127437755185e-06, "loss": 0.2619, "step": 33178 }, { "epoch": 2.465923448532144, "grad_norm": 1.9320040610622022, "learning_rate": 1.6166899904374222e-06, "loss": 0.225, "step": 33179 }, { "epoch": 2.4659977703455964, "grad_norm": 3.26105396451668, "learning_rate": 1.6162525970909715e-06, "loss": 0.2274, "step": 33180 }, { "epoch": 2.466072092159049, "grad_norm": 2.7073957007761162, "learning_rate": 1.6158152577186548e-06, "loss": 0.3327, "step": 33181 }, { "epoch": 2.466146413972501, "grad_norm": 2.415683836712471, "learning_rate": 1.6153779723232843e-06, "loss": 0.3681, "step": 33182 }, { "epoch": 2.4662207357859534, "grad_norm": 3.3839377216943616, "learning_rate": 1.6149407409076722e-06, "loss": 0.3229, "step": 33183 }, { "epoch": 2.4662950575994054, "grad_norm": 1.8118378911376467, "learning_rate": 1.6145035634746399e-06, "loss": 0.1543, "step": 33184 }, { "epoch": 2.466369379412858, "grad_norm": 2.6031004008627363, "learning_rate": 1.6140664400269945e-06, "loss": 0.2621, "step": 33185 }, { "epoch": 2.46644370122631, "grad_norm": 2.362036236632704, "learning_rate": 1.6136293705675588e-06, "loss": 0.2746, "step": 33186 }, { "epoch": 2.4665180230397623, "grad_norm": 2.41619794286635, "learning_rate": 1.61319235509914e-06, "loss": 0.3121, "step": 33187 }, { "epoch": 2.4665923448532143, "grad_norm": 2.218732481103153, "learning_rate": 1.6127553936245554e-06, "loss": 0.2625, "step": 33188 }, { "epoch": 2.466666666666667, "grad_norm": 3.6446589215061023, "learning_rate": 1.6123184861466156e-06, "loss": 0.2773, "step": 33189 }, { "epoch": 2.466740988480119, "grad_norm": 2.2221390685434583, "learning_rate": 1.611881632668132e-06, "loss": 0.1907, "step": 33190 }, { "epoch": 2.4668153102935713, "grad_norm": 1.8749131709906146, "learning_rate": 1.6114448331919218e-06, "loss": 0.3026, "step": 33191 }, { "epoch": 2.4668896321070233, "grad_norm": 1.7989515992397889, "learning_rate": 1.6110080877207934e-06, "loss": 0.2517, "step": 33192 }, { "epoch": 2.4669639539204757, "grad_norm": 3.6556825155789694, "learning_rate": 1.6105713962575587e-06, "loss": 0.3237, "step": 33193 }, { "epoch": 2.4670382757339278, "grad_norm": 2.4731813403305125, "learning_rate": 1.6101347588050321e-06, "loss": 0.3121, "step": 33194 }, { "epoch": 2.46711259754738, "grad_norm": 3.02516430957319, "learning_rate": 1.609698175366019e-06, "loss": 0.2925, "step": 33195 }, { "epoch": 2.4671869193608322, "grad_norm": 2.8599953854148965, "learning_rate": 1.6092616459433375e-06, "loss": 0.2022, "step": 33196 }, { "epoch": 2.4672612411742847, "grad_norm": 3.347627443901327, "learning_rate": 1.608825170539794e-06, "loss": 0.4062, "step": 33197 }, { "epoch": 2.4673355629877367, "grad_norm": 2.582052609577103, "learning_rate": 1.6083887491581985e-06, "loss": 0.3124, "step": 33198 }, { "epoch": 2.467409884801189, "grad_norm": 2.5257661377019827, "learning_rate": 1.6079523818013597e-06, "loss": 0.2705, "step": 33199 }, { "epoch": 2.4674842066146416, "grad_norm": 2.0898372843364132, "learning_rate": 1.607516068472087e-06, "loss": 0.2576, "step": 33200 }, { "epoch": 2.4675585284280936, "grad_norm": 2.4332797259633874, "learning_rate": 1.6070798091731921e-06, "loss": 0.3156, "step": 33201 }, { "epoch": 2.4676328502415457, "grad_norm": 2.654610739274984, "learning_rate": 1.6066436039074807e-06, "loss": 0.3627, "step": 33202 }, { "epoch": 2.467707172054998, "grad_norm": 1.8191066428592348, "learning_rate": 1.6062074526777637e-06, "loss": 0.2322, "step": 33203 }, { "epoch": 2.4677814938684506, "grad_norm": 2.3235094293011787, "learning_rate": 1.6057713554868471e-06, "loss": 0.2625, "step": 33204 }, { "epoch": 2.4678558156819026, "grad_norm": 2.2493257691559005, "learning_rate": 1.605335312337537e-06, "loss": 0.3175, "step": 33205 }, { "epoch": 2.467930137495355, "grad_norm": 2.601787079192325, "learning_rate": 1.6048993232326449e-06, "loss": 0.3314, "step": 33206 }, { "epoch": 2.468004459308807, "grad_norm": 2.32885417771649, "learning_rate": 1.6044633881749737e-06, "loss": 0.2422, "step": 33207 }, { "epoch": 2.4680787811222595, "grad_norm": 3.230487816296396, "learning_rate": 1.6040275071673339e-06, "loss": 0.3324, "step": 33208 }, { "epoch": 2.4681531029357116, "grad_norm": 2.1604153048492063, "learning_rate": 1.6035916802125285e-06, "loss": 0.2546, "step": 33209 }, { "epoch": 2.468227424749164, "grad_norm": 2.357263112461446, "learning_rate": 1.6031559073133651e-06, "loss": 0.2574, "step": 33210 }, { "epoch": 2.468301746562616, "grad_norm": 5.4604417885692484, "learning_rate": 1.6027201884726472e-06, "loss": 0.293, "step": 33211 }, { "epoch": 2.4683760683760685, "grad_norm": 3.1444755669296844, "learning_rate": 1.6022845236931795e-06, "loss": 0.3187, "step": 33212 }, { "epoch": 2.4684503901895205, "grad_norm": 2.2026605070911374, "learning_rate": 1.6018489129777704e-06, "loss": 0.2456, "step": 33213 }, { "epoch": 2.468524712002973, "grad_norm": 3.558231509093155, "learning_rate": 1.6014133563292222e-06, "loss": 0.2288, "step": 33214 }, { "epoch": 2.468599033816425, "grad_norm": 2.7744526993290246, "learning_rate": 1.6009778537503374e-06, "loss": 0.2865, "step": 33215 }, { "epoch": 2.4686733556298774, "grad_norm": 2.3188706237765717, "learning_rate": 1.6005424052439234e-06, "loss": 0.3392, "step": 33216 }, { "epoch": 2.4687476774433295, "grad_norm": 2.345715537380817, "learning_rate": 1.600107010812778e-06, "loss": 0.2336, "step": 33217 }, { "epoch": 2.468821999256782, "grad_norm": 3.751448468932107, "learning_rate": 1.5996716704597115e-06, "loss": 0.2526, "step": 33218 }, { "epoch": 2.468896321070234, "grad_norm": 2.66910921056963, "learning_rate": 1.5992363841875224e-06, "loss": 0.2609, "step": 33219 }, { "epoch": 2.4689706428836864, "grad_norm": 2.760159189414134, "learning_rate": 1.5988011519990143e-06, "loss": 0.2922, "step": 33220 }, { "epoch": 2.4690449646971384, "grad_norm": 2.1047823895125988, "learning_rate": 1.598365973896987e-06, "loss": 0.2065, "step": 33221 }, { "epoch": 2.469119286510591, "grad_norm": 2.4545938555806837, "learning_rate": 1.5979308498842427e-06, "loss": 0.2564, "step": 33222 }, { "epoch": 2.4691936083240433, "grad_norm": 1.6048232436086645, "learning_rate": 1.597495779963586e-06, "loss": 0.172, "step": 33223 }, { "epoch": 2.4692679301374953, "grad_norm": 1.7471767132578169, "learning_rate": 1.5970607641378122e-06, "loss": 0.2153, "step": 33224 }, { "epoch": 2.4693422519509474, "grad_norm": 2.6942481661453153, "learning_rate": 1.5966258024097281e-06, "loss": 0.3557, "step": 33225 }, { "epoch": 2.4694165737644, "grad_norm": 2.5864686438303224, "learning_rate": 1.5961908947821304e-06, "loss": 0.3688, "step": 33226 }, { "epoch": 2.4694908955778523, "grad_norm": 2.824938917719059, "learning_rate": 1.5957560412578177e-06, "loss": 0.2291, "step": 33227 }, { "epoch": 2.4695652173913043, "grad_norm": 2.477238884128048, "learning_rate": 1.5953212418395946e-06, "loss": 0.2822, "step": 33228 }, { "epoch": 2.4696395392047568, "grad_norm": 2.7293841860955705, "learning_rate": 1.5948864965302546e-06, "loss": 0.257, "step": 33229 }, { "epoch": 2.469713861018209, "grad_norm": 2.2071444384746854, "learning_rate": 1.5944518053326007e-06, "loss": 0.2688, "step": 33230 }, { "epoch": 2.4697881828316612, "grad_norm": 2.605169568830236, "learning_rate": 1.594017168249431e-06, "loss": 0.3671, "step": 33231 }, { "epoch": 2.4698625046451133, "grad_norm": 2.1574235397536703, "learning_rate": 1.5935825852835419e-06, "loss": 0.2415, "step": 33232 }, { "epoch": 2.4699368264585657, "grad_norm": 2.0427854747193046, "learning_rate": 1.5931480564377321e-06, "loss": 0.2561, "step": 33233 }, { "epoch": 2.4700111482720177, "grad_norm": 1.8676303377106718, "learning_rate": 1.5927135817147965e-06, "loss": 0.2133, "step": 33234 }, { "epoch": 2.47008547008547, "grad_norm": 2.264165938116724, "learning_rate": 1.592279161117537e-06, "loss": 0.2978, "step": 33235 }, { "epoch": 2.470159791898922, "grad_norm": 2.552677640934412, "learning_rate": 1.5918447946487491e-06, "loss": 0.311, "step": 33236 }, { "epoch": 2.4702341137123747, "grad_norm": 2.4744817099351786, "learning_rate": 1.5914104823112243e-06, "loss": 0.2348, "step": 33237 }, { "epoch": 2.4703084355258267, "grad_norm": 1.9662568997866197, "learning_rate": 1.5909762241077653e-06, "loss": 0.2625, "step": 33238 }, { "epoch": 2.470382757339279, "grad_norm": 1.86865051778152, "learning_rate": 1.5905420200411637e-06, "loss": 0.1736, "step": 33239 }, { "epoch": 2.470457079152731, "grad_norm": 3.1330372946763254, "learning_rate": 1.5901078701142169e-06, "loss": 0.3142, "step": 33240 }, { "epoch": 2.4705314009661836, "grad_norm": 2.104933709178178, "learning_rate": 1.5896737743297209e-06, "loss": 0.3065, "step": 33241 }, { "epoch": 2.4706057227796356, "grad_norm": 2.4727143728192593, "learning_rate": 1.5892397326904651e-06, "loss": 0.3023, "step": 33242 }, { "epoch": 2.470680044593088, "grad_norm": 2.535935620951172, "learning_rate": 1.5888057451992521e-06, "loss": 0.2279, "step": 33243 }, { "epoch": 2.47075436640654, "grad_norm": 1.4363807994276787, "learning_rate": 1.5883718118588675e-06, "loss": 0.1477, "step": 33244 }, { "epoch": 2.4708286882199926, "grad_norm": 2.2563742903639397, "learning_rate": 1.5879379326721111e-06, "loss": 0.3221, "step": 33245 }, { "epoch": 2.470903010033445, "grad_norm": 2.5173842949788847, "learning_rate": 1.5875041076417708e-06, "loss": 0.2279, "step": 33246 }, { "epoch": 2.470977331846897, "grad_norm": 2.4642386536376257, "learning_rate": 1.5870703367706452e-06, "loss": 0.2616, "step": 33247 }, { "epoch": 2.471051653660349, "grad_norm": 2.1226327280760207, "learning_rate": 1.5866366200615247e-06, "loss": 0.2387, "step": 33248 }, { "epoch": 2.4711259754738015, "grad_norm": 2.0526922117562427, "learning_rate": 1.586202957517199e-06, "loss": 0.2186, "step": 33249 }, { "epoch": 2.471200297287254, "grad_norm": 2.040143153063603, "learning_rate": 1.5857693491404635e-06, "loss": 0.1833, "step": 33250 }, { "epoch": 2.471274619100706, "grad_norm": 1.721908102922896, "learning_rate": 1.5853357949341087e-06, "loss": 0.2159, "step": 33251 }, { "epoch": 2.4713489409141585, "grad_norm": 2.5838274165449318, "learning_rate": 1.584902294900923e-06, "loss": 0.3207, "step": 33252 }, { "epoch": 2.4714232627276105, "grad_norm": 2.2890226173792882, "learning_rate": 1.584468849043702e-06, "loss": 0.2522, "step": 33253 }, { "epoch": 2.471497584541063, "grad_norm": 3.3746610200938045, "learning_rate": 1.5840354573652338e-06, "loss": 0.4054, "step": 33254 }, { "epoch": 2.471571906354515, "grad_norm": 2.0355081798813592, "learning_rate": 1.5836021198683082e-06, "loss": 0.2245, "step": 33255 }, { "epoch": 2.4716462281679674, "grad_norm": 2.5188097905416926, "learning_rate": 1.5831688365557141e-06, "loss": 0.3544, "step": 33256 }, { "epoch": 2.4717205499814194, "grad_norm": 2.25236660430544, "learning_rate": 1.5827356074302447e-06, "loss": 0.2609, "step": 33257 }, { "epoch": 2.471794871794872, "grad_norm": 2.396653733308832, "learning_rate": 1.5823024324946857e-06, "loss": 0.2292, "step": 33258 }, { "epoch": 2.471869193608324, "grad_norm": 2.4563568499760584, "learning_rate": 1.5818693117518258e-06, "loss": 0.3323, "step": 33259 }, { "epoch": 2.4719435154217764, "grad_norm": 2.3051625934554285, "learning_rate": 1.5814362452044563e-06, "loss": 0.2978, "step": 33260 }, { "epoch": 2.4720178372352284, "grad_norm": 2.0829406714922944, "learning_rate": 1.5810032328553604e-06, "loss": 0.2559, "step": 33261 }, { "epoch": 2.472092159048681, "grad_norm": 2.411019947273183, "learning_rate": 1.5805702747073315e-06, "loss": 0.2582, "step": 33262 }, { "epoch": 2.472166480862133, "grad_norm": 2.453658768837748, "learning_rate": 1.580137370763155e-06, "loss": 0.3512, "step": 33263 }, { "epoch": 2.4722408026755853, "grad_norm": 2.418406354952364, "learning_rate": 1.5797045210256146e-06, "loss": 0.2854, "step": 33264 }, { "epoch": 2.4723151244890373, "grad_norm": 2.657336821754984, "learning_rate": 1.5792717254975021e-06, "loss": 0.2338, "step": 33265 }, { "epoch": 2.47238944630249, "grad_norm": 2.4635885778133013, "learning_rate": 1.5788389841816009e-06, "loss": 0.2643, "step": 33266 }, { "epoch": 2.472463768115942, "grad_norm": 2.255180415903444, "learning_rate": 1.578406297080697e-06, "loss": 0.2401, "step": 33267 }, { "epoch": 2.4725380899293943, "grad_norm": 1.8876723773135609, "learning_rate": 1.5779736641975752e-06, "loss": 0.1849, "step": 33268 }, { "epoch": 2.4726124117428467, "grad_norm": 2.125666338234401, "learning_rate": 1.5775410855350237e-06, "loss": 0.2698, "step": 33269 }, { "epoch": 2.4726867335562988, "grad_norm": 1.9133081128138185, "learning_rate": 1.5771085610958248e-06, "loss": 0.2232, "step": 33270 }, { "epoch": 2.472761055369751, "grad_norm": 2.1455128652455344, "learning_rate": 1.5766760908827627e-06, "loss": 0.265, "step": 33271 }, { "epoch": 2.4728353771832032, "grad_norm": 1.9762447891943618, "learning_rate": 1.5762436748986254e-06, "loss": 0.2258, "step": 33272 }, { "epoch": 2.4729096989966557, "grad_norm": 2.125726728011817, "learning_rate": 1.575811313146194e-06, "loss": 0.2398, "step": 33273 }, { "epoch": 2.4729840208101077, "grad_norm": 1.9196253510739665, "learning_rate": 1.5753790056282492e-06, "loss": 0.2588, "step": 33274 }, { "epoch": 2.47305834262356, "grad_norm": 2.254316290320477, "learning_rate": 1.5749467523475803e-06, "loss": 0.3, "step": 33275 }, { "epoch": 2.473132664437012, "grad_norm": 2.4507950805327505, "learning_rate": 1.574514553306964e-06, "loss": 0.3132, "step": 33276 }, { "epoch": 2.4732069862504646, "grad_norm": 2.0885080604620017, "learning_rate": 1.5740824085091911e-06, "loss": 0.2347, "step": 33277 }, { "epoch": 2.4732813080639167, "grad_norm": 1.9765203594543186, "learning_rate": 1.5736503179570339e-06, "loss": 0.236, "step": 33278 }, { "epoch": 2.473355629877369, "grad_norm": 2.6732885940615043, "learning_rate": 1.5732182816532815e-06, "loss": 0.3348, "step": 33279 }, { "epoch": 2.473429951690821, "grad_norm": 2.328939744897539, "learning_rate": 1.5727862996007115e-06, "loss": 0.2756, "step": 33280 }, { "epoch": 2.4735042735042736, "grad_norm": 2.578322690505267, "learning_rate": 1.572354371802104e-06, "loss": 0.3175, "step": 33281 }, { "epoch": 2.4735785953177256, "grad_norm": 2.926211026100581, "learning_rate": 1.5719224982602443e-06, "loss": 0.2175, "step": 33282 }, { "epoch": 2.473652917131178, "grad_norm": 2.872902843262549, "learning_rate": 1.5714906789779084e-06, "loss": 0.2701, "step": 33283 }, { "epoch": 2.47372723894463, "grad_norm": 1.9693454359272864, "learning_rate": 1.571058913957879e-06, "loss": 0.2228, "step": 33284 }, { "epoch": 2.4738015607580826, "grad_norm": 2.396831334721515, "learning_rate": 1.5706272032029368e-06, "loss": 0.1989, "step": 33285 }, { "epoch": 2.4738758825715346, "grad_norm": 2.8169526524508632, "learning_rate": 1.5701955467158559e-06, "loss": 0.2495, "step": 33286 }, { "epoch": 2.473950204384987, "grad_norm": 3.3172237456594686, "learning_rate": 1.5697639444994217e-06, "loss": 0.267, "step": 33287 }, { "epoch": 2.4740245261984395, "grad_norm": 2.229333016823764, "learning_rate": 1.569332396556409e-06, "loss": 0.2741, "step": 33288 }, { "epoch": 2.4740988480118915, "grad_norm": 2.168932939543642, "learning_rate": 1.5689009028895974e-06, "loss": 0.2599, "step": 33289 }, { "epoch": 2.4741731698253435, "grad_norm": 2.4717430496048967, "learning_rate": 1.5684694635017618e-06, "loss": 0.3528, "step": 33290 }, { "epoch": 2.474247491638796, "grad_norm": 2.430011577991757, "learning_rate": 1.568038078395685e-06, "loss": 0.2966, "step": 33291 }, { "epoch": 2.4743218134522484, "grad_norm": 2.566505896117665, "learning_rate": 1.5676067475741408e-06, "loss": 0.3624, "step": 33292 }, { "epoch": 2.4743961352657005, "grad_norm": 2.3423171786025874, "learning_rate": 1.5671754710399044e-06, "loss": 0.2724, "step": 33293 }, { "epoch": 2.474470457079153, "grad_norm": 2.138816398166119, "learning_rate": 1.5667442487957584e-06, "loss": 0.2652, "step": 33294 }, { "epoch": 2.474544778892605, "grad_norm": 2.449414061498631, "learning_rate": 1.5663130808444749e-06, "loss": 0.3132, "step": 33295 }, { "epoch": 2.4746191007060574, "grad_norm": 2.574597051592001, "learning_rate": 1.565881967188827e-06, "loss": 0.3192, "step": 33296 }, { "epoch": 2.4746934225195094, "grad_norm": 1.8012609765078678, "learning_rate": 1.565450907831596e-06, "loss": 0.1965, "step": 33297 }, { "epoch": 2.474767744332962, "grad_norm": 2.202611326769995, "learning_rate": 1.5650199027755531e-06, "loss": 0.283, "step": 33298 }, { "epoch": 2.474842066146414, "grad_norm": 2.7345059323474263, "learning_rate": 1.564588952023478e-06, "loss": 0.2855, "step": 33299 }, { "epoch": 2.4749163879598663, "grad_norm": 2.4354409055368413, "learning_rate": 1.5641580555781377e-06, "loss": 0.3529, "step": 33300 }, { "epoch": 2.4749907097733184, "grad_norm": 2.411537854008747, "learning_rate": 1.563727213442311e-06, "loss": 0.3138, "step": 33301 }, { "epoch": 2.475065031586771, "grad_norm": 2.3943971754665236, "learning_rate": 1.5632964256187722e-06, "loss": 0.2991, "step": 33302 }, { "epoch": 2.475139353400223, "grad_norm": 1.735191813695216, "learning_rate": 1.5628656921102902e-06, "loss": 0.2112, "step": 33303 }, { "epoch": 2.4752136752136753, "grad_norm": 2.525116236279931, "learning_rate": 1.5624350129196442e-06, "loss": 0.2953, "step": 33304 }, { "epoch": 2.4752879970271273, "grad_norm": 2.546179811499229, "learning_rate": 1.5620043880496006e-06, "loss": 0.2446, "step": 33305 }, { "epoch": 2.47536231884058, "grad_norm": 2.224628596879544, "learning_rate": 1.5615738175029383e-06, "loss": 0.2857, "step": 33306 }, { "epoch": 2.475436640654032, "grad_norm": 3.075881341248319, "learning_rate": 1.5611433012824252e-06, "loss": 0.3443, "step": 33307 }, { "epoch": 2.4755109624674843, "grad_norm": 2.1799320591354974, "learning_rate": 1.560712839390831e-06, "loss": 0.2511, "step": 33308 }, { "epoch": 2.4755852842809363, "grad_norm": 1.95064321647209, "learning_rate": 1.5602824318309317e-06, "loss": 0.2724, "step": 33309 }, { "epoch": 2.4756596060943887, "grad_norm": 2.5357959478759837, "learning_rate": 1.5598520786054948e-06, "loss": 0.1732, "step": 33310 }, { "epoch": 2.475733927907841, "grad_norm": 2.3667950337819095, "learning_rate": 1.5594217797172973e-06, "loss": 0.298, "step": 33311 }, { "epoch": 2.475808249721293, "grad_norm": 3.0341263116539117, "learning_rate": 1.5589915351690988e-06, "loss": 0.3017, "step": 33312 }, { "epoch": 2.4758825715347452, "grad_norm": 2.4281743632704176, "learning_rate": 1.5585613449636772e-06, "loss": 0.3405, "step": 33313 }, { "epoch": 2.4759568933481977, "grad_norm": 2.629386317358882, "learning_rate": 1.5581312091038003e-06, "loss": 0.3127, "step": 33314 }, { "epoch": 2.47603121516165, "grad_norm": 2.014257642452861, "learning_rate": 1.5577011275922339e-06, "loss": 0.2387, "step": 33315 }, { "epoch": 2.476105536975102, "grad_norm": 2.125967512584705, "learning_rate": 1.557271100431752e-06, "loss": 0.2381, "step": 33316 }, { "epoch": 2.4761798587885546, "grad_norm": 2.8886663377642168, "learning_rate": 1.5568411276251206e-06, "loss": 0.3152, "step": 33317 }, { "epoch": 2.4762541806020066, "grad_norm": 2.2583295261397947, "learning_rate": 1.556411209175106e-06, "loss": 0.2661, "step": 33318 }, { "epoch": 2.476328502415459, "grad_norm": 2.280593571443891, "learning_rate": 1.5559813450844797e-06, "loss": 0.27, "step": 33319 }, { "epoch": 2.476402824228911, "grad_norm": 2.048887506557084, "learning_rate": 1.5555515353560059e-06, "loss": 0.2422, "step": 33320 }, { "epoch": 2.4764771460423636, "grad_norm": 1.8632155843645661, "learning_rate": 1.5551217799924546e-06, "loss": 0.2398, "step": 33321 }, { "epoch": 2.4765514678558156, "grad_norm": 2.102947340281774, "learning_rate": 1.5546920789965914e-06, "loss": 0.2377, "step": 33322 }, { "epoch": 2.476625789669268, "grad_norm": 2.4055521620023383, "learning_rate": 1.5542624323711819e-06, "loss": 0.2879, "step": 33323 }, { "epoch": 2.47670011148272, "grad_norm": 2.498321957467228, "learning_rate": 1.553832840118994e-06, "loss": 0.3361, "step": 33324 }, { "epoch": 2.4767744332961725, "grad_norm": 2.4687176659364773, "learning_rate": 1.5534033022427886e-06, "loss": 0.2728, "step": 33325 }, { "epoch": 2.4768487551096245, "grad_norm": 2.162307350132416, "learning_rate": 1.552973818745337e-06, "loss": 0.1719, "step": 33326 }, { "epoch": 2.476923076923077, "grad_norm": 2.3801816285921804, "learning_rate": 1.552544389629399e-06, "loss": 0.263, "step": 33327 }, { "epoch": 2.476997398736529, "grad_norm": 2.4547872038131273, "learning_rate": 1.5521150148977448e-06, "loss": 0.2594, "step": 33328 }, { "epoch": 2.4770717205499815, "grad_norm": 2.3746363823392467, "learning_rate": 1.5516856945531355e-06, "loss": 0.1796, "step": 33329 }, { "epoch": 2.4771460423634335, "grad_norm": 1.9447079728036616, "learning_rate": 1.5512564285983333e-06, "loss": 0.2517, "step": 33330 }, { "epoch": 2.477220364176886, "grad_norm": 2.144461971348119, "learning_rate": 1.550827217036106e-06, "loss": 0.2944, "step": 33331 }, { "epoch": 2.477294685990338, "grad_norm": 2.0458041162300113, "learning_rate": 1.5503980598692126e-06, "loss": 0.2647, "step": 33332 }, { "epoch": 2.4773690078037904, "grad_norm": 1.9435385660579538, "learning_rate": 1.5499689571004229e-06, "loss": 0.2335, "step": 33333 }, { "epoch": 2.477443329617243, "grad_norm": 1.9684750523887935, "learning_rate": 1.5495399087324892e-06, "loss": 0.2192, "step": 33334 }, { "epoch": 2.477517651430695, "grad_norm": 2.359566609768055, "learning_rate": 1.5491109147681815e-06, "loss": 0.3121, "step": 33335 }, { "epoch": 2.477591973244147, "grad_norm": 2.041328806120137, "learning_rate": 1.54868197521026e-06, "loss": 0.2385, "step": 33336 }, { "epoch": 2.4776662950575994, "grad_norm": 1.8315934059728483, "learning_rate": 1.5482530900614823e-06, "loss": 0.2457, "step": 33337 }, { "epoch": 2.477740616871052, "grad_norm": 2.6780380336675798, "learning_rate": 1.547824259324615e-06, "loss": 0.2692, "step": 33338 }, { "epoch": 2.477814938684504, "grad_norm": 2.2586989178267936, "learning_rate": 1.5473954830024175e-06, "loss": 0.3264, "step": 33339 }, { "epoch": 2.4778892604979563, "grad_norm": 2.299557426040688, "learning_rate": 1.5469667610976457e-06, "loss": 0.2688, "step": 33340 }, { "epoch": 2.4779635823114083, "grad_norm": 2.2281377828602187, "learning_rate": 1.546538093613067e-06, "loss": 0.2327, "step": 33341 }, { "epoch": 2.478037904124861, "grad_norm": 2.4341126937107083, "learning_rate": 1.5461094805514343e-06, "loss": 0.3023, "step": 33342 }, { "epoch": 2.478112225938313, "grad_norm": 2.4893254255448456, "learning_rate": 1.5456809219155122e-06, "loss": 0.3271, "step": 33343 }, { "epoch": 2.4781865477517653, "grad_norm": 2.3340099219751376, "learning_rate": 1.5452524177080585e-06, "loss": 0.2335, "step": 33344 }, { "epoch": 2.4782608695652173, "grad_norm": 2.408818674643788, "learning_rate": 1.54482396793183e-06, "loss": 0.2997, "step": 33345 }, { "epoch": 2.4783351913786698, "grad_norm": 2.3800800386032535, "learning_rate": 1.5443955725895864e-06, "loss": 0.276, "step": 33346 }, { "epoch": 2.4784095131921218, "grad_norm": 2.1090003705443188, "learning_rate": 1.5439672316840837e-06, "loss": 0.2535, "step": 33347 }, { "epoch": 2.4784838350055742, "grad_norm": 2.3107712894632577, "learning_rate": 1.5435389452180837e-06, "loss": 0.3088, "step": 33348 }, { "epoch": 2.4785581568190262, "grad_norm": 2.2286865229751593, "learning_rate": 1.5431107131943378e-06, "loss": 0.2547, "step": 33349 }, { "epoch": 2.4786324786324787, "grad_norm": 2.5598918886631967, "learning_rate": 1.5426825356156095e-06, "loss": 0.3565, "step": 33350 }, { "epoch": 2.4787068004459307, "grad_norm": 2.462656980630872, "learning_rate": 1.5422544124846518e-06, "loss": 0.2822, "step": 33351 }, { "epoch": 2.478781122259383, "grad_norm": 2.694139280041716, "learning_rate": 1.5418263438042192e-06, "loss": 0.3197, "step": 33352 }, { "epoch": 2.478855444072835, "grad_norm": 2.353315791751781, "learning_rate": 1.5413983295770718e-06, "loss": 0.2163, "step": 33353 }, { "epoch": 2.4789297658862877, "grad_norm": 2.2060229089550174, "learning_rate": 1.5409703698059609e-06, "loss": 0.2541, "step": 33354 }, { "epoch": 2.4790040876997397, "grad_norm": 2.476826195885567, "learning_rate": 1.5405424644936461e-06, "loss": 0.2832, "step": 33355 }, { "epoch": 2.479078409513192, "grad_norm": 2.5242809961688457, "learning_rate": 1.5401146136428824e-06, "loss": 0.3023, "step": 33356 }, { "epoch": 2.4791527313266446, "grad_norm": 2.146235980051786, "learning_rate": 1.5396868172564172e-06, "loss": 0.2631, "step": 33357 }, { "epoch": 2.4792270531400966, "grad_norm": 2.5641874121542654, "learning_rate": 1.5392590753370119e-06, "loss": 0.248, "step": 33358 }, { "epoch": 2.4793013749535486, "grad_norm": 2.1250893078997386, "learning_rate": 1.538831387887415e-06, "loss": 0.2682, "step": 33359 }, { "epoch": 2.479375696767001, "grad_norm": 1.6504861418619043, "learning_rate": 1.5384037549103847e-06, "loss": 0.1964, "step": 33360 }, { "epoch": 2.4794500185804536, "grad_norm": 2.272469710767882, "learning_rate": 1.5379761764086721e-06, "loss": 0.253, "step": 33361 }, { "epoch": 2.4795243403939056, "grad_norm": 1.775935796492726, "learning_rate": 1.5375486523850281e-06, "loss": 0.2053, "step": 33362 }, { "epoch": 2.479598662207358, "grad_norm": 2.71438591557251, "learning_rate": 1.5371211828422083e-06, "loss": 0.2291, "step": 33363 }, { "epoch": 2.47967298402081, "grad_norm": 2.310684197999981, "learning_rate": 1.5366937677829619e-06, "loss": 0.2587, "step": 33364 }, { "epoch": 2.4797473058342625, "grad_norm": 2.3048353800590826, "learning_rate": 1.536266407210043e-06, "loss": 0.2997, "step": 33365 }, { "epoch": 2.4798216276477145, "grad_norm": 2.8655737606076093, "learning_rate": 1.535839101126203e-06, "loss": 0.2848, "step": 33366 }, { "epoch": 2.479895949461167, "grad_norm": 2.29860362141311, "learning_rate": 1.5354118495341907e-06, "loss": 0.3041, "step": 33367 }, { "epoch": 2.479970271274619, "grad_norm": 2.5782831221823774, "learning_rate": 1.5349846524367585e-06, "loss": 0.3026, "step": 33368 }, { "epoch": 2.4800445930880715, "grad_norm": 2.2790216856460552, "learning_rate": 1.5345575098366528e-06, "loss": 0.1868, "step": 33369 }, { "epoch": 2.4801189149015235, "grad_norm": 2.294992697638257, "learning_rate": 1.5341304217366294e-06, "loss": 0.2521, "step": 33370 }, { "epoch": 2.480193236714976, "grad_norm": 2.5082684586154196, "learning_rate": 1.5337033881394325e-06, "loss": 0.3383, "step": 33371 }, { "epoch": 2.480267558528428, "grad_norm": 2.8652889852939976, "learning_rate": 1.533276409047817e-06, "loss": 0.2565, "step": 33372 }, { "epoch": 2.4803418803418804, "grad_norm": 1.9662238691657474, "learning_rate": 1.532849484464527e-06, "loss": 0.2773, "step": 33373 }, { "epoch": 2.4804162021553324, "grad_norm": 2.9710273377393124, "learning_rate": 1.5324226143923116e-06, "loss": 0.2671, "step": 33374 }, { "epoch": 2.480490523968785, "grad_norm": 1.9481128526681737, "learning_rate": 1.531995798833923e-06, "loss": 0.2101, "step": 33375 }, { "epoch": 2.480564845782237, "grad_norm": 2.2910126310069616, "learning_rate": 1.5315690377921044e-06, "loss": 0.2263, "step": 33376 }, { "epoch": 2.4806391675956894, "grad_norm": 2.8179374978351537, "learning_rate": 1.5311423312696038e-06, "loss": 0.3362, "step": 33377 }, { "epoch": 2.4807134894091414, "grad_norm": 2.0230870171386313, "learning_rate": 1.5307156792691712e-06, "loss": 0.2494, "step": 33378 }, { "epoch": 2.480787811222594, "grad_norm": 2.425182065139374, "learning_rate": 1.5302890817935522e-06, "loss": 0.2893, "step": 33379 }, { "epoch": 2.4808621330360463, "grad_norm": 2.4884600071939937, "learning_rate": 1.5298625388454924e-06, "loss": 0.3399, "step": 33380 }, { "epoch": 2.4809364548494983, "grad_norm": 2.4229247486268664, "learning_rate": 1.5294360504277362e-06, "loss": 0.285, "step": 33381 }, { "epoch": 2.4810107766629503, "grad_norm": 1.9755775871668595, "learning_rate": 1.5290096165430324e-06, "loss": 0.1823, "step": 33382 }, { "epoch": 2.481085098476403, "grad_norm": 3.0823850736457685, "learning_rate": 1.5285832371941257e-06, "loss": 0.3884, "step": 33383 }, { "epoch": 2.4811594202898553, "grad_norm": 2.4944567309584293, "learning_rate": 1.528156912383758e-06, "loss": 0.312, "step": 33384 }, { "epoch": 2.4812337421033073, "grad_norm": 3.3162453309486026, "learning_rate": 1.5277306421146798e-06, "loss": 0.3347, "step": 33385 }, { "epoch": 2.4813080639167597, "grad_norm": 2.902952494109821, "learning_rate": 1.5273044263896285e-06, "loss": 0.2839, "step": 33386 }, { "epoch": 2.4813823857302117, "grad_norm": 2.5915924899365486, "learning_rate": 1.5268782652113546e-06, "loss": 0.245, "step": 33387 }, { "epoch": 2.481456707543664, "grad_norm": 1.9698791293018816, "learning_rate": 1.5264521585825999e-06, "loss": 0.2351, "step": 33388 }, { "epoch": 2.4815310293571162, "grad_norm": 3.0122596367980523, "learning_rate": 1.526026106506102e-06, "loss": 0.3545, "step": 33389 }, { "epoch": 2.4816053511705687, "grad_norm": 2.9542112050195355, "learning_rate": 1.525600108984615e-06, "loss": 0.2602, "step": 33390 }, { "epoch": 2.4816796729840207, "grad_norm": 2.463985934312293, "learning_rate": 1.5251741660208685e-06, "loss": 0.369, "step": 33391 }, { "epoch": 2.481753994797473, "grad_norm": 1.825957070056587, "learning_rate": 1.5247482776176148e-06, "loss": 0.2091, "step": 33392 }, { "epoch": 2.481828316610925, "grad_norm": 2.1560437535805583, "learning_rate": 1.5243224437775883e-06, "loss": 0.2178, "step": 33393 }, { "epoch": 2.4819026384243776, "grad_norm": 2.4454473248523407, "learning_rate": 1.5238966645035381e-06, "loss": 0.2271, "step": 33394 }, { "epoch": 2.4819769602378297, "grad_norm": 2.7246283475880806, "learning_rate": 1.5234709397981994e-06, "loss": 0.26, "step": 33395 }, { "epoch": 2.482051282051282, "grad_norm": 2.2373960319323727, "learning_rate": 1.5230452696643138e-06, "loss": 0.2491, "step": 33396 }, { "epoch": 2.482125603864734, "grad_norm": 2.7245203380841643, "learning_rate": 1.5226196541046257e-06, "loss": 0.2911, "step": 33397 }, { "epoch": 2.4821999256781866, "grad_norm": 2.8229534688202285, "learning_rate": 1.5221940931218716e-06, "loss": 0.2497, "step": 33398 }, { "epoch": 2.4822742474916386, "grad_norm": 3.056690001308476, "learning_rate": 1.5217685867187891e-06, "loss": 0.2428, "step": 33399 }, { "epoch": 2.482348569305091, "grad_norm": 1.8810195325811525, "learning_rate": 1.5213431348981244e-06, "loss": 0.1771, "step": 33400 }, { "epoch": 2.482422891118543, "grad_norm": 2.2134598255846534, "learning_rate": 1.5209177376626116e-06, "loss": 0.2723, "step": 33401 }, { "epoch": 2.4824972129319955, "grad_norm": 2.5166173880726874, "learning_rate": 1.5204923950149907e-06, "loss": 0.3348, "step": 33402 }, { "epoch": 2.482571534745448, "grad_norm": 1.8096559356719197, "learning_rate": 1.5200671069579976e-06, "loss": 0.1939, "step": 33403 }, { "epoch": 2.4826458565589, "grad_norm": 3.624497511451829, "learning_rate": 1.5196418734943752e-06, "loss": 0.3666, "step": 33404 }, { "epoch": 2.482720178372352, "grad_norm": 1.5385777695245726, "learning_rate": 1.519216694626857e-06, "loss": 0.1771, "step": 33405 }, { "epoch": 2.4827945001858045, "grad_norm": 2.7561668389351386, "learning_rate": 1.5187915703581813e-06, "loss": 0.2596, "step": 33406 }, { "epoch": 2.482868821999257, "grad_norm": 2.3350315351994846, "learning_rate": 1.5183665006910863e-06, "loss": 0.2363, "step": 33407 }, { "epoch": 2.482943143812709, "grad_norm": 2.6131297856358597, "learning_rate": 1.5179414856283059e-06, "loss": 0.2924, "step": 33408 }, { "epoch": 2.4830174656261614, "grad_norm": 2.5696744070265933, "learning_rate": 1.5175165251725798e-06, "loss": 0.3124, "step": 33409 }, { "epoch": 2.4830917874396135, "grad_norm": 2.8744948025428947, "learning_rate": 1.5170916193266426e-06, "loss": 0.3079, "step": 33410 }, { "epoch": 2.483166109253066, "grad_norm": 2.3555838300816916, "learning_rate": 1.5166667680932278e-06, "loss": 0.2815, "step": 33411 }, { "epoch": 2.483240431066518, "grad_norm": 1.7484363231538373, "learning_rate": 1.5162419714750765e-06, "loss": 0.2091, "step": 33412 }, { "epoch": 2.4833147528799704, "grad_norm": 1.7258465111488637, "learning_rate": 1.515817229474914e-06, "loss": 0.1421, "step": 33413 }, { "epoch": 2.4833890746934224, "grad_norm": 2.4391055126389625, "learning_rate": 1.5153925420954818e-06, "loss": 0.3166, "step": 33414 }, { "epoch": 2.483463396506875, "grad_norm": 2.5571864676506904, "learning_rate": 1.514967909339511e-06, "loss": 0.3073, "step": 33415 }, { "epoch": 2.483537718320327, "grad_norm": 1.9369747674999191, "learning_rate": 1.5145433312097391e-06, "loss": 0.2057, "step": 33416 }, { "epoch": 2.4836120401337793, "grad_norm": 2.2214295671659943, "learning_rate": 1.5141188077088954e-06, "loss": 0.228, "step": 33417 }, { "epoch": 2.4836863619472314, "grad_norm": 2.0083216266933634, "learning_rate": 1.513694338839714e-06, "loss": 0.1899, "step": 33418 }, { "epoch": 2.483760683760684, "grad_norm": 2.2569233477163855, "learning_rate": 1.5132699246049286e-06, "loss": 0.245, "step": 33419 }, { "epoch": 2.483835005574136, "grad_norm": 3.010968366878734, "learning_rate": 1.5128455650072726e-06, "loss": 0.3649, "step": 33420 }, { "epoch": 2.4839093273875883, "grad_norm": 2.031454391386552, "learning_rate": 1.5124212600494737e-06, "loss": 0.2547, "step": 33421 }, { "epoch": 2.4839836492010403, "grad_norm": 2.968313990886003, "learning_rate": 1.5119970097342684e-06, "loss": 0.2516, "step": 33422 }, { "epoch": 2.4840579710144928, "grad_norm": 1.8713237671968241, "learning_rate": 1.5115728140643837e-06, "loss": 0.2162, "step": 33423 }, { "epoch": 2.484132292827945, "grad_norm": 1.7657176331292244, "learning_rate": 1.5111486730425574e-06, "loss": 0.1772, "step": 33424 }, { "epoch": 2.4842066146413972, "grad_norm": 2.1586572241430804, "learning_rate": 1.5107245866715114e-06, "loss": 0.2267, "step": 33425 }, { "epoch": 2.4842809364548497, "grad_norm": 2.8318399132994303, "learning_rate": 1.5103005549539817e-06, "loss": 0.298, "step": 33426 }, { "epoch": 2.4843552582683017, "grad_norm": 2.508608404419627, "learning_rate": 1.509876577892696e-06, "loss": 0.2675, "step": 33427 }, { "epoch": 2.484429580081754, "grad_norm": 2.1671656469289626, "learning_rate": 1.5094526554903832e-06, "loss": 0.2662, "step": 33428 }, { "epoch": 2.484503901895206, "grad_norm": 2.8886392617977794, "learning_rate": 1.5090287877497756e-06, "loss": 0.3622, "step": 33429 }, { "epoch": 2.4845782237086587, "grad_norm": 3.037155968577272, "learning_rate": 1.5086049746735975e-06, "loss": 0.3503, "step": 33430 }, { "epoch": 2.4846525455221107, "grad_norm": 2.0097500645024264, "learning_rate": 1.5081812162645827e-06, "loss": 0.2118, "step": 33431 }, { "epoch": 2.484726867335563, "grad_norm": 2.011890516421374, "learning_rate": 1.5077575125254563e-06, "loss": 0.2753, "step": 33432 }, { "epoch": 2.484801189149015, "grad_norm": 2.117674312403377, "learning_rate": 1.5073338634589441e-06, "loss": 0.1989, "step": 33433 }, { "epoch": 2.4848755109624676, "grad_norm": 2.154756150402836, "learning_rate": 1.5069102690677783e-06, "loss": 0.251, "step": 33434 }, { "epoch": 2.4849498327759196, "grad_norm": 2.7941667132313563, "learning_rate": 1.5064867293546837e-06, "loss": 0.3137, "step": 33435 }, { "epoch": 2.485024154589372, "grad_norm": 2.2822799162420804, "learning_rate": 1.5060632443223866e-06, "loss": 0.3381, "step": 33436 }, { "epoch": 2.485098476402824, "grad_norm": 2.714431533010676, "learning_rate": 1.5056398139736118e-06, "loss": 0.2642, "step": 33437 }, { "epoch": 2.4851727982162766, "grad_norm": 2.338979469811204, "learning_rate": 1.505216438311089e-06, "loss": 0.2268, "step": 33438 }, { "epoch": 2.4852471200297286, "grad_norm": 2.3365401044137135, "learning_rate": 1.5047931173375419e-06, "loss": 0.2472, "step": 33439 }, { "epoch": 2.485321441843181, "grad_norm": 2.3243336342742276, "learning_rate": 1.5043698510556936e-06, "loss": 0.2796, "step": 33440 }, { "epoch": 2.485395763656633, "grad_norm": 3.5368846290923543, "learning_rate": 1.5039466394682733e-06, "loss": 0.3131, "step": 33441 }, { "epoch": 2.4854700854700855, "grad_norm": 2.3044730235141215, "learning_rate": 1.5035234825780042e-06, "loss": 0.2273, "step": 33442 }, { "epoch": 2.4855444072835375, "grad_norm": 2.1474891194432937, "learning_rate": 1.5031003803876088e-06, "loss": 0.2247, "step": 33443 }, { "epoch": 2.48561872909699, "grad_norm": 2.5426926698202292, "learning_rate": 1.5026773328998134e-06, "loss": 0.3261, "step": 33444 }, { "epoch": 2.4856930509104425, "grad_norm": 2.5365258173333505, "learning_rate": 1.5022543401173384e-06, "loss": 0.3682, "step": 33445 }, { "epoch": 2.4857673727238945, "grad_norm": 2.650680524109085, "learning_rate": 1.5018314020429125e-06, "loss": 0.3213, "step": 33446 }, { "epoch": 2.4858416945373465, "grad_norm": 2.1254080821198786, "learning_rate": 1.5014085186792515e-06, "loss": 0.2092, "step": 33447 }, { "epoch": 2.485916016350799, "grad_norm": 2.244878448808218, "learning_rate": 1.5009856900290842e-06, "loss": 0.2788, "step": 33448 }, { "epoch": 2.4859903381642514, "grad_norm": 2.538951434288505, "learning_rate": 1.5005629160951297e-06, "loss": 0.3604, "step": 33449 }, { "epoch": 2.4860646599777034, "grad_norm": 2.03242011811474, "learning_rate": 1.5001401968801076e-06, "loss": 0.2153, "step": 33450 }, { "epoch": 2.486138981791156, "grad_norm": 2.2295433143569334, "learning_rate": 1.499717532386744e-06, "loss": 0.3259, "step": 33451 }, { "epoch": 2.486213303604608, "grad_norm": 2.7220858365980027, "learning_rate": 1.4992949226177566e-06, "loss": 0.2845, "step": 33452 }, { "epoch": 2.4862876254180604, "grad_norm": 2.7344264336209188, "learning_rate": 1.4988723675758687e-06, "loss": 0.2964, "step": 33453 }, { "epoch": 2.4863619472315124, "grad_norm": 3.066690192935525, "learning_rate": 1.4984498672638004e-06, "loss": 0.4705, "step": 33454 }, { "epoch": 2.486436269044965, "grad_norm": 1.9696563920482977, "learning_rate": 1.4980274216842672e-06, "loss": 0.2237, "step": 33455 }, { "epoch": 2.486510590858417, "grad_norm": 2.7005049448898544, "learning_rate": 1.497605030839996e-06, "loss": 0.3438, "step": 33456 }, { "epoch": 2.4865849126718693, "grad_norm": 2.5443793457616537, "learning_rate": 1.4971826947336986e-06, "loss": 0.3073, "step": 33457 }, { "epoch": 2.4866592344853213, "grad_norm": 2.506270092433605, "learning_rate": 1.4967604133681035e-06, "loss": 0.214, "step": 33458 }, { "epoch": 2.486733556298774, "grad_norm": 2.79963714465702, "learning_rate": 1.4963381867459192e-06, "loss": 0.3383, "step": 33459 }, { "epoch": 2.486807878112226, "grad_norm": 2.423185296160817, "learning_rate": 1.4959160148698704e-06, "loss": 0.3063, "step": 33460 }, { "epoch": 2.4868821999256783, "grad_norm": 2.8977482668355337, "learning_rate": 1.4954938977426725e-06, "loss": 0.3959, "step": 33461 }, { "epoch": 2.4869565217391303, "grad_norm": 3.218451941409188, "learning_rate": 1.4950718353670423e-06, "loss": 0.2974, "step": 33462 }, { "epoch": 2.4870308435525827, "grad_norm": 2.7797700322172454, "learning_rate": 1.4946498277457e-06, "loss": 0.3164, "step": 33463 }, { "epoch": 2.4871051653660348, "grad_norm": 2.029780974701203, "learning_rate": 1.494227874881361e-06, "loss": 0.2473, "step": 33464 }, { "epoch": 2.4871794871794872, "grad_norm": 2.3494822641247874, "learning_rate": 1.4938059767767388e-06, "loss": 0.3077, "step": 33465 }, { "epoch": 2.4872538089929392, "grad_norm": 2.1204744675583314, "learning_rate": 1.4933841334345555e-06, "loss": 0.2683, "step": 33466 }, { "epoch": 2.4873281308063917, "grad_norm": 2.252484325503528, "learning_rate": 1.4929623448575204e-06, "loss": 0.3227, "step": 33467 }, { "epoch": 2.487402452619844, "grad_norm": 2.7859005185648025, "learning_rate": 1.4925406110483554e-06, "loss": 0.2917, "step": 33468 }, { "epoch": 2.487476774433296, "grad_norm": 2.7425556961338087, "learning_rate": 1.4921189320097728e-06, "loss": 0.3367, "step": 33469 }, { "epoch": 2.487551096246748, "grad_norm": 2.7143217936312487, "learning_rate": 1.4916973077444862e-06, "loss": 0.3375, "step": 33470 }, { "epoch": 2.4876254180602007, "grad_norm": 2.1967239146960194, "learning_rate": 1.4912757382552111e-06, "loss": 0.2145, "step": 33471 }, { "epoch": 2.487699739873653, "grad_norm": 3.0585313414823476, "learning_rate": 1.4908542235446598e-06, "loss": 0.3172, "step": 33472 }, { "epoch": 2.487774061687105, "grad_norm": 2.6163243788106443, "learning_rate": 1.4904327636155491e-06, "loss": 0.3758, "step": 33473 }, { "epoch": 2.4878483835005576, "grad_norm": 2.370924575832621, "learning_rate": 1.4900113584705889e-06, "loss": 0.3451, "step": 33474 }, { "epoch": 2.4879227053140096, "grad_norm": 2.6849791879721514, "learning_rate": 1.4895900081124958e-06, "loss": 0.307, "step": 33475 }, { "epoch": 2.487997027127462, "grad_norm": 2.528830319150927, "learning_rate": 1.4891687125439803e-06, "loss": 0.269, "step": 33476 }, { "epoch": 2.488071348940914, "grad_norm": 2.490641126516456, "learning_rate": 1.4887474717677542e-06, "loss": 0.3178, "step": 33477 }, { "epoch": 2.4881456707543665, "grad_norm": 2.548152901315785, "learning_rate": 1.488326285786531e-06, "loss": 0.1993, "step": 33478 }, { "epoch": 2.4882199925678186, "grad_norm": 2.1024460825228086, "learning_rate": 1.48790515460302e-06, "loss": 0.2344, "step": 33479 }, { "epoch": 2.488294314381271, "grad_norm": 2.8917263335598906, "learning_rate": 1.4874840782199385e-06, "loss": 0.3, "step": 33480 }, { "epoch": 2.488368636194723, "grad_norm": 2.0187276171672375, "learning_rate": 1.487063056639988e-06, "loss": 0.2802, "step": 33481 }, { "epoch": 2.4884429580081755, "grad_norm": 2.2732513400083687, "learning_rate": 1.4866420898658862e-06, "loss": 0.2555, "step": 33482 }, { "epoch": 2.4885172798216275, "grad_norm": 3.2136854865865265, "learning_rate": 1.486221177900341e-06, "loss": 0.3419, "step": 33483 }, { "epoch": 2.48859160163508, "grad_norm": 2.5776809737623174, "learning_rate": 1.4858003207460602e-06, "loss": 0.315, "step": 33484 }, { "epoch": 2.488665923448532, "grad_norm": 2.6127169788743942, "learning_rate": 1.4853795184057561e-06, "loss": 0.3663, "step": 33485 }, { "epoch": 2.4887402452619845, "grad_norm": 2.333988205113639, "learning_rate": 1.484958770882139e-06, "loss": 0.2596, "step": 33486 }, { "epoch": 2.4888145670754365, "grad_norm": 2.110879823695103, "learning_rate": 1.4845380781779116e-06, "loss": 0.2625, "step": 33487 }, { "epoch": 2.488888888888889, "grad_norm": 2.19131209273206, "learning_rate": 1.4841174402957893e-06, "loss": 0.2886, "step": 33488 }, { "epoch": 2.488963210702341, "grad_norm": 2.343522947442885, "learning_rate": 1.4836968572384737e-06, "loss": 0.2887, "step": 33489 }, { "epoch": 2.4890375325157934, "grad_norm": 2.0051042011410516, "learning_rate": 1.4832763290086794e-06, "loss": 0.2675, "step": 33490 }, { "epoch": 2.489111854329246, "grad_norm": 2.0034573029053355, "learning_rate": 1.4828558556091089e-06, "loss": 0.1808, "step": 33491 }, { "epoch": 2.489186176142698, "grad_norm": 2.3848197681291996, "learning_rate": 1.482435437042472e-06, "loss": 0.2242, "step": 33492 }, { "epoch": 2.48926049795615, "grad_norm": 2.223200339785497, "learning_rate": 1.4820150733114723e-06, "loss": 0.1912, "step": 33493 }, { "epoch": 2.4893348197696024, "grad_norm": 1.7824581750274606, "learning_rate": 1.4815947644188167e-06, "loss": 0.2259, "step": 33494 }, { "epoch": 2.489409141583055, "grad_norm": 3.1563373307928124, "learning_rate": 1.4811745103672137e-06, "loss": 0.3013, "step": 33495 }, { "epoch": 2.489483463396507, "grad_norm": 2.392435672223106, "learning_rate": 1.4807543111593648e-06, "loss": 0.3207, "step": 33496 }, { "epoch": 2.4895577852099593, "grad_norm": 2.6111421649898627, "learning_rate": 1.4803341667979799e-06, "loss": 0.3112, "step": 33497 }, { "epoch": 2.4896321070234113, "grad_norm": 1.8619436948194281, "learning_rate": 1.4799140772857611e-06, "loss": 0.2464, "step": 33498 }, { "epoch": 2.4897064288368638, "grad_norm": 2.8286769251000146, "learning_rate": 1.4794940426254122e-06, "loss": 0.3823, "step": 33499 }, { "epoch": 2.489780750650316, "grad_norm": 2.3848764603975847, "learning_rate": 1.4790740628196398e-06, "loss": 0.3028, "step": 33500 }, { "epoch": 2.4898550724637682, "grad_norm": 2.4840231933680887, "learning_rate": 1.4786541378711472e-06, "loss": 0.2998, "step": 33501 }, { "epoch": 2.4899293942772203, "grad_norm": 2.789644619864593, "learning_rate": 1.4782342677826345e-06, "loss": 0.3298, "step": 33502 }, { "epoch": 2.4900037160906727, "grad_norm": 2.6319880571424683, "learning_rate": 1.4778144525568117e-06, "loss": 0.2367, "step": 33503 }, { "epoch": 2.4900780379041247, "grad_norm": 2.6822226884496168, "learning_rate": 1.4773946921963734e-06, "loss": 0.3353, "step": 33504 }, { "epoch": 2.490152359717577, "grad_norm": 2.4280467808995114, "learning_rate": 1.4769749867040272e-06, "loss": 0.3331, "step": 33505 }, { "epoch": 2.490226681531029, "grad_norm": 2.5732734019514987, "learning_rate": 1.4765553360824726e-06, "loss": 0.2691, "step": 33506 }, { "epoch": 2.4903010033444817, "grad_norm": 2.990468932782711, "learning_rate": 1.4761357403344145e-06, "loss": 0.3493, "step": 33507 }, { "epoch": 2.4903753251579337, "grad_norm": 2.0058362341870977, "learning_rate": 1.4757161994625513e-06, "loss": 0.239, "step": 33508 }, { "epoch": 2.490449646971386, "grad_norm": 2.0678022577948214, "learning_rate": 1.475296713469584e-06, "loss": 0.2909, "step": 33509 }, { "epoch": 2.490523968784838, "grad_norm": 2.0332367084454837, "learning_rate": 1.4748772823582158e-06, "loss": 0.2551, "step": 33510 }, { "epoch": 2.4905982905982906, "grad_norm": 2.5307301470633194, "learning_rate": 1.4744579061311425e-06, "loss": 0.2145, "step": 33511 }, { "epoch": 2.4906726124117426, "grad_norm": 3.043775923522066, "learning_rate": 1.4740385847910699e-06, "loss": 0.3857, "step": 33512 }, { "epoch": 2.490746934225195, "grad_norm": 2.643209835833024, "learning_rate": 1.4736193183406944e-06, "loss": 0.3087, "step": 33513 }, { "epoch": 2.4908212560386476, "grad_norm": 3.0071296484841707, "learning_rate": 1.473200106782715e-06, "loss": 0.2458, "step": 33514 }, { "epoch": 2.4908955778520996, "grad_norm": 2.2739612193004493, "learning_rate": 1.4727809501198309e-06, "loss": 0.3062, "step": 33515 }, { "epoch": 2.4909698996655516, "grad_norm": 2.790826146965274, "learning_rate": 1.4723618483547386e-06, "loss": 0.2927, "step": 33516 }, { "epoch": 2.491044221479004, "grad_norm": 2.514881837836604, "learning_rate": 1.4719428014901405e-06, "loss": 0.3213, "step": 33517 }, { "epoch": 2.4911185432924565, "grad_norm": 2.0569087573830047, "learning_rate": 1.47152380952873e-06, "loss": 0.2387, "step": 33518 }, { "epoch": 2.4911928651059085, "grad_norm": 2.299338281979331, "learning_rate": 1.471104872473209e-06, "loss": 0.2263, "step": 33519 }, { "epoch": 2.491267186919361, "grad_norm": 2.2268184385299623, "learning_rate": 1.4706859903262715e-06, "loss": 0.3076, "step": 33520 }, { "epoch": 2.491341508732813, "grad_norm": 2.221970387007767, "learning_rate": 1.470267163090614e-06, "loss": 0.2142, "step": 33521 }, { "epoch": 2.4914158305462655, "grad_norm": 2.6904545039139918, "learning_rate": 1.4698483907689353e-06, "loss": 0.2935, "step": 33522 }, { "epoch": 2.4914901523597175, "grad_norm": 1.9117157486710696, "learning_rate": 1.46942967336393e-06, "loss": 0.2507, "step": 33523 }, { "epoch": 2.49156447417317, "grad_norm": 2.226394291461035, "learning_rate": 1.4690110108782918e-06, "loss": 0.2545, "step": 33524 }, { "epoch": 2.491638795986622, "grad_norm": 2.022884567839366, "learning_rate": 1.4685924033147226e-06, "loss": 0.245, "step": 33525 }, { "epoch": 2.4917131178000744, "grad_norm": 2.369736309405715, "learning_rate": 1.468173850675908e-06, "loss": 0.2921, "step": 33526 }, { "epoch": 2.4917874396135264, "grad_norm": 3.898983989876738, "learning_rate": 1.4677553529645505e-06, "loss": 0.2593, "step": 33527 }, { "epoch": 2.491861761426979, "grad_norm": 2.7796568421293872, "learning_rate": 1.467336910183339e-06, "loss": 0.3595, "step": 33528 }, { "epoch": 2.491936083240431, "grad_norm": 2.504881929272728, "learning_rate": 1.4669185223349714e-06, "loss": 0.3288, "step": 33529 }, { "epoch": 2.4920104050538834, "grad_norm": 2.103970512629959, "learning_rate": 1.4665001894221398e-06, "loss": 0.2684, "step": 33530 }, { "epoch": 2.4920847268673354, "grad_norm": 3.0055775281601607, "learning_rate": 1.4660819114475355e-06, "loss": 0.2927, "step": 33531 }, { "epoch": 2.492159048680788, "grad_norm": 2.35784543801749, "learning_rate": 1.4656636884138542e-06, "loss": 0.275, "step": 33532 }, { "epoch": 2.49223337049424, "grad_norm": 2.973693755467673, "learning_rate": 1.4652455203237859e-06, "loss": 0.2745, "step": 33533 }, { "epoch": 2.4923076923076923, "grad_norm": 2.410134315409929, "learning_rate": 1.4648274071800262e-06, "loss": 0.2668, "step": 33534 }, { "epoch": 2.4923820141211444, "grad_norm": 6.199923485525102, "learning_rate": 1.464409348985264e-06, "loss": 0.2651, "step": 33535 }, { "epoch": 2.492456335934597, "grad_norm": 2.4518949443680693, "learning_rate": 1.4639913457421905e-06, "loss": 0.3385, "step": 33536 }, { "epoch": 2.4925306577480493, "grad_norm": 2.4082310150875443, "learning_rate": 1.4635733974535017e-06, "loss": 0.2151, "step": 33537 }, { "epoch": 2.4926049795615013, "grad_norm": 2.52584193624837, "learning_rate": 1.4631555041218804e-06, "loss": 0.2363, "step": 33538 }, { "epoch": 2.4926793013749533, "grad_norm": 2.378754923680265, "learning_rate": 1.4627376657500226e-06, "loss": 0.2937, "step": 33539 }, { "epoch": 2.4927536231884058, "grad_norm": 2.718880752119935, "learning_rate": 1.462319882340615e-06, "loss": 0.2091, "step": 33540 }, { "epoch": 2.4928279450018582, "grad_norm": 2.3219145243571013, "learning_rate": 1.4619021538963506e-06, "loss": 0.2664, "step": 33541 }, { "epoch": 2.4929022668153102, "grad_norm": 2.4304517774858683, "learning_rate": 1.461484480419918e-06, "loss": 0.2724, "step": 33542 }, { "epoch": 2.4929765886287627, "grad_norm": 1.9528020801296684, "learning_rate": 1.4610668619140022e-06, "loss": 0.2296, "step": 33543 }, { "epoch": 2.4930509104422147, "grad_norm": 3.0304823348175693, "learning_rate": 1.4606492983812982e-06, "loss": 0.3172, "step": 33544 }, { "epoch": 2.493125232255667, "grad_norm": 2.904654486014522, "learning_rate": 1.46023178982449e-06, "loss": 0.3886, "step": 33545 }, { "epoch": 2.493199554069119, "grad_norm": 2.5178660954850636, "learning_rate": 1.4598143362462648e-06, "loss": 0.2655, "step": 33546 }, { "epoch": 2.4932738758825717, "grad_norm": 2.2382816295520125, "learning_rate": 1.4593969376493134e-06, "loss": 0.2379, "step": 33547 }, { "epoch": 2.4933481976960237, "grad_norm": 2.374676521935978, "learning_rate": 1.4589795940363216e-06, "loss": 0.3638, "step": 33548 }, { "epoch": 2.493422519509476, "grad_norm": 2.0352611630503485, "learning_rate": 1.4585623054099763e-06, "loss": 0.1923, "step": 33549 }, { "epoch": 2.493496841322928, "grad_norm": 2.1215803145804304, "learning_rate": 1.4581450717729605e-06, "loss": 0.268, "step": 33550 }, { "epoch": 2.4935711631363806, "grad_norm": 2.56889177952258, "learning_rate": 1.457727893127967e-06, "loss": 0.311, "step": 33551 }, { "epoch": 2.4936454849498326, "grad_norm": 4.905679470444357, "learning_rate": 1.4573107694776779e-06, "loss": 0.3222, "step": 33552 }, { "epoch": 2.493719806763285, "grad_norm": 2.9320191777944387, "learning_rate": 1.4568937008247752e-06, "loss": 0.2533, "step": 33553 }, { "epoch": 2.493794128576737, "grad_norm": 1.9871408359083864, "learning_rate": 1.4564766871719515e-06, "loss": 0.2114, "step": 33554 }, { "epoch": 2.4938684503901896, "grad_norm": 2.6730350969206333, "learning_rate": 1.4560597285218847e-06, "loss": 0.2324, "step": 33555 }, { "epoch": 2.4939427722036416, "grad_norm": 2.2246675055771146, "learning_rate": 1.4556428248772636e-06, "loss": 0.2379, "step": 33556 }, { "epoch": 2.494017094017094, "grad_norm": 2.672275077338255, "learning_rate": 1.4552259762407717e-06, "loss": 0.2838, "step": 33557 }, { "epoch": 2.494091415830546, "grad_norm": 1.8476381361487149, "learning_rate": 1.454809182615089e-06, "loss": 0.2113, "step": 33558 }, { "epoch": 2.4941657376439985, "grad_norm": 1.9461931891526845, "learning_rate": 1.4543924440029067e-06, "loss": 0.209, "step": 33559 }, { "epoch": 2.494240059457451, "grad_norm": 2.6314151064505658, "learning_rate": 1.4539757604068972e-06, "loss": 0.3466, "step": 33560 }, { "epoch": 2.494314381270903, "grad_norm": 2.9684046512747546, "learning_rate": 1.4535591318297503e-06, "loss": 0.3035, "step": 33561 }, { "epoch": 2.4943887030843555, "grad_norm": 2.4380437401138026, "learning_rate": 1.4531425582741442e-06, "loss": 0.2733, "step": 33562 }, { "epoch": 2.4944630248978075, "grad_norm": 2.6770559040475654, "learning_rate": 1.4527260397427645e-06, "loss": 0.2999, "step": 33563 }, { "epoch": 2.49453734671126, "grad_norm": 2.1531576672634065, "learning_rate": 1.4523095762382922e-06, "loss": 0.2739, "step": 33564 }, { "epoch": 2.494611668524712, "grad_norm": 1.9635261454532604, "learning_rate": 1.451893167763405e-06, "loss": 0.1812, "step": 33565 }, { "epoch": 2.4946859903381644, "grad_norm": 2.5921220583418796, "learning_rate": 1.451476814320788e-06, "loss": 0.2523, "step": 33566 }, { "epoch": 2.4947603121516164, "grad_norm": 2.076874969221235, "learning_rate": 1.4510605159131197e-06, "loss": 0.2665, "step": 33567 }, { "epoch": 2.494834633965069, "grad_norm": 2.9202767060969417, "learning_rate": 1.4506442725430769e-06, "loss": 0.3509, "step": 33568 }, { "epoch": 2.494908955778521, "grad_norm": 2.891758110370538, "learning_rate": 1.4502280842133466e-06, "loss": 0.3452, "step": 33569 }, { "epoch": 2.4949832775919734, "grad_norm": 1.804073927212061, "learning_rate": 1.4498119509266018e-06, "loss": 0.1948, "step": 33570 }, { "epoch": 2.4950575994054254, "grad_norm": 1.9160464006717655, "learning_rate": 1.4493958726855274e-06, "loss": 0.2364, "step": 33571 }, { "epoch": 2.495131921218878, "grad_norm": 1.9984635421467363, "learning_rate": 1.4489798494927954e-06, "loss": 0.174, "step": 33572 }, { "epoch": 2.49520624303233, "grad_norm": 2.3506867574909034, "learning_rate": 1.4485638813510894e-06, "loss": 0.3091, "step": 33573 }, { "epoch": 2.4952805648457823, "grad_norm": 1.8745050043888853, "learning_rate": 1.448147968263085e-06, "loss": 0.1987, "step": 33574 }, { "epoch": 2.4953548866592343, "grad_norm": 3.154635324980504, "learning_rate": 1.4477321102314578e-06, "loss": 0.3753, "step": 33575 }, { "epoch": 2.495429208472687, "grad_norm": 2.1786784819305054, "learning_rate": 1.4473163072588902e-06, "loss": 0.2528, "step": 33576 }, { "epoch": 2.495503530286139, "grad_norm": 2.2527421021306546, "learning_rate": 1.4469005593480546e-06, "loss": 0.2983, "step": 33577 }, { "epoch": 2.4955778520995913, "grad_norm": 2.4605961610000975, "learning_rate": 1.4464848665016307e-06, "loss": 0.3229, "step": 33578 }, { "epoch": 2.4956521739130437, "grad_norm": 3.787687794914719, "learning_rate": 1.4460692287222944e-06, "loss": 0.3511, "step": 33579 }, { "epoch": 2.4957264957264957, "grad_norm": 1.8061532136184397, "learning_rate": 1.445653646012718e-06, "loss": 0.2213, "step": 33580 }, { "epoch": 2.4958008175399478, "grad_norm": 2.7272296428752614, "learning_rate": 1.4452381183755815e-06, "loss": 0.3413, "step": 33581 }, { "epoch": 2.4958751393534, "grad_norm": 2.5933380833179647, "learning_rate": 1.4448226458135584e-06, "loss": 0.2596, "step": 33582 }, { "epoch": 2.4959494611668527, "grad_norm": 1.9296731127353586, "learning_rate": 1.444407228329323e-06, "loss": 0.259, "step": 33583 }, { "epoch": 2.4960237829803047, "grad_norm": 1.9913290003327422, "learning_rate": 1.4439918659255481e-06, "loss": 0.1912, "step": 33584 }, { "epoch": 2.496098104793757, "grad_norm": 1.8161852533298009, "learning_rate": 1.443576558604912e-06, "loss": 0.1845, "step": 33585 }, { "epoch": 2.496172426607209, "grad_norm": 2.973511268248947, "learning_rate": 1.4431613063700855e-06, "loss": 0.2987, "step": 33586 }, { "epoch": 2.4962467484206616, "grad_norm": 1.8260939423325964, "learning_rate": 1.4427461092237404e-06, "loss": 0.2218, "step": 33587 }, { "epoch": 2.4963210702341136, "grad_norm": 2.0468728820713413, "learning_rate": 1.4423309671685538e-06, "loss": 0.2476, "step": 33588 }, { "epoch": 2.496395392047566, "grad_norm": 2.2425317803724982, "learning_rate": 1.4419158802071975e-06, "loss": 0.3501, "step": 33589 }, { "epoch": 2.496469713861018, "grad_norm": 2.1536461040110804, "learning_rate": 1.4415008483423387e-06, "loss": 0.2919, "step": 33590 }, { "epoch": 2.4965440356744706, "grad_norm": 3.287249066348702, "learning_rate": 1.4410858715766574e-06, "loss": 0.3877, "step": 33591 }, { "epoch": 2.4966183574879226, "grad_norm": 2.6750370767206886, "learning_rate": 1.440670949912818e-06, "loss": 0.2951, "step": 33592 }, { "epoch": 2.496692679301375, "grad_norm": 2.4662043957042283, "learning_rate": 1.4402560833534995e-06, "loss": 0.2348, "step": 33593 }, { "epoch": 2.496767001114827, "grad_norm": 1.8309320906262634, "learning_rate": 1.4398412719013645e-06, "loss": 0.1835, "step": 33594 }, { "epoch": 2.4968413229282795, "grad_norm": 2.59719530345813, "learning_rate": 1.439426515559088e-06, "loss": 0.3083, "step": 33595 }, { "epoch": 2.4969156447417316, "grad_norm": 2.4540352132403997, "learning_rate": 1.4390118143293408e-06, "loss": 0.2293, "step": 33596 }, { "epoch": 2.496989966555184, "grad_norm": 2.320691415706261, "learning_rate": 1.438597168214788e-06, "loss": 0.2752, "step": 33597 }, { "epoch": 2.497064288368636, "grad_norm": 2.5562129754230765, "learning_rate": 1.4381825772181047e-06, "loss": 0.3731, "step": 33598 }, { "epoch": 2.4971386101820885, "grad_norm": 1.676654852653049, "learning_rate": 1.437768041341956e-06, "loss": 0.1923, "step": 33599 }, { "epoch": 2.4972129319955405, "grad_norm": 1.998566465161646, "learning_rate": 1.4373535605890143e-06, "loss": 0.2745, "step": 33600 }, { "epoch": 2.497287253808993, "grad_norm": 2.4237969617707917, "learning_rate": 1.4369391349619466e-06, "loss": 0.2725, "step": 33601 }, { "epoch": 2.4973615756224454, "grad_norm": 2.04522699266242, "learning_rate": 1.4365247644634185e-06, "loss": 0.2567, "step": 33602 }, { "epoch": 2.4974358974358974, "grad_norm": 2.073601444179371, "learning_rate": 1.436110449096101e-06, "loss": 0.2423, "step": 33603 }, { "epoch": 2.4975102192493495, "grad_norm": 2.5237101302488005, "learning_rate": 1.4356961888626607e-06, "loss": 0.2301, "step": 33604 }, { "epoch": 2.497584541062802, "grad_norm": 2.44908185172717, "learning_rate": 1.4352819837657627e-06, "loss": 0.3332, "step": 33605 }, { "epoch": 2.4976588628762544, "grad_norm": 1.7010231437613454, "learning_rate": 1.4348678338080746e-06, "loss": 0.2172, "step": 33606 }, { "epoch": 2.4977331846897064, "grad_norm": 1.9691882635384168, "learning_rate": 1.434453738992264e-06, "loss": 0.2196, "step": 33607 }, { "epoch": 2.497807506503159, "grad_norm": 1.8780861570036018, "learning_rate": 1.434039699320996e-06, "loss": 0.2373, "step": 33608 }, { "epoch": 2.497881828316611, "grad_norm": 1.9800170087116151, "learning_rate": 1.4336257147969335e-06, "loss": 0.2515, "step": 33609 }, { "epoch": 2.4979561501300633, "grad_norm": 2.049265261117751, "learning_rate": 1.4332117854227467e-06, "loss": 0.27, "step": 33610 }, { "epoch": 2.4980304719435154, "grad_norm": 2.5146452231759713, "learning_rate": 1.4327979112010981e-06, "loss": 0.2906, "step": 33611 }, { "epoch": 2.498104793756968, "grad_norm": 2.2284132853705856, "learning_rate": 1.4323840921346487e-06, "loss": 0.2307, "step": 33612 }, { "epoch": 2.49817911557042, "grad_norm": 1.7469273236364948, "learning_rate": 1.4319703282260699e-06, "loss": 0.2353, "step": 33613 }, { "epoch": 2.4982534373838723, "grad_norm": 2.1864054619674858, "learning_rate": 1.4315566194780184e-06, "loss": 0.2634, "step": 33614 }, { "epoch": 2.4983277591973243, "grad_norm": 2.2156246178180874, "learning_rate": 1.431142965893163e-06, "loss": 0.236, "step": 33615 }, { "epoch": 2.4984020810107768, "grad_norm": 2.498877831418621, "learning_rate": 1.430729367474164e-06, "loss": 0.2953, "step": 33616 }, { "epoch": 2.498476402824229, "grad_norm": 2.0674732825071978, "learning_rate": 1.4303158242236858e-06, "loss": 0.2259, "step": 33617 }, { "epoch": 2.4985507246376812, "grad_norm": 2.3127771468173894, "learning_rate": 1.4299023361443898e-06, "loss": 0.268, "step": 33618 }, { "epoch": 2.4986250464511333, "grad_norm": 2.775884689242443, "learning_rate": 1.4294889032389348e-06, "loss": 0.2805, "step": 33619 }, { "epoch": 2.4986993682645857, "grad_norm": 2.682269471695434, "learning_rate": 1.4290755255099876e-06, "loss": 0.3426, "step": 33620 }, { "epoch": 2.4987736900780377, "grad_norm": 2.2474999824144763, "learning_rate": 1.428662202960206e-06, "loss": 0.2692, "step": 33621 }, { "epoch": 2.49884801189149, "grad_norm": 1.901464925224645, "learning_rate": 1.4282489355922546e-06, "loss": 0.1956, "step": 33622 }, { "epoch": 2.498922333704942, "grad_norm": 2.374981793581821, "learning_rate": 1.4278357234087914e-06, "loss": 0.2884, "step": 33623 }, { "epoch": 2.4989966555183947, "grad_norm": 2.194730821152221, "learning_rate": 1.4274225664124752e-06, "loss": 0.2304, "step": 33624 }, { "epoch": 2.499070977331847, "grad_norm": 2.2814847563798355, "learning_rate": 1.4270094646059696e-06, "loss": 0.2822, "step": 33625 }, { "epoch": 2.499145299145299, "grad_norm": 2.0877742944794004, "learning_rate": 1.42659641799193e-06, "loss": 0.2581, "step": 33626 }, { "epoch": 2.499219620958751, "grad_norm": 2.711367973641581, "learning_rate": 1.4261834265730223e-06, "loss": 0.2862, "step": 33627 }, { "epoch": 2.4992939427722036, "grad_norm": 1.9248667862467403, "learning_rate": 1.4257704903518986e-06, "loss": 0.237, "step": 33628 }, { "epoch": 2.499368264585656, "grad_norm": 2.4593410487968232, "learning_rate": 1.4253576093312172e-06, "loss": 0.3106, "step": 33629 }, { "epoch": 2.499442586399108, "grad_norm": 2.051268953816147, "learning_rate": 1.424944783513642e-06, "loss": 0.2535, "step": 33630 }, { "epoch": 2.4995169082125606, "grad_norm": 1.9411678900144613, "learning_rate": 1.4245320129018236e-06, "loss": 0.2676, "step": 33631 }, { "epoch": 2.4995912300260126, "grad_norm": 2.634419117350846, "learning_rate": 1.4241192974984264e-06, "loss": 0.3446, "step": 33632 }, { "epoch": 2.499665551839465, "grad_norm": 2.290600087406781, "learning_rate": 1.4237066373061038e-06, "loss": 0.2637, "step": 33633 }, { "epoch": 2.499739873652917, "grad_norm": 2.2983618358145423, "learning_rate": 1.42329403232751e-06, "loss": 0.2157, "step": 33634 }, { "epoch": 2.4998141954663695, "grad_norm": 2.5259105391343413, "learning_rate": 1.422881482565307e-06, "loss": 0.3228, "step": 33635 }, { "epoch": 2.4998885172798215, "grad_norm": 2.2086520450997558, "learning_rate": 1.4224689880221464e-06, "loss": 0.2315, "step": 33636 }, { "epoch": 2.499962839093274, "grad_norm": 2.0316947514856034, "learning_rate": 1.4220565487006864e-06, "loss": 0.1976, "step": 33637 }, { "epoch": 2.500037160906726, "grad_norm": 2.5782256210836785, "learning_rate": 1.4216441646035827e-06, "loss": 0.3051, "step": 33638 }, { "epoch": 2.5001114827201785, "grad_norm": 2.2375642611925, "learning_rate": 1.4212318357334876e-06, "loss": 0.3207, "step": 33639 }, { "epoch": 2.5001858045336305, "grad_norm": 1.9138714051230619, "learning_rate": 1.4208195620930576e-06, "loss": 0.2298, "step": 33640 }, { "epoch": 2.500260126347083, "grad_norm": 2.5164119948804697, "learning_rate": 1.420407343684943e-06, "loss": 0.2942, "step": 33641 }, { "epoch": 2.500334448160535, "grad_norm": 1.6551681253432682, "learning_rate": 1.419995180511804e-06, "loss": 0.1712, "step": 33642 }, { "epoch": 2.5004087699739874, "grad_norm": 1.9914280910675104, "learning_rate": 1.419583072576287e-06, "loss": 0.2728, "step": 33643 }, { "epoch": 2.50048309178744, "grad_norm": 2.2870460935308543, "learning_rate": 1.4191710198810527e-06, "loss": 0.2932, "step": 33644 }, { "epoch": 2.500557413600892, "grad_norm": 2.5757503979480836, "learning_rate": 1.4187590224287495e-06, "loss": 0.3083, "step": 33645 }, { "epoch": 2.500631735414344, "grad_norm": 2.355289532371665, "learning_rate": 1.4183470802220289e-06, "loss": 0.3105, "step": 33646 }, { "epoch": 2.5007060572277964, "grad_norm": 2.7100259766369015, "learning_rate": 1.4179351932635466e-06, "loss": 0.3111, "step": 33647 }, { "epoch": 2.500780379041249, "grad_norm": 2.481846848744669, "learning_rate": 1.4175233615559515e-06, "loss": 0.339, "step": 33648 }, { "epoch": 2.500854700854701, "grad_norm": 2.4163695739454876, "learning_rate": 1.4171115851018946e-06, "loss": 0.3075, "step": 33649 }, { "epoch": 2.500929022668153, "grad_norm": 2.3554020665549564, "learning_rate": 1.4166998639040308e-06, "loss": 0.2926, "step": 33650 }, { "epoch": 2.5010033444816053, "grad_norm": 1.917639742511988, "learning_rate": 1.4162881979650045e-06, "loss": 0.2193, "step": 33651 }, { "epoch": 2.501077666295058, "grad_norm": 2.913415429715052, "learning_rate": 1.4158765872874713e-06, "loss": 0.347, "step": 33652 }, { "epoch": 2.50115198810851, "grad_norm": 2.2961310533502606, "learning_rate": 1.4154650318740782e-06, "loss": 0.3335, "step": 33653 }, { "epoch": 2.501226309921962, "grad_norm": 2.547615948400216, "learning_rate": 1.4150535317274771e-06, "loss": 0.2888, "step": 33654 }, { "epoch": 2.5013006317354143, "grad_norm": 1.9771354073186291, "learning_rate": 1.4146420868503152e-06, "loss": 0.2595, "step": 33655 }, { "epoch": 2.5013749535488667, "grad_norm": 2.66750584152803, "learning_rate": 1.4142306972452402e-06, "loss": 0.2857, "step": 33656 }, { "epoch": 2.5014492753623188, "grad_norm": 2.2490903564885047, "learning_rate": 1.4138193629149055e-06, "loss": 0.2999, "step": 33657 }, { "epoch": 2.501523597175771, "grad_norm": 2.2152674862842265, "learning_rate": 1.4134080838619535e-06, "loss": 0.2258, "step": 33658 }, { "epoch": 2.5015979189892232, "grad_norm": 2.3763995589469733, "learning_rate": 1.4129968600890377e-06, "loss": 0.2512, "step": 33659 }, { "epoch": 2.5016722408026757, "grad_norm": 2.7706864961305038, "learning_rate": 1.4125856915988012e-06, "loss": 0.2982, "step": 33660 }, { "epoch": 2.5017465626161277, "grad_norm": 2.2665677935173023, "learning_rate": 1.4121745783938933e-06, "loss": 0.1763, "step": 33661 }, { "epoch": 2.50182088442958, "grad_norm": 2.2776582514702306, "learning_rate": 1.4117635204769598e-06, "loss": 0.2621, "step": 33662 }, { "epoch": 2.501895206243032, "grad_norm": 4.3418117450767655, "learning_rate": 1.4113525178506449e-06, "loss": 0.217, "step": 33663 }, { "epoch": 2.5019695280564846, "grad_norm": 2.09519787359651, "learning_rate": 1.410941570517599e-06, "loss": 0.2503, "step": 33664 }, { "epoch": 2.5020438498699367, "grad_norm": 2.961476246056276, "learning_rate": 1.410530678480463e-06, "loss": 0.3039, "step": 33665 }, { "epoch": 2.502118171683389, "grad_norm": 2.558226021239633, "learning_rate": 1.410119841741887e-06, "loss": 0.3197, "step": 33666 }, { "epoch": 2.5021924934968416, "grad_norm": 2.7574412630883627, "learning_rate": 1.409709060304515e-06, "loss": 0.2473, "step": 33667 }, { "epoch": 2.5022668153102936, "grad_norm": 2.352823199787721, "learning_rate": 1.4092983341709877e-06, "loss": 0.2876, "step": 33668 }, { "epoch": 2.5023411371237456, "grad_norm": 2.274933325627617, "learning_rate": 1.408887663343953e-06, "loss": 0.2066, "step": 33669 }, { "epoch": 2.502415458937198, "grad_norm": 2.300950452466487, "learning_rate": 1.4084770478260545e-06, "loss": 0.2301, "step": 33670 }, { "epoch": 2.5024897807506505, "grad_norm": 3.0038806090353205, "learning_rate": 1.4080664876199335e-06, "loss": 0.2991, "step": 33671 }, { "epoch": 2.5025641025641026, "grad_norm": 2.4778300532304605, "learning_rate": 1.4076559827282377e-06, "loss": 0.3354, "step": 33672 }, { "epoch": 2.5026384243775546, "grad_norm": 2.7408387281246234, "learning_rate": 1.4072455331536039e-06, "loss": 0.2828, "step": 33673 }, { "epoch": 2.502712746191007, "grad_norm": 2.276288679636841, "learning_rate": 1.4068351388986789e-06, "loss": 0.2812, "step": 33674 }, { "epoch": 2.5027870680044595, "grad_norm": 2.733394043663724, "learning_rate": 1.4064247999661006e-06, "loss": 0.3284, "step": 33675 }, { "epoch": 2.5028613898179115, "grad_norm": 2.3811465649810004, "learning_rate": 1.406014516358517e-06, "loss": 0.2967, "step": 33676 }, { "epoch": 2.502935711631364, "grad_norm": 2.63464205853091, "learning_rate": 1.4056042880785647e-06, "loss": 0.3178, "step": 33677 }, { "epoch": 2.503010033444816, "grad_norm": 2.16054927321924, "learning_rate": 1.405194115128885e-06, "loss": 0.2737, "step": 33678 }, { "epoch": 2.5030843552582684, "grad_norm": 3.2394518774204597, "learning_rate": 1.4047839975121214e-06, "loss": 0.3699, "step": 33679 }, { "epoch": 2.5031586770717205, "grad_norm": 2.6403536164052226, "learning_rate": 1.4043739352309106e-06, "loss": 0.3507, "step": 33680 }, { "epoch": 2.503232998885173, "grad_norm": 1.9689258569727524, "learning_rate": 1.403963928287896e-06, "loss": 0.2545, "step": 33681 }, { "epoch": 2.503307320698625, "grad_norm": 2.0981472646919883, "learning_rate": 1.4035539766857165e-06, "loss": 0.2788, "step": 33682 }, { "epoch": 2.5033816425120774, "grad_norm": 2.166953097681843, "learning_rate": 1.4031440804270069e-06, "loss": 0.2752, "step": 33683 }, { "epoch": 2.5034559643255294, "grad_norm": 2.5166760476700945, "learning_rate": 1.4027342395144151e-06, "loss": 0.2859, "step": 33684 }, { "epoch": 2.503530286138982, "grad_norm": 2.1031414719544888, "learning_rate": 1.4023244539505698e-06, "loss": 0.1991, "step": 33685 }, { "epoch": 2.503604607952434, "grad_norm": 2.1918494756047013, "learning_rate": 1.4019147237381147e-06, "loss": 0.2667, "step": 33686 }, { "epoch": 2.5036789297658864, "grad_norm": 1.895822506844462, "learning_rate": 1.4015050488796856e-06, "loss": 0.1939, "step": 33687 }, { "epoch": 2.5037532515793384, "grad_norm": 2.7119608022852546, "learning_rate": 1.4010954293779222e-06, "loss": 0.2664, "step": 33688 }, { "epoch": 2.503827573392791, "grad_norm": 2.2830575021034303, "learning_rate": 1.4006858652354604e-06, "loss": 0.2989, "step": 33689 }, { "epoch": 2.5039018952062433, "grad_norm": 2.599007451157646, "learning_rate": 1.4002763564549348e-06, "loss": 0.3168, "step": 33690 }, { "epoch": 2.5039762170196953, "grad_norm": 2.377694299714447, "learning_rate": 1.3998669030389866e-06, "loss": 0.2935, "step": 33691 }, { "epoch": 2.5040505388331473, "grad_norm": 2.7200877259677654, "learning_rate": 1.3994575049902481e-06, "loss": 0.331, "step": 33692 }, { "epoch": 2.5041248606466, "grad_norm": 2.375368573455539, "learning_rate": 1.399048162311354e-06, "loss": 0.2792, "step": 33693 }, { "epoch": 2.5041991824600522, "grad_norm": 2.09726944484223, "learning_rate": 1.398638875004944e-06, "loss": 0.2658, "step": 33694 }, { "epoch": 2.5042735042735043, "grad_norm": 2.1795008523147303, "learning_rate": 1.3982296430736509e-06, "loss": 0.2271, "step": 33695 }, { "epoch": 2.5043478260869563, "grad_norm": 2.141758684261226, "learning_rate": 1.3978204665201078e-06, "loss": 0.2253, "step": 33696 }, { "epoch": 2.5044221479004087, "grad_norm": 3.066266326673594, "learning_rate": 1.3974113453469495e-06, "loss": 0.2725, "step": 33697 }, { "epoch": 2.504496469713861, "grad_norm": 2.3169628285857264, "learning_rate": 1.3970022795568118e-06, "loss": 0.2993, "step": 33698 }, { "epoch": 2.504570791527313, "grad_norm": 3.651997931817316, "learning_rate": 1.3965932691523276e-06, "loss": 0.2976, "step": 33699 }, { "epoch": 2.5046451133407657, "grad_norm": 2.2157737172379233, "learning_rate": 1.3961843141361264e-06, "loss": 0.2793, "step": 33700 }, { "epoch": 2.5047194351542177, "grad_norm": 2.0332732887945877, "learning_rate": 1.395775414510847e-06, "loss": 0.252, "step": 33701 }, { "epoch": 2.50479375696767, "grad_norm": 2.4953793234370667, "learning_rate": 1.3953665702791176e-06, "loss": 0.2566, "step": 33702 }, { "epoch": 2.504868078781122, "grad_norm": 2.3554021700210086, "learning_rate": 1.3949577814435732e-06, "loss": 0.3375, "step": 33703 }, { "epoch": 2.5049424005945746, "grad_norm": 2.0881379766599157, "learning_rate": 1.3945490480068436e-06, "loss": 0.211, "step": 33704 }, { "epoch": 2.5050167224080266, "grad_norm": 2.1423164251775706, "learning_rate": 1.3941403699715594e-06, "loss": 0.2711, "step": 33705 }, { "epoch": 2.505091044221479, "grad_norm": 2.0189794194956248, "learning_rate": 1.3937317473403566e-06, "loss": 0.2106, "step": 33706 }, { "epoch": 2.505165366034931, "grad_norm": 2.1736551387554055, "learning_rate": 1.3933231801158587e-06, "loss": 0.3336, "step": 33707 }, { "epoch": 2.5052396878483836, "grad_norm": 2.237184411513082, "learning_rate": 1.392914668300701e-06, "loss": 0.2428, "step": 33708 }, { "epoch": 2.5053140096618356, "grad_norm": 2.6874765642029144, "learning_rate": 1.3925062118975108e-06, "loss": 0.3235, "step": 33709 }, { "epoch": 2.505388331475288, "grad_norm": 2.4824873474243856, "learning_rate": 1.39209781090892e-06, "loss": 0.2337, "step": 33710 }, { "epoch": 2.50546265328874, "grad_norm": 2.3078063938523603, "learning_rate": 1.3916894653375578e-06, "loss": 0.3463, "step": 33711 }, { "epoch": 2.5055369751021925, "grad_norm": 3.0380384094122226, "learning_rate": 1.39128117518605e-06, "loss": 0.2974, "step": 33712 }, { "epoch": 2.505611296915645, "grad_norm": 2.4342591977250208, "learning_rate": 1.3908729404570287e-06, "loss": 0.3233, "step": 33713 }, { "epoch": 2.505685618729097, "grad_norm": 1.7570702778129808, "learning_rate": 1.390464761153122e-06, "loss": 0.2215, "step": 33714 }, { "epoch": 2.505759940542549, "grad_norm": 2.4973902781481105, "learning_rate": 1.390056637276953e-06, "loss": 0.2815, "step": 33715 }, { "epoch": 2.5058342623560015, "grad_norm": 2.5694691593658945, "learning_rate": 1.3896485688311555e-06, "loss": 0.3363, "step": 33716 }, { "epoch": 2.505908584169454, "grad_norm": 2.6165769800421756, "learning_rate": 1.389240555818353e-06, "loss": 0.2931, "step": 33717 }, { "epoch": 2.505982905982906, "grad_norm": 2.201491712552023, "learning_rate": 1.3888325982411742e-06, "loss": 0.2142, "step": 33718 }, { "epoch": 2.506057227796358, "grad_norm": 3.4333050303350716, "learning_rate": 1.3884246961022408e-06, "loss": 0.2991, "step": 33719 }, { "epoch": 2.5061315496098104, "grad_norm": 2.0091217941304893, "learning_rate": 1.3880168494041856e-06, "loss": 0.2511, "step": 33720 }, { "epoch": 2.506205871423263, "grad_norm": 2.613447295654801, "learning_rate": 1.3876090581496304e-06, "loss": 0.2828, "step": 33721 }, { "epoch": 2.506280193236715, "grad_norm": 2.3088104354831156, "learning_rate": 1.3872013223411994e-06, "loss": 0.2664, "step": 33722 }, { "epoch": 2.5063545150501674, "grad_norm": 1.9419403694847013, "learning_rate": 1.386793641981522e-06, "loss": 0.1891, "step": 33723 }, { "epoch": 2.5064288368636194, "grad_norm": 2.008671876169132, "learning_rate": 1.386386017073218e-06, "loss": 0.2422, "step": 33724 }, { "epoch": 2.506503158677072, "grad_norm": 1.8305672606980008, "learning_rate": 1.3859784476189153e-06, "loss": 0.2395, "step": 33725 }, { "epoch": 2.506577480490524, "grad_norm": 2.6909125976719825, "learning_rate": 1.3855709336212364e-06, "loss": 0.3145, "step": 33726 }, { "epoch": 2.5066518023039763, "grad_norm": 2.3909810971093504, "learning_rate": 1.3851634750828037e-06, "loss": 0.2693, "step": 33727 }, { "epoch": 2.5067261241174283, "grad_norm": 2.8749446440019035, "learning_rate": 1.3847560720062426e-06, "loss": 0.2569, "step": 33728 }, { "epoch": 2.506800445930881, "grad_norm": 1.9913749446053775, "learning_rate": 1.3843487243941756e-06, "loss": 0.2082, "step": 33729 }, { "epoch": 2.506874767744333, "grad_norm": 2.743088981470795, "learning_rate": 1.383941432249224e-06, "loss": 0.2562, "step": 33730 }, { "epoch": 2.5069490895577853, "grad_norm": 1.867816432283402, "learning_rate": 1.3835341955740088e-06, "loss": 0.197, "step": 33731 }, { "epoch": 2.5070234113712373, "grad_norm": 1.9501422002823836, "learning_rate": 1.3831270143711551e-06, "loss": 0.2598, "step": 33732 }, { "epoch": 2.5070977331846898, "grad_norm": 2.046745249379053, "learning_rate": 1.3827198886432824e-06, "loss": 0.2017, "step": 33733 }, { "epoch": 2.5071720549981418, "grad_norm": 2.242365702929332, "learning_rate": 1.3823128183930102e-06, "loss": 0.2542, "step": 33734 }, { "epoch": 2.5072463768115942, "grad_norm": 3.273858698130494, "learning_rate": 1.3819058036229638e-06, "loss": 0.3198, "step": 33735 }, { "epoch": 2.5073206986250467, "grad_norm": 2.264098268406646, "learning_rate": 1.381498844335759e-06, "loss": 0.2781, "step": 33736 }, { "epoch": 2.5073950204384987, "grad_norm": 2.256742085223326, "learning_rate": 1.3810919405340161e-06, "loss": 0.271, "step": 33737 }, { "epoch": 2.5074693422519507, "grad_norm": 2.4374972250339004, "learning_rate": 1.3806850922203585e-06, "loss": 0.2931, "step": 33738 }, { "epoch": 2.507543664065403, "grad_norm": 2.4062894022694516, "learning_rate": 1.3802782993974006e-06, "loss": 0.235, "step": 33739 }, { "epoch": 2.5076179858788556, "grad_norm": 2.5906394170187528, "learning_rate": 1.3798715620677682e-06, "loss": 0.3205, "step": 33740 }, { "epoch": 2.5076923076923077, "grad_norm": 2.358018987342192, "learning_rate": 1.3794648802340716e-06, "loss": 0.2666, "step": 33741 }, { "epoch": 2.5077666295057597, "grad_norm": 4.217653098020145, "learning_rate": 1.379058253898934e-06, "loss": 0.2125, "step": 33742 }, { "epoch": 2.507840951319212, "grad_norm": 2.7614441405432184, "learning_rate": 1.3786516830649731e-06, "loss": 0.2885, "step": 33743 }, { "epoch": 2.5079152731326646, "grad_norm": 2.5046925159004525, "learning_rate": 1.378245167734802e-06, "loss": 0.299, "step": 33744 }, { "epoch": 2.5079895949461166, "grad_norm": 2.417295013351285, "learning_rate": 1.3778387079110444e-06, "loss": 0.2896, "step": 33745 }, { "epoch": 2.508063916759569, "grad_norm": 2.2708916693817622, "learning_rate": 1.377432303596311e-06, "loss": 0.2644, "step": 33746 }, { "epoch": 2.508138238573021, "grad_norm": 2.211271808263223, "learning_rate": 1.3770259547932229e-06, "loss": 0.2449, "step": 33747 }, { "epoch": 2.5082125603864736, "grad_norm": 2.318200271889431, "learning_rate": 1.3766196615043947e-06, "loss": 0.2255, "step": 33748 }, { "epoch": 2.5082868821999256, "grad_norm": 2.8522131426869457, "learning_rate": 1.37621342373244e-06, "loss": 0.3113, "step": 33749 }, { "epoch": 2.508361204013378, "grad_norm": 3.2015169069417477, "learning_rate": 1.3758072414799773e-06, "loss": 0.3974, "step": 33750 }, { "epoch": 2.50843552582683, "grad_norm": 2.6364559660093785, "learning_rate": 1.3754011147496204e-06, "loss": 0.2631, "step": 33751 }, { "epoch": 2.5085098476402825, "grad_norm": 2.756362921926673, "learning_rate": 1.3749950435439841e-06, "loss": 0.2241, "step": 33752 }, { "epoch": 2.5085841694537345, "grad_norm": 2.350436299044446, "learning_rate": 1.3745890278656815e-06, "loss": 0.3173, "step": 33753 }, { "epoch": 2.508658491267187, "grad_norm": 2.0527479048455817, "learning_rate": 1.374183067717325e-06, "loss": 0.3106, "step": 33754 }, { "epoch": 2.508732813080639, "grad_norm": 1.7854383688110766, "learning_rate": 1.373777163101533e-06, "loss": 0.2036, "step": 33755 }, { "epoch": 2.5088071348940915, "grad_norm": 2.0043626269807344, "learning_rate": 1.3733713140209126e-06, "loss": 0.2315, "step": 33756 }, { "epoch": 2.5088814567075435, "grad_norm": 2.254608309630136, "learning_rate": 1.3729655204780835e-06, "loss": 0.2736, "step": 33757 }, { "epoch": 2.508955778520996, "grad_norm": 2.4603421637574145, "learning_rate": 1.3725597824756543e-06, "loss": 0.2279, "step": 33758 }, { "epoch": 2.5090301003344484, "grad_norm": 2.115791285425698, "learning_rate": 1.3721541000162365e-06, "loss": 0.2513, "step": 33759 }, { "epoch": 2.5091044221479004, "grad_norm": 2.336464243651142, "learning_rate": 1.3717484731024432e-06, "loss": 0.2433, "step": 33760 }, { "epoch": 2.5091787439613524, "grad_norm": 2.5711677130928057, "learning_rate": 1.3713429017368852e-06, "loss": 0.3546, "step": 33761 }, { "epoch": 2.509253065774805, "grad_norm": 2.0438241971306845, "learning_rate": 1.3709373859221752e-06, "loss": 0.26, "step": 33762 }, { "epoch": 2.5093273875882574, "grad_norm": 2.111764829809217, "learning_rate": 1.3705319256609217e-06, "loss": 0.2448, "step": 33763 }, { "epoch": 2.5094017094017094, "grad_norm": 2.1946935132035863, "learning_rate": 1.3701265209557368e-06, "loss": 0.2498, "step": 33764 }, { "epoch": 2.5094760312151614, "grad_norm": 2.0260358543369485, "learning_rate": 1.3697211718092297e-06, "loss": 0.1894, "step": 33765 }, { "epoch": 2.509550353028614, "grad_norm": 1.900339307129795, "learning_rate": 1.369315878224008e-06, "loss": 0.2607, "step": 33766 }, { "epoch": 2.5096246748420663, "grad_norm": 2.0797972493033945, "learning_rate": 1.3689106402026842e-06, "loss": 0.1906, "step": 33767 }, { "epoch": 2.5096989966555183, "grad_norm": 2.3421915583915465, "learning_rate": 1.3685054577478641e-06, "loss": 0.3764, "step": 33768 }, { "epoch": 2.509773318468971, "grad_norm": 2.6842564114615577, "learning_rate": 1.368100330862161e-06, "loss": 0.3414, "step": 33769 }, { "epoch": 2.509847640282423, "grad_norm": 2.2844745143263614, "learning_rate": 1.3676952595481784e-06, "loss": 0.2875, "step": 33770 }, { "epoch": 2.5099219620958753, "grad_norm": 2.8913915761408817, "learning_rate": 1.3672902438085244e-06, "loss": 0.2639, "step": 33771 }, { "epoch": 2.5099962839093273, "grad_norm": 2.413298015842603, "learning_rate": 1.3668852836458112e-06, "loss": 0.3255, "step": 33772 }, { "epoch": 2.5100706057227797, "grad_norm": 2.64562474869342, "learning_rate": 1.3664803790626413e-06, "loss": 0.2848, "step": 33773 }, { "epoch": 2.5101449275362318, "grad_norm": 2.4492810995083247, "learning_rate": 1.3660755300616234e-06, "loss": 0.3308, "step": 33774 }, { "epoch": 2.510219249349684, "grad_norm": 2.32332694147719, "learning_rate": 1.3656707366453625e-06, "loss": 0.256, "step": 33775 }, { "epoch": 2.5102935711631362, "grad_norm": 2.4562527713738165, "learning_rate": 1.3652659988164641e-06, "loss": 0.3189, "step": 33776 }, { "epoch": 2.5103678929765887, "grad_norm": 2.3654948133609293, "learning_rate": 1.364861316577536e-06, "loss": 0.2632, "step": 33777 }, { "epoch": 2.510442214790041, "grad_norm": 2.340792434314844, "learning_rate": 1.3644566899311818e-06, "loss": 0.306, "step": 33778 }, { "epoch": 2.510516536603493, "grad_norm": 2.205344555395581, "learning_rate": 1.3640521188800094e-06, "loss": 0.2389, "step": 33779 }, { "epoch": 2.510590858416945, "grad_norm": 2.1407810711665416, "learning_rate": 1.363647603426621e-06, "loss": 0.2759, "step": 33780 }, { "epoch": 2.5106651802303976, "grad_norm": 2.1126419973329758, "learning_rate": 1.3632431435736192e-06, "loss": 0.2591, "step": 33781 }, { "epoch": 2.51073950204385, "grad_norm": 2.0170912093220132, "learning_rate": 1.362838739323612e-06, "loss": 0.26, "step": 33782 }, { "epoch": 2.510813823857302, "grad_norm": 2.187621064041952, "learning_rate": 1.3624343906791981e-06, "loss": 0.2112, "step": 33783 }, { "epoch": 2.510888145670754, "grad_norm": 2.8448377080879954, "learning_rate": 1.3620300976429868e-06, "loss": 0.294, "step": 33784 }, { "epoch": 2.5109624674842066, "grad_norm": 2.336731228478947, "learning_rate": 1.361625860217577e-06, "loss": 0.2635, "step": 33785 }, { "epoch": 2.511036789297659, "grad_norm": 2.934482749463099, "learning_rate": 1.3612216784055709e-06, "loss": 0.2386, "step": 33786 }, { "epoch": 2.511111111111111, "grad_norm": 2.603651514225424, "learning_rate": 1.3608175522095724e-06, "loss": 0.4203, "step": 33787 }, { "epoch": 2.511185432924563, "grad_norm": 3.6343690472996446, "learning_rate": 1.36041348163218e-06, "loss": 0.2761, "step": 33788 }, { "epoch": 2.5112597547380155, "grad_norm": 2.0791510473167185, "learning_rate": 1.360009466676e-06, "loss": 0.2782, "step": 33789 }, { "epoch": 2.511334076551468, "grad_norm": 1.6886024337649153, "learning_rate": 1.3596055073436275e-06, "loss": 0.1933, "step": 33790 }, { "epoch": 2.51140839836492, "grad_norm": 2.6892464596981185, "learning_rate": 1.3592016036376687e-06, "loss": 0.3237, "step": 33791 }, { "epoch": 2.5114827201783725, "grad_norm": 2.4180545011570653, "learning_rate": 1.3587977555607223e-06, "loss": 0.2569, "step": 33792 }, { "epoch": 2.5115570419918245, "grad_norm": 1.9116415124334671, "learning_rate": 1.358393963115384e-06, "loss": 0.2204, "step": 33793 }, { "epoch": 2.511631363805277, "grad_norm": 2.2166311150327025, "learning_rate": 1.357990226304261e-06, "loss": 0.281, "step": 33794 }, { "epoch": 2.511705685618729, "grad_norm": 2.4032443875106133, "learning_rate": 1.3575865451299475e-06, "loss": 0.2929, "step": 33795 }, { "epoch": 2.5117800074321814, "grad_norm": 2.4157430629030547, "learning_rate": 1.3571829195950414e-06, "loss": 0.2823, "step": 33796 }, { "epoch": 2.5118543292456335, "grad_norm": 2.5349221124626617, "learning_rate": 1.3567793497021476e-06, "loss": 0.3565, "step": 33797 }, { "epoch": 2.511928651059086, "grad_norm": 2.017943846061548, "learning_rate": 1.3563758354538559e-06, "loss": 0.2323, "step": 33798 }, { "epoch": 2.512002972872538, "grad_norm": 2.0945073627299107, "learning_rate": 1.3559723768527699e-06, "loss": 0.2672, "step": 33799 }, { "epoch": 2.5120772946859904, "grad_norm": 2.4761353702786093, "learning_rate": 1.3555689739014832e-06, "loss": 0.268, "step": 33800 }, { "epoch": 2.512151616499443, "grad_norm": 2.3783118222813227, "learning_rate": 1.355165626602597e-06, "loss": 0.2659, "step": 33801 }, { "epoch": 2.512225938312895, "grad_norm": 2.0936279191029405, "learning_rate": 1.3547623349587059e-06, "loss": 0.2629, "step": 33802 }, { "epoch": 2.512300260126347, "grad_norm": 2.309280388414325, "learning_rate": 1.3543590989724031e-06, "loss": 0.2429, "step": 33803 }, { "epoch": 2.5123745819397993, "grad_norm": 2.433873965296054, "learning_rate": 1.3539559186462915e-06, "loss": 0.2763, "step": 33804 }, { "epoch": 2.512448903753252, "grad_norm": 2.6594218671380907, "learning_rate": 1.3535527939829608e-06, "loss": 0.3211, "step": 33805 }, { "epoch": 2.512523225566704, "grad_norm": 3.3525710347802473, "learning_rate": 1.3531497249850101e-06, "loss": 0.3827, "step": 33806 }, { "epoch": 2.512597547380156, "grad_norm": 2.0817690694396305, "learning_rate": 1.352746711655033e-06, "loss": 0.2772, "step": 33807 }, { "epoch": 2.5126718691936083, "grad_norm": 1.9601761104292095, "learning_rate": 1.352343753995624e-06, "loss": 0.2243, "step": 33808 }, { "epoch": 2.5127461910070608, "grad_norm": 2.961303891088135, "learning_rate": 1.351940852009378e-06, "loss": 0.3387, "step": 33809 }, { "epoch": 2.5128205128205128, "grad_norm": 2.194360435959817, "learning_rate": 1.3515380056988848e-06, "loss": 0.27, "step": 33810 }, { "epoch": 2.512894834633965, "grad_norm": 3.0746250713240846, "learning_rate": 1.3511352150667435e-06, "loss": 0.3593, "step": 33811 }, { "epoch": 2.5129691564474173, "grad_norm": 2.7893206765480545, "learning_rate": 1.350732480115543e-06, "loss": 0.2932, "step": 33812 }, { "epoch": 2.5130434782608697, "grad_norm": 2.1489685645243934, "learning_rate": 1.3503298008478805e-06, "loss": 0.2568, "step": 33813 }, { "epoch": 2.5131178000743217, "grad_norm": 2.873033567194949, "learning_rate": 1.3499271772663446e-06, "loss": 0.3133, "step": 33814 }, { "epoch": 2.513192121887774, "grad_norm": 2.3445786218812485, "learning_rate": 1.3495246093735282e-06, "loss": 0.2123, "step": 33815 }, { "epoch": 2.513266443701226, "grad_norm": 2.358505165196617, "learning_rate": 1.3491220971720243e-06, "loss": 0.2711, "step": 33816 }, { "epoch": 2.5133407655146787, "grad_norm": 2.569215269078127, "learning_rate": 1.3487196406644243e-06, "loss": 0.2637, "step": 33817 }, { "epoch": 2.5134150873281307, "grad_norm": 2.3163101224555374, "learning_rate": 1.3483172398533163e-06, "loss": 0.2533, "step": 33818 }, { "epoch": 2.513489409141583, "grad_norm": 2.8178901802134027, "learning_rate": 1.3479148947412968e-06, "loss": 0.2991, "step": 33819 }, { "epoch": 2.513563730955035, "grad_norm": 1.906566429546262, "learning_rate": 1.3475126053309484e-06, "loss": 0.2259, "step": 33820 }, { "epoch": 2.5136380527684876, "grad_norm": 2.5878786975573207, "learning_rate": 1.3471103716248668e-06, "loss": 0.2854, "step": 33821 }, { "epoch": 2.5137123745819396, "grad_norm": 2.721710103653822, "learning_rate": 1.3467081936256377e-06, "loss": 0.2615, "step": 33822 }, { "epoch": 2.513786696395392, "grad_norm": 2.0259777190260135, "learning_rate": 1.3463060713358545e-06, "loss": 0.2688, "step": 33823 }, { "epoch": 2.5138610182088446, "grad_norm": 2.4415807078518927, "learning_rate": 1.345904004758104e-06, "loss": 0.2801, "step": 33824 }, { "epoch": 2.5139353400222966, "grad_norm": 2.024169300598974, "learning_rate": 1.345501993894972e-06, "loss": 0.2182, "step": 33825 }, { "epoch": 2.5140096618357486, "grad_norm": 3.7162268921602624, "learning_rate": 1.345100038749051e-06, "loss": 0.3796, "step": 33826 }, { "epoch": 2.514083983649201, "grad_norm": 2.4287040443222194, "learning_rate": 1.3446981393229253e-06, "loss": 0.3271, "step": 33827 }, { "epoch": 2.5141583054626535, "grad_norm": 2.958814609719622, "learning_rate": 1.344296295619185e-06, "loss": 0.2645, "step": 33828 }, { "epoch": 2.5142326272761055, "grad_norm": 2.597404514043551, "learning_rate": 1.3438945076404175e-06, "loss": 0.2865, "step": 33829 }, { "epoch": 2.5143069490895575, "grad_norm": 2.3183154053732404, "learning_rate": 1.3434927753892069e-06, "loss": 0.331, "step": 33830 }, { "epoch": 2.51438127090301, "grad_norm": 2.6178194461217394, "learning_rate": 1.3430910988681412e-06, "loss": 0.2423, "step": 33831 }, { "epoch": 2.5144555927164625, "grad_norm": 2.4604154825646014, "learning_rate": 1.3426894780798028e-06, "loss": 0.2849, "step": 33832 }, { "epoch": 2.5145299145299145, "grad_norm": 2.928473441086626, "learning_rate": 1.3422879130267842e-06, "loss": 0.3466, "step": 33833 }, { "epoch": 2.514604236343367, "grad_norm": 2.6483829927766345, "learning_rate": 1.341886403711663e-06, "loss": 0.2392, "step": 33834 }, { "epoch": 2.514678558156819, "grad_norm": 2.382504831415951, "learning_rate": 1.341484950137031e-06, "loss": 0.2995, "step": 33835 }, { "epoch": 2.5147528799702714, "grad_norm": 2.884440269422364, "learning_rate": 1.3410835523054689e-06, "loss": 0.2639, "step": 33836 }, { "epoch": 2.5148272017837234, "grad_norm": 2.31874422720388, "learning_rate": 1.3406822102195605e-06, "loss": 0.2755, "step": 33837 }, { "epoch": 2.514901523597176, "grad_norm": 2.688982987043903, "learning_rate": 1.3402809238818914e-06, "loss": 0.2463, "step": 33838 }, { "epoch": 2.514975845410628, "grad_norm": 2.210117777718939, "learning_rate": 1.3398796932950453e-06, "loss": 0.2661, "step": 33839 }, { "epoch": 2.5150501672240804, "grad_norm": 2.190245713812064, "learning_rate": 1.339478518461602e-06, "loss": 0.224, "step": 33840 }, { "epoch": 2.5151244890375324, "grad_norm": 2.366557434341913, "learning_rate": 1.3390773993841488e-06, "loss": 0.241, "step": 33841 }, { "epoch": 2.515198810850985, "grad_norm": 2.326343917902723, "learning_rate": 1.3386763360652666e-06, "loss": 0.2761, "step": 33842 }, { "epoch": 2.515273132664437, "grad_norm": 2.2918538110598714, "learning_rate": 1.3382753285075367e-06, "loss": 0.3442, "step": 33843 }, { "epoch": 2.5153474544778893, "grad_norm": 2.1640051502811346, "learning_rate": 1.3378743767135382e-06, "loss": 0.2359, "step": 33844 }, { "epoch": 2.5154217762913413, "grad_norm": 2.5086433201131, "learning_rate": 1.3374734806858568e-06, "loss": 0.3214, "step": 33845 }, { "epoch": 2.515496098104794, "grad_norm": 2.3405687392866508, "learning_rate": 1.3370726404270718e-06, "loss": 0.1886, "step": 33846 }, { "epoch": 2.5155704199182463, "grad_norm": 2.063100999221459, "learning_rate": 1.3366718559397618e-06, "loss": 0.2296, "step": 33847 }, { "epoch": 2.5156447417316983, "grad_norm": 2.188513774504043, "learning_rate": 1.3362711272265106e-06, "loss": 0.2041, "step": 33848 }, { "epoch": 2.5157190635451503, "grad_norm": 2.2799979956271508, "learning_rate": 1.3358704542898949e-06, "loss": 0.3297, "step": 33849 }, { "epoch": 2.5157933853586028, "grad_norm": 2.28104117103361, "learning_rate": 1.335469837132497e-06, "loss": 0.2521, "step": 33850 }, { "epoch": 2.515867707172055, "grad_norm": 2.353133994996115, "learning_rate": 1.3350692757568961e-06, "loss": 0.2301, "step": 33851 }, { "epoch": 2.5159420289855072, "grad_norm": 2.588087010795702, "learning_rate": 1.3346687701656658e-06, "loss": 0.2821, "step": 33852 }, { "epoch": 2.5160163507989592, "grad_norm": 2.5850095838376204, "learning_rate": 1.3342683203613937e-06, "loss": 0.2319, "step": 33853 }, { "epoch": 2.5160906726124117, "grad_norm": 1.67210510294899, "learning_rate": 1.333867926346648e-06, "loss": 0.1778, "step": 33854 }, { "epoch": 2.516164994425864, "grad_norm": 2.4370645888210247, "learning_rate": 1.3334675881240134e-06, "loss": 0.2951, "step": 33855 }, { "epoch": 2.516239316239316, "grad_norm": 2.538468624962599, "learning_rate": 1.333067305696062e-06, "loss": 0.2972, "step": 33856 }, { "epoch": 2.5163136380527686, "grad_norm": 2.699330475702698, "learning_rate": 1.3326670790653762e-06, "loss": 0.3281, "step": 33857 }, { "epoch": 2.5163879598662207, "grad_norm": 2.2635022035507886, "learning_rate": 1.3322669082345296e-06, "loss": 0.2601, "step": 33858 }, { "epoch": 2.516462281679673, "grad_norm": 2.6397647726107674, "learning_rate": 1.331866793206097e-06, "loss": 0.2368, "step": 33859 }, { "epoch": 2.516536603493125, "grad_norm": 2.3256268604472354, "learning_rate": 1.3314667339826582e-06, "loss": 0.2831, "step": 33860 }, { "epoch": 2.5166109253065776, "grad_norm": 3.4138981472510355, "learning_rate": 1.3310667305667869e-06, "loss": 0.3068, "step": 33861 }, { "epoch": 2.5166852471200296, "grad_norm": 2.4682585310180274, "learning_rate": 1.3306667829610552e-06, "loss": 0.371, "step": 33862 }, { "epoch": 2.516759568933482, "grad_norm": 3.349487295117817, "learning_rate": 1.330266891168045e-06, "loss": 0.3201, "step": 33863 }, { "epoch": 2.516833890746934, "grad_norm": 1.8853515223646378, "learning_rate": 1.3298670551903247e-06, "loss": 0.2117, "step": 33864 }, { "epoch": 2.5169082125603865, "grad_norm": 2.492372494398259, "learning_rate": 1.3294672750304715e-06, "loss": 0.2593, "step": 33865 }, { "epoch": 2.5169825343738386, "grad_norm": 2.405982063700943, "learning_rate": 1.3290675506910566e-06, "loss": 0.2639, "step": 33866 }, { "epoch": 2.517056856187291, "grad_norm": 1.8616232691080288, "learning_rate": 1.3286678821746557e-06, "loss": 0.1948, "step": 33867 }, { "epoch": 2.517131178000743, "grad_norm": 2.5017102047718525, "learning_rate": 1.3282682694838422e-06, "loss": 0.2477, "step": 33868 }, { "epoch": 2.5172054998141955, "grad_norm": 3.5948557163305908, "learning_rate": 1.3278687126211841e-06, "loss": 0.3236, "step": 33869 }, { "epoch": 2.517279821627648, "grad_norm": 2.103038162300619, "learning_rate": 1.3274692115892607e-06, "loss": 0.3088, "step": 33870 }, { "epoch": 2.5173541434411, "grad_norm": 2.0030258799890572, "learning_rate": 1.3270697663906384e-06, "loss": 0.2302, "step": 33871 }, { "epoch": 2.517428465254552, "grad_norm": 2.6725190233753224, "learning_rate": 1.326670377027892e-06, "loss": 0.3516, "step": 33872 }, { "epoch": 2.5175027870680045, "grad_norm": 1.8272590837934923, "learning_rate": 1.326271043503593e-06, "loss": 0.2136, "step": 33873 }, { "epoch": 2.517577108881457, "grad_norm": 2.295802014463373, "learning_rate": 1.325871765820308e-06, "loss": 0.2514, "step": 33874 }, { "epoch": 2.517651430694909, "grad_norm": 2.2839312128858573, "learning_rate": 1.3254725439806127e-06, "loss": 0.2889, "step": 33875 }, { "epoch": 2.517725752508361, "grad_norm": 2.0144093593458843, "learning_rate": 1.325073377987075e-06, "loss": 0.2051, "step": 33876 }, { "epoch": 2.5178000743218134, "grad_norm": 2.739337923584635, "learning_rate": 1.3246742678422654e-06, "loss": 0.3061, "step": 33877 }, { "epoch": 2.517874396135266, "grad_norm": 2.627277789427389, "learning_rate": 1.3242752135487492e-06, "loss": 0.288, "step": 33878 }, { "epoch": 2.517948717948718, "grad_norm": 1.9172769054234644, "learning_rate": 1.3238762151091022e-06, "loss": 0.1872, "step": 33879 }, { "epoch": 2.5180230397621703, "grad_norm": 2.8377310074839937, "learning_rate": 1.3234772725258904e-06, "loss": 0.3082, "step": 33880 }, { "epoch": 2.5180973615756224, "grad_norm": 2.6850686621170565, "learning_rate": 1.3230783858016783e-06, "loss": 0.2695, "step": 33881 }, { "epoch": 2.518171683389075, "grad_norm": 2.433520345279175, "learning_rate": 1.3226795549390404e-06, "loss": 0.3019, "step": 33882 }, { "epoch": 2.518246005202527, "grad_norm": 1.9364871154767966, "learning_rate": 1.3222807799405412e-06, "loss": 0.2462, "step": 33883 }, { "epoch": 2.5183203270159793, "grad_norm": 2.4844856096405565, "learning_rate": 1.3218820608087457e-06, "loss": 0.2352, "step": 33884 }, { "epoch": 2.5183946488294313, "grad_norm": 2.261759612032263, "learning_rate": 1.3214833975462249e-06, "loss": 0.2183, "step": 33885 }, { "epoch": 2.5184689706428838, "grad_norm": 2.415654067777021, "learning_rate": 1.3210847901555424e-06, "loss": 0.2582, "step": 33886 }, { "epoch": 2.518543292456336, "grad_norm": 2.9355990040959354, "learning_rate": 1.3206862386392705e-06, "loss": 0.3184, "step": 33887 }, { "epoch": 2.5186176142697883, "grad_norm": 2.3917702515335697, "learning_rate": 1.320287742999965e-06, "loss": 0.2546, "step": 33888 }, { "epoch": 2.5186919360832403, "grad_norm": 2.743679657087613, "learning_rate": 1.3198893032401983e-06, "loss": 0.2229, "step": 33889 }, { "epoch": 2.5187662578966927, "grad_norm": 2.76262400768183, "learning_rate": 1.319490919362535e-06, "loss": 0.3106, "step": 33890 }, { "epoch": 2.5188405797101447, "grad_norm": 2.239184009514314, "learning_rate": 1.3190925913695362e-06, "loss": 0.1984, "step": 33891 }, { "epoch": 2.518914901523597, "grad_norm": 2.075082049667875, "learning_rate": 1.3186943192637714e-06, "loss": 0.2441, "step": 33892 }, { "epoch": 2.5189892233370497, "grad_norm": 2.1397018152100618, "learning_rate": 1.3182961030477992e-06, "loss": 0.2774, "step": 33893 }, { "epoch": 2.5190635451505017, "grad_norm": 2.182745377651313, "learning_rate": 1.3178979427241889e-06, "loss": 0.2116, "step": 33894 }, { "epoch": 2.5191378669639537, "grad_norm": 1.82944841516998, "learning_rate": 1.3174998382955007e-06, "loss": 0.1991, "step": 33895 }, { "epoch": 2.519212188777406, "grad_norm": 2.4195870583973833, "learning_rate": 1.3171017897642957e-06, "loss": 0.2522, "step": 33896 }, { "epoch": 2.5192865105908586, "grad_norm": 2.8799080436631437, "learning_rate": 1.316703797133142e-06, "loss": 0.377, "step": 33897 }, { "epoch": 2.5193608324043106, "grad_norm": 2.1993892065577, "learning_rate": 1.3163058604045987e-06, "loss": 0.2917, "step": 33898 }, { "epoch": 2.5194351542177627, "grad_norm": 2.3632717711149116, "learning_rate": 1.3159079795812268e-06, "loss": 0.3311, "step": 33899 }, { "epoch": 2.519509476031215, "grad_norm": 2.693152126929166, "learning_rate": 1.315510154665589e-06, "loss": 0.2978, "step": 33900 }, { "epoch": 2.5195837978446676, "grad_norm": 3.4177109846331435, "learning_rate": 1.3151123856602444e-06, "loss": 0.4031, "step": 33901 }, { "epoch": 2.5196581196581196, "grad_norm": 2.3140720328142868, "learning_rate": 1.3147146725677572e-06, "loss": 0.2311, "step": 33902 }, { "epoch": 2.519732441471572, "grad_norm": 1.9597994670280727, "learning_rate": 1.3143170153906847e-06, "loss": 0.2015, "step": 33903 }, { "epoch": 2.519806763285024, "grad_norm": 2.72589185027386, "learning_rate": 1.3139194141315903e-06, "loss": 0.2466, "step": 33904 }, { "epoch": 2.5198810850984765, "grad_norm": 2.0516990403526814, "learning_rate": 1.3135218687930328e-06, "loss": 0.2635, "step": 33905 }, { "epoch": 2.5199554069119285, "grad_norm": 1.8045674297472571, "learning_rate": 1.3131243793775683e-06, "loss": 0.2307, "step": 33906 }, { "epoch": 2.520029728725381, "grad_norm": 3.125203250346455, "learning_rate": 1.3127269458877602e-06, "loss": 0.3907, "step": 33907 }, { "epoch": 2.520104050538833, "grad_norm": 2.368960451139745, "learning_rate": 1.3123295683261628e-06, "loss": 0.253, "step": 33908 }, { "epoch": 2.5201783723522855, "grad_norm": 2.107932913205448, "learning_rate": 1.3119322466953422e-06, "loss": 0.2126, "step": 33909 }, { "epoch": 2.5202526941657375, "grad_norm": 2.053030248871463, "learning_rate": 1.3115349809978462e-06, "loss": 0.2074, "step": 33910 }, { "epoch": 2.52032701597919, "grad_norm": 2.496502278880606, "learning_rate": 1.3111377712362383e-06, "loss": 0.2261, "step": 33911 }, { "epoch": 2.5204013377926424, "grad_norm": 2.6251331937999183, "learning_rate": 1.3107406174130766e-06, "loss": 0.2738, "step": 33912 }, { "epoch": 2.5204756596060944, "grad_norm": 2.195303677533243, "learning_rate": 1.3103435195309122e-06, "loss": 0.3031, "step": 33913 }, { "epoch": 2.5205499814195464, "grad_norm": 2.0826582633768886, "learning_rate": 1.3099464775923076e-06, "loss": 0.2431, "step": 33914 }, { "epoch": 2.520624303232999, "grad_norm": 2.851585971242406, "learning_rate": 1.3095494915998153e-06, "loss": 0.2987, "step": 33915 }, { "epoch": 2.5206986250464514, "grad_norm": 2.199653862009643, "learning_rate": 1.3091525615559942e-06, "loss": 0.2522, "step": 33916 }, { "epoch": 2.5207729468599034, "grad_norm": 2.4813046780658916, "learning_rate": 1.3087556874633977e-06, "loss": 0.2908, "step": 33917 }, { "epoch": 2.5208472686733554, "grad_norm": 2.7486290246011644, "learning_rate": 1.3083588693245797e-06, "loss": 0.3286, "step": 33918 }, { "epoch": 2.520921590486808, "grad_norm": 2.508103528323391, "learning_rate": 1.307962107142098e-06, "loss": 0.3256, "step": 33919 }, { "epoch": 2.5209959123002603, "grad_norm": 2.23262796886175, "learning_rate": 1.3075654009185056e-06, "loss": 0.3483, "step": 33920 }, { "epoch": 2.5210702341137123, "grad_norm": 2.0778016931379955, "learning_rate": 1.3071687506563558e-06, "loss": 0.2048, "step": 33921 }, { "epoch": 2.5211445559271644, "grad_norm": 2.0975048426189815, "learning_rate": 1.3067721563582025e-06, "loss": 0.3122, "step": 33922 }, { "epoch": 2.521218877740617, "grad_norm": 2.331582599714291, "learning_rate": 1.3063756180265974e-06, "loss": 0.2415, "step": 33923 }, { "epoch": 2.5212931995540693, "grad_norm": 2.6412539789580185, "learning_rate": 1.3059791356640962e-06, "loss": 0.2821, "step": 33924 }, { "epoch": 2.5213675213675213, "grad_norm": 1.966754566692395, "learning_rate": 1.3055827092732487e-06, "loss": 0.2222, "step": 33925 }, { "epoch": 2.5214418431809738, "grad_norm": 2.6559511930170943, "learning_rate": 1.30518633885661e-06, "loss": 0.2943, "step": 33926 }, { "epoch": 2.5215161649944258, "grad_norm": 2.1668147711237493, "learning_rate": 1.3047900244167311e-06, "loss": 0.2373, "step": 33927 }, { "epoch": 2.5215904868078782, "grad_norm": 2.2634528506118015, "learning_rate": 1.3043937659561612e-06, "loss": 0.2766, "step": 33928 }, { "epoch": 2.5216648086213302, "grad_norm": 2.535591831178596, "learning_rate": 1.303997563477455e-06, "loss": 0.2698, "step": 33929 }, { "epoch": 2.5217391304347827, "grad_norm": 2.2070270648996773, "learning_rate": 1.303601416983159e-06, "loss": 0.1959, "step": 33930 }, { "epoch": 2.5218134522482347, "grad_norm": 2.592770922808445, "learning_rate": 1.303205326475827e-06, "loss": 0.346, "step": 33931 }, { "epoch": 2.521887774061687, "grad_norm": 3.1785655859345074, "learning_rate": 1.3028092919580093e-06, "loss": 0.3226, "step": 33932 }, { "epoch": 2.521962095875139, "grad_norm": 2.93437239186168, "learning_rate": 1.3024133134322535e-06, "loss": 0.2784, "step": 33933 }, { "epoch": 2.5220364176885917, "grad_norm": 2.434099766383548, "learning_rate": 1.3020173909011092e-06, "loss": 0.2221, "step": 33934 }, { "epoch": 2.522110739502044, "grad_norm": 2.2008547402150804, "learning_rate": 1.3016215243671237e-06, "loss": 0.3202, "step": 33935 }, { "epoch": 2.522185061315496, "grad_norm": 2.1519668962548453, "learning_rate": 1.30122571383285e-06, "loss": 0.2482, "step": 33936 }, { "epoch": 2.522259383128948, "grad_norm": 2.6765179115440785, "learning_rate": 1.3008299593008312e-06, "loss": 0.3106, "step": 33937 }, { "epoch": 2.5223337049424006, "grad_norm": 2.6580944017207067, "learning_rate": 1.3004342607736209e-06, "loss": 0.2965, "step": 33938 }, { "epoch": 2.522408026755853, "grad_norm": 2.606391323837975, "learning_rate": 1.3000386182537617e-06, "loss": 0.2774, "step": 33939 }, { "epoch": 2.522482348569305, "grad_norm": 2.5885459406947784, "learning_rate": 1.2996430317438014e-06, "loss": 0.26, "step": 33940 }, { "epoch": 2.522556670382757, "grad_norm": 2.8621770754891886, "learning_rate": 1.2992475012462901e-06, "loss": 0.3046, "step": 33941 }, { "epoch": 2.5226309921962096, "grad_norm": 2.066752443525737, "learning_rate": 1.2988520267637717e-06, "loss": 0.2419, "step": 33942 }, { "epoch": 2.522705314009662, "grad_norm": 2.5042438221341254, "learning_rate": 1.2984566082987926e-06, "loss": 0.3165, "step": 33943 }, { "epoch": 2.522779635823114, "grad_norm": 2.41326052683856, "learning_rate": 1.2980612458538978e-06, "loss": 0.2574, "step": 33944 }, { "epoch": 2.522853957636566, "grad_norm": 2.218977056065936, "learning_rate": 1.2976659394316316e-06, "loss": 0.2808, "step": 33945 }, { "epoch": 2.5229282794500185, "grad_norm": 2.524418058587597, "learning_rate": 1.2972706890345432e-06, "loss": 0.3235, "step": 33946 }, { "epoch": 2.523002601263471, "grad_norm": 2.572484163713016, "learning_rate": 1.2968754946651719e-06, "loss": 0.2856, "step": 33947 }, { "epoch": 2.523076923076923, "grad_norm": 2.9472207836522952, "learning_rate": 1.2964803563260665e-06, "loss": 0.3955, "step": 33948 }, { "epoch": 2.5231512448903755, "grad_norm": 2.301787223843996, "learning_rate": 1.2960852740197682e-06, "loss": 0.2297, "step": 33949 }, { "epoch": 2.5232255667038275, "grad_norm": 2.6507841717549767, "learning_rate": 1.295690247748821e-06, "loss": 0.2886, "step": 33950 }, { "epoch": 2.52329988851728, "grad_norm": 3.8470120701511803, "learning_rate": 1.295295277515769e-06, "loss": 0.2536, "step": 33951 }, { "epoch": 2.523374210330732, "grad_norm": 2.296760001873771, "learning_rate": 1.2949003633231526e-06, "loss": 0.264, "step": 33952 }, { "epoch": 2.5234485321441844, "grad_norm": 2.607949454601656, "learning_rate": 1.2945055051735179e-06, "loss": 0.2877, "step": 33953 }, { "epoch": 2.5235228539576364, "grad_norm": 2.595689265440611, "learning_rate": 1.2941107030694057e-06, "loss": 0.325, "step": 33954 }, { "epoch": 2.523597175771089, "grad_norm": 4.48047150133773, "learning_rate": 1.2937159570133573e-06, "loss": 0.2265, "step": 33955 }, { "epoch": 2.523671497584541, "grad_norm": 1.9472720786504363, "learning_rate": 1.2933212670079131e-06, "loss": 0.2636, "step": 33956 }, { "epoch": 2.5237458193979934, "grad_norm": 2.341858115595804, "learning_rate": 1.2929266330556123e-06, "loss": 0.2436, "step": 33957 }, { "epoch": 2.523820141211446, "grad_norm": 2.3749104015332976, "learning_rate": 1.2925320551590015e-06, "loss": 0.2621, "step": 33958 }, { "epoch": 2.523894463024898, "grad_norm": 2.580136600421176, "learning_rate": 1.2921375333206142e-06, "loss": 0.3346, "step": 33959 }, { "epoch": 2.52396878483835, "grad_norm": 2.075716938435377, "learning_rate": 1.2917430675429965e-06, "loss": 0.3043, "step": 33960 }, { "epoch": 2.5240431066518023, "grad_norm": 2.459294619757437, "learning_rate": 1.291348657828685e-06, "loss": 0.3116, "step": 33961 }, { "epoch": 2.5241174284652548, "grad_norm": 2.0116427550323546, "learning_rate": 1.2909543041802165e-06, "loss": 0.2615, "step": 33962 }, { "epoch": 2.524191750278707, "grad_norm": 2.7123446409760015, "learning_rate": 1.2905600066001344e-06, "loss": 0.2907, "step": 33963 }, { "epoch": 2.524266072092159, "grad_norm": 2.3846808227495524, "learning_rate": 1.290165765090976e-06, "loss": 0.286, "step": 33964 }, { "epoch": 2.5243403939056113, "grad_norm": 2.5353953790504113, "learning_rate": 1.2897715796552758e-06, "loss": 0.279, "step": 33965 }, { "epoch": 2.5244147157190637, "grad_norm": 2.2711133860817583, "learning_rate": 1.2893774502955781e-06, "loss": 0.2364, "step": 33966 }, { "epoch": 2.5244890375325157, "grad_norm": 2.1668765267595735, "learning_rate": 1.2889833770144123e-06, "loss": 0.2582, "step": 33967 }, { "epoch": 2.524563359345968, "grad_norm": 2.2933463883169067, "learning_rate": 1.2885893598143217e-06, "loss": 0.263, "step": 33968 }, { "epoch": 2.52463768115942, "grad_norm": 2.713154925865709, "learning_rate": 1.2881953986978391e-06, "loss": 0.3289, "step": 33969 }, { "epoch": 2.5247120029728727, "grad_norm": 2.6605237242560333, "learning_rate": 1.2878014936675043e-06, "loss": 0.2008, "step": 33970 }, { "epoch": 2.5247863247863247, "grad_norm": 1.9553127867412803, "learning_rate": 1.287407644725851e-06, "loss": 0.2437, "step": 33971 }, { "epoch": 2.524860646599777, "grad_norm": 2.3556036209140396, "learning_rate": 1.287013851875414e-06, "loss": 0.2263, "step": 33972 }, { "epoch": 2.524934968413229, "grad_norm": 2.758720573397723, "learning_rate": 1.2866201151187308e-06, "loss": 0.3076, "step": 33973 }, { "epoch": 2.5250092902266816, "grad_norm": 2.191657668752951, "learning_rate": 1.2862264344583332e-06, "loss": 0.2419, "step": 33974 }, { "epoch": 2.5250836120401337, "grad_norm": 2.2127883620530415, "learning_rate": 1.285832809896761e-06, "loss": 0.2313, "step": 33975 }, { "epoch": 2.525157933853586, "grad_norm": 1.7718479559658837, "learning_rate": 1.2854392414365436e-06, "loss": 0.2017, "step": 33976 }, { "epoch": 2.525232255667038, "grad_norm": 2.832978071315632, "learning_rate": 1.2850457290802176e-06, "loss": 0.3041, "step": 33977 }, { "epoch": 2.5253065774804906, "grad_norm": 2.138281642526633, "learning_rate": 1.2846522728303134e-06, "loss": 0.2555, "step": 33978 }, { "epoch": 2.5253808992939426, "grad_norm": 1.9321384866834248, "learning_rate": 1.2842588726893646e-06, "loss": 0.19, "step": 33979 }, { "epoch": 2.525455221107395, "grad_norm": 2.464912602988446, "learning_rate": 1.2838655286599066e-06, "loss": 0.2434, "step": 33980 }, { "epoch": 2.5255295429208475, "grad_norm": 1.978194170575179, "learning_rate": 1.2834722407444689e-06, "loss": 0.1979, "step": 33981 }, { "epoch": 2.5256038647342995, "grad_norm": 2.355042446218202, "learning_rate": 1.2830790089455858e-06, "loss": 0.281, "step": 33982 }, { "epoch": 2.5256781865477516, "grad_norm": 2.400679166480182, "learning_rate": 1.2826858332657877e-06, "loss": 0.3153, "step": 33983 }, { "epoch": 2.525752508361204, "grad_norm": 2.687704456969089, "learning_rate": 1.2822927137076047e-06, "loss": 0.2397, "step": 33984 }, { "epoch": 2.5258268301746565, "grad_norm": 2.192965910237649, "learning_rate": 1.2818996502735704e-06, "loss": 0.2686, "step": 33985 }, { "epoch": 2.5259011519881085, "grad_norm": 2.113143542659035, "learning_rate": 1.2815066429662138e-06, "loss": 0.263, "step": 33986 }, { "epoch": 2.5259754738015605, "grad_norm": 2.3659148458502353, "learning_rate": 1.2811136917880628e-06, "loss": 0.2258, "step": 33987 }, { "epoch": 2.526049795615013, "grad_norm": 2.6427095463971986, "learning_rate": 1.2807207967416514e-06, "loss": 0.2773, "step": 33988 }, { "epoch": 2.5261241174284654, "grad_norm": 1.9110997519423443, "learning_rate": 1.280327957829508e-06, "loss": 0.2448, "step": 33989 }, { "epoch": 2.5261984392419174, "grad_norm": 1.9447851506128973, "learning_rate": 1.27993517505416e-06, "loss": 0.1804, "step": 33990 }, { "epoch": 2.52627276105537, "grad_norm": 2.601946233663938, "learning_rate": 1.279542448418135e-06, "loss": 0.3614, "step": 33991 }, { "epoch": 2.526347082868822, "grad_norm": 2.533522492483763, "learning_rate": 1.2791497779239647e-06, "loss": 0.2574, "step": 33992 }, { "epoch": 2.5264214046822744, "grad_norm": 2.204467523864389, "learning_rate": 1.2787571635741768e-06, "loss": 0.2979, "step": 33993 }, { "epoch": 2.5264957264957264, "grad_norm": 2.144062132167588, "learning_rate": 1.278364605371295e-06, "loss": 0.2601, "step": 33994 }, { "epoch": 2.526570048309179, "grad_norm": 2.4000400731426357, "learning_rate": 1.2779721033178517e-06, "loss": 0.2679, "step": 33995 }, { "epoch": 2.526644370122631, "grad_norm": 2.271065362324071, "learning_rate": 1.277579657416369e-06, "loss": 0.274, "step": 33996 }, { "epoch": 2.5267186919360833, "grad_norm": 2.086698130841635, "learning_rate": 1.2771872676693787e-06, "loss": 0.2114, "step": 33997 }, { "epoch": 2.5267930137495354, "grad_norm": 2.3878740963373155, "learning_rate": 1.2767949340794039e-06, "loss": 0.2983, "step": 33998 }, { "epoch": 2.526867335562988, "grad_norm": 2.6579131414276755, "learning_rate": 1.2764026566489673e-06, "loss": 0.2347, "step": 33999 }, { "epoch": 2.52694165737644, "grad_norm": 2.8723572703174, "learning_rate": 1.2760104353806034e-06, "loss": 0.3152, "step": 34000 }, { "epoch": 2.5270159791898923, "grad_norm": 2.9156574490149914, "learning_rate": 1.2756182702768272e-06, "loss": 0.2774, "step": 34001 }, { "epoch": 2.5270903010033443, "grad_norm": 2.350894998401526, "learning_rate": 1.2752261613401695e-06, "loss": 0.2824, "step": 34002 }, { "epoch": 2.5271646228167968, "grad_norm": 2.6580372349441186, "learning_rate": 1.274834108573152e-06, "loss": 0.3884, "step": 34003 }, { "epoch": 2.5272389446302492, "grad_norm": 2.400351828292081, "learning_rate": 1.2744421119783012e-06, "loss": 0.2876, "step": 34004 }, { "epoch": 2.5273132664437012, "grad_norm": 2.5020419862678245, "learning_rate": 1.2740501715581388e-06, "loss": 0.3276, "step": 34005 }, { "epoch": 2.5273875882571533, "grad_norm": 2.7234135004227107, "learning_rate": 1.2736582873151881e-06, "loss": 0.2503, "step": 34006 }, { "epoch": 2.5274619100706057, "grad_norm": 2.323011421840315, "learning_rate": 1.2732664592519728e-06, "loss": 0.2828, "step": 34007 }, { "epoch": 2.527536231884058, "grad_norm": 3.0071522682742606, "learning_rate": 1.2728746873710162e-06, "loss": 0.3018, "step": 34008 }, { "epoch": 2.52761055369751, "grad_norm": 3.817135737541416, "learning_rate": 1.2724829716748378e-06, "loss": 0.3196, "step": 34009 }, { "epoch": 2.527684875510962, "grad_norm": 2.19077276502896, "learning_rate": 1.2720913121659629e-06, "loss": 0.2673, "step": 34010 }, { "epoch": 2.5277591973244147, "grad_norm": 2.2197462276347384, "learning_rate": 1.2716997088469108e-06, "loss": 0.2697, "step": 34011 }, { "epoch": 2.527833519137867, "grad_norm": 2.2883572761832114, "learning_rate": 1.2713081617202038e-06, "loss": 0.2739, "step": 34012 }, { "epoch": 2.527907840951319, "grad_norm": 2.7935438830622363, "learning_rate": 1.2709166707883601e-06, "loss": 0.2198, "step": 34013 }, { "epoch": 2.5279821627647716, "grad_norm": 3.1495451114480195, "learning_rate": 1.2705252360539033e-06, "loss": 0.3581, "step": 34014 }, { "epoch": 2.5280564845782236, "grad_norm": 2.3993296080754756, "learning_rate": 1.2701338575193523e-06, "loss": 0.2427, "step": 34015 }, { "epoch": 2.528130806391676, "grad_norm": 2.4259507669962446, "learning_rate": 1.2697425351872238e-06, "loss": 0.3434, "step": 34016 }, { "epoch": 2.528205128205128, "grad_norm": 1.780196934305744, "learning_rate": 1.269351269060043e-06, "loss": 0.2081, "step": 34017 }, { "epoch": 2.5282794500185806, "grad_norm": 2.4101898360865373, "learning_rate": 1.268960059140324e-06, "loss": 0.2628, "step": 34018 }, { "epoch": 2.5283537718320326, "grad_norm": 1.8724569316402464, "learning_rate": 1.2685689054305883e-06, "loss": 0.2227, "step": 34019 }, { "epoch": 2.528428093645485, "grad_norm": 2.4584007967746304, "learning_rate": 1.2681778079333529e-06, "loss": 0.3026, "step": 34020 }, { "epoch": 2.528502415458937, "grad_norm": 2.5568661876729317, "learning_rate": 1.2677867666511335e-06, "loss": 0.3283, "step": 34021 }, { "epoch": 2.5285767372723895, "grad_norm": 1.846489842536836, "learning_rate": 1.2673957815864547e-06, "loss": 0.2484, "step": 34022 }, { "epoch": 2.5286510590858415, "grad_norm": 1.8383062525097427, "learning_rate": 1.2670048527418244e-06, "loss": 0.2316, "step": 34023 }, { "epoch": 2.528725380899294, "grad_norm": 1.6475722289179435, "learning_rate": 1.2666139801197651e-06, "loss": 0.1852, "step": 34024 }, { "epoch": 2.528799702712746, "grad_norm": 2.6097211624606134, "learning_rate": 1.2662231637227928e-06, "loss": 0.3103, "step": 34025 }, { "epoch": 2.5288740245261985, "grad_norm": 2.8076802499834193, "learning_rate": 1.2658324035534197e-06, "loss": 0.3092, "step": 34026 }, { "epoch": 2.528948346339651, "grad_norm": 2.0744019088296253, "learning_rate": 1.2654416996141672e-06, "loss": 0.2916, "step": 34027 }, { "epoch": 2.529022668153103, "grad_norm": 1.7538868180343756, "learning_rate": 1.2650510519075454e-06, "loss": 0.2553, "step": 34028 }, { "epoch": 2.529096989966555, "grad_norm": 2.3420464895682525, "learning_rate": 1.2646604604360747e-06, "loss": 0.2789, "step": 34029 }, { "epoch": 2.5291713117800074, "grad_norm": 2.0079617986083025, "learning_rate": 1.264269925202265e-06, "loss": 0.2154, "step": 34030 }, { "epoch": 2.52924563359346, "grad_norm": 2.02640406887416, "learning_rate": 1.2638794462086313e-06, "loss": 0.2156, "step": 34031 }, { "epoch": 2.529319955406912, "grad_norm": 2.017520735637788, "learning_rate": 1.2634890234576902e-06, "loss": 0.2416, "step": 34032 }, { "epoch": 2.529394277220364, "grad_norm": 2.247310447746261, "learning_rate": 1.2630986569519522e-06, "loss": 0.2593, "step": 34033 }, { "epoch": 2.5294685990338164, "grad_norm": 4.159925779054456, "learning_rate": 1.262708346693935e-06, "loss": 0.2983, "step": 34034 }, { "epoch": 2.529542920847269, "grad_norm": 2.3493746044852206, "learning_rate": 1.2623180926861434e-06, "loss": 0.2695, "step": 34035 }, { "epoch": 2.529617242660721, "grad_norm": 2.0085303064931757, "learning_rate": 1.2619278949310975e-06, "loss": 0.2041, "step": 34036 }, { "epoch": 2.5296915644741733, "grad_norm": 2.58362774589594, "learning_rate": 1.2615377534313066e-06, "loss": 0.3208, "step": 34037 }, { "epoch": 2.5297658862876253, "grad_norm": 2.2215010090373273, "learning_rate": 1.2611476681892799e-06, "loss": 0.2954, "step": 34038 }, { "epoch": 2.529840208101078, "grad_norm": 1.9718153475696802, "learning_rate": 1.2607576392075326e-06, "loss": 0.2043, "step": 34039 }, { "epoch": 2.52991452991453, "grad_norm": 2.448913706713465, "learning_rate": 1.2603676664885723e-06, "loss": 0.2795, "step": 34040 }, { "epoch": 2.5299888517279823, "grad_norm": 2.6919328840511696, "learning_rate": 1.2599777500349142e-06, "loss": 0.3265, "step": 34041 }, { "epoch": 2.5300631735414343, "grad_norm": 2.1359549266817566, "learning_rate": 1.2595878898490654e-06, "loss": 0.2201, "step": 34042 }, { "epoch": 2.5301374953548867, "grad_norm": 2.7520401988408336, "learning_rate": 1.259198085933534e-06, "loss": 0.2415, "step": 34043 }, { "epoch": 2.5302118171683388, "grad_norm": 2.623714977963694, "learning_rate": 1.2588083382908346e-06, "loss": 0.2821, "step": 34044 }, { "epoch": 2.530286138981791, "grad_norm": 2.3692740679015865, "learning_rate": 1.2584186469234728e-06, "loss": 0.3187, "step": 34045 }, { "epoch": 2.5303604607952432, "grad_norm": 2.2564203320856637, "learning_rate": 1.258029011833959e-06, "loss": 0.2258, "step": 34046 }, { "epoch": 2.5304347826086957, "grad_norm": 2.381649518026238, "learning_rate": 1.2576394330247999e-06, "loss": 0.3314, "step": 34047 }, { "epoch": 2.5305091044221477, "grad_norm": 2.17205491919524, "learning_rate": 1.2572499104985026e-06, "loss": 0.1739, "step": 34048 }, { "epoch": 2.5305834262356, "grad_norm": 2.280886336767562, "learning_rate": 1.2568604442575794e-06, "loss": 0.2672, "step": 34049 }, { "epoch": 2.5306577480490526, "grad_norm": 2.8644946312978146, "learning_rate": 1.2564710343045328e-06, "loss": 0.3778, "step": 34050 }, { "epoch": 2.5307320698625047, "grad_norm": 2.598479324730602, "learning_rate": 1.2560816806418741e-06, "loss": 0.3465, "step": 34051 }, { "epoch": 2.5308063916759567, "grad_norm": 1.8489183574935772, "learning_rate": 1.255692383272107e-06, "loss": 0.1793, "step": 34052 }, { "epoch": 2.530880713489409, "grad_norm": 2.213645444974425, "learning_rate": 1.255303142197737e-06, "loss": 0.2757, "step": 34053 }, { "epoch": 2.5309550353028616, "grad_norm": 3.1247432743270434, "learning_rate": 1.2549139574212743e-06, "loss": 0.3128, "step": 34054 }, { "epoch": 2.5310293571163136, "grad_norm": 2.268559528086246, "learning_rate": 1.2545248289452194e-06, "loss": 0.3229, "step": 34055 }, { "epoch": 2.5311036789297656, "grad_norm": 2.057324619567677, "learning_rate": 1.2541357567720835e-06, "loss": 0.2305, "step": 34056 }, { "epoch": 2.531178000743218, "grad_norm": 2.170328062494681, "learning_rate": 1.2537467409043647e-06, "loss": 0.267, "step": 34057 }, { "epoch": 2.5312523225566705, "grad_norm": 1.9816150343123313, "learning_rate": 1.253357781344572e-06, "loss": 0.2639, "step": 34058 }, { "epoch": 2.5313266443701226, "grad_norm": 2.572097869809312, "learning_rate": 1.2529688780952077e-06, "loss": 0.2331, "step": 34059 }, { "epoch": 2.531400966183575, "grad_norm": 2.7546520882239456, "learning_rate": 1.2525800311587744e-06, "loss": 0.3569, "step": 34060 }, { "epoch": 2.531475287997027, "grad_norm": 2.7507432913391496, "learning_rate": 1.252191240537779e-06, "loss": 0.3222, "step": 34061 }, { "epoch": 2.5315496098104795, "grad_norm": 1.7834416931665047, "learning_rate": 1.2518025062347205e-06, "loss": 0.2217, "step": 34062 }, { "epoch": 2.5316239316239315, "grad_norm": 2.5678797293037956, "learning_rate": 1.251413828252105e-06, "loss": 0.256, "step": 34063 }, { "epoch": 2.531698253437384, "grad_norm": 2.3466320790476747, "learning_rate": 1.2510252065924345e-06, "loss": 0.299, "step": 34064 }, { "epoch": 2.531772575250836, "grad_norm": 2.0816297809820017, "learning_rate": 1.2506366412582061e-06, "loss": 0.2165, "step": 34065 }, { "epoch": 2.5318468970642884, "grad_norm": 1.8498969516211254, "learning_rate": 1.2502481322519288e-06, "loss": 0.1976, "step": 34066 }, { "epoch": 2.5319212188777405, "grad_norm": 1.6883013525057329, "learning_rate": 1.2498596795760987e-06, "loss": 0.1631, "step": 34067 }, { "epoch": 2.531995540691193, "grad_norm": 3.4583483711076686, "learning_rate": 1.249471283233219e-06, "loss": 0.3355, "step": 34068 }, { "epoch": 2.5320698625046454, "grad_norm": 2.5210499509338247, "learning_rate": 1.249082943225789e-06, "loss": 0.2602, "step": 34069 }, { "epoch": 2.5321441843180974, "grad_norm": 2.614075048100208, "learning_rate": 1.2486946595563054e-06, "loss": 0.3573, "step": 34070 }, { "epoch": 2.5322185061315494, "grad_norm": 2.112701931703926, "learning_rate": 1.2483064322272753e-06, "loss": 0.2719, "step": 34071 }, { "epoch": 2.532292827945002, "grad_norm": 2.857247212583961, "learning_rate": 1.2479182612411912e-06, "loss": 0.3058, "step": 34072 }, { "epoch": 2.5323671497584543, "grad_norm": 1.9797399683399115, "learning_rate": 1.2475301466005584e-06, "loss": 0.2031, "step": 34073 }, { "epoch": 2.5324414715719064, "grad_norm": 4.487073169414894, "learning_rate": 1.2471420883078712e-06, "loss": 0.2854, "step": 34074 }, { "epoch": 2.5325157933853584, "grad_norm": 2.3321088474115643, "learning_rate": 1.246754086365628e-06, "loss": 0.2943, "step": 34075 }, { "epoch": 2.532590115198811, "grad_norm": 2.305613668861282, "learning_rate": 1.2463661407763294e-06, "loss": 0.2597, "step": 34076 }, { "epoch": 2.5326644370122633, "grad_norm": 2.6772980498904384, "learning_rate": 1.245978251542469e-06, "loss": 0.3145, "step": 34077 }, { "epoch": 2.5327387588257153, "grad_norm": 3.053534879972341, "learning_rate": 1.245590418666549e-06, "loss": 0.3678, "step": 34078 }, { "epoch": 2.5328130806391673, "grad_norm": 2.376292282709216, "learning_rate": 1.2452026421510633e-06, "loss": 0.2683, "step": 34079 }, { "epoch": 2.53288740245262, "grad_norm": 2.1770860191397623, "learning_rate": 1.2448149219985084e-06, "loss": 0.2666, "step": 34080 }, { "epoch": 2.5329617242660722, "grad_norm": 2.3114891226469942, "learning_rate": 1.2444272582113814e-06, "loss": 0.2403, "step": 34081 }, { "epoch": 2.5330360460795243, "grad_norm": 2.4469802565984717, "learning_rate": 1.2440396507921758e-06, "loss": 0.2712, "step": 34082 }, { "epoch": 2.5331103678929767, "grad_norm": 2.3923775035974626, "learning_rate": 1.2436520997433898e-06, "loss": 0.3695, "step": 34083 }, { "epoch": 2.5331846897064287, "grad_norm": 2.124632963819222, "learning_rate": 1.2432646050675156e-06, "loss": 0.2774, "step": 34084 }, { "epoch": 2.533259011519881, "grad_norm": 2.7295793739029297, "learning_rate": 1.2428771667670514e-06, "loss": 0.3116, "step": 34085 }, { "epoch": 2.533333333333333, "grad_norm": 2.5537072408400947, "learning_rate": 1.2424897848444895e-06, "loss": 0.2493, "step": 34086 }, { "epoch": 2.5334076551467857, "grad_norm": 2.686383489297081, "learning_rate": 1.2421024593023222e-06, "loss": 0.3593, "step": 34087 }, { "epoch": 2.5334819769602377, "grad_norm": 2.115799436326965, "learning_rate": 1.2417151901430469e-06, "loss": 0.2692, "step": 34088 }, { "epoch": 2.53355629877369, "grad_norm": 2.898782933240634, "learning_rate": 1.2413279773691544e-06, "loss": 0.3077, "step": 34089 }, { "epoch": 2.533630620587142, "grad_norm": 1.7602312746042441, "learning_rate": 1.2409408209831385e-06, "loss": 0.2172, "step": 34090 }, { "epoch": 2.5337049424005946, "grad_norm": 2.446819131836663, "learning_rate": 1.2405537209874908e-06, "loss": 0.2547, "step": 34091 }, { "epoch": 2.533779264214047, "grad_norm": 2.4793348490403297, "learning_rate": 1.240166677384701e-06, "loss": 0.1613, "step": 34092 }, { "epoch": 2.533853586027499, "grad_norm": 1.9296831359576077, "learning_rate": 1.2397796901772662e-06, "loss": 0.2038, "step": 34093 }, { "epoch": 2.533927907840951, "grad_norm": 1.843141778176588, "learning_rate": 1.2393927593676735e-06, "loss": 0.2315, "step": 34094 }, { "epoch": 2.5340022296544036, "grad_norm": 2.0006202332109293, "learning_rate": 1.2390058849584174e-06, "loss": 0.2629, "step": 34095 }, { "epoch": 2.534076551467856, "grad_norm": 1.6687992585902607, "learning_rate": 1.2386190669519871e-06, "loss": 0.2234, "step": 34096 }, { "epoch": 2.534150873281308, "grad_norm": 3.149178487251402, "learning_rate": 1.2382323053508705e-06, "loss": 0.3409, "step": 34097 }, { "epoch": 2.53422519509476, "grad_norm": 2.3279965111083745, "learning_rate": 1.2378456001575624e-06, "loss": 0.1938, "step": 34098 }, { "epoch": 2.5342995169082125, "grad_norm": 2.0840440605148407, "learning_rate": 1.237458951374546e-06, "loss": 0.249, "step": 34099 }, { "epoch": 2.534373838721665, "grad_norm": 1.9893922397516832, "learning_rate": 1.2370723590043177e-06, "loss": 0.2406, "step": 34100 }, { "epoch": 2.534448160535117, "grad_norm": 2.2225914665934625, "learning_rate": 1.2366858230493628e-06, "loss": 0.2305, "step": 34101 }, { "epoch": 2.534522482348569, "grad_norm": 1.9383527130911666, "learning_rate": 1.2362993435121695e-06, "loss": 0.2296, "step": 34102 }, { "epoch": 2.5345968041620215, "grad_norm": 1.910125645640971, "learning_rate": 1.2359129203952257e-06, "loss": 0.2021, "step": 34103 }, { "epoch": 2.534671125975474, "grad_norm": 3.2344460453704906, "learning_rate": 1.2355265537010186e-06, "loss": 0.317, "step": 34104 }, { "epoch": 2.534745447788926, "grad_norm": 1.686302503672277, "learning_rate": 1.235140243432038e-06, "loss": 0.2339, "step": 34105 }, { "epoch": 2.5348197696023784, "grad_norm": 2.3277443640357163, "learning_rate": 1.234753989590769e-06, "loss": 0.3233, "step": 34106 }, { "epoch": 2.5348940914158304, "grad_norm": 2.0493377134092556, "learning_rate": 1.2343677921796994e-06, "loss": 0.2136, "step": 34107 }, { "epoch": 2.534968413229283, "grad_norm": 2.019437795230652, "learning_rate": 1.2339816512013158e-06, "loss": 0.2258, "step": 34108 }, { "epoch": 2.535042735042735, "grad_norm": 2.0541331463890775, "learning_rate": 1.2335955666581024e-06, "loss": 0.2705, "step": 34109 }, { "epoch": 2.5351170568561874, "grad_norm": 2.1869400429727532, "learning_rate": 1.2332095385525467e-06, "loss": 0.3082, "step": 34110 }, { "epoch": 2.5351913786696394, "grad_norm": 2.3413645175412516, "learning_rate": 1.2328235668871336e-06, "loss": 0.2515, "step": 34111 }, { "epoch": 2.535265700483092, "grad_norm": 2.390182291346051, "learning_rate": 1.2324376516643455e-06, "loss": 0.317, "step": 34112 }, { "epoch": 2.535340022296544, "grad_norm": 1.8594459948365378, "learning_rate": 1.2320517928866737e-06, "loss": 0.1755, "step": 34113 }, { "epoch": 2.5354143441099963, "grad_norm": 1.962313250439994, "learning_rate": 1.231665990556592e-06, "loss": 0.2213, "step": 34114 }, { "epoch": 2.535488665923449, "grad_norm": 4.944559955435565, "learning_rate": 1.2312802446765926e-06, "loss": 0.2753, "step": 34115 }, { "epoch": 2.535562987736901, "grad_norm": 2.6345161971128395, "learning_rate": 1.2308945552491537e-06, "loss": 0.2645, "step": 34116 }, { "epoch": 2.535637309550353, "grad_norm": 2.233506274325396, "learning_rate": 1.230508922276763e-06, "loss": 0.2004, "step": 34117 }, { "epoch": 2.5357116313638053, "grad_norm": 2.6772186222319676, "learning_rate": 1.2301233457619e-06, "loss": 0.2703, "step": 34118 }, { "epoch": 2.5357859531772577, "grad_norm": 2.5854275324564435, "learning_rate": 1.229737825707047e-06, "loss": 0.3548, "step": 34119 }, { "epoch": 2.5358602749907098, "grad_norm": 2.4210324916756303, "learning_rate": 1.2293523621146885e-06, "loss": 0.2839, "step": 34120 }, { "epoch": 2.535934596804162, "grad_norm": 2.849547318884326, "learning_rate": 1.2289669549873018e-06, "loss": 0.2811, "step": 34121 }, { "epoch": 2.5360089186176142, "grad_norm": 2.4665394382829087, "learning_rate": 1.2285816043273735e-06, "loss": 0.3156, "step": 34122 }, { "epoch": 2.5360832404310667, "grad_norm": 2.1646827369902244, "learning_rate": 1.2281963101373806e-06, "loss": 0.2645, "step": 34123 }, { "epoch": 2.5361575622445187, "grad_norm": 3.1003372631627175, "learning_rate": 1.2278110724198055e-06, "loss": 0.2689, "step": 34124 }, { "epoch": 2.536231884057971, "grad_norm": 2.5774380918884803, "learning_rate": 1.2274258911771276e-06, "loss": 0.3067, "step": 34125 }, { "epoch": 2.536306205871423, "grad_norm": 2.6032611977630795, "learning_rate": 1.2270407664118244e-06, "loss": 0.2892, "step": 34126 }, { "epoch": 2.5363805276848757, "grad_norm": 1.8801179694450816, "learning_rate": 1.22665569812638e-06, "loss": 0.2053, "step": 34127 }, { "epoch": 2.5364548494983277, "grad_norm": 2.4576518033555024, "learning_rate": 1.2262706863232688e-06, "loss": 0.3249, "step": 34128 }, { "epoch": 2.53652917131178, "grad_norm": 2.9951427706936635, "learning_rate": 1.2258857310049731e-06, "loss": 0.275, "step": 34129 }, { "epoch": 2.536603493125232, "grad_norm": 2.4797630016118166, "learning_rate": 1.2255008321739704e-06, "loss": 0.253, "step": 34130 }, { "epoch": 2.5366778149386846, "grad_norm": 2.5131978643877475, "learning_rate": 1.2251159898327359e-06, "loss": 0.2279, "step": 34131 }, { "epoch": 2.5367521367521366, "grad_norm": 2.0298181826155415, "learning_rate": 1.2247312039837512e-06, "loss": 0.2198, "step": 34132 }, { "epoch": 2.536826458565589, "grad_norm": 3.4816926001016886, "learning_rate": 1.224346474629492e-06, "loss": 0.3695, "step": 34133 }, { "epoch": 2.536900780379041, "grad_norm": 2.361877072121844, "learning_rate": 1.2239618017724331e-06, "loss": 0.2762, "step": 34134 }, { "epoch": 2.5369751021924936, "grad_norm": 2.0740581322123908, "learning_rate": 1.223577185415057e-06, "loss": 0.2553, "step": 34135 }, { "epoch": 2.5370494240059456, "grad_norm": 2.6318219421536413, "learning_rate": 1.2231926255598304e-06, "loss": 0.2931, "step": 34136 }, { "epoch": 2.537123745819398, "grad_norm": 2.155454019391681, "learning_rate": 1.222808122209237e-06, "loss": 0.2414, "step": 34137 }, { "epoch": 2.5371980676328505, "grad_norm": 2.4003326886049, "learning_rate": 1.222423675365747e-06, "loss": 0.3755, "step": 34138 }, { "epoch": 2.5372723894463025, "grad_norm": 2.7782247182892355, "learning_rate": 1.2220392850318408e-06, "loss": 0.2944, "step": 34139 }, { "epoch": 2.5373467112597545, "grad_norm": 2.151685273672056, "learning_rate": 1.2216549512099895e-06, "loss": 0.2595, "step": 34140 }, { "epoch": 2.537421033073207, "grad_norm": 2.0043595812872135, "learning_rate": 1.221270673902666e-06, "loss": 0.2243, "step": 34141 }, { "epoch": 2.5374953548866594, "grad_norm": 2.5037191102284457, "learning_rate": 1.2208864531123487e-06, "loss": 0.2442, "step": 34142 }, { "epoch": 2.5375696767001115, "grad_norm": 2.6974484497292606, "learning_rate": 1.2205022888415074e-06, "loss": 0.2726, "step": 34143 }, { "epoch": 2.5376439985135635, "grad_norm": 2.7424573975190722, "learning_rate": 1.2201181810926188e-06, "loss": 0.3435, "step": 34144 }, { "epoch": 2.537718320327016, "grad_norm": 2.2046745640087213, "learning_rate": 1.219734129868153e-06, "loss": 0.2981, "step": 34145 }, { "epoch": 2.5377926421404684, "grad_norm": 1.9414293492266672, "learning_rate": 1.2193501351705816e-06, "loss": 0.1647, "step": 34146 }, { "epoch": 2.5378669639539204, "grad_norm": 2.439759491278303, "learning_rate": 1.2189661970023825e-06, "loss": 0.2953, "step": 34147 }, { "epoch": 2.537941285767373, "grad_norm": 2.4013365162724716, "learning_rate": 1.2185823153660192e-06, "loss": 0.2869, "step": 34148 }, { "epoch": 2.538015607580825, "grad_norm": 2.0034953944204794, "learning_rate": 1.2181984902639698e-06, "loss": 0.1857, "step": 34149 }, { "epoch": 2.5380899293942774, "grad_norm": 2.0296207572714495, "learning_rate": 1.2178147216987034e-06, "loss": 0.231, "step": 34150 }, { "epoch": 2.5381642512077294, "grad_norm": 2.196583050930527, "learning_rate": 1.2174310096726872e-06, "loss": 0.2574, "step": 34151 }, { "epoch": 2.538238573021182, "grad_norm": 2.295606690698612, "learning_rate": 1.2170473541883964e-06, "loss": 0.2973, "step": 34152 }, { "epoch": 2.538312894834634, "grad_norm": 2.080751744028464, "learning_rate": 1.2166637552482974e-06, "loss": 0.2376, "step": 34153 }, { "epoch": 2.5383872166480863, "grad_norm": 2.082036426999345, "learning_rate": 1.2162802128548635e-06, "loss": 0.2326, "step": 34154 }, { "epoch": 2.5384615384615383, "grad_norm": 2.225826229679838, "learning_rate": 1.2158967270105614e-06, "loss": 0.3074, "step": 34155 }, { "epoch": 2.538535860274991, "grad_norm": 2.531635853128494, "learning_rate": 1.2155132977178586e-06, "loss": 0.274, "step": 34156 }, { "epoch": 2.538610182088443, "grad_norm": 2.2361464874200565, "learning_rate": 1.215129924979227e-06, "loss": 0.2581, "step": 34157 }, { "epoch": 2.5386845039018953, "grad_norm": 2.672692004070523, "learning_rate": 1.2147466087971339e-06, "loss": 0.289, "step": 34158 }, { "epoch": 2.5387588257153473, "grad_norm": 2.337231775172477, "learning_rate": 1.214363349174046e-06, "loss": 0.2692, "step": 34159 }, { "epoch": 2.5388331475287997, "grad_norm": 2.02843611093987, "learning_rate": 1.213980146112429e-06, "loss": 0.2741, "step": 34160 }, { "epoch": 2.538907469342252, "grad_norm": 2.0092465954345347, "learning_rate": 1.2135969996147534e-06, "loss": 0.1972, "step": 34161 }, { "epoch": 2.538981791155704, "grad_norm": 2.7289240872669827, "learning_rate": 1.213213909683485e-06, "loss": 0.3551, "step": 34162 }, { "epoch": 2.5390561129691562, "grad_norm": 2.2136217487509806, "learning_rate": 1.2128308763210872e-06, "loss": 0.26, "step": 34163 }, { "epoch": 2.5391304347826087, "grad_norm": 2.319840012235467, "learning_rate": 1.2124478995300305e-06, "loss": 0.2929, "step": 34164 }, { "epoch": 2.539204756596061, "grad_norm": 2.5313999380546, "learning_rate": 1.212064979312776e-06, "loss": 0.2571, "step": 34165 }, { "epoch": 2.539279078409513, "grad_norm": 1.822675723978807, "learning_rate": 1.211682115671794e-06, "loss": 0.2358, "step": 34166 }, { "epoch": 2.539353400222965, "grad_norm": 2.6783802292499965, "learning_rate": 1.2112993086095459e-06, "loss": 0.3245, "step": 34167 }, { "epoch": 2.5394277220364176, "grad_norm": 3.0545853017937517, "learning_rate": 1.2109165581284943e-06, "loss": 0.3057, "step": 34168 }, { "epoch": 2.53950204384987, "grad_norm": 2.742331038845459, "learning_rate": 1.2105338642311116e-06, "loss": 0.2971, "step": 34169 }, { "epoch": 2.539576365663322, "grad_norm": 2.7650688307721136, "learning_rate": 1.21015122691985e-06, "loss": 0.2987, "step": 34170 }, { "epoch": 2.5396506874767746, "grad_norm": 2.4573474827877018, "learning_rate": 1.2097686461971815e-06, "loss": 0.3418, "step": 34171 }, { "epoch": 2.5397250092902266, "grad_norm": 2.0373486549566984, "learning_rate": 1.2093861220655668e-06, "loss": 0.2016, "step": 34172 }, { "epoch": 2.539799331103679, "grad_norm": 2.343193506740478, "learning_rate": 1.2090036545274653e-06, "loss": 0.3772, "step": 34173 }, { "epoch": 2.539873652917131, "grad_norm": 1.7341955233153814, "learning_rate": 1.2086212435853451e-06, "loss": 0.1827, "step": 34174 }, { "epoch": 2.5399479747305835, "grad_norm": 2.653453751683641, "learning_rate": 1.208238889241662e-06, "loss": 0.2512, "step": 34175 }, { "epoch": 2.5400222965440356, "grad_norm": 1.9233015316434958, "learning_rate": 1.2078565914988839e-06, "loss": 0.2049, "step": 34176 }, { "epoch": 2.540096618357488, "grad_norm": 2.598441103830202, "learning_rate": 1.2074743503594677e-06, "loss": 0.2912, "step": 34177 }, { "epoch": 2.54017094017094, "grad_norm": 2.1721930338614053, "learning_rate": 1.2070921658258738e-06, "loss": 0.2424, "step": 34178 }, { "epoch": 2.5402452619843925, "grad_norm": 2.469318588465969, "learning_rate": 1.206710037900567e-06, "loss": 0.2805, "step": 34179 }, { "epoch": 2.5403195837978445, "grad_norm": 2.2812595406323504, "learning_rate": 1.2063279665860018e-06, "loss": 0.2629, "step": 34180 }, { "epoch": 2.540393905611297, "grad_norm": 2.8066112161158556, "learning_rate": 1.2059459518846451e-06, "loss": 0.3469, "step": 34181 }, { "epoch": 2.540468227424749, "grad_norm": 2.7714803332189537, "learning_rate": 1.2055639937989482e-06, "loss": 0.2882, "step": 34182 }, { "epoch": 2.5405425492382014, "grad_norm": 1.9716211723505719, "learning_rate": 1.2051820923313762e-06, "loss": 0.3034, "step": 34183 }, { "epoch": 2.540616871051654, "grad_norm": 3.507065532827699, "learning_rate": 1.2048002474843856e-06, "loss": 0.2538, "step": 34184 }, { "epoch": 2.540691192865106, "grad_norm": 1.9603008579704726, "learning_rate": 1.2044184592604324e-06, "loss": 0.2686, "step": 34185 }, { "epoch": 2.540765514678558, "grad_norm": 1.9773536146420385, "learning_rate": 1.204036727661979e-06, "loss": 0.278, "step": 34186 }, { "epoch": 2.5408398364920104, "grad_norm": 2.2285762765913737, "learning_rate": 1.2036550526914792e-06, "loss": 0.2945, "step": 34187 }, { "epoch": 2.540914158305463, "grad_norm": 2.790811867288256, "learning_rate": 1.203273434351393e-06, "loss": 0.3793, "step": 34188 }, { "epoch": 2.540988480118915, "grad_norm": 3.8156830547549787, "learning_rate": 1.2028918726441763e-06, "loss": 0.2708, "step": 34189 }, { "epoch": 2.541062801932367, "grad_norm": 2.648877269356259, "learning_rate": 1.2025103675722827e-06, "loss": 0.3417, "step": 34190 }, { "epoch": 2.5411371237458193, "grad_norm": 1.9356429189914497, "learning_rate": 1.2021289191381736e-06, "loss": 0.2265, "step": 34191 }, { "epoch": 2.541211445559272, "grad_norm": 2.5532474880415754, "learning_rate": 1.2017475273443013e-06, "loss": 0.3058, "step": 34192 }, { "epoch": 2.541285767372724, "grad_norm": 1.7025874032229995, "learning_rate": 1.2013661921931231e-06, "loss": 0.217, "step": 34193 }, { "epoch": 2.5413600891861763, "grad_norm": 2.184370070301477, "learning_rate": 1.2009849136870922e-06, "loss": 0.2194, "step": 34194 }, { "epoch": 2.5414344109996283, "grad_norm": 2.6225486367172066, "learning_rate": 1.2006036918286612e-06, "loss": 0.3168, "step": 34195 }, { "epoch": 2.5415087328130808, "grad_norm": 2.3085546124603566, "learning_rate": 1.2002225266202894e-06, "loss": 0.2301, "step": 34196 }, { "epoch": 2.541583054626533, "grad_norm": 2.227639626094627, "learning_rate": 1.1998414180644268e-06, "loss": 0.2954, "step": 34197 }, { "epoch": 2.5416573764399852, "grad_norm": 2.4232904640758637, "learning_rate": 1.1994603661635306e-06, "loss": 0.2777, "step": 34198 }, { "epoch": 2.5417316982534373, "grad_norm": 2.415317819082204, "learning_rate": 1.199079370920052e-06, "loss": 0.3123, "step": 34199 }, { "epoch": 2.5418060200668897, "grad_norm": 2.53543200913982, "learning_rate": 1.1986984323364414e-06, "loss": 0.3304, "step": 34200 }, { "epoch": 2.5418803418803417, "grad_norm": 2.5100194356823504, "learning_rate": 1.1983175504151556e-06, "loss": 0.3068, "step": 34201 }, { "epoch": 2.541954663693794, "grad_norm": 1.9960179646266831, "learning_rate": 1.1979367251586426e-06, "loss": 0.2014, "step": 34202 }, { "epoch": 2.5420289855072467, "grad_norm": 2.4456037261998573, "learning_rate": 1.1975559565693606e-06, "loss": 0.2841, "step": 34203 }, { "epoch": 2.5421033073206987, "grad_norm": 2.221697429422892, "learning_rate": 1.197175244649752e-06, "loss": 0.2709, "step": 34204 }, { "epoch": 2.5421776291341507, "grad_norm": 2.4252799440263932, "learning_rate": 1.196794589402276e-06, "loss": 0.3047, "step": 34205 }, { "epoch": 2.542251950947603, "grad_norm": 2.0899549010789147, "learning_rate": 1.196413990829378e-06, "loss": 0.2517, "step": 34206 }, { "epoch": 2.5423262727610556, "grad_norm": 2.439736449152164, "learning_rate": 1.196033448933509e-06, "loss": 0.2825, "step": 34207 }, { "epoch": 2.5424005945745076, "grad_norm": 1.9614984914483442, "learning_rate": 1.195652963717121e-06, "loss": 0.1802, "step": 34208 }, { "epoch": 2.5424749163879596, "grad_norm": 2.930968040805667, "learning_rate": 1.195272535182661e-06, "loss": 0.3694, "step": 34209 }, { "epoch": 2.542549238201412, "grad_norm": 1.9488508591170879, "learning_rate": 1.1948921633325816e-06, "loss": 0.2216, "step": 34210 }, { "epoch": 2.5426235600148646, "grad_norm": 2.2291609212190053, "learning_rate": 1.1945118481693286e-06, "loss": 0.2566, "step": 34211 }, { "epoch": 2.5426978818283166, "grad_norm": 2.034803240899741, "learning_rate": 1.1941315896953497e-06, "loss": 0.2236, "step": 34212 }, { "epoch": 2.5427722036417686, "grad_norm": 2.046449211643728, "learning_rate": 1.193751387913098e-06, "loss": 0.279, "step": 34213 }, { "epoch": 2.542846525455221, "grad_norm": 2.1289904130112727, "learning_rate": 1.1933712428250166e-06, "loss": 0.1985, "step": 34214 }, { "epoch": 2.5429208472686735, "grad_norm": 2.199495818382167, "learning_rate": 1.1929911544335548e-06, "loss": 0.2867, "step": 34215 }, { "epoch": 2.5429951690821255, "grad_norm": 2.109775803704655, "learning_rate": 1.1926111227411597e-06, "loss": 0.2813, "step": 34216 }, { "epoch": 2.543069490895578, "grad_norm": 2.1274246121768763, "learning_rate": 1.1922311477502734e-06, "loss": 0.1909, "step": 34217 }, { "epoch": 2.54314381270903, "grad_norm": 2.1706922743830837, "learning_rate": 1.1918512294633499e-06, "loss": 0.2361, "step": 34218 }, { "epoch": 2.5432181345224825, "grad_norm": 2.3634195760524523, "learning_rate": 1.1914713678828283e-06, "loss": 0.3047, "step": 34219 }, { "epoch": 2.5432924563359345, "grad_norm": 1.6123567186160193, "learning_rate": 1.1910915630111585e-06, "loss": 0.1367, "step": 34220 }, { "epoch": 2.543366778149387, "grad_norm": 2.0923116647873528, "learning_rate": 1.1907118148507846e-06, "loss": 0.2282, "step": 34221 }, { "epoch": 2.543441099962839, "grad_norm": 1.9762948727869831, "learning_rate": 1.1903321234041487e-06, "loss": 0.2398, "step": 34222 }, { "epoch": 2.5435154217762914, "grad_norm": 1.5135818681204618, "learning_rate": 1.1899524886737002e-06, "loss": 0.1504, "step": 34223 }, { "epoch": 2.5435897435897434, "grad_norm": 2.3230792052942597, "learning_rate": 1.1895729106618781e-06, "loss": 0.2585, "step": 34224 }, { "epoch": 2.543664065403196, "grad_norm": 2.5625900974077154, "learning_rate": 1.1891933893711294e-06, "loss": 0.3449, "step": 34225 }, { "epoch": 2.5437383872166484, "grad_norm": 2.2060016983405726, "learning_rate": 1.1888139248038976e-06, "loss": 0.2272, "step": 34226 }, { "epoch": 2.5438127090301004, "grad_norm": 2.2902565594342237, "learning_rate": 1.1884345169626243e-06, "loss": 0.2817, "step": 34227 }, { "epoch": 2.5438870308435524, "grad_norm": 2.3580921384016276, "learning_rate": 1.188055165849753e-06, "loss": 0.2479, "step": 34228 }, { "epoch": 2.543961352657005, "grad_norm": 2.7406674211962483, "learning_rate": 1.1876758714677228e-06, "loss": 0.3672, "step": 34229 }, { "epoch": 2.5440356744704573, "grad_norm": 2.635517706302011, "learning_rate": 1.1872966338189807e-06, "loss": 0.2282, "step": 34230 }, { "epoch": 2.5441099962839093, "grad_norm": 3.408207994548851, "learning_rate": 1.1869174529059623e-06, "loss": 0.288, "step": 34231 }, { "epoch": 2.5441843180973613, "grad_norm": 2.337426214534933, "learning_rate": 1.186538328731115e-06, "loss": 0.2147, "step": 34232 }, { "epoch": 2.544258639910814, "grad_norm": 2.322320889447433, "learning_rate": 1.1861592612968765e-06, "loss": 0.2443, "step": 34233 }, { "epoch": 2.5443329617242663, "grad_norm": 2.7162848508344473, "learning_rate": 1.1857802506056848e-06, "loss": 0.3117, "step": 34234 }, { "epoch": 2.5444072835377183, "grad_norm": 2.09372719137502, "learning_rate": 1.1854012966599847e-06, "loss": 0.2414, "step": 34235 }, { "epoch": 2.5444816053511703, "grad_norm": 2.039769012660997, "learning_rate": 1.1850223994622146e-06, "loss": 0.2532, "step": 34236 }, { "epoch": 2.5445559271646228, "grad_norm": 2.7330918693252046, "learning_rate": 1.184643559014812e-06, "loss": 0.3133, "step": 34237 }, { "epoch": 2.544630248978075, "grad_norm": 2.370780766389562, "learning_rate": 1.1842647753202175e-06, "loss": 0.3045, "step": 34238 }, { "epoch": 2.5447045707915272, "grad_norm": 2.036749991232049, "learning_rate": 1.1838860483808667e-06, "loss": 0.2238, "step": 34239 }, { "epoch": 2.5447788926049797, "grad_norm": 2.4042997469611436, "learning_rate": 1.1835073781992012e-06, "loss": 0.4305, "step": 34240 }, { "epoch": 2.5448532144184317, "grad_norm": 2.272950582248779, "learning_rate": 1.1831287647776568e-06, "loss": 0.304, "step": 34241 }, { "epoch": 2.544927536231884, "grad_norm": 2.180820627769801, "learning_rate": 1.1827502081186737e-06, "loss": 0.2474, "step": 34242 }, { "epoch": 2.545001858045336, "grad_norm": 2.349152147164014, "learning_rate": 1.1823717082246867e-06, "loss": 0.2558, "step": 34243 }, { "epoch": 2.5450761798587886, "grad_norm": 2.74479139272306, "learning_rate": 1.1819932650981314e-06, "loss": 0.201, "step": 34244 }, { "epoch": 2.5451505016722407, "grad_norm": 2.468506978640949, "learning_rate": 1.1816148787414484e-06, "loss": 0.391, "step": 34245 }, { "epoch": 2.545224823485693, "grad_norm": 2.135607818938244, "learning_rate": 1.1812365491570688e-06, "loss": 0.2559, "step": 34246 }, { "epoch": 2.545299145299145, "grad_norm": 2.564027356490191, "learning_rate": 1.180858276347433e-06, "loss": 0.2518, "step": 34247 }, { "epoch": 2.5453734671125976, "grad_norm": 2.5902155437202334, "learning_rate": 1.1804800603149735e-06, "loss": 0.2272, "step": 34248 }, { "epoch": 2.54544778892605, "grad_norm": 2.2943038631795822, "learning_rate": 1.1801019010621273e-06, "loss": 0.2495, "step": 34249 }, { "epoch": 2.545522110739502, "grad_norm": 5.464667651218303, "learning_rate": 1.1797237985913256e-06, "loss": 0.2973, "step": 34250 }, { "epoch": 2.545596432552954, "grad_norm": 1.927780651534151, "learning_rate": 1.1793457529050035e-06, "loss": 0.2482, "step": 34251 }, { "epoch": 2.5456707543664066, "grad_norm": 2.1635955519488697, "learning_rate": 1.1789677640055974e-06, "loss": 0.187, "step": 34252 }, { "epoch": 2.545745076179859, "grad_norm": 2.505203003431117, "learning_rate": 1.178589831895537e-06, "loss": 0.3118, "step": 34253 }, { "epoch": 2.545819397993311, "grad_norm": 2.717728535418156, "learning_rate": 1.17821195657726e-06, "loss": 0.2572, "step": 34254 }, { "epoch": 2.545893719806763, "grad_norm": 2.461592172603569, "learning_rate": 1.1778341380531955e-06, "loss": 0.2381, "step": 34255 }, { "epoch": 2.5459680416202155, "grad_norm": 2.1630377943191754, "learning_rate": 1.1774563763257762e-06, "loss": 0.2597, "step": 34256 }, { "epoch": 2.546042363433668, "grad_norm": 2.4769451066229218, "learning_rate": 1.177078671397437e-06, "loss": 0.2734, "step": 34257 }, { "epoch": 2.54611668524712, "grad_norm": 2.055309768629364, "learning_rate": 1.1767010232706066e-06, "loss": 0.2593, "step": 34258 }, { "epoch": 2.5461910070605724, "grad_norm": 2.4642491604228587, "learning_rate": 1.1763234319477168e-06, "loss": 0.31, "step": 34259 }, { "epoch": 2.5462653288740245, "grad_norm": 2.4054928944718315, "learning_rate": 1.175945897431201e-06, "loss": 0.3473, "step": 34260 }, { "epoch": 2.546339650687477, "grad_norm": 2.5357856740979856, "learning_rate": 1.175568419723485e-06, "loss": 0.337, "step": 34261 }, { "epoch": 2.546413972500929, "grad_norm": 2.0950557833873162, "learning_rate": 1.1751909988270037e-06, "loss": 0.2342, "step": 34262 }, { "epoch": 2.5464882943143814, "grad_norm": 2.8476997648234144, "learning_rate": 1.1748136347441818e-06, "loss": 0.2894, "step": 34263 }, { "epoch": 2.5465626161278334, "grad_norm": 1.9513162684346697, "learning_rate": 1.174436327477455e-06, "loss": 0.2428, "step": 34264 }, { "epoch": 2.546636937941286, "grad_norm": 2.4967158808823786, "learning_rate": 1.174059077029248e-06, "loss": 0.3706, "step": 34265 }, { "epoch": 2.546711259754738, "grad_norm": 2.3652664399526135, "learning_rate": 1.1736818834019893e-06, "loss": 0.2386, "step": 34266 }, { "epoch": 2.5467855815681903, "grad_norm": 2.7664859189216364, "learning_rate": 1.1733047465981106e-06, "loss": 0.3093, "step": 34267 }, { "epoch": 2.5468599033816424, "grad_norm": 3.009546110026237, "learning_rate": 1.1729276666200362e-06, "loss": 0.3145, "step": 34268 }, { "epoch": 2.546934225195095, "grad_norm": 2.3249646799420995, "learning_rate": 1.172550643470197e-06, "loss": 0.1503, "step": 34269 }, { "epoch": 2.547008547008547, "grad_norm": 2.347794012097528, "learning_rate": 1.1721736771510195e-06, "loss": 0.2212, "step": 34270 }, { "epoch": 2.5470828688219993, "grad_norm": 1.931658952532965, "learning_rate": 1.17179676766493e-06, "loss": 0.2519, "step": 34271 }, { "epoch": 2.5471571906354518, "grad_norm": 2.716978427437987, "learning_rate": 1.1714199150143534e-06, "loss": 0.3232, "step": 34272 }, { "epoch": 2.547231512448904, "grad_norm": 2.495109337630686, "learning_rate": 1.1710431192017168e-06, "loss": 0.2661, "step": 34273 }, { "epoch": 2.547305834262356, "grad_norm": 1.8579062005735572, "learning_rate": 1.1706663802294482e-06, "loss": 0.2329, "step": 34274 }, { "epoch": 2.5473801560758083, "grad_norm": 2.6249032115149005, "learning_rate": 1.1702896980999702e-06, "loss": 0.3716, "step": 34275 }, { "epoch": 2.5474544778892607, "grad_norm": 2.9981575975897523, "learning_rate": 1.1699130728157082e-06, "loss": 0.3106, "step": 34276 }, { "epoch": 2.5475287997027127, "grad_norm": 2.1707701661450898, "learning_rate": 1.1695365043790896e-06, "loss": 0.2671, "step": 34277 }, { "epoch": 2.5476031215161647, "grad_norm": 1.869126174940684, "learning_rate": 1.1691599927925335e-06, "loss": 0.1779, "step": 34278 }, { "epoch": 2.547677443329617, "grad_norm": 2.4776327626008734, "learning_rate": 1.16878353805847e-06, "loss": 0.2924, "step": 34279 }, { "epoch": 2.5477517651430697, "grad_norm": 3.0094299759591934, "learning_rate": 1.1684071401793207e-06, "loss": 0.3059, "step": 34280 }, { "epoch": 2.5478260869565217, "grad_norm": 2.15589252158782, "learning_rate": 1.1680307991575046e-06, "loss": 0.2786, "step": 34281 }, { "epoch": 2.547900408769974, "grad_norm": 2.6016859632149187, "learning_rate": 1.1676545149954533e-06, "loss": 0.2895, "step": 34282 }, { "epoch": 2.547974730583426, "grad_norm": 2.5164956446537308, "learning_rate": 1.167278287695579e-06, "loss": 0.302, "step": 34283 }, { "epoch": 2.5480490523968786, "grad_norm": 2.403087702736065, "learning_rate": 1.1669021172603112e-06, "loss": 0.1727, "step": 34284 }, { "epoch": 2.5481233742103306, "grad_norm": 2.3072764890749604, "learning_rate": 1.1665260036920667e-06, "loss": 0.2996, "step": 34285 }, { "epoch": 2.548197696023783, "grad_norm": 2.3070153637052355, "learning_rate": 1.1661499469932724e-06, "loss": 0.2474, "step": 34286 }, { "epoch": 2.548272017837235, "grad_norm": 2.5364145068988613, "learning_rate": 1.1657739471663454e-06, "loss": 0.2851, "step": 34287 }, { "epoch": 2.5483463396506876, "grad_norm": 2.242900314330644, "learning_rate": 1.1653980042137058e-06, "loss": 0.2805, "step": 34288 }, { "epoch": 2.5484206614641396, "grad_norm": 7.644758895309597, "learning_rate": 1.1650221181377774e-06, "loss": 0.4321, "step": 34289 }, { "epoch": 2.548494983277592, "grad_norm": 2.378536548086892, "learning_rate": 1.164646288940976e-06, "loss": 0.2931, "step": 34290 }, { "epoch": 2.548569305091044, "grad_norm": 2.4978809556898356, "learning_rate": 1.1642705166257261e-06, "loss": 0.2768, "step": 34291 }, { "epoch": 2.5486436269044965, "grad_norm": 2.5187319524725593, "learning_rate": 1.1638948011944428e-06, "loss": 0.342, "step": 34292 }, { "epoch": 2.5487179487179485, "grad_norm": 2.184268178531557, "learning_rate": 1.163519142649545e-06, "loss": 0.2416, "step": 34293 }, { "epoch": 2.548792270531401, "grad_norm": 3.15061138277034, "learning_rate": 1.1631435409934577e-06, "loss": 0.4032, "step": 34294 }, { "epoch": 2.5488665923448535, "grad_norm": 2.3119412830044577, "learning_rate": 1.1627679962285886e-06, "loss": 0.2448, "step": 34295 }, { "epoch": 2.5489409141583055, "grad_norm": 2.4465144023452683, "learning_rate": 1.1623925083573628e-06, "loss": 0.2803, "step": 34296 }, { "epoch": 2.5490152359717575, "grad_norm": 1.9424523117227324, "learning_rate": 1.1620170773821949e-06, "loss": 0.2429, "step": 34297 }, { "epoch": 2.54908955778521, "grad_norm": 2.1823181129444222, "learning_rate": 1.1616417033055004e-06, "loss": 0.2388, "step": 34298 }, { "epoch": 2.5491638795986624, "grad_norm": 2.5111761834061115, "learning_rate": 1.1612663861297013e-06, "loss": 0.2584, "step": 34299 }, { "epoch": 2.5492382014121144, "grad_norm": 2.0759962822634574, "learning_rate": 1.1608911258572076e-06, "loss": 0.2559, "step": 34300 }, { "epoch": 2.5493125232255665, "grad_norm": 2.9782310946266137, "learning_rate": 1.1605159224904404e-06, "loss": 0.3133, "step": 34301 }, { "epoch": 2.549386845039019, "grad_norm": 2.4259034084659925, "learning_rate": 1.1601407760318129e-06, "loss": 0.2536, "step": 34302 }, { "epoch": 2.5494611668524714, "grad_norm": 2.5472336192544716, "learning_rate": 1.1597656864837392e-06, "loss": 0.2746, "step": 34303 }, { "epoch": 2.5495354886659234, "grad_norm": 3.531159883253182, "learning_rate": 1.1593906538486365e-06, "loss": 0.3339, "step": 34304 }, { "epoch": 2.549609810479376, "grad_norm": 3.8103490451681417, "learning_rate": 1.1590156781289186e-06, "loss": 0.2846, "step": 34305 }, { "epoch": 2.549684132292828, "grad_norm": 2.0253172654165255, "learning_rate": 1.1586407593269987e-06, "loss": 0.234, "step": 34306 }, { "epoch": 2.5497584541062803, "grad_norm": 2.076135429062507, "learning_rate": 1.1582658974452887e-06, "loss": 0.254, "step": 34307 }, { "epoch": 2.5498327759197323, "grad_norm": 2.013776023310606, "learning_rate": 1.1578910924862063e-06, "loss": 0.2086, "step": 34308 }, { "epoch": 2.549907097733185, "grad_norm": 2.361384367855411, "learning_rate": 1.157516344452162e-06, "loss": 0.2758, "step": 34309 }, { "epoch": 2.549981419546637, "grad_norm": 2.551913979069554, "learning_rate": 1.1571416533455671e-06, "loss": 0.2904, "step": 34310 }, { "epoch": 2.5500557413600893, "grad_norm": 2.8736282991853, "learning_rate": 1.1567670191688374e-06, "loss": 0.2802, "step": 34311 }, { "epoch": 2.5501300631735413, "grad_norm": 2.40382335883868, "learning_rate": 1.15639244192438e-06, "loss": 0.2971, "step": 34312 }, { "epoch": 2.5502043849869938, "grad_norm": 2.4069484918993242, "learning_rate": 1.1560179216146117e-06, "loss": 0.3072, "step": 34313 }, { "epoch": 2.5502787068004458, "grad_norm": 2.7600235822056627, "learning_rate": 1.1556434582419417e-06, "loss": 0.2822, "step": 34314 }, { "epoch": 2.5503530286138982, "grad_norm": 2.3911331414080936, "learning_rate": 1.1552690518087772e-06, "loss": 0.2757, "step": 34315 }, { "epoch": 2.5504273504273502, "grad_norm": 2.332767832899477, "learning_rate": 1.1548947023175361e-06, "loss": 0.2548, "step": 34316 }, { "epoch": 2.5505016722408027, "grad_norm": 2.6713364590137947, "learning_rate": 1.1545204097706208e-06, "loss": 0.3344, "step": 34317 }, { "epoch": 2.550575994054255, "grad_norm": 2.1395635019590875, "learning_rate": 1.1541461741704451e-06, "loss": 0.2642, "step": 34318 }, { "epoch": 2.550650315867707, "grad_norm": 2.704426567076322, "learning_rate": 1.1537719955194171e-06, "loss": 0.2248, "step": 34319 }, { "epoch": 2.550724637681159, "grad_norm": 2.2312925463668374, "learning_rate": 1.1533978738199447e-06, "loss": 0.2978, "step": 34320 }, { "epoch": 2.5507989594946117, "grad_norm": 2.667932846152134, "learning_rate": 1.1530238090744395e-06, "loss": 0.2547, "step": 34321 }, { "epoch": 2.550873281308064, "grad_norm": 2.572284627754417, "learning_rate": 1.152649801285306e-06, "loss": 0.2947, "step": 34322 }, { "epoch": 2.550947603121516, "grad_norm": 2.058010390729115, "learning_rate": 1.1522758504549569e-06, "loss": 0.1726, "step": 34323 }, { "epoch": 2.551021924934968, "grad_norm": 2.365228246026159, "learning_rate": 1.1519019565857957e-06, "loss": 0.2561, "step": 34324 }, { "epoch": 2.5510962467484206, "grad_norm": 2.7603051853766916, "learning_rate": 1.1515281196802286e-06, "loss": 0.3284, "step": 34325 }, { "epoch": 2.551170568561873, "grad_norm": 1.8680859036970574, "learning_rate": 1.1511543397406665e-06, "loss": 0.2264, "step": 34326 }, { "epoch": 2.551244890375325, "grad_norm": 3.003081370098969, "learning_rate": 1.1507806167695134e-06, "loss": 0.3581, "step": 34327 }, { "epoch": 2.5513192121887776, "grad_norm": 2.6404171417385665, "learning_rate": 1.1504069507691763e-06, "loss": 0.2993, "step": 34328 }, { "epoch": 2.5513935340022296, "grad_norm": 2.6200170330084207, "learning_rate": 1.1500333417420573e-06, "loss": 0.2811, "step": 34329 }, { "epoch": 2.551467855815682, "grad_norm": 2.324782480572834, "learning_rate": 1.149659789690567e-06, "loss": 0.2539, "step": 34330 }, { "epoch": 2.551542177629134, "grad_norm": 2.876859949883212, "learning_rate": 1.149286294617108e-06, "loss": 0.3474, "step": 34331 }, { "epoch": 2.5516164994425865, "grad_norm": 2.815042013325251, "learning_rate": 1.1489128565240825e-06, "loss": 0.287, "step": 34332 }, { "epoch": 2.5516908212560385, "grad_norm": 2.9772977200584423, "learning_rate": 1.1485394754138978e-06, "loss": 0.2756, "step": 34333 }, { "epoch": 2.551765143069491, "grad_norm": 2.1580601495443172, "learning_rate": 1.1481661512889553e-06, "loss": 0.2504, "step": 34334 }, { "epoch": 2.551839464882943, "grad_norm": 2.1222492901169074, "learning_rate": 1.1477928841516617e-06, "loss": 0.1919, "step": 34335 }, { "epoch": 2.5519137866963955, "grad_norm": 1.8283475753874034, "learning_rate": 1.1474196740044186e-06, "loss": 0.2045, "step": 34336 }, { "epoch": 2.5519881085098475, "grad_norm": 2.6271113221494375, "learning_rate": 1.147046520849626e-06, "loss": 0.3256, "step": 34337 }, { "epoch": 2.5520624303233, "grad_norm": 1.941221073801934, "learning_rate": 1.1466734246896903e-06, "loss": 0.2576, "step": 34338 }, { "epoch": 2.552136752136752, "grad_norm": 2.1991824529789064, "learning_rate": 1.1463003855270126e-06, "loss": 0.2191, "step": 34339 }, { "epoch": 2.5522110739502044, "grad_norm": 2.1074627413711666, "learning_rate": 1.1459274033639932e-06, "loss": 0.2206, "step": 34340 }, { "epoch": 2.552285395763657, "grad_norm": 2.3900860637613497, "learning_rate": 1.1455544782030337e-06, "loss": 0.2697, "step": 34341 }, { "epoch": 2.552359717577109, "grad_norm": 2.335755442169142, "learning_rate": 1.1451816100465329e-06, "loss": 0.2861, "step": 34342 }, { "epoch": 2.552434039390561, "grad_norm": 2.1241762289276394, "learning_rate": 1.1448087988968958e-06, "loss": 0.2652, "step": 34343 }, { "epoch": 2.5525083612040134, "grad_norm": 2.313907924046668, "learning_rate": 1.1444360447565172e-06, "loss": 0.2748, "step": 34344 }, { "epoch": 2.552582683017466, "grad_norm": 2.670119343722304, "learning_rate": 1.144063347627803e-06, "loss": 0.2169, "step": 34345 }, { "epoch": 2.552657004830918, "grad_norm": 1.858815378796052, "learning_rate": 1.1436907075131497e-06, "loss": 0.1525, "step": 34346 }, { "epoch": 2.55273132664437, "grad_norm": 2.981053133214607, "learning_rate": 1.1433181244149528e-06, "loss": 0.3146, "step": 34347 }, { "epoch": 2.5528056484578223, "grad_norm": 2.7337892779176176, "learning_rate": 1.1429455983356174e-06, "loss": 0.3417, "step": 34348 }, { "epoch": 2.552879970271275, "grad_norm": 2.3234668181596962, "learning_rate": 1.1425731292775355e-06, "loss": 0.2176, "step": 34349 }, { "epoch": 2.552954292084727, "grad_norm": 2.380827047604016, "learning_rate": 1.1422007172431137e-06, "loss": 0.2711, "step": 34350 }, { "epoch": 2.5530286138981793, "grad_norm": 2.2407858906136453, "learning_rate": 1.1418283622347393e-06, "loss": 0.2782, "step": 34351 }, { "epoch": 2.5531029357116313, "grad_norm": 2.2737524853877527, "learning_rate": 1.1414560642548155e-06, "loss": 0.258, "step": 34352 }, { "epoch": 2.5531772575250837, "grad_norm": 3.0244671966467815, "learning_rate": 1.1410838233057398e-06, "loss": 0.2436, "step": 34353 }, { "epoch": 2.5532515793385357, "grad_norm": 1.6961207864638312, "learning_rate": 1.1407116393899032e-06, "loss": 0.1936, "step": 34354 }, { "epoch": 2.553325901151988, "grad_norm": 2.362663452410586, "learning_rate": 1.1403395125097083e-06, "loss": 0.2333, "step": 34355 }, { "epoch": 2.5534002229654402, "grad_norm": 2.5860269391187876, "learning_rate": 1.1399674426675455e-06, "loss": 0.2614, "step": 34356 }, { "epoch": 2.5534745447788927, "grad_norm": 1.9830895232285042, "learning_rate": 1.139595429865814e-06, "loss": 0.2474, "step": 34357 }, { "epoch": 2.5535488665923447, "grad_norm": 2.431173486213692, "learning_rate": 1.1392234741069085e-06, "loss": 0.2939, "step": 34358 }, { "epoch": 2.553623188405797, "grad_norm": 2.270391330889458, "learning_rate": 1.1388515753932194e-06, "loss": 0.2066, "step": 34359 }, { "epoch": 2.5536975102192496, "grad_norm": 2.3220781480675847, "learning_rate": 1.138479733727147e-06, "loss": 0.2997, "step": 34360 }, { "epoch": 2.5537718320327016, "grad_norm": 1.6751448887468712, "learning_rate": 1.1381079491110825e-06, "loss": 0.1746, "step": 34361 }, { "epoch": 2.5538461538461537, "grad_norm": 2.4093157702305144, "learning_rate": 1.1377362215474187e-06, "loss": 0.2386, "step": 34362 }, { "epoch": 2.553920475659606, "grad_norm": 1.960854351806797, "learning_rate": 1.1373645510385488e-06, "loss": 0.2031, "step": 34363 }, { "epoch": 2.5539947974730586, "grad_norm": 2.2490672040880013, "learning_rate": 1.1369929375868649e-06, "loss": 0.2841, "step": 34364 }, { "epoch": 2.5540691192865106, "grad_norm": 2.1617704831791205, "learning_rate": 1.1366213811947613e-06, "loss": 0.2229, "step": 34365 }, { "epoch": 2.5541434410999626, "grad_norm": 1.9865990852803748, "learning_rate": 1.1362498818646284e-06, "loss": 0.2497, "step": 34366 }, { "epoch": 2.554217762913415, "grad_norm": 1.8065193412027847, "learning_rate": 1.135878439598861e-06, "loss": 0.2043, "step": 34367 }, { "epoch": 2.5542920847268675, "grad_norm": 2.377485171504991, "learning_rate": 1.1355070543998482e-06, "loss": 0.2707, "step": 34368 }, { "epoch": 2.5543664065403195, "grad_norm": 2.478211786107045, "learning_rate": 1.1351357262699792e-06, "loss": 0.3016, "step": 34369 }, { "epoch": 2.5544407283537716, "grad_norm": 2.937532651255902, "learning_rate": 1.1347644552116477e-06, "loss": 0.3484, "step": 34370 }, { "epoch": 2.554515050167224, "grad_norm": 2.334366862854445, "learning_rate": 1.1343932412272408e-06, "loss": 0.2281, "step": 34371 }, { "epoch": 2.5545893719806765, "grad_norm": 2.1408031127662515, "learning_rate": 1.134022084319153e-06, "loss": 0.2672, "step": 34372 }, { "epoch": 2.5546636937941285, "grad_norm": 2.4072269589758424, "learning_rate": 1.1336509844897702e-06, "loss": 0.2691, "step": 34373 }, { "epoch": 2.554738015607581, "grad_norm": 2.701075625237042, "learning_rate": 1.133279941741483e-06, "loss": 0.2649, "step": 34374 }, { "epoch": 2.554812337421033, "grad_norm": 1.9821352865171453, "learning_rate": 1.1329089560766782e-06, "loss": 0.2032, "step": 34375 }, { "epoch": 2.5548866592344854, "grad_norm": 2.548627598916648, "learning_rate": 1.1325380274977449e-06, "loss": 0.2968, "step": 34376 }, { "epoch": 2.5549609810479375, "grad_norm": 3.1793912653880607, "learning_rate": 1.1321671560070723e-06, "loss": 0.3436, "step": 34377 }, { "epoch": 2.55503530286139, "grad_norm": 2.8488404391528417, "learning_rate": 1.1317963416070466e-06, "loss": 0.3145, "step": 34378 }, { "epoch": 2.555109624674842, "grad_norm": 5.4560368502473935, "learning_rate": 1.1314255843000578e-06, "loss": 0.3239, "step": 34379 }, { "epoch": 2.5551839464882944, "grad_norm": 2.1255870650516875, "learning_rate": 1.1310548840884905e-06, "loss": 0.2271, "step": 34380 }, { "epoch": 2.5552582683017464, "grad_norm": 2.393093167969888, "learning_rate": 1.1306842409747298e-06, "loss": 0.3147, "step": 34381 }, { "epoch": 2.555332590115199, "grad_norm": 2.256264515888273, "learning_rate": 1.130313654961166e-06, "loss": 0.2174, "step": 34382 }, { "epoch": 2.5554069119286513, "grad_norm": 2.819260397543073, "learning_rate": 1.1299431260501826e-06, "loss": 0.337, "step": 34383 }, { "epoch": 2.5554812337421033, "grad_norm": 2.210371713078661, "learning_rate": 1.1295726542441643e-06, "loss": 0.2162, "step": 34384 }, { "epoch": 2.5555555555555554, "grad_norm": 2.0876111318389, "learning_rate": 1.1292022395454982e-06, "loss": 0.2514, "step": 34385 }, { "epoch": 2.555629877369008, "grad_norm": 2.2234900140132994, "learning_rate": 1.1288318819565647e-06, "loss": 0.2605, "step": 34386 }, { "epoch": 2.5557041991824603, "grad_norm": 2.366958826116377, "learning_rate": 1.128461581479754e-06, "loss": 0.2992, "step": 34387 }, { "epoch": 2.5557785209959123, "grad_norm": 2.477183997728847, "learning_rate": 1.1280913381174451e-06, "loss": 0.238, "step": 34388 }, { "epoch": 2.5558528428093643, "grad_norm": 2.3161436713657277, "learning_rate": 1.1277211518720255e-06, "loss": 0.1914, "step": 34389 }, { "epoch": 2.5559271646228168, "grad_norm": 2.2059138815265986, "learning_rate": 1.1273510227458773e-06, "loss": 0.3015, "step": 34390 }, { "epoch": 2.5560014864362692, "grad_norm": 2.755516679157389, "learning_rate": 1.126980950741381e-06, "loss": 0.3085, "step": 34391 }, { "epoch": 2.5560758082497212, "grad_norm": 2.2451037468371915, "learning_rate": 1.1266109358609223e-06, "loss": 0.2501, "step": 34392 }, { "epoch": 2.5561501300631733, "grad_norm": 2.6335437575752048, "learning_rate": 1.1262409781068806e-06, "loss": 0.3292, "step": 34393 }, { "epoch": 2.5562244518766257, "grad_norm": 1.855335198473422, "learning_rate": 1.1258710774816406e-06, "loss": 0.1738, "step": 34394 }, { "epoch": 2.556298773690078, "grad_norm": 3.1777593461414613, "learning_rate": 1.1255012339875826e-06, "loss": 0.3294, "step": 34395 }, { "epoch": 2.55637309550353, "grad_norm": 2.2541851059295697, "learning_rate": 1.125131447627087e-06, "loss": 0.2999, "step": 34396 }, { "epoch": 2.5564474173169827, "grad_norm": 2.1454566498427554, "learning_rate": 1.124761718402535e-06, "loss": 0.2461, "step": 34397 }, { "epoch": 2.5565217391304347, "grad_norm": 2.233142363906002, "learning_rate": 1.1243920463163038e-06, "loss": 0.2302, "step": 34398 }, { "epoch": 2.556596060943887, "grad_norm": 2.555611045445705, "learning_rate": 1.1240224313707782e-06, "loss": 0.2082, "step": 34399 }, { "epoch": 2.556670382757339, "grad_norm": 2.2531140219938597, "learning_rate": 1.1236528735683339e-06, "loss": 0.3398, "step": 34400 }, { "epoch": 2.5567447045707916, "grad_norm": 3.4060095872113467, "learning_rate": 1.1232833729113534e-06, "loss": 0.3122, "step": 34401 }, { "epoch": 2.5568190263842436, "grad_norm": 2.564451997713235, "learning_rate": 1.1229139294022152e-06, "loss": 0.3461, "step": 34402 }, { "epoch": 2.556893348197696, "grad_norm": 2.764405324664163, "learning_rate": 1.1225445430432935e-06, "loss": 0.2615, "step": 34403 }, { "epoch": 2.556967670011148, "grad_norm": 2.379592417569522, "learning_rate": 1.1221752138369713e-06, "loss": 0.248, "step": 34404 }, { "epoch": 2.5570419918246006, "grad_norm": 2.2591436791517374, "learning_rate": 1.1218059417856252e-06, "loss": 0.2292, "step": 34405 }, { "epoch": 2.557116313638053, "grad_norm": 1.9708447760540864, "learning_rate": 1.1214367268916292e-06, "loss": 0.2162, "step": 34406 }, { "epoch": 2.557190635451505, "grad_norm": 2.332080866789414, "learning_rate": 1.121067569157367e-06, "loss": 0.2701, "step": 34407 }, { "epoch": 2.557264957264957, "grad_norm": 2.372035121339648, "learning_rate": 1.1206984685852073e-06, "loss": 0.2758, "step": 34408 }, { "epoch": 2.5573392790784095, "grad_norm": 2.7350725851398634, "learning_rate": 1.120329425177532e-06, "loss": 0.3601, "step": 34409 }, { "epoch": 2.557413600891862, "grad_norm": 2.2347349672806835, "learning_rate": 1.1199604389367125e-06, "loss": 0.2971, "step": 34410 }, { "epoch": 2.557487922705314, "grad_norm": 1.7996342424895302, "learning_rate": 1.11959150986513e-06, "loss": 0.2308, "step": 34411 }, { "epoch": 2.557562244518766, "grad_norm": 2.5541765223073574, "learning_rate": 1.119222637965156e-06, "loss": 0.337, "step": 34412 }, { "epoch": 2.5576365663322185, "grad_norm": 2.815576433117435, "learning_rate": 1.1188538232391632e-06, "loss": 0.3183, "step": 34413 }, { "epoch": 2.557710888145671, "grad_norm": 2.1710241441672737, "learning_rate": 1.118485065689532e-06, "loss": 0.2257, "step": 34414 }, { "epoch": 2.557785209959123, "grad_norm": 2.0597755824140456, "learning_rate": 1.11811636531863e-06, "loss": 0.2079, "step": 34415 }, { "epoch": 2.5578595317725754, "grad_norm": 1.896161352511445, "learning_rate": 1.117747722128837e-06, "loss": 0.2379, "step": 34416 }, { "epoch": 2.5579338535860274, "grad_norm": 2.403309308755223, "learning_rate": 1.117379136122523e-06, "loss": 0.3281, "step": 34417 }, { "epoch": 2.55800817539948, "grad_norm": 2.947843457556806, "learning_rate": 1.1170106073020603e-06, "loss": 0.3046, "step": 34418 }, { "epoch": 2.558082497212932, "grad_norm": 2.059307660422239, "learning_rate": 1.1166421356698242e-06, "loss": 0.2291, "step": 34419 }, { "epoch": 2.5581568190263844, "grad_norm": 2.456929801629259, "learning_rate": 1.1162737212281817e-06, "loss": 0.2097, "step": 34420 }, { "epoch": 2.5582311408398364, "grad_norm": 1.9383333614360276, "learning_rate": 1.1159053639795102e-06, "loss": 0.2125, "step": 34421 }, { "epoch": 2.558305462653289, "grad_norm": 2.4351513149594655, "learning_rate": 1.1155370639261797e-06, "loss": 0.2319, "step": 34422 }, { "epoch": 2.558379784466741, "grad_norm": 2.630079604493399, "learning_rate": 1.115168821070558e-06, "loss": 0.2763, "step": 34423 }, { "epoch": 2.5584541062801933, "grad_norm": 1.9388931684234862, "learning_rate": 1.1148006354150199e-06, "loss": 0.2589, "step": 34424 }, { "epoch": 2.5585284280936453, "grad_norm": 2.3143967273459802, "learning_rate": 1.1144325069619332e-06, "loss": 0.2564, "step": 34425 }, { "epoch": 2.558602749907098, "grad_norm": 2.742232304961886, "learning_rate": 1.114064435713671e-06, "loss": 0.291, "step": 34426 }, { "epoch": 2.55867707172055, "grad_norm": 2.994536922951177, "learning_rate": 1.113696421672601e-06, "loss": 0.3424, "step": 34427 }, { "epoch": 2.5587513935340023, "grad_norm": 2.1725829761691586, "learning_rate": 1.1133284648410892e-06, "loss": 0.2421, "step": 34428 }, { "epoch": 2.5588257153474547, "grad_norm": 2.3372197349661605, "learning_rate": 1.1129605652215124e-06, "loss": 0.2382, "step": 34429 }, { "epoch": 2.5589000371609067, "grad_norm": 2.6364316731823387, "learning_rate": 1.1125927228162304e-06, "loss": 0.3083, "step": 34430 }, { "epoch": 2.5589743589743588, "grad_norm": 2.7767917216803033, "learning_rate": 1.1122249376276174e-06, "loss": 0.3123, "step": 34431 }, { "epoch": 2.5590486807878112, "grad_norm": 2.189833872303607, "learning_rate": 1.111857209658037e-06, "loss": 0.3347, "step": 34432 }, { "epoch": 2.5591230026012637, "grad_norm": 1.9183455930355138, "learning_rate": 1.1114895389098612e-06, "loss": 0.2348, "step": 34433 }, { "epoch": 2.5591973244147157, "grad_norm": 2.4007642885224505, "learning_rate": 1.1111219253854532e-06, "loss": 0.3346, "step": 34434 }, { "epoch": 2.5592716462281677, "grad_norm": 2.3124812204060614, "learning_rate": 1.1107543690871802e-06, "loss": 0.2452, "step": 34435 }, { "epoch": 2.55934596804162, "grad_norm": 2.334588274423863, "learning_rate": 1.1103868700174114e-06, "loss": 0.2848, "step": 34436 }, { "epoch": 2.5594202898550726, "grad_norm": 2.302733413275079, "learning_rate": 1.110019428178508e-06, "loss": 0.2111, "step": 34437 }, { "epoch": 2.5594946116685247, "grad_norm": 3.0485897998784637, "learning_rate": 1.109652043572841e-06, "loss": 0.2357, "step": 34438 }, { "epoch": 2.559568933481977, "grad_norm": 2.633814130416832, "learning_rate": 1.109284716202772e-06, "loss": 0.2958, "step": 34439 }, { "epoch": 2.559643255295429, "grad_norm": 2.3866691388513672, "learning_rate": 1.1089174460706676e-06, "loss": 0.2629, "step": 34440 }, { "epoch": 2.5597175771088816, "grad_norm": 2.7508998064493344, "learning_rate": 1.10855023317889e-06, "loss": 0.3039, "step": 34441 }, { "epoch": 2.5597918989223336, "grad_norm": 2.1573233260759563, "learning_rate": 1.1081830775298042e-06, "loss": 0.2653, "step": 34442 }, { "epoch": 2.559866220735786, "grad_norm": 2.384370208433925, "learning_rate": 1.1078159791257747e-06, "loss": 0.3167, "step": 34443 }, { "epoch": 2.559940542549238, "grad_norm": 2.2375544396570786, "learning_rate": 1.1074489379691666e-06, "loss": 0.2269, "step": 34444 }, { "epoch": 2.5600148643626905, "grad_norm": 2.150938617948793, "learning_rate": 1.1070819540623367e-06, "loss": 0.3036, "step": 34445 }, { "epoch": 2.5600891861761426, "grad_norm": 2.384367749207301, "learning_rate": 1.1067150274076555e-06, "loss": 0.2138, "step": 34446 }, { "epoch": 2.560163507989595, "grad_norm": 2.0128113184427767, "learning_rate": 1.1063481580074786e-06, "loss": 0.2833, "step": 34447 }, { "epoch": 2.560237829803047, "grad_norm": 2.160064524117419, "learning_rate": 1.1059813458641734e-06, "loss": 0.2162, "step": 34448 }, { "epoch": 2.5603121516164995, "grad_norm": 2.395335516888113, "learning_rate": 1.1056145909800987e-06, "loss": 0.2994, "step": 34449 }, { "epoch": 2.5603864734299515, "grad_norm": 3.206339610188479, "learning_rate": 1.1052478933576127e-06, "loss": 0.2956, "step": 34450 }, { "epoch": 2.560460795243404, "grad_norm": 2.373144281635782, "learning_rate": 1.1048812529990827e-06, "loss": 0.2877, "step": 34451 }, { "epoch": 2.5605351170568564, "grad_norm": 2.4591250008253547, "learning_rate": 1.1045146699068643e-06, "loss": 0.3425, "step": 34452 }, { "epoch": 2.5606094388703085, "grad_norm": 2.2202378825012152, "learning_rate": 1.1041481440833202e-06, "loss": 0.2173, "step": 34453 }, { "epoch": 2.5606837606837605, "grad_norm": 1.9778395742441766, "learning_rate": 1.1037816755308062e-06, "loss": 0.196, "step": 34454 }, { "epoch": 2.560758082497213, "grad_norm": 2.4105861398230424, "learning_rate": 1.103415264251686e-06, "loss": 0.2862, "step": 34455 }, { "epoch": 2.5608324043106654, "grad_norm": 1.953233054703584, "learning_rate": 1.1030489102483166e-06, "loss": 0.2342, "step": 34456 }, { "epoch": 2.5609067261241174, "grad_norm": 2.240926347344192, "learning_rate": 1.102682613523055e-06, "loss": 0.3473, "step": 34457 }, { "epoch": 2.5609810479375694, "grad_norm": 2.0027153180937063, "learning_rate": 1.1023163740782638e-06, "loss": 0.2199, "step": 34458 }, { "epoch": 2.561055369751022, "grad_norm": 1.9575033107602733, "learning_rate": 1.1019501919162945e-06, "loss": 0.1761, "step": 34459 }, { "epoch": 2.5611296915644743, "grad_norm": 2.370622941670141, "learning_rate": 1.1015840670395116e-06, "loss": 0.3131, "step": 34460 }, { "epoch": 2.5612040133779264, "grad_norm": 2.2308569172510535, "learning_rate": 1.101217999450268e-06, "loss": 0.3143, "step": 34461 }, { "epoch": 2.561278335191379, "grad_norm": 2.1632406728364297, "learning_rate": 1.1008519891509196e-06, "loss": 0.2659, "step": 34462 }, { "epoch": 2.561352657004831, "grad_norm": 2.524818764856661, "learning_rate": 1.1004860361438286e-06, "loss": 0.281, "step": 34463 }, { "epoch": 2.5614269788182833, "grad_norm": 1.7508637544525016, "learning_rate": 1.1001201404313423e-06, "loss": 0.2117, "step": 34464 }, { "epoch": 2.5615013006317353, "grad_norm": 2.46938845041156, "learning_rate": 1.0997543020158218e-06, "loss": 0.2939, "step": 34465 }, { "epoch": 2.5615756224451878, "grad_norm": 2.2987020364162527, "learning_rate": 1.0993885208996223e-06, "loss": 0.2327, "step": 34466 }, { "epoch": 2.56164994425864, "grad_norm": 2.1009831803831553, "learning_rate": 1.0990227970850963e-06, "loss": 0.237, "step": 34467 }, { "epoch": 2.5617242660720922, "grad_norm": 2.8869361120278016, "learning_rate": 1.0986571305746008e-06, "loss": 0.3444, "step": 34468 }, { "epoch": 2.5617985878855443, "grad_norm": 1.5904336291049543, "learning_rate": 1.098291521370487e-06, "loss": 0.1908, "step": 34469 }, { "epoch": 2.5618729096989967, "grad_norm": 2.2809657405503625, "learning_rate": 1.0979259694751122e-06, "loss": 0.2597, "step": 34470 }, { "epoch": 2.5619472315124487, "grad_norm": 2.369473208959746, "learning_rate": 1.0975604748908286e-06, "loss": 0.1854, "step": 34471 }, { "epoch": 2.562021553325901, "grad_norm": 3.013656020080951, "learning_rate": 1.097195037619987e-06, "loss": 0.2913, "step": 34472 }, { "epoch": 2.562095875139353, "grad_norm": 2.5238618684546106, "learning_rate": 1.0968296576649428e-06, "loss": 0.2924, "step": 34473 }, { "epoch": 2.5621701969528057, "grad_norm": 2.659302455319984, "learning_rate": 1.096464335028048e-06, "loss": 0.2651, "step": 34474 }, { "epoch": 2.562244518766258, "grad_norm": 2.1244562329974603, "learning_rate": 1.0960990697116537e-06, "loss": 0.2164, "step": 34475 }, { "epoch": 2.56231884057971, "grad_norm": 2.2101275010718378, "learning_rate": 1.0957338617181101e-06, "loss": 0.2721, "step": 34476 }, { "epoch": 2.562393162393162, "grad_norm": 2.3911919258759067, "learning_rate": 1.0953687110497713e-06, "loss": 0.3158, "step": 34477 }, { "epoch": 2.5624674842066146, "grad_norm": 1.9528085264737827, "learning_rate": 1.0950036177089873e-06, "loss": 0.2391, "step": 34478 }, { "epoch": 2.562541806020067, "grad_norm": 1.9221182319222343, "learning_rate": 1.0946385816981052e-06, "loss": 0.1889, "step": 34479 }, { "epoch": 2.562616127833519, "grad_norm": 2.686706507588382, "learning_rate": 1.09427360301948e-06, "loss": 0.2921, "step": 34480 }, { "epoch": 2.562690449646971, "grad_norm": 2.8395539052993115, "learning_rate": 1.0939086816754574e-06, "loss": 0.305, "step": 34481 }, { "epoch": 2.5627647714604236, "grad_norm": 2.562004613358968, "learning_rate": 1.093543817668391e-06, "loss": 0.3154, "step": 34482 }, { "epoch": 2.562839093273876, "grad_norm": 2.4775968152354633, "learning_rate": 1.0931790110006268e-06, "loss": 0.2684, "step": 34483 }, { "epoch": 2.562913415087328, "grad_norm": 2.1368604622434884, "learning_rate": 1.092814261674512e-06, "loss": 0.2834, "step": 34484 }, { "epoch": 2.5629877369007805, "grad_norm": 2.176813501032711, "learning_rate": 1.0924495696923997e-06, "loss": 0.2552, "step": 34485 }, { "epoch": 2.5630620587142325, "grad_norm": 2.1816150261745744, "learning_rate": 1.0920849350566343e-06, "loss": 0.3205, "step": 34486 }, { "epoch": 2.563136380527685, "grad_norm": 2.023313565706526, "learning_rate": 1.0917203577695634e-06, "loss": 0.2337, "step": 34487 }, { "epoch": 2.563210702341137, "grad_norm": 2.431093373228839, "learning_rate": 1.0913558378335353e-06, "loss": 0.2732, "step": 34488 }, { "epoch": 2.5632850241545895, "grad_norm": 2.793089080120808, "learning_rate": 1.0909913752508939e-06, "loss": 0.3456, "step": 34489 }, { "epoch": 2.5633593459680415, "grad_norm": 2.5727306342471326, "learning_rate": 1.090626970023989e-06, "loss": 0.2807, "step": 34490 }, { "epoch": 2.563433667781494, "grad_norm": 1.9705625763317947, "learning_rate": 1.0902626221551638e-06, "loss": 0.2629, "step": 34491 }, { "epoch": 2.563507989594946, "grad_norm": 2.400682957123789, "learning_rate": 1.0898983316467682e-06, "loss": 0.3407, "step": 34492 }, { "epoch": 2.5635823114083984, "grad_norm": 2.2799827284483363, "learning_rate": 1.0895340985011449e-06, "loss": 0.2419, "step": 34493 }, { "epoch": 2.563656633221851, "grad_norm": 2.147317189101893, "learning_rate": 1.0891699227206353e-06, "loss": 0.2196, "step": 34494 }, { "epoch": 2.563730955035303, "grad_norm": 2.002409292648252, "learning_rate": 1.088805804307591e-06, "loss": 0.2471, "step": 34495 }, { "epoch": 2.563805276848755, "grad_norm": 2.94218222416373, "learning_rate": 1.0884417432643502e-06, "loss": 0.2755, "step": 34496 }, { "epoch": 2.5638795986622074, "grad_norm": 2.290653996029118, "learning_rate": 1.0880777395932617e-06, "loss": 0.3053, "step": 34497 }, { "epoch": 2.56395392047566, "grad_norm": 1.8874400947488503, "learning_rate": 1.0877137932966641e-06, "loss": 0.1964, "step": 34498 }, { "epoch": 2.564028242289112, "grad_norm": 2.849167647876129, "learning_rate": 1.0873499043769042e-06, "loss": 0.3338, "step": 34499 }, { "epoch": 2.564102564102564, "grad_norm": 2.469384609423458, "learning_rate": 1.0869860728363224e-06, "loss": 0.257, "step": 34500 }, { "epoch": 2.5641768859160163, "grad_norm": 1.941885505335759, "learning_rate": 1.0866222986772601e-06, "loss": 0.2676, "step": 34501 }, { "epoch": 2.564251207729469, "grad_norm": 3.08171204951334, "learning_rate": 1.0862585819020633e-06, "loss": 0.36, "step": 34502 }, { "epoch": 2.564325529542921, "grad_norm": 2.9896936189924816, "learning_rate": 1.0858949225130689e-06, "loss": 0.2992, "step": 34503 }, { "epoch": 2.564399851356373, "grad_norm": 2.452922202711075, "learning_rate": 1.0855313205126228e-06, "loss": 0.3038, "step": 34504 }, { "epoch": 2.5644741731698253, "grad_norm": 2.305614116242324, "learning_rate": 1.0851677759030632e-06, "loss": 0.342, "step": 34505 }, { "epoch": 2.5645484949832777, "grad_norm": 2.184420270154338, "learning_rate": 1.0848042886867293e-06, "loss": 0.2463, "step": 34506 }, { "epoch": 2.5646228167967298, "grad_norm": 2.3616765753417064, "learning_rate": 1.084440858865965e-06, "loss": 0.289, "step": 34507 }, { "epoch": 2.5646971386101822, "grad_norm": 2.3284479665294975, "learning_rate": 1.0840774864431069e-06, "loss": 0.2934, "step": 34508 }, { "epoch": 2.5647714604236342, "grad_norm": 1.9568173878673953, "learning_rate": 1.0837141714204967e-06, "loss": 0.2658, "step": 34509 }, { "epoch": 2.5648457822370867, "grad_norm": 2.66006792382546, "learning_rate": 1.0833509138004705e-06, "loss": 0.3363, "step": 34510 }, { "epoch": 2.5649201040505387, "grad_norm": 2.6579237063940075, "learning_rate": 1.0829877135853672e-06, "loss": 0.307, "step": 34511 }, { "epoch": 2.564994425863991, "grad_norm": 2.109166022052299, "learning_rate": 1.0826245707775284e-06, "loss": 0.1948, "step": 34512 }, { "epoch": 2.565068747677443, "grad_norm": 2.2809923450862155, "learning_rate": 1.082261485379288e-06, "loss": 0.2311, "step": 34513 }, { "epoch": 2.5651430694908957, "grad_norm": 2.1972974062451254, "learning_rate": 1.0818984573929881e-06, "loss": 0.2394, "step": 34514 }, { "epoch": 2.5652173913043477, "grad_norm": 2.575417397423866, "learning_rate": 1.0815354868209616e-06, "loss": 0.3285, "step": 34515 }, { "epoch": 2.5652917131178, "grad_norm": 2.3268356205673566, "learning_rate": 1.0811725736655454e-06, "loss": 0.2908, "step": 34516 }, { "epoch": 2.5653660349312526, "grad_norm": 2.046044767867672, "learning_rate": 1.08080971792908e-06, "loss": 0.2595, "step": 34517 }, { "epoch": 2.5654403567447046, "grad_norm": 2.396603932410344, "learning_rate": 1.0804469196138968e-06, "loss": 0.2413, "step": 34518 }, { "epoch": 2.5655146785581566, "grad_norm": 2.600978008477636, "learning_rate": 1.080084178722335e-06, "loss": 0.2684, "step": 34519 }, { "epoch": 2.565589000371609, "grad_norm": 2.528096345371182, "learning_rate": 1.0797214952567293e-06, "loss": 0.3194, "step": 34520 }, { "epoch": 2.5656633221850615, "grad_norm": 1.9340683705395512, "learning_rate": 1.0793588692194124e-06, "loss": 0.2065, "step": 34521 }, { "epoch": 2.5657376439985136, "grad_norm": 2.0405994224304647, "learning_rate": 1.0789963006127213e-06, "loss": 0.2101, "step": 34522 }, { "epoch": 2.5658119658119656, "grad_norm": 2.4778331945117738, "learning_rate": 1.0786337894389865e-06, "loss": 0.2952, "step": 34523 }, { "epoch": 2.565886287625418, "grad_norm": 1.9897983658246825, "learning_rate": 1.078271335700547e-06, "loss": 0.2428, "step": 34524 }, { "epoch": 2.5659606094388705, "grad_norm": 3.0947712287140483, "learning_rate": 1.0779089393997322e-06, "loss": 0.3758, "step": 34525 }, { "epoch": 2.5660349312523225, "grad_norm": 2.6454383891800206, "learning_rate": 1.077546600538878e-06, "loss": 0.324, "step": 34526 }, { "epoch": 2.5661092530657745, "grad_norm": 2.4290081504408856, "learning_rate": 1.0771843191203157e-06, "loss": 0.261, "step": 34527 }, { "epoch": 2.566183574879227, "grad_norm": 1.6193493236092196, "learning_rate": 1.076822095146376e-06, "loss": 0.1716, "step": 34528 }, { "epoch": 2.5662578966926795, "grad_norm": 2.207787791369013, "learning_rate": 1.0764599286193944e-06, "loss": 0.3123, "step": 34529 }, { "epoch": 2.5663322185061315, "grad_norm": 2.578688376726703, "learning_rate": 1.0760978195417004e-06, "loss": 0.2588, "step": 34530 }, { "epoch": 2.566406540319584, "grad_norm": 2.1949489945349416, "learning_rate": 1.0757357679156267e-06, "loss": 0.2522, "step": 34531 }, { "epoch": 2.566480862133036, "grad_norm": 2.2223645355810993, "learning_rate": 1.0753737737435022e-06, "loss": 0.252, "step": 34532 }, { "epoch": 2.5665551839464884, "grad_norm": 2.419114056265797, "learning_rate": 1.0750118370276575e-06, "loss": 0.2782, "step": 34533 }, { "epoch": 2.5666295057599404, "grad_norm": 2.4290221068975915, "learning_rate": 1.074649957770424e-06, "loss": 0.3125, "step": 34534 }, { "epoch": 2.566703827573393, "grad_norm": 2.0964385452982675, "learning_rate": 1.0742881359741297e-06, "loss": 0.2567, "step": 34535 }, { "epoch": 2.566778149386845, "grad_norm": 1.788769539164811, "learning_rate": 1.0739263716411074e-06, "loss": 0.1976, "step": 34536 }, { "epoch": 2.5668524712002974, "grad_norm": 2.1990284627006287, "learning_rate": 1.0735646647736843e-06, "loss": 0.2598, "step": 34537 }, { "epoch": 2.5669267930137494, "grad_norm": 2.117599732675451, "learning_rate": 1.0732030153741857e-06, "loss": 0.3142, "step": 34538 }, { "epoch": 2.567001114827202, "grad_norm": 2.268670076121522, "learning_rate": 1.0728414234449457e-06, "loss": 0.315, "step": 34539 }, { "epoch": 2.5670754366406543, "grad_norm": 2.6181149501400562, "learning_rate": 1.0724798889882871e-06, "loss": 0.3246, "step": 34540 }, { "epoch": 2.5671497584541063, "grad_norm": 2.4808882688847285, "learning_rate": 1.072118412006541e-06, "loss": 0.3161, "step": 34541 }, { "epoch": 2.5672240802675583, "grad_norm": 2.0686171121915877, "learning_rate": 1.0717569925020344e-06, "loss": 0.2219, "step": 34542 }, { "epoch": 2.567298402081011, "grad_norm": 2.030474692171015, "learning_rate": 1.0713956304770922e-06, "loss": 0.2555, "step": 34543 }, { "epoch": 2.5673727238944632, "grad_norm": 2.4281359970022476, "learning_rate": 1.0710343259340428e-06, "loss": 0.265, "step": 34544 }, { "epoch": 2.5674470457079153, "grad_norm": 1.7300348080349057, "learning_rate": 1.0706730788752084e-06, "loss": 0.1942, "step": 34545 }, { "epoch": 2.5675213675213673, "grad_norm": 2.1317253187346945, "learning_rate": 1.0703118893029197e-06, "loss": 0.2735, "step": 34546 }, { "epoch": 2.5675956893348197, "grad_norm": 2.4912772806238865, "learning_rate": 1.069950757219499e-06, "loss": 0.26, "step": 34547 }, { "epoch": 2.567670011148272, "grad_norm": 2.1349646109391656, "learning_rate": 1.069589682627271e-06, "loss": 0.254, "step": 34548 }, { "epoch": 2.567744332961724, "grad_norm": 1.9370190778159342, "learning_rate": 1.0692286655285623e-06, "loss": 0.1829, "step": 34549 }, { "epoch": 2.5678186547751767, "grad_norm": 2.1151742939979075, "learning_rate": 1.0688677059256947e-06, "loss": 0.2302, "step": 34550 }, { "epoch": 2.5678929765886287, "grad_norm": 1.7055217363796644, "learning_rate": 1.0685068038209956e-06, "loss": 0.1492, "step": 34551 }, { "epoch": 2.567967298402081, "grad_norm": 2.158670127490763, "learning_rate": 1.0681459592167853e-06, "loss": 0.2603, "step": 34552 }, { "epoch": 2.568041620215533, "grad_norm": 2.476393854637289, "learning_rate": 1.0677851721153887e-06, "loss": 0.2607, "step": 34553 }, { "epoch": 2.5681159420289856, "grad_norm": 2.8751670722636713, "learning_rate": 1.067424442519127e-06, "loss": 0.3412, "step": 34554 }, { "epoch": 2.5681902638424376, "grad_norm": 2.7998425214288294, "learning_rate": 1.0670637704303222e-06, "loss": 0.237, "step": 34555 }, { "epoch": 2.56826458565589, "grad_norm": 4.381512872536132, "learning_rate": 1.0667031558512986e-06, "loss": 0.3112, "step": 34556 }, { "epoch": 2.568338907469342, "grad_norm": 2.3784600572456314, "learning_rate": 1.0663425987843756e-06, "loss": 0.2705, "step": 34557 }, { "epoch": 2.5684132292827946, "grad_norm": 2.665486370604807, "learning_rate": 1.065982099231876e-06, "loss": 0.2651, "step": 34558 }, { "epoch": 2.5684875510962466, "grad_norm": 2.8750436916104447, "learning_rate": 1.0656216571961209e-06, "loss": 0.3139, "step": 34559 }, { "epoch": 2.568561872909699, "grad_norm": 2.066293431690723, "learning_rate": 1.0652612726794277e-06, "loss": 0.2243, "step": 34560 }, { "epoch": 2.568636194723151, "grad_norm": 1.8658274761378237, "learning_rate": 1.0649009456841209e-06, "loss": 0.2505, "step": 34561 }, { "epoch": 2.5687105165366035, "grad_norm": 2.4742874698333477, "learning_rate": 1.0645406762125166e-06, "loss": 0.3275, "step": 34562 }, { "epoch": 2.568784838350056, "grad_norm": 2.227362052686128, "learning_rate": 1.0641804642669373e-06, "loss": 0.2108, "step": 34563 }, { "epoch": 2.568859160163508, "grad_norm": 3.3379257536056164, "learning_rate": 1.0638203098497013e-06, "loss": 0.3047, "step": 34564 }, { "epoch": 2.56893348197696, "grad_norm": 2.0956996399882293, "learning_rate": 1.0634602129631255e-06, "loss": 0.2629, "step": 34565 }, { "epoch": 2.5690078037904125, "grad_norm": 2.1347001282781375, "learning_rate": 1.0631001736095293e-06, "loss": 0.2231, "step": 34566 }, { "epoch": 2.569082125603865, "grad_norm": 2.9943825072971, "learning_rate": 1.0627401917912294e-06, "loss": 0.2515, "step": 34567 }, { "epoch": 2.569156447417317, "grad_norm": 2.169596071248127, "learning_rate": 1.0623802675105454e-06, "loss": 0.216, "step": 34568 }, { "epoch": 2.569230769230769, "grad_norm": 2.1185963951140048, "learning_rate": 1.0620204007697944e-06, "loss": 0.287, "step": 34569 }, { "epoch": 2.5693050910442214, "grad_norm": 3.4173254484985627, "learning_rate": 1.0616605915712896e-06, "loss": 0.3619, "step": 34570 }, { "epoch": 2.569379412857674, "grad_norm": 2.047564260785699, "learning_rate": 1.0613008399173518e-06, "loss": 0.2591, "step": 34571 }, { "epoch": 2.569453734671126, "grad_norm": 2.614989607026296, "learning_rate": 1.0609411458102937e-06, "loss": 0.3076, "step": 34572 }, { "epoch": 2.5695280564845784, "grad_norm": 2.5636861383859197, "learning_rate": 1.060581509252435e-06, "loss": 0.3683, "step": 34573 }, { "epoch": 2.5696023782980304, "grad_norm": 2.2069949618193814, "learning_rate": 1.060221930246088e-06, "loss": 0.3618, "step": 34574 }, { "epoch": 2.569676700111483, "grad_norm": 2.26698172001478, "learning_rate": 1.0598624087935672e-06, "loss": 0.2878, "step": 34575 }, { "epoch": 2.569751021924935, "grad_norm": 2.493561252828438, "learning_rate": 1.0595029448971928e-06, "loss": 0.2992, "step": 34576 }, { "epoch": 2.5698253437383873, "grad_norm": 2.7289497789647816, "learning_rate": 1.0591435385592697e-06, "loss": 0.3156, "step": 34577 }, { "epoch": 2.5698996655518394, "grad_norm": 2.67331074570371, "learning_rate": 1.0587841897821183e-06, "loss": 0.3092, "step": 34578 }, { "epoch": 2.569973987365292, "grad_norm": 2.1841452008658337, "learning_rate": 1.0584248985680489e-06, "loss": 0.2111, "step": 34579 }, { "epoch": 2.570048309178744, "grad_norm": 1.6139117442346897, "learning_rate": 1.0580656649193777e-06, "loss": 0.2128, "step": 34580 }, { "epoch": 2.5701226309921963, "grad_norm": 2.0031279099343005, "learning_rate": 1.057706488838416e-06, "loss": 0.1868, "step": 34581 }, { "epoch": 2.5701969528056483, "grad_norm": 2.562712045035338, "learning_rate": 1.0573473703274739e-06, "loss": 0.3586, "step": 34582 }, { "epoch": 2.5702712746191008, "grad_norm": 2.0137896633510106, "learning_rate": 1.0569883093888678e-06, "loss": 0.2344, "step": 34583 }, { "epoch": 2.570345596432553, "grad_norm": 2.0545242814977143, "learning_rate": 1.0566293060249044e-06, "loss": 0.259, "step": 34584 }, { "epoch": 2.5704199182460052, "grad_norm": 2.3631395596256763, "learning_rate": 1.0562703602378998e-06, "loss": 0.2923, "step": 34585 }, { "epoch": 2.5704942400594577, "grad_norm": 2.10995753027265, "learning_rate": 1.0559114720301622e-06, "loss": 0.2363, "step": 34586 }, { "epoch": 2.5705685618729097, "grad_norm": 2.0600521825430067, "learning_rate": 1.055552641404003e-06, "loss": 0.2493, "step": 34587 }, { "epoch": 2.5706428836863617, "grad_norm": 3.1334121677713225, "learning_rate": 1.0551938683617314e-06, "loss": 0.3514, "step": 34588 }, { "epoch": 2.570717205499814, "grad_norm": 2.094099512030991, "learning_rate": 1.0548351529056555e-06, "loss": 0.1933, "step": 34589 }, { "epoch": 2.5707915273132667, "grad_norm": 2.3957639377270468, "learning_rate": 1.0544764950380892e-06, "loss": 0.3056, "step": 34590 }, { "epoch": 2.5708658491267187, "grad_norm": 2.162558617305089, "learning_rate": 1.0541178947613383e-06, "loss": 0.2706, "step": 34591 }, { "epoch": 2.5709401709401707, "grad_norm": 1.9090983821132066, "learning_rate": 1.0537593520777112e-06, "loss": 0.1574, "step": 34592 }, { "epoch": 2.571014492753623, "grad_norm": 2.1963031824627315, "learning_rate": 1.053400866989519e-06, "loss": 0.289, "step": 34593 }, { "epoch": 2.5710888145670756, "grad_norm": 3.4689776747937944, "learning_rate": 1.0530424394990657e-06, "loss": 0.3205, "step": 34594 }, { "epoch": 2.5711631363805276, "grad_norm": 2.2174003016436763, "learning_rate": 1.0526840696086626e-06, "loss": 0.1879, "step": 34595 }, { "epoch": 2.57123745819398, "grad_norm": 2.499708119870637, "learning_rate": 1.0523257573206157e-06, "loss": 0.2651, "step": 34596 }, { "epoch": 2.571311780007432, "grad_norm": 2.0635406853175082, "learning_rate": 1.051967502637229e-06, "loss": 0.2514, "step": 34597 }, { "epoch": 2.5713861018208846, "grad_norm": 3.4209531554272834, "learning_rate": 1.0516093055608135e-06, "loss": 0.2589, "step": 34598 }, { "epoch": 2.5714604236343366, "grad_norm": 2.117229677025401, "learning_rate": 1.051251166093672e-06, "loss": 0.1916, "step": 34599 }, { "epoch": 2.571534745447789, "grad_norm": 2.4551086079001356, "learning_rate": 1.0508930842381127e-06, "loss": 0.2475, "step": 34600 }, { "epoch": 2.571609067261241, "grad_norm": 2.618480132394344, "learning_rate": 1.050535059996436e-06, "loss": 0.282, "step": 34601 }, { "epoch": 2.5716833890746935, "grad_norm": 2.4037686522456343, "learning_rate": 1.0501770933709531e-06, "loss": 0.3086, "step": 34602 }, { "epoch": 2.5717577108881455, "grad_norm": 2.4419789764271815, "learning_rate": 1.049819184363965e-06, "loss": 0.2183, "step": 34603 }, { "epoch": 2.571832032701598, "grad_norm": 2.060851548133517, "learning_rate": 1.0494613329777748e-06, "loss": 0.2189, "step": 34604 }, { "epoch": 2.57190635451505, "grad_norm": 1.9623634126458935, "learning_rate": 1.049103539214691e-06, "loss": 0.2088, "step": 34605 }, { "epoch": 2.5719806763285025, "grad_norm": 2.4131419428092, "learning_rate": 1.0487458030770114e-06, "loss": 0.2377, "step": 34606 }, { "epoch": 2.5720549981419545, "grad_norm": 2.8240207929989896, "learning_rate": 1.0483881245670436e-06, "loss": 0.2832, "step": 34607 }, { "epoch": 2.572129319955407, "grad_norm": 2.842952401760323, "learning_rate": 1.0480305036870898e-06, "loss": 0.2732, "step": 34608 }, { "epoch": 2.5722036417688594, "grad_norm": 2.7841291491098126, "learning_rate": 1.0476729404394481e-06, "loss": 0.2908, "step": 34609 }, { "epoch": 2.5722779635823114, "grad_norm": 2.520224189526158, "learning_rate": 1.047315434826428e-06, "loss": 0.2908, "step": 34610 }, { "epoch": 2.5723522853957634, "grad_norm": 2.091219023436876, "learning_rate": 1.0469579868503221e-06, "loss": 0.2574, "step": 34611 }, { "epoch": 2.572426607209216, "grad_norm": 3.777169867828923, "learning_rate": 1.0466005965134385e-06, "loss": 0.3671, "step": 34612 }, { "epoch": 2.5725009290226684, "grad_norm": 5.869816844065283, "learning_rate": 1.0462432638180754e-06, "loss": 0.2824, "step": 34613 }, { "epoch": 2.5725752508361204, "grad_norm": 3.0057845410695494, "learning_rate": 1.0458859887665318e-06, "loss": 0.3798, "step": 34614 }, { "epoch": 2.5726495726495724, "grad_norm": 2.784161331166561, "learning_rate": 1.0455287713611118e-06, "loss": 0.2086, "step": 34615 }, { "epoch": 2.572723894463025, "grad_norm": 1.8890779481687143, "learning_rate": 1.04517161160411e-06, "loss": 0.2662, "step": 34616 }, { "epoch": 2.5727982162764773, "grad_norm": 2.405337822881284, "learning_rate": 1.0448145094978323e-06, "loss": 0.2793, "step": 34617 }, { "epoch": 2.5728725380899293, "grad_norm": 2.6998126257159565, "learning_rate": 1.0444574650445727e-06, "loss": 0.2999, "step": 34618 }, { "epoch": 2.572946859903382, "grad_norm": 2.695277191794752, "learning_rate": 1.0441004782466303e-06, "loss": 0.3484, "step": 34619 }, { "epoch": 2.573021181716834, "grad_norm": 2.592231675426806, "learning_rate": 1.0437435491063053e-06, "loss": 0.3095, "step": 34620 }, { "epoch": 2.5730955035302863, "grad_norm": 2.6357256656232995, "learning_rate": 1.043386677625895e-06, "loss": 0.3489, "step": 34621 }, { "epoch": 2.5731698253437383, "grad_norm": 2.753972933706008, "learning_rate": 1.0430298638076974e-06, "loss": 0.2663, "step": 34622 }, { "epoch": 2.5732441471571907, "grad_norm": 2.597298367449942, "learning_rate": 1.0426731076540064e-06, "loss": 0.2532, "step": 34623 }, { "epoch": 2.5733184689706428, "grad_norm": 3.006882046669523, "learning_rate": 1.0423164091671222e-06, "loss": 0.3109, "step": 34624 }, { "epoch": 2.573392790784095, "grad_norm": 2.1070733819187057, "learning_rate": 1.041959768349341e-06, "loss": 0.2216, "step": 34625 }, { "epoch": 2.5734671125975472, "grad_norm": 2.5691909417868986, "learning_rate": 1.0416031852029562e-06, "loss": 0.2754, "step": 34626 }, { "epoch": 2.5735414344109997, "grad_norm": 2.304302526837006, "learning_rate": 1.0412466597302674e-06, "loss": 0.2087, "step": 34627 }, { "epoch": 2.5736157562244517, "grad_norm": 1.9638188812711093, "learning_rate": 1.0408901919335645e-06, "loss": 0.2129, "step": 34628 }, { "epoch": 2.573690078037904, "grad_norm": 1.9063390227420887, "learning_rate": 1.0405337818151484e-06, "loss": 0.2758, "step": 34629 }, { "epoch": 2.573764399851356, "grad_norm": 2.336133291705492, "learning_rate": 1.0401774293773114e-06, "loss": 0.2016, "step": 34630 }, { "epoch": 2.5738387216648086, "grad_norm": 2.40435810662469, "learning_rate": 1.0398211346223452e-06, "loss": 0.2756, "step": 34631 }, { "epoch": 2.573913043478261, "grad_norm": 2.3620852492819306, "learning_rate": 1.039464897552549e-06, "loss": 0.1938, "step": 34632 }, { "epoch": 2.573987365291713, "grad_norm": 2.5947336246434602, "learning_rate": 1.0391087181702098e-06, "loss": 0.2369, "step": 34633 }, { "epoch": 2.574061687105165, "grad_norm": 2.7173782216654807, "learning_rate": 1.0387525964776246e-06, "loss": 0.2591, "step": 34634 }, { "epoch": 2.5741360089186176, "grad_norm": 2.313495108136634, "learning_rate": 1.0383965324770862e-06, "loss": 0.2795, "step": 34635 }, { "epoch": 2.57421033073207, "grad_norm": 2.962843332713174, "learning_rate": 1.0380405261708826e-06, "loss": 0.2946, "step": 34636 }, { "epoch": 2.574284652545522, "grad_norm": 2.6121343783572453, "learning_rate": 1.037684577561312e-06, "loss": 0.2676, "step": 34637 }, { "epoch": 2.574358974358974, "grad_norm": 2.676014860463238, "learning_rate": 1.0373286866506616e-06, "loss": 0.2657, "step": 34638 }, { "epoch": 2.5744332961724266, "grad_norm": 2.047051652504918, "learning_rate": 1.0369728534412248e-06, "loss": 0.1926, "step": 34639 }, { "epoch": 2.574507617985879, "grad_norm": 2.3455964001496548, "learning_rate": 1.0366170779352925e-06, "loss": 0.334, "step": 34640 }, { "epoch": 2.574581939799331, "grad_norm": 2.656579563060554, "learning_rate": 1.0362613601351523e-06, "loss": 0.3313, "step": 34641 }, { "epoch": 2.5746562616127835, "grad_norm": 2.440727983099602, "learning_rate": 1.0359057000430983e-06, "loss": 0.2238, "step": 34642 }, { "epoch": 2.5747305834262355, "grad_norm": 2.725083864654755, "learning_rate": 1.0355500976614163e-06, "loss": 0.3393, "step": 34643 }, { "epoch": 2.574804905239688, "grad_norm": 2.699058386288412, "learning_rate": 1.0351945529924024e-06, "loss": 0.3204, "step": 34644 }, { "epoch": 2.57487922705314, "grad_norm": 2.468584833038069, "learning_rate": 1.0348390660383355e-06, "loss": 0.2857, "step": 34645 }, { "epoch": 2.5749535488665924, "grad_norm": 2.550208928577588, "learning_rate": 1.034483636801512e-06, "loss": 0.256, "step": 34646 }, { "epoch": 2.5750278706800445, "grad_norm": 2.26513232240091, "learning_rate": 1.0341282652842178e-06, "loss": 0.2625, "step": 34647 }, { "epoch": 2.575102192493497, "grad_norm": 2.146984497647347, "learning_rate": 1.0337729514887384e-06, "loss": 0.2802, "step": 34648 }, { "epoch": 2.575176514306949, "grad_norm": 1.9634257516502485, "learning_rate": 1.0334176954173658e-06, "loss": 0.2064, "step": 34649 }, { "epoch": 2.5752508361204014, "grad_norm": 2.152881961044937, "learning_rate": 1.0330624970723835e-06, "loss": 0.2492, "step": 34650 }, { "epoch": 2.575325157933854, "grad_norm": 2.8845353972957803, "learning_rate": 1.0327073564560807e-06, "loss": 0.3682, "step": 34651 }, { "epoch": 2.575399479747306, "grad_norm": 2.058461602979481, "learning_rate": 1.0323522735707425e-06, "loss": 0.2597, "step": 34652 }, { "epoch": 2.575473801560758, "grad_norm": 3.016496666996688, "learning_rate": 1.0319972484186537e-06, "loss": 0.4334, "step": 34653 }, { "epoch": 2.5755481233742104, "grad_norm": 2.2574949681732743, "learning_rate": 1.031642281002102e-06, "loss": 0.2229, "step": 34654 }, { "epoch": 2.575622445187663, "grad_norm": 2.492787010800361, "learning_rate": 1.0312873713233728e-06, "loss": 0.337, "step": 34655 }, { "epoch": 2.575696767001115, "grad_norm": 2.018867736555913, "learning_rate": 1.0309325193847508e-06, "loss": 0.245, "step": 34656 }, { "epoch": 2.575771088814567, "grad_norm": 2.1140260560741493, "learning_rate": 1.0305777251885196e-06, "loss": 0.284, "step": 34657 }, { "epoch": 2.5758454106280193, "grad_norm": 2.383510428734463, "learning_rate": 1.0302229887369609e-06, "loss": 0.306, "step": 34658 }, { "epoch": 2.5759197324414718, "grad_norm": 1.9349376520309918, "learning_rate": 1.0298683100323635e-06, "loss": 0.232, "step": 34659 }, { "epoch": 2.575994054254924, "grad_norm": 2.71041750438445, "learning_rate": 1.0295136890770064e-06, "loss": 0.2917, "step": 34660 }, { "epoch": 2.576068376068376, "grad_norm": 2.285028496399465, "learning_rate": 1.029159125873177e-06, "loss": 0.2628, "step": 34661 }, { "epoch": 2.5761426978818283, "grad_norm": 3.24910523852556, "learning_rate": 1.028804620423156e-06, "loss": 0.3223, "step": 34662 }, { "epoch": 2.5762170196952807, "grad_norm": 2.0380808435480784, "learning_rate": 1.0284501727292228e-06, "loss": 0.1858, "step": 34663 }, { "epoch": 2.5762913415087327, "grad_norm": 2.2354319267199267, "learning_rate": 1.0280957827936645e-06, "loss": 0.3243, "step": 34664 }, { "epoch": 2.576365663322185, "grad_norm": 1.9192181130851218, "learning_rate": 1.0277414506187567e-06, "loss": 0.1953, "step": 34665 }, { "epoch": 2.576439985135637, "grad_norm": 2.633384798588721, "learning_rate": 1.0273871762067889e-06, "loss": 0.318, "step": 34666 }, { "epoch": 2.5765143069490897, "grad_norm": 2.8044277015228776, "learning_rate": 1.0270329595600327e-06, "loss": 0.3056, "step": 34667 }, { "epoch": 2.5765886287625417, "grad_norm": 2.046180235533025, "learning_rate": 1.0266788006807748e-06, "loss": 0.2469, "step": 34668 }, { "epoch": 2.576662950575994, "grad_norm": 2.6453527654511637, "learning_rate": 1.0263246995712938e-06, "loss": 0.3229, "step": 34669 }, { "epoch": 2.576737272389446, "grad_norm": 2.2692222424328787, "learning_rate": 1.0259706562338657e-06, "loss": 0.2708, "step": 34670 }, { "epoch": 2.5768115942028986, "grad_norm": 3.2538380434581264, "learning_rate": 1.025616670670776e-06, "loss": 0.296, "step": 34671 }, { "epoch": 2.5768859160163506, "grad_norm": 2.4291869260168917, "learning_rate": 1.0252627428842998e-06, "loss": 0.2645, "step": 34672 }, { "epoch": 2.576960237829803, "grad_norm": 1.9070466268101967, "learning_rate": 1.0249088728767142e-06, "loss": 0.2695, "step": 34673 }, { "epoch": 2.5770345596432556, "grad_norm": 2.0871009723920095, "learning_rate": 1.0245550606503019e-06, "loss": 0.2812, "step": 34674 }, { "epoch": 2.5771088814567076, "grad_norm": 2.1514982702456784, "learning_rate": 1.0242013062073365e-06, "loss": 0.2642, "step": 34675 }, { "epoch": 2.5771832032701596, "grad_norm": 2.513134988756921, "learning_rate": 1.0238476095500993e-06, "loss": 0.293, "step": 34676 }, { "epoch": 2.577257525083612, "grad_norm": 2.4042956919517144, "learning_rate": 1.0234939706808656e-06, "loss": 0.2438, "step": 34677 }, { "epoch": 2.5773318468970645, "grad_norm": 2.533305943724391, "learning_rate": 1.023140389601911e-06, "loss": 0.3241, "step": 34678 }, { "epoch": 2.5774061687105165, "grad_norm": 2.077464971823901, "learning_rate": 1.0227868663155138e-06, "loss": 0.2379, "step": 34679 }, { "epoch": 2.5774804905239685, "grad_norm": 2.084234055502447, "learning_rate": 1.0224334008239466e-06, "loss": 0.2383, "step": 34680 }, { "epoch": 2.577554812337421, "grad_norm": 2.704001220540276, "learning_rate": 1.02207999312949e-06, "loss": 0.2551, "step": 34681 }, { "epoch": 2.5776291341508735, "grad_norm": 2.262380297833306, "learning_rate": 1.021726643234413e-06, "loss": 0.243, "step": 34682 }, { "epoch": 2.5777034559643255, "grad_norm": 1.7820858203831418, "learning_rate": 1.0213733511409974e-06, "loss": 0.2156, "step": 34683 }, { "epoch": 2.5777777777777775, "grad_norm": 2.5267003083681843, "learning_rate": 1.0210201168515144e-06, "loss": 0.2565, "step": 34684 }, { "epoch": 2.57785209959123, "grad_norm": 2.4454804567752517, "learning_rate": 1.0206669403682357e-06, "loss": 0.3247, "step": 34685 }, { "epoch": 2.5779264214046824, "grad_norm": 2.4884427731846683, "learning_rate": 1.0203138216934382e-06, "loss": 0.304, "step": 34686 }, { "epoch": 2.5780007432181344, "grad_norm": 2.2114440840455516, "learning_rate": 1.0199607608293938e-06, "loss": 0.2108, "step": 34687 }, { "epoch": 2.578075065031587, "grad_norm": 2.4455795798537605, "learning_rate": 1.019607757778378e-06, "loss": 0.2879, "step": 34688 }, { "epoch": 2.578149386845039, "grad_norm": 2.0637733592936494, "learning_rate": 1.0192548125426605e-06, "loss": 0.2194, "step": 34689 }, { "epoch": 2.5782237086584914, "grad_norm": 3.3486428143655105, "learning_rate": 1.0189019251245157e-06, "loss": 0.3184, "step": 34690 }, { "epoch": 2.5782980304719434, "grad_norm": 2.0256111672652723, "learning_rate": 1.0185490955262135e-06, "loss": 0.2188, "step": 34691 }, { "epoch": 2.578372352285396, "grad_norm": 2.5540189162209423, "learning_rate": 1.0181963237500248e-06, "loss": 0.2564, "step": 34692 }, { "epoch": 2.578446674098848, "grad_norm": 2.192733283014628, "learning_rate": 1.0178436097982237e-06, "loss": 0.2852, "step": 34693 }, { "epoch": 2.5785209959123003, "grad_norm": 2.5186512189654366, "learning_rate": 1.0174909536730792e-06, "loss": 0.3655, "step": 34694 }, { "epoch": 2.5785953177257523, "grad_norm": 1.877573919594764, "learning_rate": 1.017138355376861e-06, "loss": 0.1881, "step": 34695 }, { "epoch": 2.578669639539205, "grad_norm": 2.307475922435462, "learning_rate": 1.0167858149118404e-06, "loss": 0.3276, "step": 34696 }, { "epoch": 2.5787439613526573, "grad_norm": 2.398546828769579, "learning_rate": 1.0164333322802854e-06, "loss": 0.2663, "step": 34697 }, { "epoch": 2.5788182831661093, "grad_norm": 1.9542138828784366, "learning_rate": 1.0160809074844692e-06, "loss": 0.23, "step": 34698 }, { "epoch": 2.5788926049795613, "grad_norm": 2.8434544311345196, "learning_rate": 1.015728540526657e-06, "loss": 0.3717, "step": 34699 }, { "epoch": 2.5789669267930138, "grad_norm": 2.4188222085900013, "learning_rate": 1.0153762314091187e-06, "loss": 0.3165, "step": 34700 }, { "epoch": 2.579041248606466, "grad_norm": 1.6712451552877678, "learning_rate": 1.015023980134121e-06, "loss": 0.1818, "step": 34701 }, { "epoch": 2.5791155704199182, "grad_norm": 3.311075246863549, "learning_rate": 1.0146717867039324e-06, "loss": 0.333, "step": 34702 }, { "epoch": 2.5791898922333703, "grad_norm": 2.1584749841628095, "learning_rate": 1.014319651120822e-06, "loss": 0.2928, "step": 34703 }, { "epoch": 2.5792642140468227, "grad_norm": 2.2162258920779423, "learning_rate": 1.0139675733870525e-06, "loss": 0.2551, "step": 34704 }, { "epoch": 2.579338535860275, "grad_norm": 2.106315012079047, "learning_rate": 1.0136155535048963e-06, "loss": 0.2709, "step": 34705 }, { "epoch": 2.579412857673727, "grad_norm": 1.85320467587166, "learning_rate": 1.0132635914766177e-06, "loss": 0.2017, "step": 34706 }, { "epoch": 2.5794871794871796, "grad_norm": 2.6288425196132534, "learning_rate": 1.0129116873044787e-06, "loss": 0.2664, "step": 34707 }, { "epoch": 2.5795615013006317, "grad_norm": 2.5316900314778255, "learning_rate": 1.01255984099075e-06, "loss": 0.3162, "step": 34708 }, { "epoch": 2.579635823114084, "grad_norm": 2.218686636092861, "learning_rate": 1.0122080525376931e-06, "loss": 0.2892, "step": 34709 }, { "epoch": 2.579710144927536, "grad_norm": 2.453193231666554, "learning_rate": 1.0118563219475762e-06, "loss": 0.2537, "step": 34710 }, { "epoch": 2.5797844667409886, "grad_norm": 3.041962629710973, "learning_rate": 1.011504649222662e-06, "loss": 0.3341, "step": 34711 }, { "epoch": 2.5798587885544406, "grad_norm": 2.6275606590424405, "learning_rate": 1.0111530343652142e-06, "loss": 0.325, "step": 34712 }, { "epoch": 2.579933110367893, "grad_norm": 2.2312399060177897, "learning_rate": 1.0108014773774966e-06, "loss": 0.2456, "step": 34713 }, { "epoch": 2.580007432181345, "grad_norm": 2.2097916981517227, "learning_rate": 1.0104499782617716e-06, "loss": 0.2831, "step": 34714 }, { "epoch": 2.5800817539947976, "grad_norm": 2.249154588285475, "learning_rate": 1.0100985370203042e-06, "loss": 0.2606, "step": 34715 }, { "epoch": 2.5801560758082496, "grad_norm": 1.97952432758273, "learning_rate": 1.0097471536553571e-06, "loss": 0.2573, "step": 34716 }, { "epoch": 2.580230397621702, "grad_norm": 2.522456923253111, "learning_rate": 1.0093958281691884e-06, "loss": 0.29, "step": 34717 }, { "epoch": 2.580304719435154, "grad_norm": 3.377027792354337, "learning_rate": 1.0090445605640654e-06, "loss": 0.3542, "step": 34718 }, { "epoch": 2.5803790412486065, "grad_norm": 2.157052441245383, "learning_rate": 1.0086933508422447e-06, "loss": 0.274, "step": 34719 }, { "epoch": 2.580453363062059, "grad_norm": 2.5965027020005382, "learning_rate": 1.0083421990059915e-06, "loss": 0.2627, "step": 34720 }, { "epoch": 2.580527684875511, "grad_norm": 1.910303856198097, "learning_rate": 1.0079911050575652e-06, "loss": 0.2487, "step": 34721 }, { "epoch": 2.580602006688963, "grad_norm": 2.225596214381377, "learning_rate": 1.0076400689992238e-06, "loss": 0.2414, "step": 34722 }, { "epoch": 2.5806763285024155, "grad_norm": 2.517724396189715, "learning_rate": 1.0072890908332321e-06, "loss": 0.2388, "step": 34723 }, { "epoch": 2.580750650315868, "grad_norm": 2.532126495581022, "learning_rate": 1.0069381705618431e-06, "loss": 0.2424, "step": 34724 }, { "epoch": 2.58082497212932, "grad_norm": 2.3743622720493245, "learning_rate": 1.0065873081873223e-06, "loss": 0.2403, "step": 34725 }, { "epoch": 2.580899293942772, "grad_norm": 2.229079338047244, "learning_rate": 1.0062365037119227e-06, "loss": 0.2527, "step": 34726 }, { "epoch": 2.5809736157562244, "grad_norm": 3.28315978756059, "learning_rate": 1.0058857571379076e-06, "loss": 0.3987, "step": 34727 }, { "epoch": 2.581047937569677, "grad_norm": 2.988016724577357, "learning_rate": 1.0055350684675348e-06, "loss": 0.2844, "step": 34728 }, { "epoch": 2.581122259383129, "grad_norm": 2.6471257289417727, "learning_rate": 1.0051844377030574e-06, "loss": 0.3177, "step": 34729 }, { "epoch": 2.5811965811965814, "grad_norm": 3.222285881793054, "learning_rate": 1.004833864846738e-06, "loss": 0.27, "step": 34730 }, { "epoch": 2.5812709030100334, "grad_norm": 1.9654119408648083, "learning_rate": 1.00448334990083e-06, "loss": 0.2444, "step": 34731 }, { "epoch": 2.581345224823486, "grad_norm": 1.9153047707438586, "learning_rate": 1.0041328928675931e-06, "loss": 0.2699, "step": 34732 }, { "epoch": 2.581419546636938, "grad_norm": 2.081230845907072, "learning_rate": 1.0037824937492813e-06, "loss": 0.2624, "step": 34733 }, { "epoch": 2.5814938684503903, "grad_norm": 2.7416460617034244, "learning_rate": 1.0034321525481516e-06, "loss": 0.3021, "step": 34734 }, { "epoch": 2.5815681902638423, "grad_norm": 2.973793414896003, "learning_rate": 1.003081869266459e-06, "loss": 0.2849, "step": 34735 }, { "epoch": 2.581642512077295, "grad_norm": 2.542750791586385, "learning_rate": 1.002731643906456e-06, "loss": 0.2959, "step": 34736 }, { "epoch": 2.581716833890747, "grad_norm": 4.033716898022228, "learning_rate": 1.0023814764704021e-06, "loss": 0.3934, "step": 34737 }, { "epoch": 2.5817911557041993, "grad_norm": 2.076821317520988, "learning_rate": 1.0020313669605497e-06, "loss": 0.2184, "step": 34738 }, { "epoch": 2.5818654775176513, "grad_norm": 3.1601629434443157, "learning_rate": 1.0016813153791505e-06, "loss": 0.2415, "step": 34739 }, { "epoch": 2.5819397993311037, "grad_norm": 2.5782424280146925, "learning_rate": 1.0013313217284615e-06, "loss": 0.2628, "step": 34740 }, { "epoch": 2.5820141211445558, "grad_norm": 2.6202653668935816, "learning_rate": 1.0009813860107332e-06, "loss": 0.3513, "step": 34741 }, { "epoch": 2.582088442958008, "grad_norm": 3.256746132147741, "learning_rate": 1.0006315082282213e-06, "loss": 0.3516, "step": 34742 }, { "epoch": 2.5821627647714607, "grad_norm": 1.8392746415689099, "learning_rate": 1.0002816883831778e-06, "loss": 0.2373, "step": 34743 }, { "epoch": 2.5822370865849127, "grad_norm": 2.7063614404695158, "learning_rate": 9.999319264778506e-07, "loss": 0.2644, "step": 34744 }, { "epoch": 2.5823114083983647, "grad_norm": 2.007648233523334, "learning_rate": 9.99582222514499e-07, "loss": 0.2265, "step": 34745 }, { "epoch": 2.582385730211817, "grad_norm": 2.3656845337088397, "learning_rate": 9.99232576495366e-07, "loss": 0.1952, "step": 34746 }, { "epoch": 2.5824600520252696, "grad_norm": 3.1057865244436926, "learning_rate": 9.988829884227092e-07, "loss": 0.3702, "step": 34747 }, { "epoch": 2.5825343738387216, "grad_norm": 3.1159397396337627, "learning_rate": 9.985334582987738e-07, "loss": 0.2988, "step": 34748 }, { "epoch": 2.5826086956521737, "grad_norm": 1.819985703653067, "learning_rate": 9.981839861258157e-07, "loss": 0.2269, "step": 34749 }, { "epoch": 2.582683017465626, "grad_norm": 2.644408143025516, "learning_rate": 9.978345719060823e-07, "loss": 0.3249, "step": 34750 }, { "epoch": 2.5827573392790786, "grad_norm": 1.9254866245573798, "learning_rate": 9.974852156418202e-07, "loss": 0.2603, "step": 34751 }, { "epoch": 2.5828316610925306, "grad_norm": 1.9611943017407367, "learning_rate": 9.971359173352835e-07, "loss": 0.2093, "step": 34752 }, { "epoch": 2.582905982905983, "grad_norm": 2.658878504139311, "learning_rate": 9.967866769887158e-07, "loss": 0.3018, "step": 34753 }, { "epoch": 2.582980304719435, "grad_norm": 2.8682499265655195, "learning_rate": 9.964374946043709e-07, "loss": 0.3126, "step": 34754 }, { "epoch": 2.5830546265328875, "grad_norm": 1.7205980415367874, "learning_rate": 9.960883701844948e-07, "loss": 0.21, "step": 34755 }, { "epoch": 2.5831289483463395, "grad_norm": 2.9097130472397894, "learning_rate": 9.957393037313323e-07, "loss": 0.3314, "step": 34756 }, { "epoch": 2.583203270159792, "grad_norm": 2.0359178284400707, "learning_rate": 9.953902952471361e-07, "loss": 0.2104, "step": 34757 }, { "epoch": 2.583277591973244, "grad_norm": 2.251215726452006, "learning_rate": 9.950413447341468e-07, "loss": 0.2814, "step": 34758 }, { "epoch": 2.5833519137866965, "grad_norm": 2.0038121285200337, "learning_rate": 9.946924521946155e-07, "loss": 0.264, "step": 34759 }, { "epoch": 2.5834262356001485, "grad_norm": 1.9270162765230066, "learning_rate": 9.943436176307874e-07, "loss": 0.26, "step": 34760 }, { "epoch": 2.583500557413601, "grad_norm": 2.854048501621331, "learning_rate": 9.939948410449063e-07, "loss": 0.2772, "step": 34761 }, { "epoch": 2.583574879227053, "grad_norm": 2.474082525013077, "learning_rate": 9.936461224392202e-07, "loss": 0.2864, "step": 34762 }, { "epoch": 2.5836492010405054, "grad_norm": 2.4531933738069633, "learning_rate": 9.93297461815972e-07, "loss": 0.2607, "step": 34763 }, { "epoch": 2.5837235228539575, "grad_norm": 2.8291895530824305, "learning_rate": 9.929488591774085e-07, "loss": 0.315, "step": 34764 }, { "epoch": 2.58379784466741, "grad_norm": 2.2019182135990616, "learning_rate": 9.926003145257734e-07, "loss": 0.3152, "step": 34765 }, { "epoch": 2.5838721664808624, "grad_norm": 2.1037883550522696, "learning_rate": 9.922518278633075e-07, "loss": 0.245, "step": 34766 }, { "epoch": 2.5839464882943144, "grad_norm": 2.320085049838799, "learning_rate": 9.919033991922599e-07, "loss": 0.2557, "step": 34767 }, { "epoch": 2.5840208101077664, "grad_norm": 2.8967880795887253, "learning_rate": 9.915550285148701e-07, "loss": 0.3115, "step": 34768 }, { "epoch": 2.584095131921219, "grad_norm": 2.589491996586837, "learning_rate": 9.912067158333816e-07, "loss": 0.2866, "step": 34769 }, { "epoch": 2.5841694537346713, "grad_norm": 2.254898107199674, "learning_rate": 9.908584611500361e-07, "loss": 0.2746, "step": 34770 }, { "epoch": 2.5842437755481233, "grad_norm": 2.232398121678808, "learning_rate": 9.905102644670773e-07, "loss": 0.2761, "step": 34771 }, { "epoch": 2.5843180973615754, "grad_norm": 2.5431350423222603, "learning_rate": 9.901621257867456e-07, "loss": 0.3208, "step": 34772 }, { "epoch": 2.584392419175028, "grad_norm": 2.4220359966009584, "learning_rate": 9.898140451112813e-07, "loss": 0.1968, "step": 34773 }, { "epoch": 2.5844667409884803, "grad_norm": 1.9127170572186545, "learning_rate": 9.894660224429286e-07, "loss": 0.1706, "step": 34774 }, { "epoch": 2.5845410628019323, "grad_norm": 2.893027471730655, "learning_rate": 9.891180577839232e-07, "loss": 0.3477, "step": 34775 }, { "epoch": 2.5846153846153848, "grad_norm": 2.8241374451200976, "learning_rate": 9.88770151136511e-07, "loss": 0.3511, "step": 34776 }, { "epoch": 2.5846897064288368, "grad_norm": 2.7233741749926783, "learning_rate": 9.884223025029294e-07, "loss": 0.2847, "step": 34777 }, { "epoch": 2.5847640282422892, "grad_norm": 2.6115625316859625, "learning_rate": 9.88074511885415e-07, "loss": 0.3433, "step": 34778 }, { "epoch": 2.5848383500557413, "grad_norm": 2.2906976128932834, "learning_rate": 9.877267792862132e-07, "loss": 0.2946, "step": 34779 }, { "epoch": 2.5849126718691937, "grad_norm": 2.9397956945733696, "learning_rate": 9.873791047075542e-07, "loss": 0.3011, "step": 34780 }, { "epoch": 2.5849869936826457, "grad_norm": 2.598912725069278, "learning_rate": 9.87031488151684e-07, "loss": 0.2514, "step": 34781 }, { "epoch": 2.585061315496098, "grad_norm": 2.1214584524502356, "learning_rate": 9.866839296208375e-07, "loss": 0.2758, "step": 34782 }, { "epoch": 2.58513563730955, "grad_norm": 2.440193413672343, "learning_rate": 9.863364291172496e-07, "loss": 0.2463, "step": 34783 }, { "epoch": 2.5852099591230027, "grad_norm": 2.720067464709714, "learning_rate": 9.859889866431615e-07, "loss": 0.3092, "step": 34784 }, { "epoch": 2.585284280936455, "grad_norm": 2.151884002687547, "learning_rate": 9.856416022008074e-07, "loss": 0.2756, "step": 34785 }, { "epoch": 2.585358602749907, "grad_norm": 1.945902814682722, "learning_rate": 9.852942757924266e-07, "loss": 0.2257, "step": 34786 }, { "epoch": 2.585432924563359, "grad_norm": 2.3502331577975935, "learning_rate": 9.849470074202537e-07, "loss": 0.236, "step": 34787 }, { "epoch": 2.5855072463768116, "grad_norm": 2.116957129871398, "learning_rate": 9.845997970865228e-07, "loss": 0.2284, "step": 34788 }, { "epoch": 2.585581568190264, "grad_norm": 2.393358937494803, "learning_rate": 9.842526447934719e-07, "loss": 0.2512, "step": 34789 }, { "epoch": 2.585655890003716, "grad_norm": 3.4686981132223202, "learning_rate": 9.839055505433325e-07, "loss": 0.3262, "step": 34790 }, { "epoch": 2.585730211817168, "grad_norm": 2.9037902109312297, "learning_rate": 9.835585143383463e-07, "loss": 0.2483, "step": 34791 }, { "epoch": 2.5858045336306206, "grad_norm": 2.0088762366009014, "learning_rate": 9.83211536180738e-07, "loss": 0.2164, "step": 34792 }, { "epoch": 2.585878855444073, "grad_norm": 2.367602336778651, "learning_rate": 9.828646160727473e-07, "loss": 0.3029, "step": 34793 }, { "epoch": 2.585953177257525, "grad_norm": 2.1003064581549604, "learning_rate": 9.825177540166075e-07, "loss": 0.2306, "step": 34794 }, { "epoch": 2.586027499070977, "grad_norm": 2.272329417218162, "learning_rate": 9.821709500145482e-07, "loss": 0.2177, "step": 34795 }, { "epoch": 2.5861018208844295, "grad_norm": 2.1525042005815314, "learning_rate": 9.818242040688053e-07, "loss": 0.2642, "step": 34796 }, { "epoch": 2.586176142697882, "grad_norm": 2.4766116010365806, "learning_rate": 9.814775161816115e-07, "loss": 0.3481, "step": 34797 }, { "epoch": 2.586250464511334, "grad_norm": 2.0480077901584886, "learning_rate": 9.811308863551948e-07, "loss": 0.1845, "step": 34798 }, { "epoch": 2.5863247863247865, "grad_norm": 1.8010290799963855, "learning_rate": 9.807843145917917e-07, "loss": 0.248, "step": 34799 }, { "epoch": 2.5863991081382385, "grad_norm": 2.1847737153344005, "learning_rate": 9.8043780089363e-07, "loss": 0.2407, "step": 34800 }, { "epoch": 2.586473429951691, "grad_norm": 2.2516362607182603, "learning_rate": 9.800913452629434e-07, "loss": 0.2553, "step": 34801 }, { "epoch": 2.586547751765143, "grad_norm": 2.8189919021446137, "learning_rate": 9.797449477019593e-07, "loss": 0.2545, "step": 34802 }, { "epoch": 2.5866220735785954, "grad_norm": 2.760092563425597, "learning_rate": 9.793986082129103e-07, "loss": 0.2516, "step": 34803 }, { "epoch": 2.5866963953920474, "grad_norm": 2.664644134745788, "learning_rate": 9.790523267980257e-07, "loss": 0.3257, "step": 34804 }, { "epoch": 2.5867707172055, "grad_norm": 2.765499987779998, "learning_rate": 9.787061034595313e-07, "loss": 0.2364, "step": 34805 }, { "epoch": 2.586845039018952, "grad_norm": 2.117188652608792, "learning_rate": 9.78359938199661e-07, "loss": 0.2402, "step": 34806 }, { "epoch": 2.5869193608324044, "grad_norm": 3.0097730341144615, "learning_rate": 9.7801383102064e-07, "loss": 0.2995, "step": 34807 }, { "epoch": 2.586993682645857, "grad_norm": 1.9662093883933813, "learning_rate": 9.776677819246994e-07, "loss": 0.2195, "step": 34808 }, { "epoch": 2.587068004459309, "grad_norm": 2.370880133318675, "learning_rate": 9.773217909140642e-07, "loss": 0.2616, "step": 34809 }, { "epoch": 2.587142326272761, "grad_norm": 1.8193286385620835, "learning_rate": 9.769758579909628e-07, "loss": 0.1579, "step": 34810 }, { "epoch": 2.5872166480862133, "grad_norm": 2.0846286415053057, "learning_rate": 9.766299831576243e-07, "loss": 0.3195, "step": 34811 }, { "epoch": 2.587290969899666, "grad_norm": 3.9773646278966335, "learning_rate": 9.762841664162714e-07, "loss": 0.302, "step": 34812 }, { "epoch": 2.587365291713118, "grad_norm": 2.479092929160717, "learning_rate": 9.75938407769137e-07, "loss": 0.2945, "step": 34813 }, { "epoch": 2.58743961352657, "grad_norm": 2.032193555324283, "learning_rate": 9.75592707218439e-07, "loss": 0.2688, "step": 34814 }, { "epoch": 2.5875139353400223, "grad_norm": 2.2771824307155253, "learning_rate": 9.752470647664092e-07, "loss": 0.2475, "step": 34815 }, { "epoch": 2.5875882571534747, "grad_norm": 2.6434151265784838, "learning_rate": 9.7490148041527e-07, "loss": 0.2608, "step": 34816 }, { "epoch": 2.5876625789669268, "grad_norm": 2.3395395971163024, "learning_rate": 9.745559541672444e-07, "loss": 0.255, "step": 34817 }, { "epoch": 2.5877369007803788, "grad_norm": 2.5248797891403902, "learning_rate": 9.742104860245627e-07, "loss": 0.2869, "step": 34818 }, { "epoch": 2.5878112225938312, "grad_norm": 2.2321946321803163, "learning_rate": 9.738650759894441e-07, "loss": 0.2603, "step": 34819 }, { "epoch": 2.5878855444072837, "grad_norm": 2.7314440697186795, "learning_rate": 9.735197240641126e-07, "loss": 0.312, "step": 34820 }, { "epoch": 2.5879598662207357, "grad_norm": 1.8429445110937788, "learning_rate": 9.73174430250795e-07, "loss": 0.2278, "step": 34821 }, { "epoch": 2.588034188034188, "grad_norm": 2.4252484532663914, "learning_rate": 9.728291945517099e-07, "loss": 0.2763, "step": 34822 }, { "epoch": 2.58810850984764, "grad_norm": 1.8934251662643466, "learning_rate": 9.72484016969083e-07, "loss": 0.1743, "step": 34823 }, { "epoch": 2.5881828316610926, "grad_norm": 3.928285655368322, "learning_rate": 9.72138897505136e-07, "loss": 0.3231, "step": 34824 }, { "epoch": 2.5882571534745447, "grad_norm": 2.2835822846338973, "learning_rate": 9.717938361620917e-07, "loss": 0.266, "step": 34825 }, { "epoch": 2.588331475287997, "grad_norm": 2.738304462555067, "learning_rate": 9.71448832942169e-07, "loss": 0.2801, "step": 34826 }, { "epoch": 2.588405797101449, "grad_norm": 2.1727355840026283, "learning_rate": 9.711038878475887e-07, "loss": 0.3769, "step": 34827 }, { "epoch": 2.5884801189149016, "grad_norm": 2.544730341080863, "learning_rate": 9.707590008805744e-07, "loss": 0.2365, "step": 34828 }, { "epoch": 2.5885544407283536, "grad_norm": 2.57974425187026, "learning_rate": 9.704141720433435e-07, "loss": 0.2776, "step": 34829 }, { "epoch": 2.588628762541806, "grad_norm": 1.9204464954737837, "learning_rate": 9.700694013381206e-07, "loss": 0.1951, "step": 34830 }, { "epoch": 2.5887030843552585, "grad_norm": 1.9315782785593507, "learning_rate": 9.697246887671209e-07, "loss": 0.2057, "step": 34831 }, { "epoch": 2.5887774061687105, "grad_norm": 2.9026591307694276, "learning_rate": 9.693800343325644e-07, "loss": 0.3328, "step": 34832 }, { "epoch": 2.5888517279821626, "grad_norm": 2.458387784708367, "learning_rate": 9.690354380366718e-07, "loss": 0.2832, "step": 34833 }, { "epoch": 2.588926049795615, "grad_norm": 2.404828119600157, "learning_rate": 9.68690899881659e-07, "loss": 0.3758, "step": 34834 }, { "epoch": 2.5890003716090675, "grad_norm": 2.4193788116438486, "learning_rate": 9.683464198697468e-07, "loss": 0.3115, "step": 34835 }, { "epoch": 2.5890746934225195, "grad_norm": 2.477512749847137, "learning_rate": 9.68001998003153e-07, "loss": 0.2864, "step": 34836 }, { "epoch": 2.5891490152359715, "grad_norm": 2.442360756975267, "learning_rate": 9.676576342840926e-07, "loss": 0.3222, "step": 34837 }, { "epoch": 2.589223337049424, "grad_norm": 1.8002647294067597, "learning_rate": 9.67313328714784e-07, "loss": 0.1533, "step": 34838 }, { "epoch": 2.5892976588628764, "grad_norm": 2.5523622449618903, "learning_rate": 9.66969081297442e-07, "loss": 0.2824, "step": 34839 }, { "epoch": 2.5893719806763285, "grad_norm": 2.9853269761627494, "learning_rate": 9.666248920342858e-07, "loss": 0.2991, "step": 34840 }, { "epoch": 2.589446302489781, "grad_norm": 2.139154415343283, "learning_rate": 9.662807609275293e-07, "loss": 0.2026, "step": 34841 }, { "epoch": 2.589520624303233, "grad_norm": 2.681234681528467, "learning_rate": 9.659366879793875e-07, "loss": 0.3182, "step": 34842 }, { "epoch": 2.5895949461166854, "grad_norm": 2.338511302922696, "learning_rate": 9.655926731920784e-07, "loss": 0.272, "step": 34843 }, { "epoch": 2.5896692679301374, "grad_norm": 2.470650516945916, "learning_rate": 9.652487165678126e-07, "loss": 0.3341, "step": 34844 }, { "epoch": 2.58974358974359, "grad_norm": 1.9923945513400128, "learning_rate": 9.649048181088094e-07, "loss": 0.2581, "step": 34845 }, { "epoch": 2.589817911557042, "grad_norm": 2.092555994250381, "learning_rate": 9.645609778172793e-07, "loss": 0.2754, "step": 34846 }, { "epoch": 2.5898922333704943, "grad_norm": 2.6953647949937305, "learning_rate": 9.642171956954372e-07, "loss": 0.2911, "step": 34847 }, { "epoch": 2.5899665551839464, "grad_norm": 2.5863690169094866, "learning_rate": 9.638734717454955e-07, "loss": 0.2754, "step": 34848 }, { "epoch": 2.590040876997399, "grad_norm": 2.3185527219710926, "learning_rate": 9.635298059696664e-07, "loss": 0.2685, "step": 34849 }, { "epoch": 2.590115198810851, "grad_norm": 2.237571317754509, "learning_rate": 9.631861983701651e-07, "loss": 0.2341, "step": 34850 }, { "epoch": 2.5901895206243033, "grad_norm": 2.6648707112918255, "learning_rate": 9.628426489492004e-07, "loss": 0.2397, "step": 34851 }, { "epoch": 2.5902638424377553, "grad_norm": 2.6532249368885195, "learning_rate": 9.62499157708987e-07, "loss": 0.3076, "step": 34852 }, { "epoch": 2.5903381642512078, "grad_norm": 2.3885811100231873, "learning_rate": 9.621557246517366e-07, "loss": 0.2881, "step": 34853 }, { "epoch": 2.5904124860646602, "grad_norm": 2.1886202030005943, "learning_rate": 9.61812349779655e-07, "loss": 0.2446, "step": 34854 }, { "epoch": 2.5904868078781123, "grad_norm": 2.232215721370723, "learning_rate": 9.614690330949594e-07, "loss": 0.2791, "step": 34855 }, { "epoch": 2.5905611296915643, "grad_norm": 3.3743212289997815, "learning_rate": 9.61125774599856e-07, "loss": 0.3902, "step": 34856 }, { "epoch": 2.5906354515050167, "grad_norm": 2.3575438994995963, "learning_rate": 9.607825742965571e-07, "loss": 0.3127, "step": 34857 }, { "epoch": 2.590709773318469, "grad_norm": 4.58012021772043, "learning_rate": 9.604394321872701e-07, "loss": 0.2868, "step": 34858 }, { "epoch": 2.590784095131921, "grad_norm": 2.6712270218046035, "learning_rate": 9.600963482742066e-07, "loss": 0.2911, "step": 34859 }, { "epoch": 2.590858416945373, "grad_norm": 2.5697749867103807, "learning_rate": 9.597533225595724e-07, "loss": 0.2883, "step": 34860 }, { "epoch": 2.5909327387588257, "grad_norm": 1.7966118974775243, "learning_rate": 9.594103550455757e-07, "loss": 0.2651, "step": 34861 }, { "epoch": 2.591007060572278, "grad_norm": 3.1494762383887416, "learning_rate": 9.590674457344284e-07, "loss": 0.3301, "step": 34862 }, { "epoch": 2.59108138238573, "grad_norm": 1.880352311639328, "learning_rate": 9.587245946283353e-07, "loss": 0.2219, "step": 34863 }, { "epoch": 2.5911557041991826, "grad_norm": 2.5621261313802774, "learning_rate": 9.583818017295022e-07, "loss": 0.2944, "step": 34864 }, { "epoch": 2.5912300260126346, "grad_norm": 2.84039184649979, "learning_rate": 9.580390670401408e-07, "loss": 0.3311, "step": 34865 }, { "epoch": 2.591304347826087, "grad_norm": 2.187758280238471, "learning_rate": 9.576963905624515e-07, "loss": 0.2642, "step": 34866 }, { "epoch": 2.591378669639539, "grad_norm": 2.6669190617108094, "learning_rate": 9.57353772298647e-07, "loss": 0.3185, "step": 34867 }, { "epoch": 2.5914529914529916, "grad_norm": 2.523453314141298, "learning_rate": 9.57011212250929e-07, "loss": 0.3227, "step": 34868 }, { "epoch": 2.5915273132664436, "grad_norm": 5.095027584217358, "learning_rate": 9.56668710421501e-07, "loss": 0.2511, "step": 34869 }, { "epoch": 2.591601635079896, "grad_norm": 2.4185566251369157, "learning_rate": 9.56326266812575e-07, "loss": 0.3204, "step": 34870 }, { "epoch": 2.591675956893348, "grad_norm": 2.9894934898920886, "learning_rate": 9.559838814263478e-07, "loss": 0.3281, "step": 34871 }, { "epoch": 2.5917502787068005, "grad_norm": 2.0078208286341908, "learning_rate": 9.556415542650287e-07, "loss": 0.2236, "step": 34872 }, { "epoch": 2.5918246005202525, "grad_norm": 2.1297732852841627, "learning_rate": 9.552992853308174e-07, "loss": 0.2717, "step": 34873 }, { "epoch": 2.591898922333705, "grad_norm": 2.2783084618055014, "learning_rate": 9.54957074625923e-07, "loss": 0.2839, "step": 34874 }, { "epoch": 2.591973244147157, "grad_norm": 2.7477990126323126, "learning_rate": 9.546149221525448e-07, "loss": 0.3495, "step": 34875 }, { "epoch": 2.5920475659606095, "grad_norm": 2.3576129989417147, "learning_rate": 9.542728279128854e-07, "loss": 0.2912, "step": 34876 }, { "epoch": 2.592121887774062, "grad_norm": 2.2235928833056176, "learning_rate": 9.53930791909149e-07, "loss": 0.2335, "step": 34877 }, { "epoch": 2.592196209587514, "grad_norm": 2.3109976426068584, "learning_rate": 9.535888141435357e-07, "loss": 0.2854, "step": 34878 }, { "epoch": 2.592270531400966, "grad_norm": 2.0910483021128985, "learning_rate": 9.532468946182505e-07, "loss": 0.2593, "step": 34879 }, { "epoch": 2.5923448532144184, "grad_norm": 2.9557218230350686, "learning_rate": 9.529050333354917e-07, "loss": 0.3057, "step": 34880 }, { "epoch": 2.592419175027871, "grad_norm": 2.561938755741005, "learning_rate": 9.525632302974607e-07, "loss": 0.2974, "step": 34881 }, { "epoch": 2.592493496841323, "grad_norm": 2.2971814363821017, "learning_rate": 9.522214855063583e-07, "loss": 0.2906, "step": 34882 }, { "epoch": 2.592567818654775, "grad_norm": 2.370420245033138, "learning_rate": 9.518797989643824e-07, "loss": 0.2129, "step": 34883 }, { "epoch": 2.5926421404682274, "grad_norm": 2.6468173702944773, "learning_rate": 9.515381706737381e-07, "loss": 0.3143, "step": 34884 }, { "epoch": 2.59271646228168, "grad_norm": 2.1761966532666532, "learning_rate": 9.511966006366203e-07, "loss": 0.2746, "step": 34885 }, { "epoch": 2.592790784095132, "grad_norm": 3.112609934224094, "learning_rate": 9.508550888552281e-07, "loss": 0.2964, "step": 34886 }, { "epoch": 2.5928651059085843, "grad_norm": 1.9627792117334706, "learning_rate": 9.505136353317623e-07, "loss": 0.2967, "step": 34887 }, { "epoch": 2.5929394277220363, "grad_norm": 2.2047292219314163, "learning_rate": 9.501722400684188e-07, "loss": 0.3114, "step": 34888 }, { "epoch": 2.593013749535489, "grad_norm": 2.006905360626385, "learning_rate": 9.498309030673991e-07, "loss": 0.2117, "step": 34889 }, { "epoch": 2.593088071348941, "grad_norm": 3.261907001906741, "learning_rate": 9.494896243308982e-07, "loss": 0.2728, "step": 34890 }, { "epoch": 2.5931623931623933, "grad_norm": 2.4328822285176623, "learning_rate": 9.491484038611109e-07, "loss": 0.3388, "step": 34891 }, { "epoch": 2.5932367149758453, "grad_norm": 2.0943717573189096, "learning_rate": 9.488072416602411e-07, "loss": 0.2803, "step": 34892 }, { "epoch": 2.5933110367892978, "grad_norm": 2.1496853848359727, "learning_rate": 9.484661377304772e-07, "loss": 0.2392, "step": 34893 }, { "epoch": 2.5933853586027498, "grad_norm": 2.7126001092471106, "learning_rate": 9.481250920740193e-07, "loss": 0.2578, "step": 34894 }, { "epoch": 2.5934596804162022, "grad_norm": 2.5207894025293003, "learning_rate": 9.477841046930614e-07, "loss": 0.2702, "step": 34895 }, { "epoch": 2.5935340022296542, "grad_norm": 2.395333515549522, "learning_rate": 9.474431755898017e-07, "loss": 0.3061, "step": 34896 }, { "epoch": 2.5936083240431067, "grad_norm": 2.465347359455775, "learning_rate": 9.47102304766433e-07, "loss": 0.2849, "step": 34897 }, { "epoch": 2.5936826458565587, "grad_norm": 3.0164549589455634, "learning_rate": 9.467614922251478e-07, "loss": 0.3468, "step": 34898 }, { "epoch": 2.593756967670011, "grad_norm": 2.604946715858056, "learning_rate": 9.464207379681445e-07, "loss": 0.2413, "step": 34899 }, { "epoch": 2.5938312894834636, "grad_norm": 1.7166424088639025, "learning_rate": 9.460800419976124e-07, "loss": 0.261, "step": 34900 }, { "epoch": 2.5939056112969157, "grad_norm": 1.9911431583930945, "learning_rate": 9.457394043157497e-07, "loss": 0.2549, "step": 34901 }, { "epoch": 2.5939799331103677, "grad_norm": 2.29839669196157, "learning_rate": 9.45398824924747e-07, "loss": 0.3156, "step": 34902 }, { "epoch": 2.59405425492382, "grad_norm": 2.5024581145888507, "learning_rate": 9.450583038267947e-07, "loss": 0.2294, "step": 34903 }, { "epoch": 2.5941285767372726, "grad_norm": 2.399612450395173, "learning_rate": 9.44717841024091e-07, "loss": 0.3158, "step": 34904 }, { "epoch": 2.5942028985507246, "grad_norm": 2.6555893276282685, "learning_rate": 9.443774365188197e-07, "loss": 0.311, "step": 34905 }, { "epoch": 2.5942772203641766, "grad_norm": 1.9764030197533475, "learning_rate": 9.440370903131791e-07, "loss": 0.2384, "step": 34906 }, { "epoch": 2.594351542177629, "grad_norm": 2.4460077182340005, "learning_rate": 9.436968024093574e-07, "loss": 0.2435, "step": 34907 }, { "epoch": 2.5944258639910815, "grad_norm": 2.431770735864188, "learning_rate": 9.43356572809544e-07, "loss": 0.2232, "step": 34908 }, { "epoch": 2.5945001858045336, "grad_norm": 2.3331526691570033, "learning_rate": 9.430164015159338e-07, "loss": 0.2152, "step": 34909 }, { "epoch": 2.594574507617986, "grad_norm": 2.229769773430786, "learning_rate": 9.426762885307117e-07, "loss": 0.2763, "step": 34910 }, { "epoch": 2.594648829431438, "grad_norm": 2.425667592034835, "learning_rate": 9.423362338560705e-07, "loss": 0.2539, "step": 34911 }, { "epoch": 2.5947231512448905, "grad_norm": 2.462874873496166, "learning_rate": 9.419962374942005e-07, "loss": 0.2881, "step": 34912 }, { "epoch": 2.5947974730583425, "grad_norm": 1.8561601525250873, "learning_rate": 9.416562994472855e-07, "loss": 0.2392, "step": 34913 }, { "epoch": 2.594871794871795, "grad_norm": 3.1996744134002575, "learning_rate": 9.413164197175195e-07, "loss": 0.3345, "step": 34914 }, { "epoch": 2.594946116685247, "grad_norm": 2.4049462272501128, "learning_rate": 9.409765983070884e-07, "loss": 0.3012, "step": 34915 }, { "epoch": 2.5950204384986995, "grad_norm": 1.9263421671154628, "learning_rate": 9.406368352181794e-07, "loss": 0.2197, "step": 34916 }, { "epoch": 2.5950947603121515, "grad_norm": 1.880368229983249, "learning_rate": 9.402971304529795e-07, "loss": 0.1885, "step": 34917 }, { "epoch": 2.595169082125604, "grad_norm": 2.4718181345713135, "learning_rate": 9.399574840136782e-07, "loss": 0.2646, "step": 34918 }, { "epoch": 2.595243403939056, "grad_norm": 2.180649216947532, "learning_rate": 9.396178959024604e-07, "loss": 0.2844, "step": 34919 }, { "epoch": 2.5953177257525084, "grad_norm": 2.114812814096507, "learning_rate": 9.392783661215099e-07, "loss": 0.266, "step": 34920 }, { "epoch": 2.5953920475659604, "grad_norm": 2.8177430747107293, "learning_rate": 9.389388946730183e-07, "loss": 0.3015, "step": 34921 }, { "epoch": 2.595466369379413, "grad_norm": 2.5954024054899167, "learning_rate": 9.385994815591648e-07, "loss": 0.2712, "step": 34922 }, { "epoch": 2.5955406911928653, "grad_norm": 2.373353201722806, "learning_rate": 9.3826012678214e-07, "loss": 0.3251, "step": 34923 }, { "epoch": 2.5956150130063174, "grad_norm": 3.0560779684277066, "learning_rate": 9.379208303441267e-07, "loss": 0.3635, "step": 34924 }, { "epoch": 2.5956893348197694, "grad_norm": 3.0496181670739984, "learning_rate": 9.375815922473075e-07, "loss": 0.3503, "step": 34925 }, { "epoch": 2.595763656633222, "grad_norm": 2.9699089277114816, "learning_rate": 9.372424124938706e-07, "loss": 0.3508, "step": 34926 }, { "epoch": 2.5958379784466743, "grad_norm": 2.224806416219052, "learning_rate": 9.369032910859932e-07, "loss": 0.2773, "step": 34927 }, { "epoch": 2.5959123002601263, "grad_norm": 2.4061636061158067, "learning_rate": 9.365642280258647e-07, "loss": 0.31, "step": 34928 }, { "epoch": 2.5959866220735783, "grad_norm": 2.8404749687572726, "learning_rate": 9.362252233156643e-07, "loss": 0.2919, "step": 34929 }, { "epoch": 2.596060943887031, "grad_norm": 2.2390276947538243, "learning_rate": 9.35886276957575e-07, "loss": 0.3216, "step": 34930 }, { "epoch": 2.5961352657004833, "grad_norm": 2.9620644722085343, "learning_rate": 9.355473889537803e-07, "loss": 0.3609, "step": 34931 }, { "epoch": 2.5962095875139353, "grad_norm": 2.2181373449565855, "learning_rate": 9.352085593064597e-07, "loss": 0.2188, "step": 34932 }, { "epoch": 2.5962839093273877, "grad_norm": 2.678353336313975, "learning_rate": 9.348697880177981e-07, "loss": 0.3544, "step": 34933 }, { "epoch": 2.5963582311408397, "grad_norm": 3.027226109608056, "learning_rate": 9.345310750899738e-07, "loss": 0.2853, "step": 34934 }, { "epoch": 2.596432552954292, "grad_norm": 2.157917403738217, "learning_rate": 9.341924205251673e-07, "loss": 0.2165, "step": 34935 }, { "epoch": 2.596506874767744, "grad_norm": 2.614319337331932, "learning_rate": 9.338538243255613e-07, "loss": 0.2695, "step": 34936 }, { "epoch": 2.5965811965811967, "grad_norm": 2.8694799633289643, "learning_rate": 9.335152864933328e-07, "loss": 0.3483, "step": 34937 }, { "epoch": 2.5966555183946487, "grad_norm": 2.085379012553377, "learning_rate": 9.331768070306635e-07, "loss": 0.2187, "step": 34938 }, { "epoch": 2.596729840208101, "grad_norm": 2.479310218303244, "learning_rate": 9.328383859397294e-07, "loss": 0.1708, "step": 34939 }, { "epoch": 2.596804162021553, "grad_norm": 2.616003508069906, "learning_rate": 9.325000232227133e-07, "loss": 0.2357, "step": 34940 }, { "epoch": 2.5968784838350056, "grad_norm": 2.8858600001789134, "learning_rate": 9.32161718881791e-07, "loss": 0.3502, "step": 34941 }, { "epoch": 2.596952805648458, "grad_norm": 2.320988949146476, "learning_rate": 9.318234729191399e-07, "loss": 0.2431, "step": 34942 }, { "epoch": 2.59702712746191, "grad_norm": 2.626269550948347, "learning_rate": 9.314852853369405e-07, "loss": 0.3014, "step": 34943 }, { "epoch": 2.597101449275362, "grad_norm": 2.0194061420569813, "learning_rate": 9.311471561373675e-07, "loss": 0.2192, "step": 34944 }, { "epoch": 2.5971757710888146, "grad_norm": 3.085397578656336, "learning_rate": 9.308090853225982e-07, "loss": 0.2426, "step": 34945 }, { "epoch": 2.597250092902267, "grad_norm": 2.0881867917834933, "learning_rate": 9.304710728948096e-07, "loss": 0.2202, "step": 34946 }, { "epoch": 2.597324414715719, "grad_norm": 1.9916849465135453, "learning_rate": 9.301331188561768e-07, "loss": 0.2334, "step": 34947 }, { "epoch": 2.597398736529171, "grad_norm": 2.323210456943666, "learning_rate": 9.29795223208878e-07, "loss": 0.2821, "step": 34948 }, { "epoch": 2.5974730583426235, "grad_norm": 2.3003798927285883, "learning_rate": 9.294573859550871e-07, "loss": 0.2678, "step": 34949 }, { "epoch": 2.597547380156076, "grad_norm": 1.8732259109263265, "learning_rate": 9.291196070969788e-07, "loss": 0.2016, "step": 34950 }, { "epoch": 2.597621701969528, "grad_norm": 1.990977827606332, "learning_rate": 9.287818866367282e-07, "loss": 0.2185, "step": 34951 }, { "epoch": 2.59769602378298, "grad_norm": 1.9915732472297887, "learning_rate": 9.284442245765068e-07, "loss": 0.1836, "step": 34952 }, { "epoch": 2.5977703455964325, "grad_norm": 2.6038771451999496, "learning_rate": 9.281066209184919e-07, "loss": 0.349, "step": 34953 }, { "epoch": 2.597844667409885, "grad_norm": 2.299272673268467, "learning_rate": 9.27769075664855e-07, "loss": 0.2709, "step": 34954 }, { "epoch": 2.597918989223337, "grad_norm": 2.3615191704553316, "learning_rate": 9.274315888177721e-07, "loss": 0.2138, "step": 34955 }, { "epoch": 2.5979933110367894, "grad_norm": 2.5643745921632997, "learning_rate": 9.270941603794137e-07, "loss": 0.2594, "step": 34956 }, { "epoch": 2.5980676328502414, "grad_norm": 2.3088943516116234, "learning_rate": 9.267567903519504e-07, "loss": 0.2189, "step": 34957 }, { "epoch": 2.598141954663694, "grad_norm": 2.6369728169426465, "learning_rate": 9.26419478737558e-07, "loss": 0.3253, "step": 34958 }, { "epoch": 2.598216276477146, "grad_norm": 2.38878397509414, "learning_rate": 9.260822255384039e-07, "loss": 0.2771, "step": 34959 }, { "epoch": 2.5982905982905984, "grad_norm": 2.5402691584524524, "learning_rate": 9.257450307566651e-07, "loss": 0.3511, "step": 34960 }, { "epoch": 2.5983649201040504, "grad_norm": 2.7958970152701212, "learning_rate": 9.254078943945067e-07, "loss": 0.3227, "step": 34961 }, { "epoch": 2.598439241917503, "grad_norm": 2.9245944860853155, "learning_rate": 9.250708164541023e-07, "loss": 0.3404, "step": 34962 }, { "epoch": 2.598513563730955, "grad_norm": 1.840931712347563, "learning_rate": 9.247337969376213e-07, "loss": 0.1844, "step": 34963 }, { "epoch": 2.5985878855444073, "grad_norm": 2.477258989172909, "learning_rate": 9.243968358472311e-07, "loss": 0.2635, "step": 34964 }, { "epoch": 2.59866220735786, "grad_norm": 2.5572400933142236, "learning_rate": 9.240599331851052e-07, "loss": 0.2616, "step": 34965 }, { "epoch": 2.598736529171312, "grad_norm": 2.3265254979457533, "learning_rate": 9.237230889534099e-07, "loss": 0.267, "step": 34966 }, { "epoch": 2.598810850984764, "grad_norm": 2.263622387277337, "learning_rate": 9.233863031543133e-07, "loss": 0.2835, "step": 34967 }, { "epoch": 2.5988851727982163, "grad_norm": 2.400811791429037, "learning_rate": 9.230495757899871e-07, "loss": 0.3083, "step": 34968 }, { "epoch": 2.5989594946116688, "grad_norm": 2.1226898349347434, "learning_rate": 9.22712906862594e-07, "loss": 0.2277, "step": 34969 }, { "epoch": 2.5990338164251208, "grad_norm": 1.9311997830385523, "learning_rate": 9.223762963743065e-07, "loss": 0.1943, "step": 34970 }, { "epoch": 2.599108138238573, "grad_norm": 2.9036259385710905, "learning_rate": 9.220397443272889e-07, "loss": 0.3175, "step": 34971 }, { "epoch": 2.5991824600520252, "grad_norm": 2.302050067747375, "learning_rate": 9.217032507237089e-07, "loss": 0.2211, "step": 34972 }, { "epoch": 2.5992567818654777, "grad_norm": 2.7924662132512874, "learning_rate": 9.213668155657318e-07, "loss": 0.3524, "step": 34973 }, { "epoch": 2.5993311036789297, "grad_norm": 2.5911836990920527, "learning_rate": 9.210304388555235e-07, "loss": 0.2971, "step": 34974 }, { "epoch": 2.5994054254923817, "grad_norm": 2.4089935531030573, "learning_rate": 9.206941205952502e-07, "loss": 0.2649, "step": 34975 }, { "epoch": 2.599479747305834, "grad_norm": 2.8820299443940645, "learning_rate": 9.203578607870767e-07, "loss": 0.3096, "step": 34976 }, { "epoch": 2.5995540691192867, "grad_norm": 2.853629247509377, "learning_rate": 9.20021659433169e-07, "loss": 0.3358, "step": 34977 }, { "epoch": 2.5996283909327387, "grad_norm": 3.25008085246873, "learning_rate": 9.196855165356922e-07, "loss": 0.2951, "step": 34978 }, { "epoch": 2.599702712746191, "grad_norm": 2.4220043594134815, "learning_rate": 9.193494320968055e-07, "loss": 0.2677, "step": 34979 }, { "epoch": 2.599777034559643, "grad_norm": 2.540642739966115, "learning_rate": 9.190134061186784e-07, "loss": 0.2848, "step": 34980 }, { "epoch": 2.5998513563730956, "grad_norm": 7.614769160743434, "learning_rate": 9.186774386034702e-07, "loss": 0.2526, "step": 34981 }, { "epoch": 2.5999256781865476, "grad_norm": 2.0485342116163143, "learning_rate": 9.183415295533471e-07, "loss": 0.2799, "step": 34982 }, { "epoch": 2.6, "grad_norm": 2.1967938397860336, "learning_rate": 9.180056789704695e-07, "loss": 0.2717, "step": 34983 }, { "epoch": 2.600074321813452, "grad_norm": 2.2192135760963154, "learning_rate": 9.176698868570011e-07, "loss": 0.2973, "step": 34984 }, { "epoch": 2.6001486436269046, "grad_norm": 2.361494543651935, "learning_rate": 9.173341532151015e-07, "loss": 0.2701, "step": 34985 }, { "epoch": 2.6002229654403566, "grad_norm": 2.3357330180274554, "learning_rate": 9.169984780469321e-07, "loss": 0.271, "step": 34986 }, { "epoch": 2.600297287253809, "grad_norm": 2.55990820129046, "learning_rate": 9.16662861354658e-07, "loss": 0.2323, "step": 34987 }, { "epoch": 2.6003716090672615, "grad_norm": 2.534434036390727, "learning_rate": 9.163273031404363e-07, "loss": 0.2662, "step": 34988 }, { "epoch": 2.6004459308807135, "grad_norm": 2.7955469493894576, "learning_rate": 9.159918034064253e-07, "loss": 0.3513, "step": 34989 }, { "epoch": 2.6005202526941655, "grad_norm": 2.0586513503174633, "learning_rate": 9.156563621547909e-07, "loss": 0.2659, "step": 34990 }, { "epoch": 2.600594574507618, "grad_norm": 2.497513647285185, "learning_rate": 9.153209793876871e-07, "loss": 0.2505, "step": 34991 }, { "epoch": 2.6006688963210705, "grad_norm": 2.5821630377585145, "learning_rate": 9.149856551072778e-07, "loss": 0.2961, "step": 34992 }, { "epoch": 2.6007432181345225, "grad_norm": 2.867524098655173, "learning_rate": 9.146503893157189e-07, "loss": 0.3343, "step": 34993 }, { "epoch": 2.6008175399479745, "grad_norm": 6.3221994219703115, "learning_rate": 9.143151820151697e-07, "loss": 0.2717, "step": 34994 }, { "epoch": 2.600891861761427, "grad_norm": 2.036212604205227, "learning_rate": 9.139800332077875e-07, "loss": 0.2278, "step": 34995 }, { "epoch": 2.6009661835748794, "grad_norm": 2.1492645190954796, "learning_rate": 9.136449428957295e-07, "loss": 0.2349, "step": 34996 }, { "epoch": 2.6010405053883314, "grad_norm": 2.501101456869169, "learning_rate": 9.133099110811549e-07, "loss": 0.2889, "step": 34997 }, { "epoch": 2.601114827201784, "grad_norm": 1.791912018968365, "learning_rate": 9.129749377662179e-07, "loss": 0.2179, "step": 34998 }, { "epoch": 2.601189149015236, "grad_norm": 2.2258823917292023, "learning_rate": 9.126400229530785e-07, "loss": 0.2397, "step": 34999 }, { "epoch": 2.6012634708286884, "grad_norm": 2.191517151750339, "learning_rate": 9.12305166643892e-07, "loss": 0.2615, "step": 35000 }, { "epoch": 2.6013377926421404, "grad_norm": 2.834768000793722, "learning_rate": 9.119703688408099e-07, "loss": 0.2885, "step": 35001 }, { "epoch": 2.601412114455593, "grad_norm": 2.018235235316509, "learning_rate": 9.116356295459938e-07, "loss": 0.2644, "step": 35002 }, { "epoch": 2.601486436269045, "grad_norm": 2.3519239601830844, "learning_rate": 9.113009487615942e-07, "loss": 0.3119, "step": 35003 }, { "epoch": 2.6015607580824973, "grad_norm": 2.1498726178106837, "learning_rate": 9.109663264897695e-07, "loss": 0.2598, "step": 35004 }, { "epoch": 2.6016350798959493, "grad_norm": 2.5878907197521466, "learning_rate": 9.106317627326722e-07, "loss": 0.2806, "step": 35005 }, { "epoch": 2.601709401709402, "grad_norm": 2.449723051893394, "learning_rate": 9.102972574924551e-07, "loss": 0.2552, "step": 35006 }, { "epoch": 2.601783723522854, "grad_norm": 2.112970267760033, "learning_rate": 9.099628107712732e-07, "loss": 0.1775, "step": 35007 }, { "epoch": 2.6018580453363063, "grad_norm": 2.4579138355364445, "learning_rate": 9.09628422571277e-07, "loss": 0.3765, "step": 35008 }, { "epoch": 2.6019323671497583, "grad_norm": 2.8435764888799886, "learning_rate": 9.092940928946225e-07, "loss": 0.3184, "step": 35009 }, { "epoch": 2.6020066889632107, "grad_norm": 2.225139721333286, "learning_rate": 9.089598217434626e-07, "loss": 0.282, "step": 35010 }, { "epoch": 2.602081010776663, "grad_norm": 2.473363049067544, "learning_rate": 9.086256091199441e-07, "loss": 0.2258, "step": 35011 }, { "epoch": 2.6021553325901152, "grad_norm": 2.7734518129720134, "learning_rate": 9.082914550262256e-07, "loss": 0.2992, "step": 35012 }, { "epoch": 2.6022296544035672, "grad_norm": 2.2586842213250864, "learning_rate": 9.079573594644519e-07, "loss": 0.2659, "step": 35013 }, { "epoch": 2.6023039762170197, "grad_norm": 2.1060994553741885, "learning_rate": 9.076233224367792e-07, "loss": 0.1563, "step": 35014 }, { "epoch": 2.602378298030472, "grad_norm": 1.985159064694183, "learning_rate": 9.072893439453567e-07, "loss": 0.2021, "step": 35015 }, { "epoch": 2.602452619843924, "grad_norm": 2.5854610437311374, "learning_rate": 9.069554239923306e-07, "loss": 0.3268, "step": 35016 }, { "epoch": 2.602526941657376, "grad_norm": 1.9844133275904434, "learning_rate": 9.06621562579858e-07, "loss": 0.1903, "step": 35017 }, { "epoch": 2.6026012634708287, "grad_norm": 2.670265041836185, "learning_rate": 9.062877597100794e-07, "loss": 0.3053, "step": 35018 }, { "epoch": 2.602675585284281, "grad_norm": 1.9380091640813737, "learning_rate": 9.059540153851509e-07, "loss": 0.2465, "step": 35019 }, { "epoch": 2.602749907097733, "grad_norm": 2.1924248794198693, "learning_rate": 9.056203296072163e-07, "loss": 0.297, "step": 35020 }, { "epoch": 2.6028242289111856, "grad_norm": 2.8967744744903245, "learning_rate": 9.052867023784285e-07, "loss": 0.3387, "step": 35021 }, { "epoch": 2.6028985507246376, "grad_norm": 2.0592337289998293, "learning_rate": 9.049531337009332e-07, "loss": 0.2568, "step": 35022 }, { "epoch": 2.60297287253809, "grad_norm": 2.4573617357011317, "learning_rate": 9.046196235768756e-07, "loss": 0.3034, "step": 35023 }, { "epoch": 2.603047194351542, "grad_norm": 1.5989030220979314, "learning_rate": 9.042861720084062e-07, "loss": 0.1932, "step": 35024 }, { "epoch": 2.6031215161649945, "grad_norm": 2.1038200665771196, "learning_rate": 9.039527789976699e-07, "loss": 0.2355, "step": 35025 }, { "epoch": 2.6031958379784466, "grad_norm": 1.953630434914079, "learning_rate": 9.03619444546815e-07, "loss": 0.2212, "step": 35026 }, { "epoch": 2.603270159791899, "grad_norm": 2.4625791602200153, "learning_rate": 9.032861686579853e-07, "loss": 0.3066, "step": 35027 }, { "epoch": 2.603344481605351, "grad_norm": 2.384160746749922, "learning_rate": 9.02952951333328e-07, "loss": 0.2986, "step": 35028 }, { "epoch": 2.6034188034188035, "grad_norm": 2.8117648022240838, "learning_rate": 9.026197925749869e-07, "loss": 0.317, "step": 35029 }, { "epoch": 2.6034931252322555, "grad_norm": 2.9624071079645655, "learning_rate": 9.022866923851071e-07, "loss": 0.2381, "step": 35030 }, { "epoch": 2.603567447045708, "grad_norm": 1.6662684648657782, "learning_rate": 9.019536507658344e-07, "loss": 0.1813, "step": 35031 }, { "epoch": 2.60364176885916, "grad_norm": 2.3967475502758897, "learning_rate": 9.016206677193118e-07, "loss": 0.279, "step": 35032 }, { "epoch": 2.6037160906726124, "grad_norm": 2.864300373144023, "learning_rate": 9.012877432476818e-07, "loss": 0.2724, "step": 35033 }, { "epoch": 2.603790412486065, "grad_norm": 2.966309280042391, "learning_rate": 9.009548773530907e-07, "loss": 0.3395, "step": 35034 }, { "epoch": 2.603864734299517, "grad_norm": 2.5601056793293915, "learning_rate": 9.006220700376789e-07, "loss": 0.3189, "step": 35035 }, { "epoch": 2.603939056112969, "grad_norm": 2.443094668820821, "learning_rate": 9.002893213035912e-07, "loss": 0.2951, "step": 35036 }, { "epoch": 2.6040133779264214, "grad_norm": 1.881297852928135, "learning_rate": 8.999566311529684e-07, "loss": 0.2086, "step": 35037 }, { "epoch": 2.604087699739874, "grad_norm": 2.1147190348678873, "learning_rate": 8.996239995879519e-07, "loss": 0.2358, "step": 35038 }, { "epoch": 2.604162021553326, "grad_norm": 2.034761457694765, "learning_rate": 8.992914266106867e-07, "loss": 0.2512, "step": 35039 }, { "epoch": 2.604236343366778, "grad_norm": 1.8834307059800721, "learning_rate": 8.989589122233078e-07, "loss": 0.243, "step": 35040 }, { "epoch": 2.6043106651802304, "grad_norm": 2.1618090692425405, "learning_rate": 8.986264564279612e-07, "loss": 0.2847, "step": 35041 }, { "epoch": 2.604384986993683, "grad_norm": 3.289073347478892, "learning_rate": 8.98294059226783e-07, "loss": 0.3205, "step": 35042 }, { "epoch": 2.604459308807135, "grad_norm": 1.993902331902815, "learning_rate": 8.979617206219171e-07, "loss": 0.1828, "step": 35043 }, { "epoch": 2.6045336306205873, "grad_norm": 2.485398209080152, "learning_rate": 8.976294406155017e-07, "loss": 0.3485, "step": 35044 }, { "epoch": 2.6046079524340393, "grad_norm": 3.235236407086617, "learning_rate": 8.97297219209674e-07, "loss": 0.368, "step": 35045 }, { "epoch": 2.6046822742474918, "grad_norm": 3.0166547285550687, "learning_rate": 8.969650564065757e-07, "loss": 0.3449, "step": 35046 }, { "epoch": 2.604756596060944, "grad_norm": 2.1498933082838336, "learning_rate": 8.966329522083428e-07, "loss": 0.2792, "step": 35047 }, { "epoch": 2.6048309178743962, "grad_norm": 2.240895262478033, "learning_rate": 8.963009066171169e-07, "loss": 0.2631, "step": 35048 }, { "epoch": 2.6049052396878483, "grad_norm": 2.0108298554272674, "learning_rate": 8.959689196350318e-07, "loss": 0.2163, "step": 35049 }, { "epoch": 2.6049795615013007, "grad_norm": 1.894460371831528, "learning_rate": 8.956369912642282e-07, "loss": 0.234, "step": 35050 }, { "epoch": 2.6050538833147527, "grad_norm": 2.3808958227686725, "learning_rate": 8.953051215068398e-07, "loss": 0.2892, "step": 35051 }, { "epoch": 2.605128205128205, "grad_norm": 2.535592046254671, "learning_rate": 8.949733103650038e-07, "loss": 0.2614, "step": 35052 }, { "epoch": 2.605202526941657, "grad_norm": 2.551825267405394, "learning_rate": 8.946415578408585e-07, "loss": 0.289, "step": 35053 }, { "epoch": 2.6052768487551097, "grad_norm": 2.4533609106029557, "learning_rate": 8.943098639365389e-07, "loss": 0.3456, "step": 35054 }, { "epoch": 2.6053511705685617, "grad_norm": 2.3869181688218046, "learning_rate": 8.939782286541776e-07, "loss": 0.1656, "step": 35055 }, { "epoch": 2.605425492382014, "grad_norm": 2.2249443610978963, "learning_rate": 8.936466519959142e-07, "loss": 0.3061, "step": 35056 }, { "epoch": 2.6054998141954666, "grad_norm": 2.51992949930524, "learning_rate": 8.933151339638791e-07, "loss": 0.2661, "step": 35057 }, { "epoch": 2.6055741360089186, "grad_norm": 2.499654735846245, "learning_rate": 8.929836745602105e-07, "loss": 0.2946, "step": 35058 }, { "epoch": 2.6056484578223706, "grad_norm": 1.8526649634195567, "learning_rate": 8.926522737870402e-07, "loss": 0.237, "step": 35059 }, { "epoch": 2.605722779635823, "grad_norm": 2.5373475185897862, "learning_rate": 8.923209316465009e-07, "loss": 0.3135, "step": 35060 }, { "epoch": 2.6057971014492756, "grad_norm": 2.3145603080311217, "learning_rate": 8.919896481407286e-07, "loss": 0.268, "step": 35061 }, { "epoch": 2.6058714232627276, "grad_norm": 2.894169831378859, "learning_rate": 8.916584232718539e-07, "loss": 0.337, "step": 35062 }, { "epoch": 2.6059457450761796, "grad_norm": 3.4780098178158614, "learning_rate": 8.913272570420095e-07, "loss": 0.4187, "step": 35063 }, { "epoch": 2.606020066889632, "grad_norm": 2.269392115152983, "learning_rate": 8.909961494533259e-07, "loss": 0.3249, "step": 35064 }, { "epoch": 2.6060943887030845, "grad_norm": 1.9267380483961634, "learning_rate": 8.906651005079392e-07, "loss": 0.1925, "step": 35065 }, { "epoch": 2.6061687105165365, "grad_norm": 2.9660740567962023, "learning_rate": 8.903341102079766e-07, "loss": 0.4284, "step": 35066 }, { "epoch": 2.606243032329989, "grad_norm": 2.10757142567785, "learning_rate": 8.900031785555696e-07, "loss": 0.2625, "step": 35067 }, { "epoch": 2.606317354143441, "grad_norm": 1.8936538244355068, "learning_rate": 8.8967230555285e-07, "loss": 0.2774, "step": 35068 }, { "epoch": 2.6063916759568935, "grad_norm": 2.3182363555365875, "learning_rate": 8.893414912019483e-07, "loss": 0.2388, "step": 35069 }, { "epoch": 2.6064659977703455, "grad_norm": 2.5444697889329153, "learning_rate": 8.890107355049915e-07, "loss": 0.3697, "step": 35070 }, { "epoch": 2.606540319583798, "grad_norm": 3.0227974301031812, "learning_rate": 8.886800384641125e-07, "loss": 0.272, "step": 35071 }, { "epoch": 2.60661464139725, "grad_norm": 2.2935326849332154, "learning_rate": 8.883494000814374e-07, "loss": 0.2885, "step": 35072 }, { "epoch": 2.6066889632107024, "grad_norm": 2.5827408164024837, "learning_rate": 8.88018820359099e-07, "loss": 0.2686, "step": 35073 }, { "epoch": 2.6067632850241544, "grad_norm": 2.5027064469977756, "learning_rate": 8.876882992992197e-07, "loss": 0.2843, "step": 35074 }, { "epoch": 2.606837606837607, "grad_norm": 2.0902066359622706, "learning_rate": 8.873578369039315e-07, "loss": 0.2284, "step": 35075 }, { "epoch": 2.6069119286510594, "grad_norm": 2.2259251422654716, "learning_rate": 8.870274331753603e-07, "loss": 0.3285, "step": 35076 }, { "epoch": 2.6069862504645114, "grad_norm": 2.439233459277591, "learning_rate": 8.866970881156334e-07, "loss": 0.2555, "step": 35077 }, { "epoch": 2.6070605722779634, "grad_norm": 2.6743324866568123, "learning_rate": 8.863668017268779e-07, "loss": 0.2319, "step": 35078 }, { "epoch": 2.607134894091416, "grad_norm": 2.204909776904469, "learning_rate": 8.860365740112198e-07, "loss": 0.2628, "step": 35079 }, { "epoch": 2.6072092159048683, "grad_norm": 2.2057039933136537, "learning_rate": 8.857064049707853e-07, "loss": 0.2492, "step": 35080 }, { "epoch": 2.6072835377183203, "grad_norm": 3.829524065772754, "learning_rate": 8.853762946077016e-07, "loss": 0.3238, "step": 35081 }, { "epoch": 2.6073578595317723, "grad_norm": 3.1723256868438003, "learning_rate": 8.850462429240902e-07, "loss": 0.3208, "step": 35082 }, { "epoch": 2.607432181345225, "grad_norm": 2.625824949879365, "learning_rate": 8.847162499220796e-07, "loss": 0.3374, "step": 35083 }, { "epoch": 2.6075065031586773, "grad_norm": 1.857199689593165, "learning_rate": 8.843863156037935e-07, "loss": 0.2328, "step": 35084 }, { "epoch": 2.6075808249721293, "grad_norm": 2.3867806385787125, "learning_rate": 8.840564399713547e-07, "loss": 0.2674, "step": 35085 }, { "epoch": 2.6076551467855813, "grad_norm": 2.640588183167325, "learning_rate": 8.837266230268859e-07, "loss": 0.3363, "step": 35086 }, { "epoch": 2.6077294685990338, "grad_norm": 2.1028291506588324, "learning_rate": 8.833968647725144e-07, "loss": 0.2879, "step": 35087 }, { "epoch": 2.6078037904124862, "grad_norm": 2.3271509634851903, "learning_rate": 8.830671652103595e-07, "loss": 0.2644, "step": 35088 }, { "epoch": 2.6078781122259382, "grad_norm": 2.1446346775581984, "learning_rate": 8.827375243425451e-07, "loss": 0.268, "step": 35089 }, { "epoch": 2.6079524340393907, "grad_norm": 2.4901970171479717, "learning_rate": 8.82407942171194e-07, "loss": 0.3227, "step": 35090 }, { "epoch": 2.6080267558528427, "grad_norm": 2.5640263214946284, "learning_rate": 8.820784186984277e-07, "loss": 0.2744, "step": 35091 }, { "epoch": 2.608101077666295, "grad_norm": 1.9942954238839563, "learning_rate": 8.817489539263657e-07, "loss": 0.222, "step": 35092 }, { "epoch": 2.608175399479747, "grad_norm": 1.7790694155168405, "learning_rate": 8.81419547857133e-07, "loss": 0.2086, "step": 35093 }, { "epoch": 2.6082497212931997, "grad_norm": 2.5588976858790327, "learning_rate": 8.810902004928456e-07, "loss": 0.2966, "step": 35094 }, { "epoch": 2.6083240431066517, "grad_norm": 2.2359965530636265, "learning_rate": 8.807609118356286e-07, "loss": 0.2323, "step": 35095 }, { "epoch": 2.608398364920104, "grad_norm": 2.5246324475612942, "learning_rate": 8.804316818876002e-07, "loss": 0.2338, "step": 35096 }, { "epoch": 2.608472686733556, "grad_norm": 2.2073158468750878, "learning_rate": 8.801025106508787e-07, "loss": 0.2838, "step": 35097 }, { "epoch": 2.6085470085470086, "grad_norm": 2.3234194102558705, "learning_rate": 8.797733981275846e-07, "loss": 0.2603, "step": 35098 }, { "epoch": 2.608621330360461, "grad_norm": 2.47419898499162, "learning_rate": 8.794443443198342e-07, "loss": 0.2619, "step": 35099 }, { "epoch": 2.608695652173913, "grad_norm": 2.211877196881191, "learning_rate": 8.791153492297489e-07, "loss": 0.2428, "step": 35100 }, { "epoch": 2.608769973987365, "grad_norm": 2.804462756586929, "learning_rate": 8.787864128594447e-07, "loss": 0.2673, "step": 35101 }, { "epoch": 2.6088442958008176, "grad_norm": 2.771622921051092, "learning_rate": 8.784575352110424e-07, "loss": 0.4159, "step": 35102 }, { "epoch": 2.60891861761427, "grad_norm": 2.059503991662868, "learning_rate": 8.781287162866558e-07, "loss": 0.2731, "step": 35103 }, { "epoch": 2.608992939427722, "grad_norm": 2.237275976933373, "learning_rate": 8.777999560884032e-07, "loss": 0.2636, "step": 35104 }, { "epoch": 2.609067261241174, "grad_norm": 1.6828851642582425, "learning_rate": 8.774712546184017e-07, "loss": 0.2422, "step": 35105 }, { "epoch": 2.6091415830546265, "grad_norm": 2.3514654636708925, "learning_rate": 8.771426118787652e-07, "loss": 0.2307, "step": 35106 }, { "epoch": 2.609215904868079, "grad_norm": 1.8051572039143626, "learning_rate": 8.768140278716153e-07, "loss": 0.2302, "step": 35107 }, { "epoch": 2.609290226681531, "grad_norm": 1.8786607842525809, "learning_rate": 8.764855025990604e-07, "loss": 0.2149, "step": 35108 }, { "epoch": 2.609364548494983, "grad_norm": 2.4701934143735316, "learning_rate": 8.761570360632188e-07, "loss": 0.3225, "step": 35109 }, { "epoch": 2.6094388703084355, "grad_norm": 2.448631869185006, "learning_rate": 8.758286282662054e-07, "loss": 0.2807, "step": 35110 }, { "epoch": 2.609513192121888, "grad_norm": 2.5008840365526064, "learning_rate": 8.755002792101319e-07, "loss": 0.3126, "step": 35111 }, { "epoch": 2.60958751393534, "grad_norm": 1.6245290314815293, "learning_rate": 8.751719888971167e-07, "loss": 0.1747, "step": 35112 }, { "epoch": 2.6096618357487924, "grad_norm": 2.5918447661481308, "learning_rate": 8.748437573292701e-07, "loss": 0.2556, "step": 35113 }, { "epoch": 2.6097361575622444, "grad_norm": 2.1280940260897374, "learning_rate": 8.745155845087039e-07, "loss": 0.2525, "step": 35114 }, { "epoch": 2.609810479375697, "grad_norm": 2.0496563131433274, "learning_rate": 8.741874704375353e-07, "loss": 0.2287, "step": 35115 }, { "epoch": 2.609884801189149, "grad_norm": 2.276167548920845, "learning_rate": 8.738594151178725e-07, "loss": 0.4622, "step": 35116 }, { "epoch": 2.6099591230026014, "grad_norm": 2.7034938960049986, "learning_rate": 8.735314185518307e-07, "loss": 0.3165, "step": 35117 }, { "epoch": 2.6100334448160534, "grad_norm": 2.331003771852015, "learning_rate": 8.732034807415202e-07, "loss": 0.2347, "step": 35118 }, { "epoch": 2.610107766629506, "grad_norm": 2.5062825822693178, "learning_rate": 8.728756016890516e-07, "loss": 0.2489, "step": 35119 }, { "epoch": 2.610182088442958, "grad_norm": 2.340805225088731, "learning_rate": 8.725477813965366e-07, "loss": 0.3132, "step": 35120 }, { "epoch": 2.6102564102564103, "grad_norm": 2.155084516090977, "learning_rate": 8.722200198660846e-07, "loss": 0.2771, "step": 35121 }, { "epoch": 2.6103307320698628, "grad_norm": 2.4131692774821545, "learning_rate": 8.718923170998072e-07, "loss": 0.2126, "step": 35122 }, { "epoch": 2.610405053883315, "grad_norm": 1.831407630813356, "learning_rate": 8.715646730998117e-07, "loss": 0.1896, "step": 35123 }, { "epoch": 2.610479375696767, "grad_norm": 2.460066003340352, "learning_rate": 8.712370878682109e-07, "loss": 0.2662, "step": 35124 }, { "epoch": 2.6105536975102193, "grad_norm": 2.0711108911288534, "learning_rate": 8.709095614071128e-07, "loss": 0.181, "step": 35125 }, { "epoch": 2.6106280193236717, "grad_norm": 3.3092348581006035, "learning_rate": 8.705820937186227e-07, "loss": 0.33, "step": 35126 }, { "epoch": 2.6107023411371237, "grad_norm": 2.12147988810195, "learning_rate": 8.702546848048543e-07, "loss": 0.2399, "step": 35127 }, { "epoch": 2.6107766629505758, "grad_norm": 2.0046384294481725, "learning_rate": 8.699273346679094e-07, "loss": 0.2373, "step": 35128 }, { "epoch": 2.610850984764028, "grad_norm": 2.3999039960485935, "learning_rate": 8.696000433099005e-07, "loss": 0.2964, "step": 35129 }, { "epoch": 2.6109253065774807, "grad_norm": 2.592452411520492, "learning_rate": 8.69272810732934e-07, "loss": 0.2916, "step": 35130 }, { "epoch": 2.6109996283909327, "grad_norm": 2.37622280682941, "learning_rate": 8.689456369391148e-07, "loss": 0.2802, "step": 35131 }, { "epoch": 2.611073950204385, "grad_norm": 3.501344392330697, "learning_rate": 8.686185219305487e-07, "loss": 0.2499, "step": 35132 }, { "epoch": 2.611148272017837, "grad_norm": 1.9158162626960549, "learning_rate": 8.682914657093422e-07, "loss": 0.1473, "step": 35133 }, { "epoch": 2.6112225938312896, "grad_norm": 1.9794626977276923, "learning_rate": 8.679644682776034e-07, "loss": 0.2667, "step": 35134 }, { "epoch": 2.6112969156447416, "grad_norm": 3.127859041246441, "learning_rate": 8.676375296374351e-07, "loss": 0.315, "step": 35135 }, { "epoch": 2.611371237458194, "grad_norm": 2.0366172061147765, "learning_rate": 8.6731064979094e-07, "loss": 0.255, "step": 35136 }, { "epoch": 2.611445559271646, "grad_norm": 2.1797202695172087, "learning_rate": 8.669838287402277e-07, "loss": 0.3601, "step": 35137 }, { "epoch": 2.6115198810850986, "grad_norm": 2.3054676319411733, "learning_rate": 8.666570664873975e-07, "loss": 0.2604, "step": 35138 }, { "epoch": 2.6115942028985506, "grad_norm": 2.290869722172386, "learning_rate": 8.663303630345577e-07, "loss": 0.295, "step": 35139 }, { "epoch": 2.611668524712003, "grad_norm": 2.2542717006099067, "learning_rate": 8.660037183838088e-07, "loss": 0.2515, "step": 35140 }, { "epoch": 2.611742846525455, "grad_norm": 2.240388992241448, "learning_rate": 8.656771325372537e-07, "loss": 0.2917, "step": 35141 }, { "epoch": 2.6118171683389075, "grad_norm": 4.132543816776559, "learning_rate": 8.653506054969951e-07, "loss": 0.3771, "step": 35142 }, { "epoch": 2.6118914901523596, "grad_norm": 2.4257487804797377, "learning_rate": 8.650241372651336e-07, "loss": 0.1821, "step": 35143 }, { "epoch": 2.611965811965812, "grad_norm": 2.2835854107913516, "learning_rate": 8.646977278437751e-07, "loss": 0.2426, "step": 35144 }, { "epoch": 2.6120401337792645, "grad_norm": 2.0377419764472435, "learning_rate": 8.643713772350159e-07, "loss": 0.2374, "step": 35145 }, { "epoch": 2.6121144555927165, "grad_norm": 2.2467342279199713, "learning_rate": 8.640450854409621e-07, "loss": 0.2791, "step": 35146 }, { "epoch": 2.6121887774061685, "grad_norm": 1.673470327881683, "learning_rate": 8.63718852463713e-07, "loss": 0.2272, "step": 35147 }, { "epoch": 2.612263099219621, "grad_norm": 2.2549782563843923, "learning_rate": 8.633926783053648e-07, "loss": 0.1822, "step": 35148 }, { "epoch": 2.6123374210330734, "grad_norm": 1.8979444186055079, "learning_rate": 8.630665629680224e-07, "loss": 0.242, "step": 35149 }, { "epoch": 2.6124117428465254, "grad_norm": 3.18688987076952, "learning_rate": 8.627405064537819e-07, "loss": 0.2279, "step": 35150 }, { "epoch": 2.6124860646599775, "grad_norm": 2.632229688160231, "learning_rate": 8.624145087647451e-07, "loss": 0.3247, "step": 35151 }, { "epoch": 2.61256038647343, "grad_norm": 2.603834288706877, "learning_rate": 8.620885699030102e-07, "loss": 0.3928, "step": 35152 }, { "epoch": 2.6126347082868824, "grad_norm": 2.5202352402872634, "learning_rate": 8.617626898706755e-07, "loss": 0.2909, "step": 35153 }, { "epoch": 2.6127090301003344, "grad_norm": 2.6101126377086024, "learning_rate": 8.614368686698371e-07, "loss": 0.3384, "step": 35154 }, { "epoch": 2.612783351913787, "grad_norm": 1.987132730350882, "learning_rate": 8.611111063025935e-07, "loss": 0.2336, "step": 35155 }, { "epoch": 2.612857673727239, "grad_norm": 2.3010115715928614, "learning_rate": 8.607854027710438e-07, "loss": 0.2582, "step": 35156 }, { "epoch": 2.6129319955406913, "grad_norm": 1.7980201077523108, "learning_rate": 8.604597580772833e-07, "loss": 0.2139, "step": 35157 }, { "epoch": 2.6130063173541433, "grad_norm": 2.3175487364147043, "learning_rate": 8.601341722234058e-07, "loss": 0.2601, "step": 35158 }, { "epoch": 2.613080639167596, "grad_norm": 2.8944145431898907, "learning_rate": 8.59808645211514e-07, "loss": 0.2745, "step": 35159 }, { "epoch": 2.613154960981048, "grad_norm": 2.137930002700727, "learning_rate": 8.594831770436963e-07, "loss": 0.2748, "step": 35160 }, { "epoch": 2.6132292827945003, "grad_norm": 2.6497010349413705, "learning_rate": 8.591577677220553e-07, "loss": 0.2335, "step": 35161 }, { "epoch": 2.6133036046079523, "grad_norm": 2.0914489661936733, "learning_rate": 8.588324172486806e-07, "loss": 0.2889, "step": 35162 }, { "epoch": 2.6133779264214048, "grad_norm": 2.9142893667930876, "learning_rate": 8.585071256256694e-07, "loss": 0.2917, "step": 35163 }, { "epoch": 2.613452248234857, "grad_norm": 1.8942904434292245, "learning_rate": 8.581818928551144e-07, "loss": 0.1949, "step": 35164 }, { "epoch": 2.6135265700483092, "grad_norm": 2.791293132382446, "learning_rate": 8.578567189391085e-07, "loss": 0.361, "step": 35165 }, { "epoch": 2.6136008918617613, "grad_norm": 2.446524930942007, "learning_rate": 8.575316038797488e-07, "loss": 0.2945, "step": 35166 }, { "epoch": 2.6136752136752137, "grad_norm": 2.580931383526807, "learning_rate": 8.572065476791247e-07, "loss": 0.3376, "step": 35167 }, { "epoch": 2.613749535488666, "grad_norm": 1.782842373640981, "learning_rate": 8.568815503393313e-07, "loss": 0.2464, "step": 35168 }, { "epoch": 2.613823857302118, "grad_norm": 3.6579827114682533, "learning_rate": 8.565566118624613e-07, "loss": 0.265, "step": 35169 }, { "epoch": 2.61389817911557, "grad_norm": 2.5738841337781495, "learning_rate": 8.56231732250603e-07, "loss": 0.2572, "step": 35170 }, { "epoch": 2.6139725009290227, "grad_norm": 3.031251399129134, "learning_rate": 8.559069115058516e-07, "loss": 0.4148, "step": 35171 }, { "epoch": 2.614046822742475, "grad_norm": 2.143426831702801, "learning_rate": 8.555821496302963e-07, "loss": 0.2312, "step": 35172 }, { "epoch": 2.614121144555927, "grad_norm": 1.5609955268564175, "learning_rate": 8.5525744662603e-07, "loss": 0.1995, "step": 35173 }, { "epoch": 2.614195466369379, "grad_norm": 2.9624629738670945, "learning_rate": 8.549328024951409e-07, "loss": 0.3205, "step": 35174 }, { "epoch": 2.6142697881828316, "grad_norm": 2.635318281491249, "learning_rate": 8.546082172397208e-07, "loss": 0.2516, "step": 35175 }, { "epoch": 2.614344109996284, "grad_norm": 2.131791086585752, "learning_rate": 8.54283690861859e-07, "loss": 0.2841, "step": 35176 }, { "epoch": 2.614418431809736, "grad_norm": 1.7499441621756742, "learning_rate": 8.539592233636418e-07, "loss": 0.2038, "step": 35177 }, { "epoch": 2.6144927536231886, "grad_norm": 2.40840962962514, "learning_rate": 8.536348147471618e-07, "loss": 0.2936, "step": 35178 }, { "epoch": 2.6145670754366406, "grad_norm": 4.762152715796027, "learning_rate": 8.533104650145075e-07, "loss": 0.2342, "step": 35179 }, { "epoch": 2.614641397250093, "grad_norm": 2.491900469870969, "learning_rate": 8.529861741677626e-07, "loss": 0.3002, "step": 35180 }, { "epoch": 2.614715719063545, "grad_norm": 2.2374898720757015, "learning_rate": 8.52661942209021e-07, "loss": 0.2666, "step": 35181 }, { "epoch": 2.6147900408769975, "grad_norm": 2.2797878260596716, "learning_rate": 8.523377691403656e-07, "loss": 0.2663, "step": 35182 }, { "epoch": 2.6148643626904495, "grad_norm": 2.6646696883347576, "learning_rate": 8.520136549638857e-07, "loss": 0.3363, "step": 35183 }, { "epoch": 2.614938684503902, "grad_norm": 2.6246507397512624, "learning_rate": 8.516895996816677e-07, "loss": 0.3054, "step": 35184 }, { "epoch": 2.615013006317354, "grad_norm": 2.2418155271235176, "learning_rate": 8.513656032957963e-07, "loss": 0.2718, "step": 35185 }, { "epoch": 2.6150873281308065, "grad_norm": 2.2080146538556673, "learning_rate": 8.510416658083608e-07, "loss": 0.2377, "step": 35186 }, { "epoch": 2.6151616499442585, "grad_norm": 1.737514790702747, "learning_rate": 8.507177872214423e-07, "loss": 0.1879, "step": 35187 }, { "epoch": 2.615235971757711, "grad_norm": 2.209110324119731, "learning_rate": 8.503939675371287e-07, "loss": 0.2225, "step": 35188 }, { "epoch": 2.615310293571163, "grad_norm": 2.602022986606487, "learning_rate": 8.500702067575029e-07, "loss": 0.3533, "step": 35189 }, { "epoch": 2.6153846153846154, "grad_norm": 2.1105493437134135, "learning_rate": 8.49746504884651e-07, "loss": 0.2695, "step": 35190 }, { "epoch": 2.615458937198068, "grad_norm": 2.372571467305074, "learning_rate": 8.494228619206568e-07, "loss": 0.196, "step": 35191 }, { "epoch": 2.61553325901152, "grad_norm": 2.215999465845336, "learning_rate": 8.490992778676022e-07, "loss": 0.2696, "step": 35192 }, { "epoch": 2.615607580824972, "grad_norm": 1.8659416772213684, "learning_rate": 8.487757527275731e-07, "loss": 0.2203, "step": 35193 }, { "epoch": 2.6156819026384244, "grad_norm": 2.8265093044052514, "learning_rate": 8.484522865026501e-07, "loss": 0.2832, "step": 35194 }, { "epoch": 2.615756224451877, "grad_norm": 2.3153256895411203, "learning_rate": 8.481288791949161e-07, "loss": 0.2574, "step": 35195 }, { "epoch": 2.615830546265329, "grad_norm": 3.2980445839171653, "learning_rate": 8.478055308064548e-07, "loss": 0.4464, "step": 35196 }, { "epoch": 2.615904868078781, "grad_norm": 2.033494047557151, "learning_rate": 8.474822413393457e-07, "loss": 0.27, "step": 35197 }, { "epoch": 2.6159791898922333, "grad_norm": 2.923800763916541, "learning_rate": 8.471590107956717e-07, "loss": 0.3265, "step": 35198 }, { "epoch": 2.616053511705686, "grad_norm": 2.0590045697901513, "learning_rate": 8.468358391775111e-07, "loss": 0.227, "step": 35199 }, { "epoch": 2.616127833519138, "grad_norm": 2.505321490793796, "learning_rate": 8.465127264869488e-07, "loss": 0.2768, "step": 35200 }, { "epoch": 2.6162021553325903, "grad_norm": 2.489986925705274, "learning_rate": 8.461896727260621e-07, "loss": 0.3145, "step": 35201 }, { "epoch": 2.6162764771460423, "grad_norm": 2.5014024976008904, "learning_rate": 8.458666778969304e-07, "loss": 0.3143, "step": 35202 }, { "epoch": 2.6163507989594947, "grad_norm": 2.8379035660356, "learning_rate": 8.455437420016344e-07, "loss": 0.3434, "step": 35203 }, { "epoch": 2.6164251207729468, "grad_norm": 2.0583902009160386, "learning_rate": 8.452208650422522e-07, "loss": 0.2124, "step": 35204 }, { "epoch": 2.616499442586399, "grad_norm": 2.729433385239824, "learning_rate": 8.448980470208645e-07, "loss": 0.2825, "step": 35205 }, { "epoch": 2.6165737643998512, "grad_norm": 2.117166385214646, "learning_rate": 8.445752879395474e-07, "loss": 0.2556, "step": 35206 }, { "epoch": 2.6166480862133037, "grad_norm": 2.2674665545866293, "learning_rate": 8.442525878003793e-07, "loss": 0.2888, "step": 35207 }, { "epoch": 2.6167224080267557, "grad_norm": 1.952298764450829, "learning_rate": 8.439299466054385e-07, "loss": 0.2305, "step": 35208 }, { "epoch": 2.616796729840208, "grad_norm": 2.555069475007722, "learning_rate": 8.436073643568021e-07, "loss": 0.3367, "step": 35209 }, { "epoch": 2.61687105165366, "grad_norm": 2.4444828928982325, "learning_rate": 8.432848410565464e-07, "loss": 0.2578, "step": 35210 }, { "epoch": 2.6169453734671126, "grad_norm": 2.0543742973966532, "learning_rate": 8.429623767067451e-07, "loss": 0.2206, "step": 35211 }, { "epoch": 2.6170196952805647, "grad_norm": 2.1905698446445028, "learning_rate": 8.426399713094801e-07, "loss": 0.2475, "step": 35212 }, { "epoch": 2.617094017094017, "grad_norm": 1.938710494840884, "learning_rate": 8.42317624866823e-07, "loss": 0.2218, "step": 35213 }, { "epoch": 2.6171683389074696, "grad_norm": 2.1367199466449214, "learning_rate": 8.419953373808487e-07, "loss": 0.2217, "step": 35214 }, { "epoch": 2.6172426607209216, "grad_norm": 2.76022345279309, "learning_rate": 8.416731088536345e-07, "loss": 0.3642, "step": 35215 }, { "epoch": 2.6173169825343736, "grad_norm": 2.9502611356087134, "learning_rate": 8.413509392872532e-07, "loss": 0.3219, "step": 35216 }, { "epoch": 2.617391304347826, "grad_norm": 2.3504757472947064, "learning_rate": 8.410288286837786e-07, "loss": 0.2547, "step": 35217 }, { "epoch": 2.6174656261612785, "grad_norm": 2.814498625201702, "learning_rate": 8.40706777045287e-07, "loss": 0.2823, "step": 35218 }, { "epoch": 2.6175399479747306, "grad_norm": 3.277624528991114, "learning_rate": 8.403847843738467e-07, "loss": 0.3726, "step": 35219 }, { "epoch": 2.6176142697881826, "grad_norm": 2.737774862153296, "learning_rate": 8.400628506715392e-07, "loss": 0.2909, "step": 35220 }, { "epoch": 2.617688591601635, "grad_norm": 3.338820831891384, "learning_rate": 8.397409759404274e-07, "loss": 0.3698, "step": 35221 }, { "epoch": 2.6177629134150875, "grad_norm": 2.5735215855691735, "learning_rate": 8.394191601825896e-07, "loss": 0.3026, "step": 35222 }, { "epoch": 2.6178372352285395, "grad_norm": 2.480885980837005, "learning_rate": 8.390974034000954e-07, "loss": 0.2895, "step": 35223 }, { "epoch": 2.617911557041992, "grad_norm": 2.559444641825365, "learning_rate": 8.387757055950163e-07, "loss": 0.2714, "step": 35224 }, { "epoch": 2.617985878855444, "grad_norm": 1.8805511610884438, "learning_rate": 8.384540667694241e-07, "loss": 0.1968, "step": 35225 }, { "epoch": 2.6180602006688964, "grad_norm": 2.0451101890652374, "learning_rate": 8.381324869253893e-07, "loss": 0.2187, "step": 35226 }, { "epoch": 2.6181345224823485, "grad_norm": 2.871939985276144, "learning_rate": 8.378109660649825e-07, "loss": 0.3539, "step": 35227 }, { "epoch": 2.618208844295801, "grad_norm": 1.9732412284136212, "learning_rate": 8.374895041902742e-07, "loss": 0.2569, "step": 35228 }, { "epoch": 2.618283166109253, "grad_norm": 2.3533261578868094, "learning_rate": 8.371681013033306e-07, "loss": 0.3235, "step": 35229 }, { "epoch": 2.6183574879227054, "grad_norm": 2.024479585694187, "learning_rate": 8.368467574062255e-07, "loss": 0.3001, "step": 35230 }, { "epoch": 2.6184318097361574, "grad_norm": 2.0950471678418503, "learning_rate": 8.365254725010252e-07, "loss": 0.2548, "step": 35231 }, { "epoch": 2.61850613154961, "grad_norm": 1.982301980123058, "learning_rate": 8.362042465897991e-07, "loss": 0.2445, "step": 35232 }, { "epoch": 2.6185804533630623, "grad_norm": 2.2979746871471107, "learning_rate": 8.35883079674612e-07, "loss": 0.2123, "step": 35233 }, { "epoch": 2.6186547751765143, "grad_norm": 2.4858702358525226, "learning_rate": 8.355619717575369e-07, "loss": 0.2528, "step": 35234 }, { "epoch": 2.6187290969899664, "grad_norm": 2.411370518140484, "learning_rate": 8.352409228406377e-07, "loss": 0.253, "step": 35235 }, { "epoch": 2.618803418803419, "grad_norm": 2.4043440134659066, "learning_rate": 8.349199329259794e-07, "loss": 0.2688, "step": 35236 }, { "epoch": 2.6188777406168713, "grad_norm": 2.832836494437822, "learning_rate": 8.345990020156336e-07, "loss": 0.3118, "step": 35237 }, { "epoch": 2.6189520624303233, "grad_norm": 2.7910175771092627, "learning_rate": 8.342781301116642e-07, "loss": 0.3507, "step": 35238 }, { "epoch": 2.6190263842437753, "grad_norm": 1.8439386081975664, "learning_rate": 8.339573172161342e-07, "loss": 0.1949, "step": 35239 }, { "epoch": 2.619100706057228, "grad_norm": 2.3182946353355867, "learning_rate": 8.336365633311139e-07, "loss": 0.2044, "step": 35240 }, { "epoch": 2.6191750278706802, "grad_norm": 2.236996088415423, "learning_rate": 8.333158684586628e-07, "loss": 0.2349, "step": 35241 }, { "epoch": 2.6192493496841323, "grad_norm": 2.2770580438050154, "learning_rate": 8.329952326008539e-07, "loss": 0.307, "step": 35242 }, { "epoch": 2.6193236714975843, "grad_norm": 2.379989041221036, "learning_rate": 8.32674655759741e-07, "loss": 0.2521, "step": 35243 }, { "epoch": 2.6193979933110367, "grad_norm": 2.350904150314708, "learning_rate": 8.323541379373956e-07, "loss": 0.288, "step": 35244 }, { "epoch": 2.619472315124489, "grad_norm": 2.4787754538311844, "learning_rate": 8.320336791358774e-07, "loss": 0.2488, "step": 35245 }, { "epoch": 2.619546636937941, "grad_norm": 2.55677396334775, "learning_rate": 8.317132793572502e-07, "loss": 0.287, "step": 35246 }, { "epoch": 2.6196209587513937, "grad_norm": 2.114077203058429, "learning_rate": 8.31392938603578e-07, "loss": 0.2124, "step": 35247 }, { "epoch": 2.6196952805648457, "grad_norm": 2.8531057318645927, "learning_rate": 8.310726568769211e-07, "loss": 0.2978, "step": 35248 }, { "epoch": 2.619769602378298, "grad_norm": 2.587575808341771, "learning_rate": 8.307524341793449e-07, "loss": 0.2714, "step": 35249 }, { "epoch": 2.61984392419175, "grad_norm": 1.9801254760321236, "learning_rate": 8.304322705129075e-07, "loss": 0.2636, "step": 35250 }, { "epoch": 2.6199182460052026, "grad_norm": 2.372840889815269, "learning_rate": 8.301121658796707e-07, "loss": 0.2942, "step": 35251 }, { "epoch": 2.6199925678186546, "grad_norm": 2.233309104869661, "learning_rate": 8.297921202816972e-07, "loss": 0.2261, "step": 35252 }, { "epoch": 2.620066889632107, "grad_norm": 2.151917307236356, "learning_rate": 8.294721337210454e-07, "loss": 0.21, "step": 35253 }, { "epoch": 2.620141211445559, "grad_norm": 2.8286055736422804, "learning_rate": 8.291522061997792e-07, "loss": 0.2607, "step": 35254 }, { "epoch": 2.6202155332590116, "grad_norm": 2.466621203702247, "learning_rate": 8.288323377199514e-07, "loss": 0.3417, "step": 35255 }, { "epoch": 2.620289855072464, "grad_norm": 2.0705091246084772, "learning_rate": 8.285125282836271e-07, "loss": 0.2563, "step": 35256 }, { "epoch": 2.620364176885916, "grad_norm": 2.5823060385915775, "learning_rate": 8.281927778928622e-07, "loss": 0.3441, "step": 35257 }, { "epoch": 2.620438498699368, "grad_norm": 2.71906924389632, "learning_rate": 8.278730865497165e-07, "loss": 0.3183, "step": 35258 }, { "epoch": 2.6205128205128205, "grad_norm": 1.8244501783520564, "learning_rate": 8.27553454256248e-07, "loss": 0.1952, "step": 35259 }, { "epoch": 2.620587142326273, "grad_norm": 2.1081765470258946, "learning_rate": 8.272338810145153e-07, "loss": 0.2611, "step": 35260 }, { "epoch": 2.620661464139725, "grad_norm": 2.4222753348533406, "learning_rate": 8.269143668265733e-07, "loss": 0.2742, "step": 35261 }, { "epoch": 2.620735785953177, "grad_norm": 2.5157752181864343, "learning_rate": 8.265949116944816e-07, "loss": 0.3173, "step": 35262 }, { "epoch": 2.6208101077666295, "grad_norm": 2.4293538123064042, "learning_rate": 8.262755156202951e-07, "loss": 0.2708, "step": 35263 }, { "epoch": 2.620884429580082, "grad_norm": 3.131411819903803, "learning_rate": 8.259561786060722e-07, "loss": 0.2936, "step": 35264 }, { "epoch": 2.620958751393534, "grad_norm": 1.9916435691599286, "learning_rate": 8.256369006538678e-07, "loss": 0.2656, "step": 35265 }, { "epoch": 2.621033073206986, "grad_norm": 2.8280287233722454, "learning_rate": 8.25317681765736e-07, "loss": 0.3534, "step": 35266 }, { "epoch": 2.6211073950204384, "grad_norm": 1.8382371330891216, "learning_rate": 8.24998521943734e-07, "loss": 0.2528, "step": 35267 }, { "epoch": 2.621181716833891, "grad_norm": 3.4795539565212037, "learning_rate": 8.246794211899134e-07, "loss": 0.3412, "step": 35268 }, { "epoch": 2.621256038647343, "grad_norm": 2.804939445544414, "learning_rate": 8.243603795063315e-07, "loss": 0.31, "step": 35269 }, { "epoch": 2.6213303604607954, "grad_norm": 1.988942814378449, "learning_rate": 8.240413968950412e-07, "loss": 0.205, "step": 35270 }, { "epoch": 2.6214046822742474, "grad_norm": 2.815908914348136, "learning_rate": 8.237224733580961e-07, "loss": 0.2906, "step": 35271 }, { "epoch": 2.6214790040877, "grad_norm": 2.3000800076945804, "learning_rate": 8.234036088975517e-07, "loss": 0.3063, "step": 35272 }, { "epoch": 2.621553325901152, "grad_norm": 2.2232179078572596, "learning_rate": 8.230848035154548e-07, "loss": 0.185, "step": 35273 }, { "epoch": 2.6216276477146043, "grad_norm": 2.1787774789903924, "learning_rate": 8.227660572138651e-07, "loss": 0.2133, "step": 35274 }, { "epoch": 2.6217019695280563, "grad_norm": 2.267188558868765, "learning_rate": 8.224473699948288e-07, "loss": 0.2351, "step": 35275 }, { "epoch": 2.621776291341509, "grad_norm": 1.9488700255486988, "learning_rate": 8.22128741860404e-07, "loss": 0.1992, "step": 35276 }, { "epoch": 2.621850613154961, "grad_norm": 2.5362930931451864, "learning_rate": 8.218101728126349e-07, "loss": 0.2927, "step": 35277 }, { "epoch": 2.6219249349684133, "grad_norm": 2.9638046741344897, "learning_rate": 8.214916628535774e-07, "loss": 0.3783, "step": 35278 }, { "epoch": 2.6219992567818657, "grad_norm": 2.1405782855396227, "learning_rate": 8.2117321198528e-07, "loss": 0.1712, "step": 35279 }, { "epoch": 2.6220735785953178, "grad_norm": 2.0299069806204484, "learning_rate": 8.208548202097921e-07, "loss": 0.2843, "step": 35280 }, { "epoch": 2.6221479004087698, "grad_norm": 1.9013661032381812, "learning_rate": 8.205364875291654e-07, "loss": 0.2017, "step": 35281 }, { "epoch": 2.6222222222222222, "grad_norm": 2.4140084814850846, "learning_rate": 8.202182139454484e-07, "loss": 0.3126, "step": 35282 }, { "epoch": 2.6222965440356747, "grad_norm": 3.9221618223445844, "learning_rate": 8.198999994606893e-07, "loss": 0.3683, "step": 35283 }, { "epoch": 2.6223708658491267, "grad_norm": 2.627549138671326, "learning_rate": 8.195818440769399e-07, "loss": 0.33, "step": 35284 }, { "epoch": 2.6224451876625787, "grad_norm": 1.9896800426351788, "learning_rate": 8.192637477962429e-07, "loss": 0.2559, "step": 35285 }, { "epoch": 2.622519509476031, "grad_norm": 2.3801626079960436, "learning_rate": 8.189457106206522e-07, "loss": 0.316, "step": 35286 }, { "epoch": 2.6225938312894836, "grad_norm": 2.21748900839936, "learning_rate": 8.18627732552213e-07, "loss": 0.194, "step": 35287 }, { "epoch": 2.6226681531029357, "grad_norm": 2.359358493735988, "learning_rate": 8.183098135929712e-07, "loss": 0.3001, "step": 35288 }, { "epoch": 2.622742474916388, "grad_norm": 2.1159569760093833, "learning_rate": 8.179919537449754e-07, "loss": 0.2042, "step": 35289 }, { "epoch": 2.62281679672984, "grad_norm": 2.9098229035272314, "learning_rate": 8.176741530102683e-07, "loss": 0.2958, "step": 35290 }, { "epoch": 2.6228911185432926, "grad_norm": 1.7043858070576083, "learning_rate": 8.173564113909005e-07, "loss": 0.2325, "step": 35291 }, { "epoch": 2.6229654403567446, "grad_norm": 3.542989768284081, "learning_rate": 8.170387288889137e-07, "loss": 0.3181, "step": 35292 }, { "epoch": 2.623039762170197, "grad_norm": 2.1638078589565395, "learning_rate": 8.167211055063573e-07, "loss": 0.2436, "step": 35293 }, { "epoch": 2.623114083983649, "grad_norm": 3.1285451969379565, "learning_rate": 8.164035412452742e-07, "loss": 0.2959, "step": 35294 }, { "epoch": 2.6231884057971016, "grad_norm": 2.1111020732221397, "learning_rate": 8.160860361077061e-07, "loss": 0.2398, "step": 35295 }, { "epoch": 2.6232627276105536, "grad_norm": 1.8992895538767354, "learning_rate": 8.157685900957024e-07, "loss": 0.2262, "step": 35296 }, { "epoch": 2.623337049424006, "grad_norm": 1.9230063808967908, "learning_rate": 8.154512032113016e-07, "loss": 0.2293, "step": 35297 }, { "epoch": 2.623411371237458, "grad_norm": 3.337321846207115, "learning_rate": 8.151338754565508e-07, "loss": 0.2834, "step": 35298 }, { "epoch": 2.6234856930509105, "grad_norm": 2.5386676928117757, "learning_rate": 8.148166068334918e-07, "loss": 0.3731, "step": 35299 }, { "epoch": 2.6235600148643625, "grad_norm": 2.3703905862268075, "learning_rate": 8.144993973441673e-07, "loss": 0.3343, "step": 35300 }, { "epoch": 2.623634336677815, "grad_norm": 1.5007269327784443, "learning_rate": 8.141822469906191e-07, "loss": 0.1619, "step": 35301 }, { "epoch": 2.6237086584912674, "grad_norm": 2.0823230653693248, "learning_rate": 8.138651557748867e-07, "loss": 0.2082, "step": 35302 }, { "epoch": 2.6237829803047195, "grad_norm": 2.156890420831941, "learning_rate": 8.13548123699015e-07, "loss": 0.2589, "step": 35303 }, { "epoch": 2.6238573021181715, "grad_norm": 2.3776191680065115, "learning_rate": 8.132311507650447e-07, "loss": 0.2781, "step": 35304 }, { "epoch": 2.623931623931624, "grad_norm": 2.9907700243505984, "learning_rate": 8.129142369750143e-07, "loss": 0.2964, "step": 35305 }, { "epoch": 2.6240059457450764, "grad_norm": 2.219068153267005, "learning_rate": 8.125973823309663e-07, "loss": 0.1437, "step": 35306 }, { "epoch": 2.6240802675585284, "grad_norm": 2.15519377143857, "learning_rate": 8.122805868349382e-07, "loss": 0.2416, "step": 35307 }, { "epoch": 2.6241545893719804, "grad_norm": 2.4150347737370947, "learning_rate": 8.119638504889726e-07, "loss": 0.2199, "step": 35308 }, { "epoch": 2.624228911185433, "grad_norm": 3.162500263685533, "learning_rate": 8.11647173295107e-07, "loss": 0.333, "step": 35309 }, { "epoch": 2.6243032329988853, "grad_norm": 3.0976108502398034, "learning_rate": 8.113305552553796e-07, "loss": 0.3218, "step": 35310 }, { "epoch": 2.6243775548123374, "grad_norm": 2.3916033999835937, "learning_rate": 8.1101399637183e-07, "loss": 0.3401, "step": 35311 }, { "epoch": 2.62445187662579, "grad_norm": 2.6820963879081967, "learning_rate": 8.106974966464942e-07, "loss": 0.2898, "step": 35312 }, { "epoch": 2.624526198439242, "grad_norm": 2.827048021483833, "learning_rate": 8.103810560814118e-07, "loss": 0.2456, "step": 35313 }, { "epoch": 2.6246005202526943, "grad_norm": 2.0581939797115325, "learning_rate": 8.100646746786179e-07, "loss": 0.2584, "step": 35314 }, { "epoch": 2.6246748420661463, "grad_norm": 2.4627945116933274, "learning_rate": 8.097483524401528e-07, "loss": 0.2764, "step": 35315 }, { "epoch": 2.624749163879599, "grad_norm": 3.091256815821172, "learning_rate": 8.094320893680507e-07, "loss": 0.3332, "step": 35316 }, { "epoch": 2.624823485693051, "grad_norm": 1.9438680593417004, "learning_rate": 8.091158854643466e-07, "loss": 0.2029, "step": 35317 }, { "epoch": 2.6248978075065033, "grad_norm": 1.9560154060562243, "learning_rate": 8.087997407310788e-07, "loss": 0.2097, "step": 35318 }, { "epoch": 2.6249721293199553, "grad_norm": 2.270697047022901, "learning_rate": 8.084836551702824e-07, "loss": 0.2691, "step": 35319 }, { "epoch": 2.6250464511334077, "grad_norm": 2.2620713573893565, "learning_rate": 8.081676287839879e-07, "loss": 0.281, "step": 35320 }, { "epoch": 2.6251207729468597, "grad_norm": 1.8789223738749372, "learning_rate": 8.07851661574236e-07, "loss": 0.2302, "step": 35321 }, { "epoch": 2.625195094760312, "grad_norm": 2.1296536373143806, "learning_rate": 8.075357535430583e-07, "loss": 0.2334, "step": 35322 }, { "epoch": 2.6252694165737642, "grad_norm": 2.112656035080698, "learning_rate": 8.072199046924878e-07, "loss": 0.2321, "step": 35323 }, { "epoch": 2.6253437383872167, "grad_norm": 2.0875851708609536, "learning_rate": 8.069041150245582e-07, "loss": 0.2534, "step": 35324 }, { "epoch": 2.625418060200669, "grad_norm": 2.6071643585128443, "learning_rate": 8.065883845413036e-07, "loss": 0.266, "step": 35325 }, { "epoch": 2.625492382014121, "grad_norm": 2.245972927292885, "learning_rate": 8.062727132447556e-07, "loss": 0.2688, "step": 35326 }, { "epoch": 2.625566703827573, "grad_norm": 2.5541948467247435, "learning_rate": 8.059571011369461e-07, "loss": 0.3126, "step": 35327 }, { "epoch": 2.6256410256410256, "grad_norm": 2.575802200293699, "learning_rate": 8.056415482199087e-07, "loss": 0.2504, "step": 35328 }, { "epoch": 2.625715347454478, "grad_norm": 2.924644266303143, "learning_rate": 8.05326054495672e-07, "loss": 0.2883, "step": 35329 }, { "epoch": 2.62578966926793, "grad_norm": 3.157867539488439, "learning_rate": 8.05010619966271e-07, "loss": 0.3042, "step": 35330 }, { "epoch": 2.625863991081382, "grad_norm": 2.2237558788203367, "learning_rate": 8.046952446337352e-07, "loss": 0.2314, "step": 35331 }, { "epoch": 2.6259383128948346, "grad_norm": 2.2641572854507945, "learning_rate": 8.04379928500092e-07, "loss": 0.2735, "step": 35332 }, { "epoch": 2.626012634708287, "grad_norm": 2.9893497577783394, "learning_rate": 8.040646715673772e-07, "loss": 0.2873, "step": 35333 }, { "epoch": 2.626086956521739, "grad_norm": 3.431730599071004, "learning_rate": 8.03749473837614e-07, "loss": 0.3685, "step": 35334 }, { "epoch": 2.6261612783351915, "grad_norm": 2.0323567715003312, "learning_rate": 8.03434335312836e-07, "loss": 0.1997, "step": 35335 }, { "epoch": 2.6262356001486435, "grad_norm": 2.364323189132826, "learning_rate": 8.031192559950684e-07, "loss": 0.2782, "step": 35336 }, { "epoch": 2.626309921962096, "grad_norm": 1.997331726665436, "learning_rate": 8.028042358863441e-07, "loss": 0.2659, "step": 35337 }, { "epoch": 2.626384243775548, "grad_norm": 2.20870332634509, "learning_rate": 8.024892749886881e-07, "loss": 0.2821, "step": 35338 }, { "epoch": 2.6264585655890005, "grad_norm": 3.2097038628526895, "learning_rate": 8.021743733041276e-07, "loss": 0.3254, "step": 35339 }, { "epoch": 2.6265328874024525, "grad_norm": 1.9777291716554313, "learning_rate": 8.018595308346932e-07, "loss": 0.2511, "step": 35340 }, { "epoch": 2.626607209215905, "grad_norm": 2.090732451143124, "learning_rate": 8.015447475824101e-07, "loss": 0.2288, "step": 35341 }, { "epoch": 2.626681531029357, "grad_norm": 2.349609572525877, "learning_rate": 8.012300235493031e-07, "loss": 0.2241, "step": 35342 }, { "epoch": 2.6267558528428094, "grad_norm": 2.4688864863947586, "learning_rate": 8.009153587374008e-07, "loss": 0.31, "step": 35343 }, { "epoch": 2.6268301746562615, "grad_norm": 2.490340507102301, "learning_rate": 8.006007531487281e-07, "loss": 0.2919, "step": 35344 }, { "epoch": 2.626904496469714, "grad_norm": 2.3812242263995578, "learning_rate": 8.002862067853112e-07, "loss": 0.2826, "step": 35345 }, { "epoch": 2.626978818283166, "grad_norm": 2.6288020251455677, "learning_rate": 7.999717196491718e-07, "loss": 0.3308, "step": 35346 }, { "epoch": 2.6270531400966184, "grad_norm": 2.1260888685269426, "learning_rate": 7.996572917423395e-07, "loss": 0.3126, "step": 35347 }, { "epoch": 2.627127461910071, "grad_norm": 2.4634571717200098, "learning_rate": 7.99342923066836e-07, "loss": 0.2165, "step": 35348 }, { "epoch": 2.627201783723523, "grad_norm": 2.6045146660730856, "learning_rate": 7.990286136246839e-07, "loss": 0.2631, "step": 35349 }, { "epoch": 2.627276105536975, "grad_norm": 2.4844614046245126, "learning_rate": 7.987143634179095e-07, "loss": 0.2575, "step": 35350 }, { "epoch": 2.6273504273504273, "grad_norm": 3.7452315420622697, "learning_rate": 7.984001724485324e-07, "loss": 0.4259, "step": 35351 }, { "epoch": 2.62742474916388, "grad_norm": 1.9819483678318297, "learning_rate": 7.980860407185808e-07, "loss": 0.1975, "step": 35352 }, { "epoch": 2.627499070977332, "grad_norm": 2.4337187405976652, "learning_rate": 7.977719682300722e-07, "loss": 0.2478, "step": 35353 }, { "epoch": 2.627573392790784, "grad_norm": 2.407480304146728, "learning_rate": 7.974579549850292e-07, "loss": 0.2867, "step": 35354 }, { "epoch": 2.6276477146042363, "grad_norm": 3.1100224408062007, "learning_rate": 7.971440009854781e-07, "loss": 0.3482, "step": 35355 }, { "epoch": 2.6277220364176888, "grad_norm": 2.6276541875823973, "learning_rate": 7.968301062334327e-07, "loss": 0.2878, "step": 35356 }, { "epoch": 2.6277963582311408, "grad_norm": 2.5963962732694488, "learning_rate": 7.965162707309193e-07, "loss": 0.3091, "step": 35357 }, { "epoch": 2.6278706800445932, "grad_norm": 2.1554713016258304, "learning_rate": 7.962024944799552e-07, "loss": 0.2626, "step": 35358 }, { "epoch": 2.6279450018580452, "grad_norm": 2.39992050709054, "learning_rate": 7.95888777482563e-07, "loss": 0.2944, "step": 35359 }, { "epoch": 2.6280193236714977, "grad_norm": 2.3819661126085463, "learning_rate": 7.955751197407613e-07, "loss": 0.2827, "step": 35360 }, { "epoch": 2.6280936454849497, "grad_norm": 2.7455779058806526, "learning_rate": 7.952615212565673e-07, "loss": 0.2948, "step": 35361 }, { "epoch": 2.628167967298402, "grad_norm": 2.345013251424263, "learning_rate": 7.949479820320049e-07, "loss": 0.2719, "step": 35362 }, { "epoch": 2.628242289111854, "grad_norm": 2.209706406442121, "learning_rate": 7.946345020690893e-07, "loss": 0.2362, "step": 35363 }, { "epoch": 2.6283166109253067, "grad_norm": 1.9205393144244511, "learning_rate": 7.943210813698377e-07, "loss": 0.2692, "step": 35364 }, { "epoch": 2.6283909327387587, "grad_norm": 2.053253295016972, "learning_rate": 7.940077199362706e-07, "loss": 0.2358, "step": 35365 }, { "epoch": 2.628465254552211, "grad_norm": 1.8311918379805279, "learning_rate": 7.93694417770402e-07, "loss": 0.2619, "step": 35366 }, { "epoch": 2.6285395763656636, "grad_norm": 2.47103697089257, "learning_rate": 7.933811748742548e-07, "loss": 0.2802, "step": 35367 }, { "epoch": 2.6286138981791156, "grad_norm": 2.995105374327032, "learning_rate": 7.930679912498384e-07, "loss": 0.2904, "step": 35368 }, { "epoch": 2.6286882199925676, "grad_norm": 2.7883185508643535, "learning_rate": 7.927548668991747e-07, "loss": 0.2669, "step": 35369 }, { "epoch": 2.62876254180602, "grad_norm": 1.99772560380131, "learning_rate": 7.924418018242774e-07, "loss": 0.1806, "step": 35370 }, { "epoch": 2.6288368636194726, "grad_norm": 2.6784621447485666, "learning_rate": 7.921287960271606e-07, "loss": 0.3641, "step": 35371 }, { "epoch": 2.6289111854329246, "grad_norm": 2.2744569731610476, "learning_rate": 7.918158495098416e-07, "loss": 0.3373, "step": 35372 }, { "epoch": 2.6289855072463766, "grad_norm": 3.021248327846046, "learning_rate": 7.915029622743331e-07, "loss": 0.3673, "step": 35373 }, { "epoch": 2.629059829059829, "grad_norm": 2.077562504358351, "learning_rate": 7.911901343226535e-07, "loss": 0.2397, "step": 35374 }, { "epoch": 2.6291341508732815, "grad_norm": 1.9769614117934862, "learning_rate": 7.908773656568125e-07, "loss": 0.2133, "step": 35375 }, { "epoch": 2.6292084726867335, "grad_norm": 2.3251316713707135, "learning_rate": 7.905646562788239e-07, "loss": 0.2517, "step": 35376 }, { "epoch": 2.6292827945001855, "grad_norm": 2.655817240327476, "learning_rate": 7.902520061907037e-07, "loss": 0.2487, "step": 35377 }, { "epoch": 2.629357116313638, "grad_norm": 1.9911192063684748, "learning_rate": 7.89939415394464e-07, "loss": 0.255, "step": 35378 }, { "epoch": 2.6294314381270905, "grad_norm": 2.519591443769326, "learning_rate": 7.896268838921162e-07, "loss": 0.2886, "step": 35379 }, { "epoch": 2.6295057599405425, "grad_norm": 2.4176876541372696, "learning_rate": 7.89314411685671e-07, "loss": 0.2643, "step": 35380 }, { "epoch": 2.629580081753995, "grad_norm": 2.4434207690163596, "learning_rate": 7.890019987771425e-07, "loss": 0.272, "step": 35381 }, { "epoch": 2.629654403567447, "grad_norm": 2.0055781797160677, "learning_rate": 7.886896451685422e-07, "loss": 0.2392, "step": 35382 }, { "epoch": 2.6297287253808994, "grad_norm": 3.096674439814539, "learning_rate": 7.883773508618786e-07, "loss": 0.3635, "step": 35383 }, { "epoch": 2.6298030471943514, "grad_norm": 3.006553630529984, "learning_rate": 7.880651158591646e-07, "loss": 0.3234, "step": 35384 }, { "epoch": 2.629877369007804, "grad_norm": 2.884404181915737, "learning_rate": 7.877529401624095e-07, "loss": 0.2522, "step": 35385 }, { "epoch": 2.629951690821256, "grad_norm": 2.275324609111064, "learning_rate": 7.874408237736208e-07, "loss": 0.3057, "step": 35386 }, { "epoch": 2.6300260126347084, "grad_norm": 2.024077189185632, "learning_rate": 7.871287666948124e-07, "loss": 0.2591, "step": 35387 }, { "epoch": 2.6301003344481604, "grad_norm": 2.108753213559413, "learning_rate": 7.868167689279893e-07, "loss": 0.2261, "step": 35388 }, { "epoch": 2.630174656261613, "grad_norm": 2.321898932104619, "learning_rate": 7.865048304751654e-07, "loss": 0.2193, "step": 35389 }, { "epoch": 2.6302489780750653, "grad_norm": 2.2340483786740575, "learning_rate": 7.861929513383415e-07, "loss": 0.2323, "step": 35390 }, { "epoch": 2.6303232998885173, "grad_norm": 2.5505258279646683, "learning_rate": 7.858811315195314e-07, "loss": 0.3158, "step": 35391 }, { "epoch": 2.6303976217019693, "grad_norm": 1.7157594175285458, "learning_rate": 7.855693710207401e-07, "loss": 0.2136, "step": 35392 }, { "epoch": 2.630471943515422, "grad_norm": 2.9073983189727723, "learning_rate": 7.852576698439729e-07, "loss": 0.2618, "step": 35393 }, { "epoch": 2.6305462653288743, "grad_norm": 3.0637332983078407, "learning_rate": 7.849460279912414e-07, "loss": 0.3179, "step": 35394 }, { "epoch": 2.6306205871423263, "grad_norm": 2.178838324269606, "learning_rate": 7.846344454645472e-07, "loss": 0.2393, "step": 35395 }, { "epoch": 2.6306949089557783, "grad_norm": 2.0873734398258748, "learning_rate": 7.843229222658999e-07, "loss": 0.2223, "step": 35396 }, { "epoch": 2.6307692307692307, "grad_norm": 2.3004447134629, "learning_rate": 7.840114583973035e-07, "loss": 0.3402, "step": 35397 }, { "epoch": 2.630843552582683, "grad_norm": 2.4486533875663867, "learning_rate": 7.837000538607609e-07, "loss": 0.2509, "step": 35398 }, { "epoch": 2.6309178743961352, "grad_norm": 1.944668130669782, "learning_rate": 7.833887086582814e-07, "loss": 0.2528, "step": 35399 }, { "epoch": 2.6309921962095872, "grad_norm": 4.064997863887861, "learning_rate": 7.830774227918658e-07, "loss": 0.2816, "step": 35400 }, { "epoch": 2.6310665180230397, "grad_norm": 2.270555056895954, "learning_rate": 7.827661962635225e-07, "loss": 0.2615, "step": 35401 }, { "epoch": 2.631140839836492, "grad_norm": 3.1026227168453384, "learning_rate": 7.824550290752487e-07, "loss": 0.2728, "step": 35402 }, { "epoch": 2.631215161649944, "grad_norm": 2.2075647406031225, "learning_rate": 7.821439212290538e-07, "loss": 0.2174, "step": 35403 }, { "epoch": 2.6312894834633966, "grad_norm": 3.1863550061571178, "learning_rate": 7.818328727269364e-07, "loss": 0.3337, "step": 35404 }, { "epoch": 2.6313638052768487, "grad_norm": 2.016345577658078, "learning_rate": 7.815218835709004e-07, "loss": 0.2338, "step": 35405 }, { "epoch": 2.631438127090301, "grad_norm": 2.4907033810540073, "learning_rate": 7.812109537629497e-07, "loss": 0.2775, "step": 35406 }, { "epoch": 2.631512448903753, "grad_norm": 2.381674007630268, "learning_rate": 7.809000833050839e-07, "loss": 0.2614, "step": 35407 }, { "epoch": 2.6315867707172056, "grad_norm": 2.409971972918312, "learning_rate": 7.805892721993036e-07, "loss": 0.278, "step": 35408 }, { "epoch": 2.6316610925306576, "grad_norm": 2.1440603879815585, "learning_rate": 7.802785204476126e-07, "loss": 0.2279, "step": 35409 }, { "epoch": 2.63173541434411, "grad_norm": 2.8865691651349747, "learning_rate": 7.799678280520085e-07, "loss": 0.3202, "step": 35410 }, { "epoch": 2.631809736157562, "grad_norm": 2.3333183110989446, "learning_rate": 7.79657195014496e-07, "loss": 0.2789, "step": 35411 }, { "epoch": 2.6318840579710145, "grad_norm": 2.622165743801223, "learning_rate": 7.793466213370704e-07, "loss": 0.2777, "step": 35412 }, { "epoch": 2.631958379784467, "grad_norm": 2.380690101253334, "learning_rate": 7.790361070217334e-07, "loss": 0.2533, "step": 35413 }, { "epoch": 2.632032701597919, "grad_norm": 2.0817339510116204, "learning_rate": 7.787256520704844e-07, "loss": 0.2854, "step": 35414 }, { "epoch": 2.632107023411371, "grad_norm": 2.5300344477302743, "learning_rate": 7.784152564853186e-07, "loss": 0.2446, "step": 35415 }, { "epoch": 2.6321813452248235, "grad_norm": 1.980817106362056, "learning_rate": 7.781049202682389e-07, "loss": 0.2121, "step": 35416 }, { "epoch": 2.632255667038276, "grad_norm": 2.212244117309681, "learning_rate": 7.777946434212392e-07, "loss": 0.3184, "step": 35417 }, { "epoch": 2.632329988851728, "grad_norm": 2.62656793986391, "learning_rate": 7.77484425946321e-07, "loss": 0.2925, "step": 35418 }, { "epoch": 2.63240431066518, "grad_norm": 2.826021510865806, "learning_rate": 7.771742678454797e-07, "loss": 0.3588, "step": 35419 }, { "epoch": 2.6324786324786325, "grad_norm": 2.2376069122252193, "learning_rate": 7.76864169120709e-07, "loss": 0.3131, "step": 35420 }, { "epoch": 2.632552954292085, "grad_norm": 2.9636826160018575, "learning_rate": 7.765541297740109e-07, "loss": 0.379, "step": 35421 }, { "epoch": 2.632627276105537, "grad_norm": 2.4582548489252267, "learning_rate": 7.762441498073781e-07, "loss": 0.2186, "step": 35422 }, { "epoch": 2.6327015979189894, "grad_norm": 2.0036804197220435, "learning_rate": 7.759342292228089e-07, "loss": 0.1863, "step": 35423 }, { "epoch": 2.6327759197324414, "grad_norm": 3.1785695124595787, "learning_rate": 7.75624368022293e-07, "loss": 0.3625, "step": 35424 }, { "epoch": 2.632850241545894, "grad_norm": 2.407208863781515, "learning_rate": 7.75314566207831e-07, "loss": 0.2637, "step": 35425 }, { "epoch": 2.632924563359346, "grad_norm": 3.031418079139868, "learning_rate": 7.750048237814157e-07, "loss": 0.3437, "step": 35426 }, { "epoch": 2.6329988851727983, "grad_norm": 2.819391294123151, "learning_rate": 7.746951407450387e-07, "loss": 0.3667, "step": 35427 }, { "epoch": 2.6330732069862504, "grad_norm": 2.685210488416222, "learning_rate": 7.743855171006965e-07, "loss": 0.3246, "step": 35428 }, { "epoch": 2.633147528799703, "grad_norm": 2.5283190829173843, "learning_rate": 7.740759528503827e-07, "loss": 0.2823, "step": 35429 }, { "epoch": 2.633221850613155, "grad_norm": 2.070375645493983, "learning_rate": 7.737664479960871e-07, "loss": 0.2334, "step": 35430 }, { "epoch": 2.6332961724266073, "grad_norm": 2.465182210764477, "learning_rate": 7.734570025398058e-07, "loss": 0.3061, "step": 35431 }, { "epoch": 2.6333704942400593, "grad_norm": 2.127701574328397, "learning_rate": 7.731476164835283e-07, "loss": 0.281, "step": 35432 }, { "epoch": 2.6334448160535118, "grad_norm": 2.149136557251652, "learning_rate": 7.728382898292497e-07, "loss": 0.2644, "step": 35433 }, { "epoch": 2.633519137866964, "grad_norm": 2.8446031180728366, "learning_rate": 7.725290225789583e-07, "loss": 0.275, "step": 35434 }, { "epoch": 2.6335934596804162, "grad_norm": 2.1712781908174605, "learning_rate": 7.722198147346471e-07, "loss": 0.2722, "step": 35435 }, { "epoch": 2.6336677814938687, "grad_norm": 1.9618442813172583, "learning_rate": 7.719106662983067e-07, "loss": 0.1956, "step": 35436 }, { "epoch": 2.6337421033073207, "grad_norm": 1.9311828676406178, "learning_rate": 7.716015772719231e-07, "loss": 0.2548, "step": 35437 }, { "epoch": 2.6338164251207727, "grad_norm": 2.2181048156947796, "learning_rate": 7.712925476574928e-07, "loss": 0.2545, "step": 35438 }, { "epoch": 2.633890746934225, "grad_norm": 2.6762683277577155, "learning_rate": 7.709835774569996e-07, "loss": 0.1672, "step": 35439 }, { "epoch": 2.6339650687476777, "grad_norm": 3.201931783189994, "learning_rate": 7.706746666724385e-07, "loss": 0.3162, "step": 35440 }, { "epoch": 2.6340393905611297, "grad_norm": 2.2072497022490096, "learning_rate": 7.703658153057936e-07, "loss": 0.2428, "step": 35441 }, { "epoch": 2.6341137123745817, "grad_norm": 3.174339964780814, "learning_rate": 7.700570233590531e-07, "loss": 0.2992, "step": 35442 }, { "epoch": 2.634188034188034, "grad_norm": 1.996367507196648, "learning_rate": 7.697482908342091e-07, "loss": 0.2121, "step": 35443 }, { "epoch": 2.6342623560014866, "grad_norm": 2.7229743521576744, "learning_rate": 7.694396177332442e-07, "loss": 0.3245, "step": 35444 }, { "epoch": 2.6343366778149386, "grad_norm": 2.7558656367635956, "learning_rate": 7.691310040581501e-07, "loss": 0.2919, "step": 35445 }, { "epoch": 2.634410999628391, "grad_norm": 2.027818496164705, "learning_rate": 7.688224498109131e-07, "loss": 0.2824, "step": 35446 }, { "epoch": 2.634485321441843, "grad_norm": 2.599378660875427, "learning_rate": 7.68513954993515e-07, "loss": 0.2778, "step": 35447 }, { "epoch": 2.6345596432552956, "grad_norm": 2.6451651952141177, "learning_rate": 7.682055196079463e-07, "loss": 0.3, "step": 35448 }, { "epoch": 2.6346339650687476, "grad_norm": 1.8291007452802288, "learning_rate": 7.67897143656191e-07, "loss": 0.236, "step": 35449 }, { "epoch": 2.6347082868822, "grad_norm": 3.20761750112477, "learning_rate": 7.675888271402354e-07, "loss": 0.3176, "step": 35450 }, { "epoch": 2.634782608695652, "grad_norm": 2.402281756875424, "learning_rate": 7.672805700620645e-07, "loss": 0.3603, "step": 35451 }, { "epoch": 2.6348569305091045, "grad_norm": 2.5749708193621452, "learning_rate": 7.6697237242366e-07, "loss": 0.3175, "step": 35452 }, { "epoch": 2.6349312523225565, "grad_norm": 2.4768613837430253, "learning_rate": 7.666642342270103e-07, "loss": 0.318, "step": 35453 }, { "epoch": 2.635005574136009, "grad_norm": 2.18985681082103, "learning_rate": 7.66356155474095e-07, "loss": 0.2564, "step": 35454 }, { "epoch": 2.635079895949461, "grad_norm": 2.4710026502303943, "learning_rate": 7.660481361669025e-07, "loss": 0.2862, "step": 35455 }, { "epoch": 2.6351542177629135, "grad_norm": 2.061308777283302, "learning_rate": 7.657401763074124e-07, "loss": 0.2551, "step": 35456 }, { "epoch": 2.6352285395763655, "grad_norm": 1.8580202346235133, "learning_rate": 7.654322758976074e-07, "loss": 0.2238, "step": 35457 }, { "epoch": 2.635302861389818, "grad_norm": 4.377205529714668, "learning_rate": 7.651244349394704e-07, "loss": 0.3155, "step": 35458 }, { "epoch": 2.6353771832032704, "grad_norm": 3.1029666323251592, "learning_rate": 7.648166534349821e-07, "loss": 0.3741, "step": 35459 }, { "epoch": 2.6354515050167224, "grad_norm": 2.966922628442757, "learning_rate": 7.645089313861265e-07, "loss": 0.3439, "step": 35460 }, { "epoch": 2.6355258268301744, "grad_norm": 2.8828140166404217, "learning_rate": 7.642012687948808e-07, "loss": 0.3332, "step": 35461 }, { "epoch": 2.635600148643627, "grad_norm": 1.7708027984075663, "learning_rate": 7.638936656632312e-07, "loss": 0.2526, "step": 35462 }, { "epoch": 2.6356744704570794, "grad_norm": 2.868161860897737, "learning_rate": 7.63586121993154e-07, "loss": 0.3073, "step": 35463 }, { "epoch": 2.6357487922705314, "grad_norm": 2.2154408958598686, "learning_rate": 7.632786377866286e-07, "loss": 0.299, "step": 35464 }, { "epoch": 2.6358231140839834, "grad_norm": 2.6700811717722144, "learning_rate": 7.629712130456368e-07, "loss": 0.3197, "step": 35465 }, { "epoch": 2.635897435897436, "grad_norm": 2.36025382011309, "learning_rate": 7.626638477721582e-07, "loss": 0.242, "step": 35466 }, { "epoch": 2.6359717577108883, "grad_norm": 3.0619360762350945, "learning_rate": 7.623565419681689e-07, "loss": 0.36, "step": 35467 }, { "epoch": 2.6360460795243403, "grad_norm": 2.425233037437341, "learning_rate": 7.620492956356518e-07, "loss": 0.2982, "step": 35468 }, { "epoch": 2.636120401337793, "grad_norm": 3.0038254532548523, "learning_rate": 7.617421087765787e-07, "loss": 0.307, "step": 35469 }, { "epoch": 2.636194723151245, "grad_norm": 3.538634594932152, "learning_rate": 7.614349813929323e-07, "loss": 0.361, "step": 35470 }, { "epoch": 2.6362690449646973, "grad_norm": 2.464872116108929, "learning_rate": 7.611279134866856e-07, "loss": 0.3193, "step": 35471 }, { "epoch": 2.6363433667781493, "grad_norm": 1.9430986788152758, "learning_rate": 7.608209050598214e-07, "loss": 0.2034, "step": 35472 }, { "epoch": 2.6364176885916017, "grad_norm": 2.655591575228716, "learning_rate": 7.605139561143115e-07, "loss": 0.2409, "step": 35473 }, { "epoch": 2.6364920104050538, "grad_norm": 2.4064117629489314, "learning_rate": 7.602070666521322e-07, "loss": 0.2801, "step": 35474 }, { "epoch": 2.6365663322185062, "grad_norm": 3.2076147018914525, "learning_rate": 7.599002366752617e-07, "loss": 0.3395, "step": 35475 }, { "epoch": 2.6366406540319582, "grad_norm": 2.12868364628172, "learning_rate": 7.59593466185673e-07, "loss": 0.1936, "step": 35476 }, { "epoch": 2.6367149758454107, "grad_norm": 2.1988060582905984, "learning_rate": 7.592867551853433e-07, "loss": 0.2242, "step": 35477 }, { "epoch": 2.6367892976588627, "grad_norm": 2.4231452172552377, "learning_rate": 7.589801036762457e-07, "loss": 0.2539, "step": 35478 }, { "epoch": 2.636863619472315, "grad_norm": 2.1039761384072535, "learning_rate": 7.586735116603539e-07, "loss": 0.2406, "step": 35479 }, { "epoch": 2.636937941285767, "grad_norm": 2.560180044835158, "learning_rate": 7.583669791396453e-07, "loss": 0.3561, "step": 35480 }, { "epoch": 2.6370122630992197, "grad_norm": 2.9335385875073303, "learning_rate": 7.580605061160873e-07, "loss": 0.3753, "step": 35481 }, { "epoch": 2.637086584912672, "grad_norm": 3.3162669823092994, "learning_rate": 7.577540925916572e-07, "loss": 0.443, "step": 35482 }, { "epoch": 2.637160906726124, "grad_norm": 2.138664784554956, "learning_rate": 7.574477385683265e-07, "loss": 0.2274, "step": 35483 }, { "epoch": 2.637235228539576, "grad_norm": 2.06898403796852, "learning_rate": 7.571414440480684e-07, "loss": 0.28, "step": 35484 }, { "epoch": 2.6373095503530286, "grad_norm": 2.8872725208818686, "learning_rate": 7.568352090328545e-07, "loss": 0.3041, "step": 35485 }, { "epoch": 2.637383872166481, "grad_norm": 1.9040631089965228, "learning_rate": 7.565290335246533e-07, "loss": 0.2536, "step": 35486 }, { "epoch": 2.637458193979933, "grad_norm": 2.513919661500862, "learning_rate": 7.56222917525441e-07, "loss": 0.2902, "step": 35487 }, { "epoch": 2.637532515793385, "grad_norm": 1.9680520550879912, "learning_rate": 7.559168610371859e-07, "loss": 0.2092, "step": 35488 }, { "epoch": 2.6376068376068376, "grad_norm": 3.001551695910561, "learning_rate": 7.556108640618576e-07, "loss": 0.3043, "step": 35489 }, { "epoch": 2.63768115942029, "grad_norm": 3.427073320859768, "learning_rate": 7.553049266014279e-07, "loss": 0.2604, "step": 35490 }, { "epoch": 2.637755481233742, "grad_norm": 3.3788810802339615, "learning_rate": 7.549990486578651e-07, "loss": 0.3477, "step": 35491 }, { "epoch": 2.6378298030471945, "grad_norm": 2.4024552592965898, "learning_rate": 7.546932302331389e-07, "loss": 0.275, "step": 35492 }, { "epoch": 2.6379041248606465, "grad_norm": 2.200878000008438, "learning_rate": 7.543874713292155e-07, "loss": 0.2787, "step": 35493 }, { "epoch": 2.637978446674099, "grad_norm": 2.4421245006984225, "learning_rate": 7.540817719480687e-07, "loss": 0.293, "step": 35494 }, { "epoch": 2.638052768487551, "grad_norm": 2.789264276961138, "learning_rate": 7.537761320916637e-07, "loss": 0.3187, "step": 35495 }, { "epoch": 2.6381270903010035, "grad_norm": 2.4562672761708835, "learning_rate": 7.534705517619656e-07, "loss": 0.2573, "step": 35496 }, { "epoch": 2.6382014121144555, "grad_norm": 2.628878907087068, "learning_rate": 7.531650309609473e-07, "loss": 0.3028, "step": 35497 }, { "epoch": 2.638275733927908, "grad_norm": 2.464833822716805, "learning_rate": 7.528595696905693e-07, "loss": 0.2945, "step": 35498 }, { "epoch": 2.63835005574136, "grad_norm": 2.8215362760146827, "learning_rate": 7.525541679528048e-07, "loss": 0.3807, "step": 35499 }, { "epoch": 2.6384243775548124, "grad_norm": 2.1401966214391863, "learning_rate": 7.522488257496164e-07, "loss": 0.3361, "step": 35500 }, { "epoch": 2.6384986993682644, "grad_norm": 2.3304682535119157, "learning_rate": 7.51943543082968e-07, "loss": 0.2906, "step": 35501 }, { "epoch": 2.638573021181717, "grad_norm": 2.057842563290035, "learning_rate": 7.516383199548316e-07, "loss": 0.2385, "step": 35502 }, { "epoch": 2.638647342995169, "grad_norm": 2.2951349372323784, "learning_rate": 7.513331563671633e-07, "loss": 0.2994, "step": 35503 }, { "epoch": 2.6387216648086214, "grad_norm": 1.9275242037584464, "learning_rate": 7.510280523219349e-07, "loss": 0.2554, "step": 35504 }, { "epoch": 2.638795986622074, "grad_norm": 2.2764412858935987, "learning_rate": 7.507230078211059e-07, "loss": 0.2505, "step": 35505 }, { "epoch": 2.638870308435526, "grad_norm": 2.3906260164643314, "learning_rate": 7.504180228666436e-07, "loss": 0.2403, "step": 35506 }, { "epoch": 2.638944630248978, "grad_norm": 2.2708453040013774, "learning_rate": 7.501130974605108e-07, "loss": 0.2404, "step": 35507 }, { "epoch": 2.6390189520624303, "grad_norm": 2.422083265543751, "learning_rate": 7.498082316046673e-07, "loss": 0.3379, "step": 35508 }, { "epoch": 2.6390932738758828, "grad_norm": 2.542502094721913, "learning_rate": 7.495034253010814e-07, "loss": 0.2398, "step": 35509 }, { "epoch": 2.639167595689335, "grad_norm": 2.384251503580922, "learning_rate": 7.491986785517113e-07, "loss": 0.2527, "step": 35510 }, { "epoch": 2.639241917502787, "grad_norm": 2.2285285696186783, "learning_rate": 7.488939913585191e-07, "loss": 0.2366, "step": 35511 }, { "epoch": 2.6393162393162393, "grad_norm": 1.8728251347097276, "learning_rate": 7.485893637234687e-07, "loss": 0.1938, "step": 35512 }, { "epoch": 2.6393905611296917, "grad_norm": 1.8364153947437478, "learning_rate": 7.482847956485184e-07, "loss": 0.2105, "step": 35513 }, { "epoch": 2.6394648829431437, "grad_norm": 2.2917673068334237, "learning_rate": 7.479802871356346e-07, "loss": 0.3187, "step": 35514 }, { "epoch": 2.639539204756596, "grad_norm": 2.173558907204991, "learning_rate": 7.476758381867699e-07, "loss": 0.2394, "step": 35515 }, { "epoch": 2.639613526570048, "grad_norm": 2.115594160878138, "learning_rate": 7.473714488038897e-07, "loss": 0.2465, "step": 35516 }, { "epoch": 2.6396878483835007, "grad_norm": 2.523423773658125, "learning_rate": 7.470671189889512e-07, "loss": 0.3068, "step": 35517 }, { "epoch": 2.6397621701969527, "grad_norm": 2.896308872934538, "learning_rate": 7.467628487439138e-07, "loss": 0.2531, "step": 35518 }, { "epoch": 2.639836492010405, "grad_norm": 1.7329213761726436, "learning_rate": 7.464586380707383e-07, "loss": 0.2256, "step": 35519 }, { "epoch": 2.639910813823857, "grad_norm": 2.406508172086413, "learning_rate": 7.46154486971381e-07, "loss": 0.302, "step": 35520 }, { "epoch": 2.6399851356373096, "grad_norm": 1.904333697395163, "learning_rate": 7.458503954478013e-07, "loss": 0.2284, "step": 35521 }, { "epoch": 2.6400594574507616, "grad_norm": 2.5236560526392986, "learning_rate": 7.455463635019577e-07, "loss": 0.2446, "step": 35522 }, { "epoch": 2.640133779264214, "grad_norm": 2.5941417757361958, "learning_rate": 7.452423911358042e-07, "loss": 0.3187, "step": 35523 }, { "epoch": 2.6402081010776666, "grad_norm": 2.46436407553771, "learning_rate": 7.449384783513026e-07, "loss": 0.3538, "step": 35524 }, { "epoch": 2.6402824228911186, "grad_norm": 2.378298561538276, "learning_rate": 7.446346251504055e-07, "loss": 0.25, "step": 35525 }, { "epoch": 2.6403567447045706, "grad_norm": 2.191011583307272, "learning_rate": 7.443308315350705e-07, "loss": 0.276, "step": 35526 }, { "epoch": 2.640431066518023, "grad_norm": 2.607835856357725, "learning_rate": 7.440270975072528e-07, "loss": 0.3562, "step": 35527 }, { "epoch": 2.6405053883314755, "grad_norm": 2.8213530831110645, "learning_rate": 7.437234230689094e-07, "loss": 0.2972, "step": 35528 }, { "epoch": 2.6405797101449275, "grad_norm": 1.8235117971349997, "learning_rate": 7.434198082219945e-07, "loss": 0.2087, "step": 35529 }, { "epoch": 2.6406540319583796, "grad_norm": 2.5527568854344977, "learning_rate": 7.431162529684611e-07, "loss": 0.2007, "step": 35530 }, { "epoch": 2.640728353771832, "grad_norm": 2.1743287546995638, "learning_rate": 7.428127573102662e-07, "loss": 0.2379, "step": 35531 }, { "epoch": 2.6408026755852845, "grad_norm": 2.158424792674828, "learning_rate": 7.425093212493628e-07, "loss": 0.2229, "step": 35532 }, { "epoch": 2.6408769973987365, "grad_norm": 2.352724835639836, "learning_rate": 7.422059447877017e-07, "loss": 0.3087, "step": 35533 }, { "epoch": 2.6409513192121885, "grad_norm": 2.5043608313320367, "learning_rate": 7.419026279272412e-07, "loss": 0.2727, "step": 35534 }, { "epoch": 2.641025641025641, "grad_norm": 2.459132382553622, "learning_rate": 7.415993706699287e-07, "loss": 0.2966, "step": 35535 }, { "epoch": 2.6410999628390934, "grad_norm": 2.502167591263226, "learning_rate": 7.412961730177226e-07, "loss": 0.2884, "step": 35536 }, { "epoch": 2.6411742846525454, "grad_norm": 1.9267152661631517, "learning_rate": 7.40993034972568e-07, "loss": 0.2352, "step": 35537 }, { "epoch": 2.641248606465998, "grad_norm": 2.6170831901032376, "learning_rate": 7.406899565364211e-07, "loss": 0.2722, "step": 35538 }, { "epoch": 2.64132292827945, "grad_norm": 2.2233542585736092, "learning_rate": 7.403869377112327e-07, "loss": 0.2065, "step": 35539 }, { "epoch": 2.6413972500929024, "grad_norm": 2.7478050568231303, "learning_rate": 7.400839784989511e-07, "loss": 0.3096, "step": 35540 }, { "epoch": 2.6414715719063544, "grad_norm": 2.4897848371561135, "learning_rate": 7.397810789015292e-07, "loss": 0.3351, "step": 35541 }, { "epoch": 2.641545893719807, "grad_norm": 2.356592512390875, "learning_rate": 7.394782389209154e-07, "loss": 0.2441, "step": 35542 }, { "epoch": 2.641620215533259, "grad_norm": 2.7634723334335596, "learning_rate": 7.391754585590605e-07, "loss": 0.2731, "step": 35543 }, { "epoch": 2.6416945373467113, "grad_norm": 2.7236730513596603, "learning_rate": 7.38872737817915e-07, "loss": 0.2816, "step": 35544 }, { "epoch": 2.6417688591601634, "grad_norm": 2.1179868550653995, "learning_rate": 7.385700766994241e-07, "loss": 0.2718, "step": 35545 }, { "epoch": 2.641843180973616, "grad_norm": 2.5233214474758547, "learning_rate": 7.382674752055397e-07, "loss": 0.2685, "step": 35546 }, { "epoch": 2.6419175027870683, "grad_norm": 2.1760533887308706, "learning_rate": 7.379649333382077e-07, "loss": 0.202, "step": 35547 }, { "epoch": 2.6419918246005203, "grad_norm": 2.145637222889915, "learning_rate": 7.376624510993802e-07, "loss": 0.3287, "step": 35548 }, { "epoch": 2.6420661464139723, "grad_norm": 2.2689638839576163, "learning_rate": 7.373600284909977e-07, "loss": 0.2619, "step": 35549 }, { "epoch": 2.6421404682274248, "grad_norm": 3.345824806718608, "learning_rate": 7.370576655150119e-07, "loss": 0.2992, "step": 35550 }, { "epoch": 2.6422147900408772, "grad_norm": 2.23688431300326, "learning_rate": 7.367553621733692e-07, "loss": 0.2833, "step": 35551 }, { "epoch": 2.6422891118543292, "grad_norm": 2.299571891864618, "learning_rate": 7.364531184680124e-07, "loss": 0.2809, "step": 35552 }, { "epoch": 2.6423634336677813, "grad_norm": 2.2010110942927406, "learning_rate": 7.36150934400891e-07, "loss": 0.2587, "step": 35553 }, { "epoch": 2.6424377554812337, "grad_norm": 2.5211473665091093, "learning_rate": 7.358488099739503e-07, "loss": 0.3473, "step": 35554 }, { "epoch": 2.642512077294686, "grad_norm": 2.755254653069719, "learning_rate": 7.355467451891318e-07, "loss": 0.2772, "step": 35555 }, { "epoch": 2.642586399108138, "grad_norm": 2.4388152597845494, "learning_rate": 7.352447400483853e-07, "loss": 0.2486, "step": 35556 }, { "epoch": 2.64266072092159, "grad_norm": 2.345180288182776, "learning_rate": 7.349427945536502e-07, "loss": 0.3217, "step": 35557 }, { "epoch": 2.6427350427350427, "grad_norm": 1.9858073648930676, "learning_rate": 7.34640908706874e-07, "loss": 0.2466, "step": 35558 }, { "epoch": 2.642809364548495, "grad_norm": 1.9016227405115231, "learning_rate": 7.343390825099994e-07, "loss": 0.2308, "step": 35559 }, { "epoch": 2.642883686361947, "grad_norm": 2.765156602065385, "learning_rate": 7.340373159649694e-07, "loss": 0.2548, "step": 35560 }, { "epoch": 2.6429580081753996, "grad_norm": 2.4583979443448047, "learning_rate": 7.337356090737258e-07, "loss": 0.2956, "step": 35561 }, { "epoch": 2.6430323299888516, "grad_norm": 2.9715714386506207, "learning_rate": 7.334339618382102e-07, "loss": 0.2967, "step": 35562 }, { "epoch": 2.643106651802304, "grad_norm": 2.2203095591108344, "learning_rate": 7.331323742603668e-07, "loss": 0.2192, "step": 35563 }, { "epoch": 2.643180973615756, "grad_norm": 2.54527387600439, "learning_rate": 7.328308463421352e-07, "loss": 0.2341, "step": 35564 }, { "epoch": 2.6432552954292086, "grad_norm": 2.059038025576166, "learning_rate": 7.325293780854592e-07, "loss": 0.2595, "step": 35565 }, { "epoch": 2.6433296172426606, "grad_norm": 2.061476924961854, "learning_rate": 7.322279694922785e-07, "loss": 0.2266, "step": 35566 }, { "epoch": 2.643403939056113, "grad_norm": 2.27827617613935, "learning_rate": 7.319266205645326e-07, "loss": 0.2461, "step": 35567 }, { "epoch": 2.643478260869565, "grad_norm": 2.592325653025931, "learning_rate": 7.316253313041621e-07, "loss": 0.2381, "step": 35568 }, { "epoch": 2.6435525826830175, "grad_norm": 2.1343749274689476, "learning_rate": 7.313241017131057e-07, "loss": 0.2032, "step": 35569 }, { "epoch": 2.64362690449647, "grad_norm": 2.3111834483574376, "learning_rate": 7.310229317933082e-07, "loss": 0.2946, "step": 35570 }, { "epoch": 2.643701226309922, "grad_norm": 2.8495512416435163, "learning_rate": 7.307218215467015e-07, "loss": 0.3152, "step": 35571 }, { "epoch": 2.643775548123374, "grad_norm": 2.1855056806350475, "learning_rate": 7.304207709752253e-07, "loss": 0.2549, "step": 35572 }, { "epoch": 2.6438498699368265, "grad_norm": 2.653517864161813, "learning_rate": 7.301197800808202e-07, "loss": 0.2355, "step": 35573 }, { "epoch": 2.643924191750279, "grad_norm": 2.108342453728637, "learning_rate": 7.298188488654223e-07, "loss": 0.2437, "step": 35574 }, { "epoch": 2.643998513563731, "grad_norm": 2.3898573341447333, "learning_rate": 7.295179773309713e-07, "loss": 0.2719, "step": 35575 }, { "epoch": 2.644072835377183, "grad_norm": 2.562903610558792, "learning_rate": 7.292171654794011e-07, "loss": 0.2496, "step": 35576 }, { "epoch": 2.6441471571906354, "grad_norm": 2.1704215733425425, "learning_rate": 7.289164133126503e-07, "loss": 0.2249, "step": 35577 }, { "epoch": 2.644221479004088, "grad_norm": 2.5907541603672355, "learning_rate": 7.286157208326549e-07, "loss": 0.3722, "step": 35578 }, { "epoch": 2.64429580081754, "grad_norm": 2.526718880717869, "learning_rate": 7.283150880413492e-07, "loss": 0.202, "step": 35579 }, { "epoch": 2.6443701226309924, "grad_norm": 2.4878022271165845, "learning_rate": 7.280145149406703e-07, "loss": 0.2103, "step": 35580 }, { "epoch": 2.6444444444444444, "grad_norm": 1.7627857329364953, "learning_rate": 7.277140015325546e-07, "loss": 0.2291, "step": 35581 }, { "epoch": 2.644518766257897, "grad_norm": 2.779507102896876, "learning_rate": 7.274135478189337e-07, "loss": 0.2687, "step": 35582 }, { "epoch": 2.644593088071349, "grad_norm": 2.568408019602984, "learning_rate": 7.27113153801744e-07, "loss": 0.3163, "step": 35583 }, { "epoch": 2.6446674098848013, "grad_norm": 2.1598228423540116, "learning_rate": 7.268128194829172e-07, "loss": 0.276, "step": 35584 }, { "epoch": 2.6447417316982533, "grad_norm": 2.3371411168849248, "learning_rate": 7.265125448643895e-07, "loss": 0.2697, "step": 35585 }, { "epoch": 2.644816053511706, "grad_norm": 2.427361027432647, "learning_rate": 7.262123299480917e-07, "loss": 0.3181, "step": 35586 }, { "epoch": 2.644890375325158, "grad_norm": 6.792174691094302, "learning_rate": 7.259121747359588e-07, "loss": 0.3506, "step": 35587 }, { "epoch": 2.6449646971386103, "grad_norm": 2.4788594662222456, "learning_rate": 7.256120792299226e-07, "loss": 0.3041, "step": 35588 }, { "epoch": 2.6450390189520623, "grad_norm": 2.1557263605586634, "learning_rate": 7.253120434319127e-07, "loss": 0.3053, "step": 35589 }, { "epoch": 2.6451133407655147, "grad_norm": 2.3469358479018596, "learning_rate": 7.250120673438643e-07, "loss": 0.225, "step": 35590 }, { "epoch": 2.6451876625789668, "grad_norm": 1.9980289035366372, "learning_rate": 7.24712150967708e-07, "loss": 0.224, "step": 35591 }, { "epoch": 2.645261984392419, "grad_norm": 2.388955490960455, "learning_rate": 7.2441229430537e-07, "loss": 0.2594, "step": 35592 }, { "epoch": 2.6453363062058717, "grad_norm": 2.3475130499317394, "learning_rate": 7.241124973587888e-07, "loss": 0.2311, "step": 35593 }, { "epoch": 2.6454106280193237, "grad_norm": 2.1626408616898254, "learning_rate": 7.238127601298872e-07, "loss": 0.3221, "step": 35594 }, { "epoch": 2.6454849498327757, "grad_norm": 2.4402722150010745, "learning_rate": 7.235130826205982e-07, "loss": 0.3266, "step": 35595 }, { "epoch": 2.645559271646228, "grad_norm": 2.7069354927354605, "learning_rate": 7.232134648328504e-07, "loss": 0.2765, "step": 35596 }, { "epoch": 2.6456335934596806, "grad_norm": 2.072371908614087, "learning_rate": 7.229139067685731e-07, "loss": 0.2029, "step": 35597 }, { "epoch": 2.6457079152731326, "grad_norm": 2.1797028612517675, "learning_rate": 7.22614408429696e-07, "loss": 0.2136, "step": 35598 }, { "epoch": 2.6457822370865847, "grad_norm": 2.163497873943029, "learning_rate": 7.223149698181431e-07, "loss": 0.2527, "step": 35599 }, { "epoch": 2.645856558900037, "grad_norm": 2.2947170847562264, "learning_rate": 7.220155909358484e-07, "loss": 0.2394, "step": 35600 }, { "epoch": 2.6459308807134896, "grad_norm": 2.5640135172855807, "learning_rate": 7.217162717847326e-07, "loss": 0.3446, "step": 35601 }, { "epoch": 2.6460052025269416, "grad_norm": 2.527384038382626, "learning_rate": 7.214170123667286e-07, "loss": 0.2473, "step": 35602 }, { "epoch": 2.646079524340394, "grad_norm": 1.956509858642044, "learning_rate": 7.211178126837604e-07, "loss": 0.3022, "step": 35603 }, { "epoch": 2.646153846153846, "grad_norm": 2.7823360286112098, "learning_rate": 7.208186727377531e-07, "loss": 0.258, "step": 35604 }, { "epoch": 2.6462281679672985, "grad_norm": 2.2534228539681904, "learning_rate": 7.205195925306352e-07, "loss": 0.2777, "step": 35605 }, { "epoch": 2.6463024897807506, "grad_norm": 2.7917076344350193, "learning_rate": 7.202205720643285e-07, "loss": 0.355, "step": 35606 }, { "epoch": 2.646376811594203, "grad_norm": 2.2302443337569597, "learning_rate": 7.199216113407615e-07, "loss": 0.2186, "step": 35607 }, { "epoch": 2.646451133407655, "grad_norm": 2.205181201152955, "learning_rate": 7.196227103618569e-07, "loss": 0.2653, "step": 35608 }, { "epoch": 2.6465254552211075, "grad_norm": 2.2043875266233233, "learning_rate": 7.193238691295412e-07, "loss": 0.1944, "step": 35609 }, { "epoch": 2.6465997770345595, "grad_norm": 2.4338691479050265, "learning_rate": 7.190250876457361e-07, "loss": 0.3027, "step": 35610 }, { "epoch": 2.646674098848012, "grad_norm": 2.6012908356840048, "learning_rate": 7.187263659123644e-07, "loss": 0.3142, "step": 35611 }, { "epoch": 2.646748420661464, "grad_norm": 1.9746374386257024, "learning_rate": 7.184277039313537e-07, "loss": 0.2213, "step": 35612 }, { "epoch": 2.6468227424749164, "grad_norm": 3.0347088967025675, "learning_rate": 7.181291017046221e-07, "loss": 0.3095, "step": 35613 }, { "epoch": 2.6468970642883685, "grad_norm": 2.386879119815205, "learning_rate": 7.178305592340928e-07, "loss": 0.3028, "step": 35614 }, { "epoch": 2.646971386101821, "grad_norm": 2.485891933630002, "learning_rate": 7.175320765216931e-07, "loss": 0.2698, "step": 35615 }, { "epoch": 2.6470457079152734, "grad_norm": 2.731191739946848, "learning_rate": 7.172336535693358e-07, "loss": 0.3499, "step": 35616 }, { "epoch": 2.6471200297287254, "grad_norm": 2.119236651114805, "learning_rate": 7.169352903789483e-07, "loss": 0.1933, "step": 35617 }, { "epoch": 2.6471943515421774, "grad_norm": 4.23757036747322, "learning_rate": 7.166369869524481e-07, "loss": 0.3071, "step": 35618 }, { "epoch": 2.64726867335563, "grad_norm": 2.116309574532674, "learning_rate": 7.16338743291759e-07, "loss": 0.2151, "step": 35619 }, { "epoch": 2.6473429951690823, "grad_norm": 2.6239726649291706, "learning_rate": 7.160405593987996e-07, "loss": 0.3279, "step": 35620 }, { "epoch": 2.6474173169825344, "grad_norm": 2.3735997360405268, "learning_rate": 7.157424352754882e-07, "loss": 0.2766, "step": 35621 }, { "epoch": 2.6474916387959864, "grad_norm": 2.4723525649460054, "learning_rate": 7.154443709237457e-07, "loss": 0.2706, "step": 35622 }, { "epoch": 2.647565960609439, "grad_norm": 3.519449651085472, "learning_rate": 7.151463663454905e-07, "loss": 0.3144, "step": 35623 }, { "epoch": 2.6476402824228913, "grad_norm": 2.58841662429331, "learning_rate": 7.14848421542641e-07, "loss": 0.1808, "step": 35624 }, { "epoch": 2.6477146042363433, "grad_norm": 2.2942147509830626, "learning_rate": 7.145505365171168e-07, "loss": 0.2634, "step": 35625 }, { "epoch": 2.6477889260497958, "grad_norm": 2.3815162326672485, "learning_rate": 7.14252711270833e-07, "loss": 0.2949, "step": 35626 }, { "epoch": 2.647863247863248, "grad_norm": 2.2569698163948426, "learning_rate": 7.139549458057116e-07, "loss": 0.2855, "step": 35627 }, { "epoch": 2.6479375696767002, "grad_norm": 2.468756421286615, "learning_rate": 7.136572401236619e-07, "loss": 0.3297, "step": 35628 }, { "epoch": 2.6480118914901523, "grad_norm": 2.5151471105280794, "learning_rate": 7.133595942266081e-07, "loss": 0.246, "step": 35629 }, { "epoch": 2.6480862133036047, "grad_norm": 2.078248058787593, "learning_rate": 7.130620081164607e-07, "loss": 0.3212, "step": 35630 }, { "epoch": 2.6481605351170567, "grad_norm": 2.3910898470041517, "learning_rate": 7.127644817951407e-07, "loss": 0.2951, "step": 35631 }, { "epoch": 2.648234856930509, "grad_norm": 2.1267626057400593, "learning_rate": 7.124670152645607e-07, "loss": 0.2099, "step": 35632 }, { "epoch": 2.648309178743961, "grad_norm": 2.1561926703992835, "learning_rate": 7.12169608526635e-07, "loss": 0.2292, "step": 35633 }, { "epoch": 2.6483835005574137, "grad_norm": 2.192662316194513, "learning_rate": 7.118722615832807e-07, "loss": 0.2914, "step": 35634 }, { "epoch": 2.6484578223708657, "grad_norm": 2.866236499643703, "learning_rate": 7.115749744364109e-07, "loss": 0.338, "step": 35635 }, { "epoch": 2.648532144184318, "grad_norm": 2.1924560406133633, "learning_rate": 7.112777470879384e-07, "loss": 0.2459, "step": 35636 }, { "epoch": 2.64860646599777, "grad_norm": 2.1746201205262423, "learning_rate": 7.109805795397784e-07, "loss": 0.1917, "step": 35637 }, { "epoch": 2.6486807878112226, "grad_norm": 1.9603780835498008, "learning_rate": 7.106834717938438e-07, "loss": 0.2104, "step": 35638 }, { "epoch": 2.648755109624675, "grad_norm": 2.1787611989035116, "learning_rate": 7.103864238520475e-07, "loss": 0.2113, "step": 35639 }, { "epoch": 2.648829431438127, "grad_norm": 2.9218149611468878, "learning_rate": 7.100894357162991e-07, "loss": 0.3508, "step": 35640 }, { "epoch": 2.648903753251579, "grad_norm": 1.98862481601556, "learning_rate": 7.097925073885148e-07, "loss": 0.2674, "step": 35641 }, { "epoch": 2.6489780750650316, "grad_norm": 2.2138264355666113, "learning_rate": 7.094956388706043e-07, "loss": 0.1966, "step": 35642 }, { "epoch": 2.649052396878484, "grad_norm": 2.3489038426115636, "learning_rate": 7.09198830164477e-07, "loss": 0.2489, "step": 35643 }, { "epoch": 2.649126718691936, "grad_norm": 2.396870637752898, "learning_rate": 7.089020812720482e-07, "loss": 0.2317, "step": 35644 }, { "epoch": 2.649201040505388, "grad_norm": 2.505224896176236, "learning_rate": 7.086053921952229e-07, "loss": 0.272, "step": 35645 }, { "epoch": 2.6492753623188405, "grad_norm": 4.037338217093899, "learning_rate": 7.083087629359164e-07, "loss": 0.2444, "step": 35646 }, { "epoch": 2.649349684132293, "grad_norm": 2.3480515273198277, "learning_rate": 7.080121934960349e-07, "loss": 0.2809, "step": 35647 }, { "epoch": 2.649424005945745, "grad_norm": 2.1834357323864513, "learning_rate": 7.077156838774879e-07, "loss": 0.2952, "step": 35648 }, { "epoch": 2.6494983277591975, "grad_norm": 2.0799500236224895, "learning_rate": 7.074192340821883e-07, "loss": 0.2336, "step": 35649 }, { "epoch": 2.6495726495726495, "grad_norm": 1.9570323262876357, "learning_rate": 7.071228441120392e-07, "loss": 0.2373, "step": 35650 }, { "epoch": 2.649646971386102, "grad_norm": 1.4579883272785763, "learning_rate": 7.068265139689512e-07, "loss": 0.1893, "step": 35651 }, { "epoch": 2.649721293199554, "grad_norm": 2.729606224627669, "learning_rate": 7.065302436548316e-07, "loss": 0.2964, "step": 35652 }, { "epoch": 2.6497956150130064, "grad_norm": 2.0083095077385504, "learning_rate": 7.062340331715889e-07, "loss": 0.1963, "step": 35653 }, { "epoch": 2.6498699368264584, "grad_norm": 2.208257420273741, "learning_rate": 7.059378825211294e-07, "loss": 0.1976, "step": 35654 }, { "epoch": 2.649944258639911, "grad_norm": 2.18488020441108, "learning_rate": 7.056417917053571e-07, "loss": 0.2511, "step": 35655 }, { "epoch": 2.650018580453363, "grad_norm": 1.9682102764615075, "learning_rate": 7.053457607261838e-07, "loss": 0.2311, "step": 35656 }, { "epoch": 2.6500929022668154, "grad_norm": 2.0110317687676442, "learning_rate": 7.050497895855113e-07, "loss": 0.1887, "step": 35657 }, { "epoch": 2.650167224080268, "grad_norm": 2.6637303768142364, "learning_rate": 7.047538782852448e-07, "loss": 0.3159, "step": 35658 }, { "epoch": 2.65024154589372, "grad_norm": 1.9973990442202931, "learning_rate": 7.044580268272916e-07, "loss": 0.191, "step": 35659 }, { "epoch": 2.650315867707172, "grad_norm": 2.857036801290478, "learning_rate": 7.041622352135557e-07, "loss": 0.3494, "step": 35660 }, { "epoch": 2.6503901895206243, "grad_norm": 2.1581575245735145, "learning_rate": 7.038665034459413e-07, "loss": 0.3551, "step": 35661 }, { "epoch": 2.650464511334077, "grad_norm": 2.4162276961320486, "learning_rate": 7.035708315263501e-07, "loss": 0.2601, "step": 35662 }, { "epoch": 2.650538833147529, "grad_norm": 1.953224942166706, "learning_rate": 7.032752194566895e-07, "loss": 0.2034, "step": 35663 }, { "epoch": 2.650613154960981, "grad_norm": 2.125134599776265, "learning_rate": 7.0297966723886e-07, "loss": 0.2751, "step": 35664 }, { "epoch": 2.6506874767744333, "grad_norm": 2.0586984486711914, "learning_rate": 7.026841748747637e-07, "loss": 0.2115, "step": 35665 }, { "epoch": 2.6507617985878857, "grad_norm": 2.6338177010676866, "learning_rate": 7.023887423663056e-07, "loss": 0.3662, "step": 35666 }, { "epoch": 2.6508361204013378, "grad_norm": 2.8177888838164797, "learning_rate": 7.020933697153853e-07, "loss": 0.2697, "step": 35667 }, { "epoch": 2.6509104422147898, "grad_norm": 2.093491252605735, "learning_rate": 7.017980569239069e-07, "loss": 0.2011, "step": 35668 }, { "epoch": 2.6509847640282422, "grad_norm": 2.1251996684799526, "learning_rate": 7.0150280399377e-07, "loss": 0.2593, "step": 35669 }, { "epoch": 2.6510590858416947, "grad_norm": 2.212019499782766, "learning_rate": 7.01207610926874e-07, "loss": 0.2414, "step": 35670 }, { "epoch": 2.6511334076551467, "grad_norm": 2.252597511434114, "learning_rate": 7.009124777251231e-07, "loss": 0.2537, "step": 35671 }, { "epoch": 2.651207729468599, "grad_norm": 3.083450162291617, "learning_rate": 7.006174043904146e-07, "loss": 0.2874, "step": 35672 }, { "epoch": 2.651282051282051, "grad_norm": 2.6343849246211763, "learning_rate": 7.003223909246482e-07, "loss": 0.2601, "step": 35673 }, { "epoch": 2.6513563730955036, "grad_norm": 2.3523363995604174, "learning_rate": 7.000274373297223e-07, "loss": 0.2142, "step": 35674 }, { "epoch": 2.6514306949089557, "grad_norm": 2.423118135109308, "learning_rate": 6.997325436075397e-07, "loss": 0.2755, "step": 35675 }, { "epoch": 2.651505016722408, "grad_norm": 2.1840875599655396, "learning_rate": 6.994377097599958e-07, "loss": 0.2542, "step": 35676 }, { "epoch": 2.65157933853586, "grad_norm": 1.9740525746531732, "learning_rate": 6.991429357889878e-07, "loss": 0.1912, "step": 35677 }, { "epoch": 2.6516536603493126, "grad_norm": 2.148636413594875, "learning_rate": 6.988482216964165e-07, "loss": 0.2403, "step": 35678 }, { "epoch": 2.6517279821627646, "grad_norm": 2.709108557181773, "learning_rate": 6.985535674841781e-07, "loss": 0.3703, "step": 35679 }, { "epoch": 2.651802303976217, "grad_norm": 2.410719277601318, "learning_rate": 6.982589731541678e-07, "loss": 0.3372, "step": 35680 }, { "epoch": 2.6518766257896695, "grad_norm": 2.5712305796401704, "learning_rate": 6.97964438708284e-07, "loss": 0.3057, "step": 35681 }, { "epoch": 2.6519509476031216, "grad_norm": 2.3109133134463136, "learning_rate": 6.976699641484219e-07, "loss": 0.265, "step": 35682 }, { "epoch": 2.6520252694165736, "grad_norm": 1.85560378535787, "learning_rate": 6.973755494764811e-07, "loss": 0.22, "step": 35683 }, { "epoch": 2.652099591230026, "grad_norm": 2.02465465660407, "learning_rate": 6.970811946943501e-07, "loss": 0.2528, "step": 35684 }, { "epoch": 2.6521739130434785, "grad_norm": 1.8185665293355013, "learning_rate": 6.967868998039307e-07, "loss": 0.1605, "step": 35685 }, { "epoch": 2.6522482348569305, "grad_norm": 2.124472520914218, "learning_rate": 6.964926648071124e-07, "loss": 0.2406, "step": 35686 }, { "epoch": 2.6523225566703825, "grad_norm": 2.4582089188004037, "learning_rate": 6.961984897057916e-07, "loss": 0.272, "step": 35687 }, { "epoch": 2.652396878483835, "grad_norm": 2.274490645845137, "learning_rate": 6.959043745018634e-07, "loss": 0.246, "step": 35688 }, { "epoch": 2.6524712002972874, "grad_norm": 2.9107374274400177, "learning_rate": 6.956103191972185e-07, "loss": 0.277, "step": 35689 }, { "epoch": 2.6525455221107395, "grad_norm": 1.8576102259828053, "learning_rate": 6.953163237937521e-07, "loss": 0.2032, "step": 35690 }, { "epoch": 2.6526198439241915, "grad_norm": 2.1513950885942723, "learning_rate": 6.950223882933582e-07, "loss": 0.2176, "step": 35691 }, { "epoch": 2.652694165737644, "grad_norm": 1.6749501110253764, "learning_rate": 6.947285126979242e-07, "loss": 0.224, "step": 35692 }, { "epoch": 2.6527684875510964, "grad_norm": 2.4536195664028697, "learning_rate": 6.944346970093474e-07, "loss": 0.2636, "step": 35693 }, { "epoch": 2.6528428093645484, "grad_norm": 1.9167166928823522, "learning_rate": 6.941409412295175e-07, "loss": 0.2775, "step": 35694 }, { "epoch": 2.652917131178001, "grad_norm": 2.511681392460852, "learning_rate": 6.938472453603251e-07, "loss": 0.269, "step": 35695 }, { "epoch": 2.652991452991453, "grad_norm": 2.339638756906874, "learning_rate": 6.935536094036621e-07, "loss": 0.2567, "step": 35696 }, { "epoch": 2.6530657748049054, "grad_norm": 2.2478849079080527, "learning_rate": 6.932600333614147e-07, "loss": 0.2978, "step": 35697 }, { "epoch": 2.6531400966183574, "grad_norm": 2.3312630810001203, "learning_rate": 6.929665172354794e-07, "loss": 0.2803, "step": 35698 }, { "epoch": 2.65321441843181, "grad_norm": 2.0679277760873167, "learning_rate": 6.92673061027741e-07, "loss": 0.2644, "step": 35699 }, { "epoch": 2.653288740245262, "grad_norm": 2.2888467867422184, "learning_rate": 6.923796647400915e-07, "loss": 0.3493, "step": 35700 }, { "epoch": 2.6533630620587143, "grad_norm": 2.916731287814513, "learning_rate": 6.920863283744194e-07, "loss": 0.3739, "step": 35701 }, { "epoch": 2.6534373838721663, "grad_norm": 2.538729707937513, "learning_rate": 6.917930519326099e-07, "loss": 0.2754, "step": 35702 }, { "epoch": 2.653511705685619, "grad_norm": 2.4671128847906347, "learning_rate": 6.914998354165569e-07, "loss": 0.3253, "step": 35703 }, { "epoch": 2.6535860274990712, "grad_norm": 2.0444657192331466, "learning_rate": 6.912066788281424e-07, "loss": 0.1985, "step": 35704 }, { "epoch": 2.6536603493125233, "grad_norm": 2.3327364396289925, "learning_rate": 6.909135821692581e-07, "loss": 0.232, "step": 35705 }, { "epoch": 2.6537346711259753, "grad_norm": 2.221519381158733, "learning_rate": 6.906205454417892e-07, "loss": 0.2821, "step": 35706 }, { "epoch": 2.6538089929394277, "grad_norm": 2.2091690346323145, "learning_rate": 6.903275686476218e-07, "loss": 0.232, "step": 35707 }, { "epoch": 2.65388331475288, "grad_norm": 2.455626634041283, "learning_rate": 6.900346517886425e-07, "loss": 0.3587, "step": 35708 }, { "epoch": 2.653957636566332, "grad_norm": 2.1218909851280428, "learning_rate": 6.897417948667362e-07, "loss": 0.2627, "step": 35709 }, { "epoch": 2.6540319583797842, "grad_norm": 2.184967685948882, "learning_rate": 6.894489978837903e-07, "loss": 0.1984, "step": 35710 }, { "epoch": 2.6541062801932367, "grad_norm": 3.5021171231695996, "learning_rate": 6.891562608416868e-07, "loss": 0.2366, "step": 35711 }, { "epoch": 2.654180602006689, "grad_norm": 1.809742788217105, "learning_rate": 6.888635837423141e-07, "loss": 0.2052, "step": 35712 }, { "epoch": 2.654254923820141, "grad_norm": 3.1499008488310483, "learning_rate": 6.885709665875539e-07, "loss": 0.3761, "step": 35713 }, { "epoch": 2.6543292456335936, "grad_norm": 2.0791176512536347, "learning_rate": 6.882784093792905e-07, "loss": 0.2252, "step": 35714 }, { "epoch": 2.6544035674470456, "grad_norm": 2.4264135756038305, "learning_rate": 6.879859121194077e-07, "loss": 0.2976, "step": 35715 }, { "epoch": 2.654477889260498, "grad_norm": 2.131642082485224, "learning_rate": 6.876934748097897e-07, "loss": 0.201, "step": 35716 }, { "epoch": 2.65455221107395, "grad_norm": 2.093714164191672, "learning_rate": 6.874010974523171e-07, "loss": 0.2549, "step": 35717 }, { "epoch": 2.6546265328874026, "grad_norm": 2.162702936648201, "learning_rate": 6.871087800488729e-07, "loss": 0.1868, "step": 35718 }, { "epoch": 2.6547008547008546, "grad_norm": 2.8329497987473706, "learning_rate": 6.86816522601338e-07, "loss": 0.2314, "step": 35719 }, { "epoch": 2.654775176514307, "grad_norm": 2.593451100103277, "learning_rate": 6.86524325111596e-07, "loss": 0.2475, "step": 35720 }, { "epoch": 2.654849498327759, "grad_norm": 2.285554618201468, "learning_rate": 6.862321875815259e-07, "loss": 0.2835, "step": 35721 }, { "epoch": 2.6549238201412115, "grad_norm": 1.9182897910039183, "learning_rate": 6.859401100130125e-07, "loss": 0.2181, "step": 35722 }, { "epoch": 2.6549981419546635, "grad_norm": 2.150752478203949, "learning_rate": 6.856480924079322e-07, "loss": 0.2517, "step": 35723 }, { "epoch": 2.655072463768116, "grad_norm": 2.642999976120897, "learning_rate": 6.853561347681648e-07, "loss": 0.3173, "step": 35724 }, { "epoch": 2.655146785581568, "grad_norm": 2.490076245232978, "learning_rate": 6.850642370955928e-07, "loss": 0.3052, "step": 35725 }, { "epoch": 2.6552211073950205, "grad_norm": 2.3295869979176147, "learning_rate": 6.847723993920918e-07, "loss": 0.2839, "step": 35726 }, { "epoch": 2.655295429208473, "grad_norm": 2.1804636968336744, "learning_rate": 6.844806216595457e-07, "loss": 0.1999, "step": 35727 }, { "epoch": 2.655369751021925, "grad_norm": 2.144894288656786, "learning_rate": 6.841889038998295e-07, "loss": 0.2427, "step": 35728 }, { "epoch": 2.655444072835377, "grad_norm": 2.049267306312452, "learning_rate": 6.838972461148208e-07, "loss": 0.1924, "step": 35729 }, { "epoch": 2.6555183946488294, "grad_norm": 3.854344368538819, "learning_rate": 6.836056483063991e-07, "loss": 0.2153, "step": 35730 }, { "epoch": 2.655592716462282, "grad_norm": 2.078425054242271, "learning_rate": 6.833141104764395e-07, "loss": 0.2419, "step": 35731 }, { "epoch": 2.655667038275734, "grad_norm": 2.167367167142116, "learning_rate": 6.830226326268208e-07, "loss": 0.2457, "step": 35732 }, { "epoch": 2.655741360089186, "grad_norm": 1.748182789542641, "learning_rate": 6.827312147594189e-07, "loss": 0.2175, "step": 35733 }, { "epoch": 2.6558156819026384, "grad_norm": 2.4743284385651996, "learning_rate": 6.824398568761104e-07, "loss": 0.2634, "step": 35734 }, { "epoch": 2.655890003716091, "grad_norm": 2.682474101677857, "learning_rate": 6.821485589787713e-07, "loss": 0.332, "step": 35735 }, { "epoch": 2.655964325529543, "grad_norm": 2.3180298636770145, "learning_rate": 6.818573210692736e-07, "loss": 0.2422, "step": 35736 }, { "epoch": 2.6560386473429953, "grad_norm": 2.383351734202787, "learning_rate": 6.81566143149498e-07, "loss": 0.2283, "step": 35737 }, { "epoch": 2.6561129691564473, "grad_norm": 2.15784649792131, "learning_rate": 6.812750252213152e-07, "loss": 0.195, "step": 35738 }, { "epoch": 2.6561872909699, "grad_norm": 2.7388282881960486, "learning_rate": 6.809839672865992e-07, "loss": 0.2865, "step": 35739 }, { "epoch": 2.656261612783352, "grad_norm": 2.1635316608088884, "learning_rate": 6.806929693472275e-07, "loss": 0.2484, "step": 35740 }, { "epoch": 2.6563359345968043, "grad_norm": 2.20359505792643, "learning_rate": 6.804020314050686e-07, "loss": 0.2842, "step": 35741 }, { "epoch": 2.6564102564102563, "grad_norm": 2.25365326602032, "learning_rate": 6.801111534619986e-07, "loss": 0.2277, "step": 35742 }, { "epoch": 2.6564845782237088, "grad_norm": 2.1440160138796234, "learning_rate": 6.798203355198885e-07, "loss": 0.263, "step": 35743 }, { "epoch": 2.6565589000371608, "grad_norm": 2.5841042473750306, "learning_rate": 6.795295775806121e-07, "loss": 0.3432, "step": 35744 }, { "epoch": 2.6566332218506132, "grad_norm": 1.5683674266643952, "learning_rate": 6.792388796460414e-07, "loss": 0.1812, "step": 35745 }, { "epoch": 2.6567075436640653, "grad_norm": 2.242321119021543, "learning_rate": 6.78948241718046e-07, "loss": 0.2987, "step": 35746 }, { "epoch": 2.6567818654775177, "grad_norm": 2.1462249575626626, "learning_rate": 6.786576637984987e-07, "loss": 0.2201, "step": 35747 }, { "epoch": 2.6568561872909697, "grad_norm": 2.0699783646170284, "learning_rate": 6.783671458892682e-07, "loss": 0.1542, "step": 35748 }, { "epoch": 2.656930509104422, "grad_norm": 2.83595937823421, "learning_rate": 6.780766879922285e-07, "loss": 0.317, "step": 35749 }, { "epoch": 2.6570048309178746, "grad_norm": 2.1334480490352417, "learning_rate": 6.77786290109248e-07, "loss": 0.2765, "step": 35750 }, { "epoch": 2.6570791527313267, "grad_norm": 2.1206729953685577, "learning_rate": 6.774959522421943e-07, "loss": 0.1994, "step": 35751 }, { "epoch": 2.6571534745447787, "grad_norm": 2.1587160836223713, "learning_rate": 6.772056743929389e-07, "loss": 0.2362, "step": 35752 }, { "epoch": 2.657227796358231, "grad_norm": 2.263106912896771, "learning_rate": 6.769154565633473e-07, "loss": 0.2274, "step": 35753 }, { "epoch": 2.6573021181716836, "grad_norm": 2.1813927890954994, "learning_rate": 6.766252987552924e-07, "loss": 0.2705, "step": 35754 }, { "epoch": 2.6573764399851356, "grad_norm": 1.848926696497362, "learning_rate": 6.763352009706392e-07, "loss": 0.1707, "step": 35755 }, { "epoch": 2.6574507617985876, "grad_norm": 2.8146424424154035, "learning_rate": 6.760451632112564e-07, "loss": 0.2439, "step": 35756 }, { "epoch": 2.65752508361204, "grad_norm": 1.7879962651742427, "learning_rate": 6.757551854790112e-07, "loss": 0.1904, "step": 35757 }, { "epoch": 2.6575994054254926, "grad_norm": 1.783594087749668, "learning_rate": 6.754652677757701e-07, "loss": 0.2293, "step": 35758 }, { "epoch": 2.6576737272389446, "grad_norm": 2.285169862275225, "learning_rate": 6.751754101034002e-07, "loss": 0.3013, "step": 35759 }, { "epoch": 2.657748049052397, "grad_norm": 3.041767493279546, "learning_rate": 6.74885612463767e-07, "loss": 0.3329, "step": 35760 }, { "epoch": 2.657822370865849, "grad_norm": 2.5104050141386502, "learning_rate": 6.745958748587356e-07, "loss": 0.3792, "step": 35761 }, { "epoch": 2.6578966926793015, "grad_norm": 2.6977752282963077, "learning_rate": 6.743061972901743e-07, "loss": 0.2198, "step": 35762 }, { "epoch": 2.6579710144927535, "grad_norm": 2.4656935861961626, "learning_rate": 6.74016579759944e-07, "loss": 0.3136, "step": 35763 }, { "epoch": 2.658045336306206, "grad_norm": 2.6759455854347722, "learning_rate": 6.737270222699111e-07, "loss": 0.3048, "step": 35764 }, { "epoch": 2.658119658119658, "grad_norm": 2.565434347526044, "learning_rate": 6.734375248219393e-07, "loss": 0.308, "step": 35765 }, { "epoch": 2.6581939799331105, "grad_norm": 1.9400367924499382, "learning_rate": 6.731480874178942e-07, "loss": 0.2182, "step": 35766 }, { "epoch": 2.6582683017465625, "grad_norm": 2.1894626298315147, "learning_rate": 6.728587100596374e-07, "loss": 0.2185, "step": 35767 }, { "epoch": 2.658342623560015, "grad_norm": 2.165159880850585, "learning_rate": 6.725693927490307e-07, "loss": 0.274, "step": 35768 }, { "epoch": 2.658416945373467, "grad_norm": 2.4524933472386428, "learning_rate": 6.722801354879405e-07, "loss": 0.2978, "step": 35769 }, { "epoch": 2.6584912671869194, "grad_norm": 2.277496480604558, "learning_rate": 6.719909382782241e-07, "loss": 0.2694, "step": 35770 }, { "epoch": 2.6585655890003714, "grad_norm": 2.0234435011130163, "learning_rate": 6.71701801121748e-07, "loss": 0.1913, "step": 35771 }, { "epoch": 2.658639910813824, "grad_norm": 2.387075266303419, "learning_rate": 6.714127240203728e-07, "loss": 0.2896, "step": 35772 }, { "epoch": 2.6587142326272764, "grad_norm": 2.9129578617251712, "learning_rate": 6.711237069759569e-07, "loss": 0.2992, "step": 35773 }, { "epoch": 2.6587885544407284, "grad_norm": 2.939596146483652, "learning_rate": 6.708347499903634e-07, "loss": 0.3853, "step": 35774 }, { "epoch": 2.6588628762541804, "grad_norm": 2.339220559997055, "learning_rate": 6.705458530654496e-07, "loss": 0.2662, "step": 35775 }, { "epoch": 2.658937198067633, "grad_norm": 3.990940033496544, "learning_rate": 6.702570162030797e-07, "loss": 0.3575, "step": 35776 }, { "epoch": 2.6590115198810853, "grad_norm": 2.4796300229807144, "learning_rate": 6.699682394051099e-07, "loss": 0.329, "step": 35777 }, { "epoch": 2.6590858416945373, "grad_norm": 2.468367138016381, "learning_rate": 6.69679522673401e-07, "loss": 0.2824, "step": 35778 }, { "epoch": 2.6591601635079893, "grad_norm": 2.4199210288591724, "learning_rate": 6.693908660098114e-07, "loss": 0.2984, "step": 35779 }, { "epoch": 2.659234485321442, "grad_norm": 2.1843430403738417, "learning_rate": 6.691022694161975e-07, "loss": 0.2094, "step": 35780 }, { "epoch": 2.6593088071348943, "grad_norm": 2.2906950905994203, "learning_rate": 6.688137328944211e-07, "loss": 0.2557, "step": 35781 }, { "epoch": 2.6593831289483463, "grad_norm": 2.4770725106016225, "learning_rate": 6.685252564463385e-07, "loss": 0.2826, "step": 35782 }, { "epoch": 2.6594574507617987, "grad_norm": 2.3142811146435127, "learning_rate": 6.682368400738037e-07, "loss": 0.311, "step": 35783 }, { "epoch": 2.6595317725752508, "grad_norm": 2.4566287900746393, "learning_rate": 6.679484837786776e-07, "loss": 0.2301, "step": 35784 }, { "epoch": 2.659606094388703, "grad_norm": 2.311793908840339, "learning_rate": 6.676601875628142e-07, "loss": 0.2507, "step": 35785 }, { "epoch": 2.6596804162021552, "grad_norm": 2.045668227934238, "learning_rate": 6.673719514280707e-07, "loss": 0.2011, "step": 35786 }, { "epoch": 2.6597547380156077, "grad_norm": 1.8196598792292544, "learning_rate": 6.670837753763015e-07, "loss": 0.2229, "step": 35787 }, { "epoch": 2.6598290598290597, "grad_norm": 2.039401289372654, "learning_rate": 6.667956594093639e-07, "loss": 0.2131, "step": 35788 }, { "epoch": 2.659903381642512, "grad_norm": 1.8876472310983214, "learning_rate": 6.665076035291118e-07, "loss": 0.2042, "step": 35789 }, { "epoch": 2.659977703455964, "grad_norm": 3.0749528690251156, "learning_rate": 6.662196077373973e-07, "loss": 0.2553, "step": 35790 }, { "epoch": 2.6600520252694166, "grad_norm": 2.0522215847316008, "learning_rate": 6.659316720360787e-07, "loss": 0.2234, "step": 35791 }, { "epoch": 2.6601263470828687, "grad_norm": 2.2988531491547084, "learning_rate": 6.656437964270057e-07, "loss": 0.3017, "step": 35792 }, { "epoch": 2.660200668896321, "grad_norm": 2.5320427048664387, "learning_rate": 6.653559809120347e-07, "loss": 0.2567, "step": 35793 }, { "epoch": 2.660274990709773, "grad_norm": 2.760688034556629, "learning_rate": 6.650682254930185e-07, "loss": 0.3308, "step": 35794 }, { "epoch": 2.6603493125232256, "grad_norm": 2.1173478996138426, "learning_rate": 6.647805301718068e-07, "loss": 0.255, "step": 35795 }, { "epoch": 2.660423634336678, "grad_norm": 2.0881941517463565, "learning_rate": 6.644928949502572e-07, "loss": 0.2327, "step": 35796 }, { "epoch": 2.66049795615013, "grad_norm": 2.0638538703527076, "learning_rate": 6.642053198302145e-07, "loss": 0.1722, "step": 35797 }, { "epoch": 2.660572277963582, "grad_norm": 2.5129182450431333, "learning_rate": 6.639178048135353e-07, "loss": 0.3241, "step": 35798 }, { "epoch": 2.6606465997770345, "grad_norm": 2.205624556998083, "learning_rate": 6.636303499020669e-07, "loss": 0.2818, "step": 35799 }, { "epoch": 2.660720921590487, "grad_norm": 2.3368687747583077, "learning_rate": 6.633429550976634e-07, "loss": 0.2702, "step": 35800 }, { "epoch": 2.660795243403939, "grad_norm": 1.948860529673773, "learning_rate": 6.630556204021743e-07, "loss": 0.275, "step": 35801 }, { "epoch": 2.660869565217391, "grad_norm": 2.713161327094271, "learning_rate": 6.627683458174461e-07, "loss": 0.3517, "step": 35802 }, { "epoch": 2.6609438870308435, "grad_norm": 2.6903651445390353, "learning_rate": 6.624811313453328e-07, "loss": 0.3637, "step": 35803 }, { "epoch": 2.661018208844296, "grad_norm": 2.3433667981458703, "learning_rate": 6.621939769876818e-07, "loss": 0.2828, "step": 35804 }, { "epoch": 2.661092530657748, "grad_norm": 1.9557060037024139, "learning_rate": 6.619068827463393e-07, "loss": 0.2257, "step": 35805 }, { "epoch": 2.6611668524712004, "grad_norm": 1.9547345996661118, "learning_rate": 6.616198486231584e-07, "loss": 0.149, "step": 35806 }, { "epoch": 2.6612411742846525, "grad_norm": 1.8947839887861606, "learning_rate": 6.613328746199832e-07, "loss": 0.2114, "step": 35807 }, { "epoch": 2.661315496098105, "grad_norm": 2.1315435480219076, "learning_rate": 6.610459607386632e-07, "loss": 0.2338, "step": 35808 }, { "epoch": 2.661389817911557, "grad_norm": 2.6234022654757476, "learning_rate": 6.607591069810426e-07, "loss": 0.3516, "step": 35809 }, { "epoch": 2.6614641397250094, "grad_norm": 2.3365320014247435, "learning_rate": 6.604723133489721e-07, "loss": 0.2292, "step": 35810 }, { "epoch": 2.6615384615384614, "grad_norm": 2.3415851610328597, "learning_rate": 6.601855798442958e-07, "loss": 0.2903, "step": 35811 }, { "epoch": 2.661612783351914, "grad_norm": 2.808986568421201, "learning_rate": 6.598989064688599e-07, "loss": 0.2942, "step": 35812 }, { "epoch": 2.661687105165366, "grad_norm": 2.4526228709860725, "learning_rate": 6.596122932245108e-07, "loss": 0.1968, "step": 35813 }, { "epoch": 2.6617614269788183, "grad_norm": 1.9052465110333765, "learning_rate": 6.593257401130915e-07, "loss": 0.2099, "step": 35814 }, { "epoch": 2.661835748792271, "grad_norm": 2.353577406096894, "learning_rate": 6.590392471364493e-07, "loss": 0.3155, "step": 35815 }, { "epoch": 2.661910070605723, "grad_norm": 2.6679733000752113, "learning_rate": 6.587528142964294e-07, "loss": 0.3154, "step": 35816 }, { "epoch": 2.661984392419175, "grad_norm": 1.8938954962749361, "learning_rate": 6.584664415948716e-07, "loss": 0.2163, "step": 35817 }, { "epoch": 2.6620587142326273, "grad_norm": 2.465522929592057, "learning_rate": 6.581801290336243e-07, "loss": 0.2914, "step": 35818 }, { "epoch": 2.6621330360460798, "grad_norm": 2.055183738827978, "learning_rate": 6.578938766145282e-07, "loss": 0.2129, "step": 35819 }, { "epoch": 2.6622073578595318, "grad_norm": 2.5849112532623884, "learning_rate": 6.576076843394263e-07, "loss": 0.2819, "step": 35820 }, { "epoch": 2.662281679672984, "grad_norm": 2.1536059258417195, "learning_rate": 6.573215522101606e-07, "loss": 0.2099, "step": 35821 }, { "epoch": 2.6623560014864363, "grad_norm": 2.008489446364026, "learning_rate": 6.57035480228575e-07, "loss": 0.2695, "step": 35822 }, { "epoch": 2.6624303232998887, "grad_norm": 2.088304555359491, "learning_rate": 6.567494683965103e-07, "loss": 0.2791, "step": 35823 }, { "epoch": 2.6625046451133407, "grad_norm": 2.3879611416178763, "learning_rate": 6.564635167158062e-07, "loss": 0.2954, "step": 35824 }, { "epoch": 2.6625789669267927, "grad_norm": 1.9774248957881129, "learning_rate": 6.561776251883068e-07, "loss": 0.2232, "step": 35825 }, { "epoch": 2.662653288740245, "grad_norm": 1.9409405086534406, "learning_rate": 6.558917938158504e-07, "loss": 0.1817, "step": 35826 }, { "epoch": 2.6627276105536977, "grad_norm": 2.833458325859871, "learning_rate": 6.55606022600277e-07, "loss": 0.2885, "step": 35827 }, { "epoch": 2.6628019323671497, "grad_norm": 2.8941972241628187, "learning_rate": 6.553203115434282e-07, "loss": 0.3548, "step": 35828 }, { "epoch": 2.662876254180602, "grad_norm": 2.2903617532351332, "learning_rate": 6.550346606471414e-07, "loss": 0.2674, "step": 35829 }, { "epoch": 2.662950575994054, "grad_norm": 2.7413000099115328, "learning_rate": 6.547490699132597e-07, "loss": 0.3951, "step": 35830 }, { "epoch": 2.6630248978075066, "grad_norm": 2.8472030584926156, "learning_rate": 6.544635393436149e-07, "loss": 0.3529, "step": 35831 }, { "epoch": 2.6630992196209586, "grad_norm": 2.8980986471591454, "learning_rate": 6.541780689400512e-07, "loss": 0.265, "step": 35832 }, { "epoch": 2.663173541434411, "grad_norm": 2.4772214373563886, "learning_rate": 6.538926587044037e-07, "loss": 0.3157, "step": 35833 }, { "epoch": 2.663247863247863, "grad_norm": 1.8643372046548703, "learning_rate": 6.536073086385086e-07, "loss": 0.2379, "step": 35834 }, { "epoch": 2.6633221850613156, "grad_norm": 2.141921507338067, "learning_rate": 6.533220187442068e-07, "loss": 0.2526, "step": 35835 }, { "epoch": 2.6633965068747676, "grad_norm": 2.473947572629951, "learning_rate": 6.530367890233302e-07, "loss": 0.3054, "step": 35836 }, { "epoch": 2.66347082868822, "grad_norm": 2.2218265487063253, "learning_rate": 6.527516194777206e-07, "loss": 0.255, "step": 35837 }, { "epoch": 2.6635451505016725, "grad_norm": 2.6565282478751233, "learning_rate": 6.524665101092109e-07, "loss": 0.3792, "step": 35838 }, { "epoch": 2.6636194723151245, "grad_norm": 1.990960630798965, "learning_rate": 6.521814609196342e-07, "loss": 0.2061, "step": 35839 }, { "epoch": 2.6636937941285765, "grad_norm": 1.975217171756047, "learning_rate": 6.518964719108311e-07, "loss": 0.1818, "step": 35840 }, { "epoch": 2.663768115942029, "grad_norm": 2.773796116675785, "learning_rate": 6.516115430846325e-07, "loss": 0.2751, "step": 35841 }, { "epoch": 2.6638424377554815, "grad_norm": 3.5083806837627125, "learning_rate": 6.513266744428748e-07, "loss": 0.2869, "step": 35842 }, { "epoch": 2.6639167595689335, "grad_norm": 3.001371259064793, "learning_rate": 6.510418659873896e-07, "loss": 0.3177, "step": 35843 }, { "epoch": 2.6639910813823855, "grad_norm": 2.573016056708959, "learning_rate": 6.507571177200112e-07, "loss": 0.3699, "step": 35844 }, { "epoch": 2.664065403195838, "grad_norm": 2.386359217413982, "learning_rate": 6.504724296425746e-07, "loss": 0.2532, "step": 35845 }, { "epoch": 2.6641397250092904, "grad_norm": 2.1982948890548477, "learning_rate": 6.501878017569096e-07, "loss": 0.2309, "step": 35846 }, { "epoch": 2.6642140468227424, "grad_norm": 2.4712524711843975, "learning_rate": 6.499032340648525e-07, "loss": 0.2493, "step": 35847 }, { "epoch": 2.6642883686361944, "grad_norm": 2.1477350999678135, "learning_rate": 6.49618726568233e-07, "loss": 0.2091, "step": 35848 }, { "epoch": 2.664362690449647, "grad_norm": 2.295975127432952, "learning_rate": 6.493342792688806e-07, "loss": 0.2996, "step": 35849 }, { "epoch": 2.6644370122630994, "grad_norm": 2.2554379701053957, "learning_rate": 6.490498921686316e-07, "loss": 0.2523, "step": 35850 }, { "epoch": 2.6645113340765514, "grad_norm": 2.2627237096977986, "learning_rate": 6.487655652693126e-07, "loss": 0.2456, "step": 35851 }, { "epoch": 2.664585655890004, "grad_norm": 2.0828723345067415, "learning_rate": 6.484812985727574e-07, "loss": 0.2228, "step": 35852 }, { "epoch": 2.664659977703456, "grad_norm": 2.0255126386590208, "learning_rate": 6.481970920807934e-07, "loss": 0.2026, "step": 35853 }, { "epoch": 2.6647342995169083, "grad_norm": 1.8157401025320214, "learning_rate": 6.479129457952515e-07, "loss": 0.1762, "step": 35854 }, { "epoch": 2.6648086213303603, "grad_norm": 2.3009569351358343, "learning_rate": 6.476288597179615e-07, "loss": 0.2984, "step": 35855 }, { "epoch": 2.664882943143813, "grad_norm": 2.5707144296204723, "learning_rate": 6.473448338507493e-07, "loss": 0.3169, "step": 35856 }, { "epoch": 2.664957264957265, "grad_norm": 2.826605348437602, "learning_rate": 6.470608681954482e-07, "loss": 0.2363, "step": 35857 }, { "epoch": 2.6650315867707173, "grad_norm": 2.2795293464069073, "learning_rate": 6.467769627538823e-07, "loss": 0.2451, "step": 35858 }, { "epoch": 2.6651059085841693, "grad_norm": 2.1648729506087743, "learning_rate": 6.464931175278821e-07, "loss": 0.2775, "step": 35859 }, { "epoch": 2.6651802303976218, "grad_norm": 2.8117248383963007, "learning_rate": 6.46209332519273e-07, "loss": 0.2939, "step": 35860 }, { "epoch": 2.665254552211074, "grad_norm": 2.1458908871788873, "learning_rate": 6.459256077298825e-07, "loss": 0.2525, "step": 35861 }, { "epoch": 2.6653288740245262, "grad_norm": 2.094776228645399, "learning_rate": 6.45641943161539e-07, "loss": 0.3004, "step": 35862 }, { "epoch": 2.6654031958379782, "grad_norm": 2.7462497368260523, "learning_rate": 6.453583388160678e-07, "loss": 0.248, "step": 35863 }, { "epoch": 2.6654775176514307, "grad_norm": 2.6990625107775057, "learning_rate": 6.45074794695294e-07, "loss": 0.2955, "step": 35864 }, { "epoch": 2.665551839464883, "grad_norm": 2.8818670479867228, "learning_rate": 6.447913108010428e-07, "loss": 0.2614, "step": 35865 }, { "epoch": 2.665626161278335, "grad_norm": 2.1535010942629094, "learning_rate": 6.445078871351384e-07, "loss": 0.284, "step": 35866 }, { "epoch": 2.665700483091787, "grad_norm": 2.8669897950462744, "learning_rate": 6.442245236994083e-07, "loss": 0.2981, "step": 35867 }, { "epoch": 2.6657748049052397, "grad_norm": 3.190944875125149, "learning_rate": 6.43941220495673e-07, "loss": 0.2912, "step": 35868 }, { "epoch": 2.665849126718692, "grad_norm": 2.9835923053984352, "learning_rate": 6.436579775257612e-07, "loss": 0.3439, "step": 35869 }, { "epoch": 2.665923448532144, "grad_norm": 2.174852948324115, "learning_rate": 6.433747947914936e-07, "loss": 0.2433, "step": 35870 }, { "epoch": 2.6659977703455966, "grad_norm": 2.261328731758182, "learning_rate": 6.430916722946923e-07, "loss": 0.2192, "step": 35871 }, { "epoch": 2.6660720921590486, "grad_norm": 2.991074629819455, "learning_rate": 6.428086100371822e-07, "loss": 0.3044, "step": 35872 }, { "epoch": 2.666146413972501, "grad_norm": 2.0064907113682424, "learning_rate": 6.425256080207842e-07, "loss": 0.2351, "step": 35873 }, { "epoch": 2.666220735785953, "grad_norm": 2.9307194602306557, "learning_rate": 6.422426662473214e-07, "loss": 0.3844, "step": 35874 }, { "epoch": 2.6662950575994055, "grad_norm": 2.480533195779653, "learning_rate": 6.419597847186154e-07, "loss": 0.4073, "step": 35875 }, { "epoch": 2.6663693794128576, "grad_norm": 2.5638585407170487, "learning_rate": 6.416769634364861e-07, "loss": 0.207, "step": 35876 }, { "epoch": 2.66644370122631, "grad_norm": 2.033903403400919, "learning_rate": 6.413942024027553e-07, "loss": 0.2548, "step": 35877 }, { "epoch": 2.666518023039762, "grad_norm": 2.5283414930897705, "learning_rate": 6.411115016192415e-07, "loss": 0.2806, "step": 35878 }, { "epoch": 2.6665923448532145, "grad_norm": 2.901368484079997, "learning_rate": 6.408288610877678e-07, "loss": 0.3059, "step": 35879 }, { "epoch": 2.6666666666666665, "grad_norm": 2.422322840680406, "learning_rate": 6.405462808101514e-07, "loss": 0.2503, "step": 35880 }, { "epoch": 2.666740988480119, "grad_norm": 2.79395085906071, "learning_rate": 6.402637607882134e-07, "loss": 0.2839, "step": 35881 }, { "epoch": 2.666815310293571, "grad_norm": 2.469980778747882, "learning_rate": 6.39981301023771e-07, "loss": 0.3222, "step": 35882 }, { "epoch": 2.6668896321070235, "grad_norm": 2.5159360581302153, "learning_rate": 6.396989015186427e-07, "loss": 0.3162, "step": 35883 }, { "epoch": 2.666963953920476, "grad_norm": 2.4040344899972967, "learning_rate": 6.394165622746484e-07, "loss": 0.2925, "step": 35884 }, { "epoch": 2.667038275733928, "grad_norm": 2.2602630627949822, "learning_rate": 6.391342832936043e-07, "loss": 0.2856, "step": 35885 }, { "epoch": 2.66711259754738, "grad_norm": 2.2578220216037908, "learning_rate": 6.388520645773289e-07, "loss": 0.305, "step": 35886 }, { "epoch": 2.6671869193608324, "grad_norm": 1.9915845912923567, "learning_rate": 6.385699061276374e-07, "loss": 0.2562, "step": 35887 }, { "epoch": 2.667261241174285, "grad_norm": 2.105563032453325, "learning_rate": 6.382878079463461e-07, "loss": 0.2284, "step": 35888 }, { "epoch": 2.667335562987737, "grad_norm": 2.5615925712555963, "learning_rate": 6.380057700352726e-07, "loss": 0.2955, "step": 35889 }, { "epoch": 2.667409884801189, "grad_norm": 2.320194856942225, "learning_rate": 6.37723792396232e-07, "loss": 0.2795, "step": 35890 }, { "epoch": 2.6674842066146414, "grad_norm": 1.9952484316818002, "learning_rate": 6.374418750310407e-07, "loss": 0.2066, "step": 35891 }, { "epoch": 2.667558528428094, "grad_norm": 2.3446192561736896, "learning_rate": 6.371600179415127e-07, "loss": 0.2178, "step": 35892 }, { "epoch": 2.667632850241546, "grad_norm": 2.0679225046083505, "learning_rate": 6.36878221129461e-07, "loss": 0.271, "step": 35893 }, { "epoch": 2.6677071720549983, "grad_norm": 1.7113185308000942, "learning_rate": 6.365964845967044e-07, "loss": 0.204, "step": 35894 }, { "epoch": 2.6677814938684503, "grad_norm": 1.8428836276054978, "learning_rate": 6.36314808345051e-07, "loss": 0.2028, "step": 35895 }, { "epoch": 2.6678558156819028, "grad_norm": 2.0978917206754484, "learning_rate": 6.360331923763186e-07, "loss": 0.2459, "step": 35896 }, { "epoch": 2.667930137495355, "grad_norm": 2.0113771983358912, "learning_rate": 6.357516366923189e-07, "loss": 0.2151, "step": 35897 }, { "epoch": 2.6680044593088073, "grad_norm": 2.6969102360171506, "learning_rate": 6.354701412948638e-07, "loss": 0.3557, "step": 35898 }, { "epoch": 2.6680787811222593, "grad_norm": 2.4775356863774562, "learning_rate": 6.351887061857665e-07, "loss": 0.2257, "step": 35899 }, { "epoch": 2.6681531029357117, "grad_norm": 2.0697755801697753, "learning_rate": 6.349073313668374e-07, "loss": 0.256, "step": 35900 }, { "epoch": 2.6682274247491637, "grad_norm": 1.9272579527991864, "learning_rate": 6.346260168398899e-07, "loss": 0.1917, "step": 35901 }, { "epoch": 2.668301746562616, "grad_norm": 1.9925248796604087, "learning_rate": 6.343447626067323e-07, "loss": 0.2332, "step": 35902 }, { "epoch": 2.668376068376068, "grad_norm": 2.440927192019051, "learning_rate": 6.340635686691788e-07, "loss": 0.3439, "step": 35903 }, { "epoch": 2.6684503901895207, "grad_norm": 2.9013929491138493, "learning_rate": 6.337824350290389e-07, "loss": 0.3455, "step": 35904 }, { "epoch": 2.6685247120029727, "grad_norm": 2.3429386055729027, "learning_rate": 6.335013616881192e-07, "loss": 0.2137, "step": 35905 }, { "epoch": 2.668599033816425, "grad_norm": 2.5577796985503256, "learning_rate": 6.332203486482335e-07, "loss": 0.3343, "step": 35906 }, { "epoch": 2.6686733556298776, "grad_norm": 2.162916152482037, "learning_rate": 6.329393959111895e-07, "loss": 0.1917, "step": 35907 }, { "epoch": 2.6687476774433296, "grad_norm": 3.105006675496365, "learning_rate": 6.326585034787946e-07, "loss": 0.3152, "step": 35908 }, { "epoch": 2.6688219992567817, "grad_norm": 2.0605745311551473, "learning_rate": 6.323776713528607e-07, "loss": 0.2543, "step": 35909 }, { "epoch": 2.668896321070234, "grad_norm": 2.4561257822795177, "learning_rate": 6.320968995351906e-07, "loss": 0.3498, "step": 35910 }, { "epoch": 2.6689706428836866, "grad_norm": 2.3012497537147083, "learning_rate": 6.318161880275953e-07, "loss": 0.3177, "step": 35911 }, { "epoch": 2.6690449646971386, "grad_norm": 2.1401780817031177, "learning_rate": 6.315355368318798e-07, "loss": 0.336, "step": 35912 }, { "epoch": 2.6691192865105906, "grad_norm": 2.0399756616330604, "learning_rate": 6.312549459498552e-07, "loss": 0.2206, "step": 35913 }, { "epoch": 2.669193608324043, "grad_norm": 2.418454296594283, "learning_rate": 6.309744153833241e-07, "loss": 0.3301, "step": 35914 }, { "epoch": 2.6692679301374955, "grad_norm": 2.4275231313036523, "learning_rate": 6.306939451340921e-07, "loss": 0.31, "step": 35915 }, { "epoch": 2.6693422519509475, "grad_norm": 2.1411644633444307, "learning_rate": 6.304135352039675e-07, "loss": 0.2195, "step": 35916 }, { "epoch": 2.6694165737644, "grad_norm": 2.196348595362116, "learning_rate": 6.301331855947534e-07, "loss": 0.2717, "step": 35917 }, { "epoch": 2.669490895577852, "grad_norm": 2.3186959872410866, "learning_rate": 6.298528963082573e-07, "loss": 0.254, "step": 35918 }, { "epoch": 2.6695652173913045, "grad_norm": 2.6035393563075635, "learning_rate": 6.295726673462821e-07, "loss": 0.3244, "step": 35919 }, { "epoch": 2.6696395392047565, "grad_norm": 2.3900809115243935, "learning_rate": 6.292924987106319e-07, "loss": 0.3102, "step": 35920 }, { "epoch": 2.669713861018209, "grad_norm": 2.1598097961580818, "learning_rate": 6.290123904031109e-07, "loss": 0.2306, "step": 35921 }, { "epoch": 2.669788182831661, "grad_norm": 2.3085342106571627, "learning_rate": 6.287323424255188e-07, "loss": 0.2494, "step": 35922 }, { "epoch": 2.6698625046451134, "grad_norm": 2.203131296279271, "learning_rate": 6.284523547796651e-07, "loss": 0.2501, "step": 35923 }, { "epoch": 2.6699368264585654, "grad_norm": 1.8981041539587424, "learning_rate": 6.281724274673462e-07, "loss": 0.1816, "step": 35924 }, { "epoch": 2.670011148272018, "grad_norm": 2.833888361917018, "learning_rate": 6.278925604903685e-07, "loss": 0.285, "step": 35925 }, { "epoch": 2.67008547008547, "grad_norm": 2.3483058627141844, "learning_rate": 6.276127538505316e-07, "loss": 0.3236, "step": 35926 }, { "epoch": 2.6701597918989224, "grad_norm": 2.6013210612804922, "learning_rate": 6.273330075496376e-07, "loss": 0.2667, "step": 35927 }, { "epoch": 2.6702341137123744, "grad_norm": 3.51868086496515, "learning_rate": 6.270533215894881e-07, "loss": 0.3042, "step": 35928 }, { "epoch": 2.670308435525827, "grad_norm": 2.32663200531485, "learning_rate": 6.267736959718817e-07, "loss": 0.3216, "step": 35929 }, { "epoch": 2.6703827573392793, "grad_norm": 1.9794656445475922, "learning_rate": 6.264941306986205e-07, "loss": 0.2099, "step": 35930 }, { "epoch": 2.6704570791527313, "grad_norm": 2.762949440812455, "learning_rate": 6.26214625771504e-07, "loss": 0.3623, "step": 35931 }, { "epoch": 2.6705314009661834, "grad_norm": 2.420148376006924, "learning_rate": 6.25935181192332e-07, "loss": 0.2863, "step": 35932 }, { "epoch": 2.670605722779636, "grad_norm": 2.6873693149993305, "learning_rate": 6.256557969629018e-07, "loss": 0.2712, "step": 35933 }, { "epoch": 2.6706800445930883, "grad_norm": 1.9081395279482307, "learning_rate": 6.253764730850132e-07, "loss": 0.2384, "step": 35934 }, { "epoch": 2.6707543664065403, "grad_norm": 2.6203969699891023, "learning_rate": 6.250972095604646e-07, "loss": 0.2805, "step": 35935 }, { "epoch": 2.6708286882199923, "grad_norm": 2.3057591466203182, "learning_rate": 6.248180063910547e-07, "loss": 0.2684, "step": 35936 }, { "epoch": 2.6709030100334448, "grad_norm": 2.683281229260873, "learning_rate": 6.245388635785777e-07, "loss": 0.2987, "step": 35937 }, { "epoch": 2.6709773318468972, "grad_norm": 2.2279959962525218, "learning_rate": 6.242597811248352e-07, "loss": 0.2254, "step": 35938 }, { "epoch": 2.6710516536603492, "grad_norm": 1.9580105082841277, "learning_rate": 6.239807590316204e-07, "loss": 0.1889, "step": 35939 }, { "epoch": 2.6711259754738017, "grad_norm": 2.3505935216085416, "learning_rate": 6.23701797300732e-07, "loss": 0.2826, "step": 35940 }, { "epoch": 2.6712002972872537, "grad_norm": 2.5094517235103053, "learning_rate": 6.23422895933965e-07, "loss": 0.3273, "step": 35941 }, { "epoch": 2.671274619100706, "grad_norm": 2.891437283263107, "learning_rate": 6.231440549331136e-07, "loss": 0.2891, "step": 35942 }, { "epoch": 2.671348940914158, "grad_norm": 2.188219468459233, "learning_rate": 6.228652742999775e-07, "loss": 0.2817, "step": 35943 }, { "epoch": 2.6714232627276107, "grad_norm": 2.4917839128877675, "learning_rate": 6.225865540363441e-07, "loss": 0.3233, "step": 35944 }, { "epoch": 2.6714975845410627, "grad_norm": 2.6477570528474526, "learning_rate": 6.223078941440141e-07, "loss": 0.3069, "step": 35945 }, { "epoch": 2.671571906354515, "grad_norm": 2.4966481430049616, "learning_rate": 6.220292946247786e-07, "loss": 0.2679, "step": 35946 }, { "epoch": 2.671646228167967, "grad_norm": 2.217086635740427, "learning_rate": 6.217507554804325e-07, "loss": 0.2198, "step": 35947 }, { "epoch": 2.6717205499814196, "grad_norm": 2.1615158240417593, "learning_rate": 6.214722767127679e-07, "loss": 0.1832, "step": 35948 }, { "epoch": 2.6717948717948716, "grad_norm": 1.7425631343517813, "learning_rate": 6.211938583235777e-07, "loss": 0.2075, "step": 35949 }, { "epoch": 2.671869193608324, "grad_norm": 2.1488536293680074, "learning_rate": 6.209155003146561e-07, "loss": 0.2506, "step": 35950 }, { "epoch": 2.671943515421776, "grad_norm": 2.533155280422668, "learning_rate": 6.206372026877938e-07, "loss": 0.3277, "step": 35951 }, { "epoch": 2.6720178372352286, "grad_norm": 2.7105584930109368, "learning_rate": 6.203589654447805e-07, "loss": 0.2197, "step": 35952 }, { "epoch": 2.672092159048681, "grad_norm": 2.1967470770795967, "learning_rate": 6.200807885874116e-07, "loss": 0.3044, "step": 35953 }, { "epoch": 2.672166480862133, "grad_norm": 1.9264624770072847, "learning_rate": 6.198026721174755e-07, "loss": 0.2375, "step": 35954 }, { "epoch": 2.672240802675585, "grad_norm": 2.4045216850838247, "learning_rate": 6.195246160367641e-07, "loss": 0.2847, "step": 35955 }, { "epoch": 2.6723151244890375, "grad_norm": 3.066063055619884, "learning_rate": 6.192466203470638e-07, "loss": 0.2967, "step": 35956 }, { "epoch": 2.67238944630249, "grad_norm": 2.487131902010007, "learning_rate": 6.189686850501686e-07, "loss": 0.2956, "step": 35957 }, { "epoch": 2.672463768115942, "grad_norm": 2.3613016666538815, "learning_rate": 6.186908101478673e-07, "loss": 0.2662, "step": 35958 }, { "epoch": 2.672538089929394, "grad_norm": 2.189774444019861, "learning_rate": 6.184129956419449e-07, "loss": 0.2809, "step": 35959 }, { "epoch": 2.6726124117428465, "grad_norm": 2.7581351268437424, "learning_rate": 6.181352415341957e-07, "loss": 0.2899, "step": 35960 }, { "epoch": 2.672686733556299, "grad_norm": 3.5052316087302886, "learning_rate": 6.178575478264026e-07, "loss": 0.3714, "step": 35961 }, { "epoch": 2.672761055369751, "grad_norm": 2.6196653006310404, "learning_rate": 6.175799145203575e-07, "loss": 0.2462, "step": 35962 }, { "epoch": 2.6728353771832034, "grad_norm": 2.7230579203879035, "learning_rate": 6.173023416178459e-07, "loss": 0.276, "step": 35963 }, { "epoch": 2.6729096989966554, "grad_norm": 1.6796312004226241, "learning_rate": 6.170248291206538e-07, "loss": 0.2226, "step": 35964 }, { "epoch": 2.672984020810108, "grad_norm": 2.067636292806806, "learning_rate": 6.16747377030572e-07, "loss": 0.2024, "step": 35965 }, { "epoch": 2.67305834262356, "grad_norm": 2.2025196563543172, "learning_rate": 6.164699853493806e-07, "loss": 0.2522, "step": 35966 }, { "epoch": 2.6731326644370124, "grad_norm": 2.5905585091977463, "learning_rate": 6.16192654078871e-07, "loss": 0.22, "step": 35967 }, { "epoch": 2.6732069862504644, "grad_norm": 1.9959822996218646, "learning_rate": 6.159153832208253e-07, "loss": 0.2183, "step": 35968 }, { "epoch": 2.673281308063917, "grad_norm": 2.0769768096323866, "learning_rate": 6.156381727770278e-07, "loss": 0.2028, "step": 35969 }, { "epoch": 2.673355629877369, "grad_norm": 2.0807888507400127, "learning_rate": 6.153610227492667e-07, "loss": 0.2501, "step": 35970 }, { "epoch": 2.6734299516908213, "grad_norm": 1.7866328990501856, "learning_rate": 6.150839331393221e-07, "loss": 0.1873, "step": 35971 }, { "epoch": 2.6735042735042738, "grad_norm": 2.6013952082064256, "learning_rate": 6.148069039489823e-07, "loss": 0.2948, "step": 35972 }, { "epoch": 2.673578595317726, "grad_norm": 2.1018820948456582, "learning_rate": 6.145299351800293e-07, "loss": 0.2251, "step": 35973 }, { "epoch": 2.673652917131178, "grad_norm": 2.890434942044991, "learning_rate": 6.142530268342428e-07, "loss": 0.2768, "step": 35974 }, { "epoch": 2.6737272389446303, "grad_norm": 5.70547763962479, "learning_rate": 6.139761789134114e-07, "loss": 0.2049, "step": 35975 }, { "epoch": 2.6738015607580827, "grad_norm": 2.156560797887191, "learning_rate": 6.136993914193112e-07, "loss": 0.2048, "step": 35976 }, { "epoch": 2.6738758825715347, "grad_norm": 2.250575828412256, "learning_rate": 6.134226643537311e-07, "loss": 0.2575, "step": 35977 }, { "epoch": 2.6739502043849868, "grad_norm": 2.529141227718592, "learning_rate": 6.131459977184461e-07, "loss": 0.36, "step": 35978 }, { "epoch": 2.6740245261984392, "grad_norm": 2.2713609772260366, "learning_rate": 6.128693915152417e-07, "loss": 0.3299, "step": 35979 }, { "epoch": 2.6740988480118917, "grad_norm": 2.2946953473765745, "learning_rate": 6.125928457458963e-07, "loss": 0.2666, "step": 35980 }, { "epoch": 2.6741731698253437, "grad_norm": 2.450126696699219, "learning_rate": 6.123163604121895e-07, "loss": 0.2648, "step": 35981 }, { "epoch": 2.6742474916387957, "grad_norm": 2.7406324071022037, "learning_rate": 6.120399355159057e-07, "loss": 0.3507, "step": 35982 }, { "epoch": 2.674321813452248, "grad_norm": 2.381708195713017, "learning_rate": 6.117635710588188e-07, "loss": 0.2465, "step": 35983 }, { "epoch": 2.6743961352657006, "grad_norm": 2.6245665213536227, "learning_rate": 6.11487267042713e-07, "loss": 0.295, "step": 35984 }, { "epoch": 2.6744704570791527, "grad_norm": 2.5080451875756844, "learning_rate": 6.112110234693647e-07, "loss": 0.2756, "step": 35985 }, { "epoch": 2.674544778892605, "grad_norm": 2.660132575643668, "learning_rate": 6.109348403405524e-07, "loss": 0.2885, "step": 35986 }, { "epoch": 2.674619100706057, "grad_norm": 2.049609021588228, "learning_rate": 6.106587176580547e-07, "loss": 0.2257, "step": 35987 }, { "epoch": 2.6746934225195096, "grad_norm": 2.9562349390914364, "learning_rate": 6.103826554236491e-07, "loss": 0.3276, "step": 35988 }, { "epoch": 2.6747677443329616, "grad_norm": 1.7927263469766606, "learning_rate": 6.101066536391131e-07, "loss": 0.174, "step": 35989 }, { "epoch": 2.674842066146414, "grad_norm": 2.1172889871184317, "learning_rate": 6.09830712306223e-07, "loss": 0.278, "step": 35990 }, { "epoch": 2.674916387959866, "grad_norm": 1.8843033745023834, "learning_rate": 6.09554831426754e-07, "loss": 0.2296, "step": 35991 }, { "epoch": 2.6749907097733185, "grad_norm": 2.646979092419226, "learning_rate": 6.09279011002486e-07, "loss": 0.315, "step": 35992 }, { "epoch": 2.6750650315867706, "grad_norm": 14.556104150358639, "learning_rate": 6.090032510351907e-07, "loss": 0.3538, "step": 35993 }, { "epoch": 2.675139353400223, "grad_norm": 2.5042335111351344, "learning_rate": 6.087275515266456e-07, "loss": 0.3056, "step": 35994 }, { "epoch": 2.6752136752136755, "grad_norm": 2.376444431862108, "learning_rate": 6.084519124786259e-07, "loss": 0.2499, "step": 35995 }, { "epoch": 2.6752879970271275, "grad_norm": 2.807789352747287, "learning_rate": 6.081763338929047e-07, "loss": 0.2728, "step": 35996 }, { "epoch": 2.6753623188405795, "grad_norm": 2.355866615781901, "learning_rate": 6.079008157712574e-07, "loss": 0.2803, "step": 35997 }, { "epoch": 2.675436640654032, "grad_norm": 2.344102909426188, "learning_rate": 6.076253581154567e-07, "loss": 0.2283, "step": 35998 }, { "epoch": 2.6755109624674844, "grad_norm": 2.8140809846610844, "learning_rate": 6.073499609272792e-07, "loss": 0.2548, "step": 35999 }, { "epoch": 2.6755852842809364, "grad_norm": 2.6353325802479652, "learning_rate": 6.070746242084924e-07, "loss": 0.2986, "step": 36000 }, { "epoch": 2.6756596060943885, "grad_norm": 1.9586802640618235, "learning_rate": 6.067993479608736e-07, "loss": 0.2296, "step": 36001 }, { "epoch": 2.675733927907841, "grad_norm": 2.3896142709753896, "learning_rate": 6.065241321861926e-07, "loss": 0.2404, "step": 36002 }, { "epoch": 2.6758082497212934, "grad_norm": 1.6660083965298265, "learning_rate": 6.062489768862211e-07, "loss": 0.1743, "step": 36003 }, { "epoch": 2.6758825715347454, "grad_norm": 2.731048367010585, "learning_rate": 6.059738820627315e-07, "loss": 0.3291, "step": 36004 }, { "epoch": 2.675956893348198, "grad_norm": 2.699156543480376, "learning_rate": 6.056988477174941e-07, "loss": 0.3438, "step": 36005 }, { "epoch": 2.67603121516165, "grad_norm": 2.156216806971015, "learning_rate": 6.054238738522822e-07, "loss": 0.2665, "step": 36006 }, { "epoch": 2.6761055369751023, "grad_norm": 2.5693992560862697, "learning_rate": 6.051489604688632e-07, "loss": 0.2516, "step": 36007 }, { "epoch": 2.6761798587885544, "grad_norm": 3.384291858966992, "learning_rate": 6.048741075690056e-07, "loss": 0.288, "step": 36008 }, { "epoch": 2.676254180602007, "grad_norm": 2.207103344376458, "learning_rate": 6.045993151544827e-07, "loss": 0.2954, "step": 36009 }, { "epoch": 2.676328502415459, "grad_norm": 2.649504799567157, "learning_rate": 6.043245832270627e-07, "loss": 0.3182, "step": 36010 }, { "epoch": 2.6764028242289113, "grad_norm": 2.157759658389903, "learning_rate": 6.040499117885135e-07, "loss": 0.2363, "step": 36011 }, { "epoch": 2.6764771460423633, "grad_norm": 1.795744208534042, "learning_rate": 6.037753008406022e-07, "loss": 0.1648, "step": 36012 }, { "epoch": 2.6765514678558158, "grad_norm": 2.027913284162464, "learning_rate": 6.035007503850976e-07, "loss": 0.2975, "step": 36013 }, { "epoch": 2.676625789669268, "grad_norm": 2.4683373922286846, "learning_rate": 6.032262604237693e-07, "loss": 0.2426, "step": 36014 }, { "epoch": 2.6767001114827202, "grad_norm": 2.708276318315424, "learning_rate": 6.029518309583804e-07, "loss": 0.2795, "step": 36015 }, { "epoch": 2.6767744332961723, "grad_norm": 2.55623494145245, "learning_rate": 6.026774619907028e-07, "loss": 0.3319, "step": 36016 }, { "epoch": 2.6768487551096247, "grad_norm": 1.8543570646842806, "learning_rate": 6.024031535224984e-07, "loss": 0.2611, "step": 36017 }, { "epoch": 2.676923076923077, "grad_norm": 2.914456462943591, "learning_rate": 6.021289055555346e-07, "loss": 0.2662, "step": 36018 }, { "epoch": 2.676997398736529, "grad_norm": 1.940842621367452, "learning_rate": 6.01854718091579e-07, "loss": 0.2247, "step": 36019 }, { "epoch": 2.677071720549981, "grad_norm": 2.2295479908954015, "learning_rate": 6.015805911323924e-07, "loss": 0.1951, "step": 36020 }, { "epoch": 2.6771460423634337, "grad_norm": 2.3907131380043856, "learning_rate": 6.013065246797445e-07, "loss": 0.2745, "step": 36021 }, { "epoch": 2.677220364176886, "grad_norm": 1.9383285730320332, "learning_rate": 6.010325187353971e-07, "loss": 0.208, "step": 36022 }, { "epoch": 2.677294685990338, "grad_norm": 2.126842101685429, "learning_rate": 6.007585733011157e-07, "loss": 0.3184, "step": 36023 }, { "epoch": 2.67736900780379, "grad_norm": 2.246890084843087, "learning_rate": 6.004846883786619e-07, "loss": 0.2333, "step": 36024 }, { "epoch": 2.6774433296172426, "grad_norm": 2.0922328326514887, "learning_rate": 6.002108639697989e-07, "loss": 0.2509, "step": 36025 }, { "epoch": 2.677517651430695, "grad_norm": 2.0155724099141317, "learning_rate": 5.999371000762921e-07, "loss": 0.2448, "step": 36026 }, { "epoch": 2.677591973244147, "grad_norm": 2.302939888616161, "learning_rate": 5.99663396699901e-07, "loss": 0.2905, "step": 36027 }, { "epoch": 2.6776662950575996, "grad_norm": 3.6115980693736076, "learning_rate": 5.993897538423909e-07, "loss": 0.3043, "step": 36028 }, { "epoch": 2.6777406168710516, "grad_norm": 2.623161894038388, "learning_rate": 5.991161715055216e-07, "loss": 0.3381, "step": 36029 }, { "epoch": 2.677814938684504, "grad_norm": 2.3101095291007328, "learning_rate": 5.988426496910527e-07, "loss": 0.2666, "step": 36030 }, { "epoch": 2.677889260497956, "grad_norm": 1.9742025118621522, "learning_rate": 5.985691884007494e-07, "loss": 0.1919, "step": 36031 }, { "epoch": 2.6779635823114085, "grad_norm": 2.666227039918963, "learning_rate": 5.982957876363693e-07, "loss": 0.2436, "step": 36032 }, { "epoch": 2.6780379041248605, "grad_norm": 2.923826717578636, "learning_rate": 5.980224473996732e-07, "loss": 0.3296, "step": 36033 }, { "epoch": 2.678112225938313, "grad_norm": 2.54457156093202, "learning_rate": 5.977491676924207e-07, "loss": 0.2846, "step": 36034 }, { "epoch": 2.678186547751765, "grad_norm": 2.476361883749784, "learning_rate": 5.974759485163695e-07, "loss": 0.3302, "step": 36035 }, { "epoch": 2.6782608695652175, "grad_norm": 2.667035619729675, "learning_rate": 5.972027898732824e-07, "loss": 0.3849, "step": 36036 }, { "epoch": 2.6783351913786695, "grad_norm": 2.2214624470735247, "learning_rate": 5.969296917649137e-07, "loss": 0.1833, "step": 36037 }, { "epoch": 2.678409513192122, "grad_norm": 2.3409636114642485, "learning_rate": 5.966566541930252e-07, "loss": 0.2043, "step": 36038 }, { "epoch": 2.678483835005574, "grad_norm": 2.5194339832108543, "learning_rate": 5.963836771593734e-07, "loss": 0.3135, "step": 36039 }, { "epoch": 2.6785581568190264, "grad_norm": 2.746551225950403, "learning_rate": 5.961107606657146e-07, "loss": 0.3146, "step": 36040 }, { "epoch": 2.678632478632479, "grad_norm": 2.192548579615343, "learning_rate": 5.958379047138074e-07, "loss": 0.2734, "step": 36041 }, { "epoch": 2.678706800445931, "grad_norm": 1.8998189806225028, "learning_rate": 5.955651093054071e-07, "loss": 0.2056, "step": 36042 }, { "epoch": 2.678781122259383, "grad_norm": 2.179482264527448, "learning_rate": 5.952923744422722e-07, "loss": 0.2578, "step": 36043 }, { "epoch": 2.6788554440728354, "grad_norm": 3.3731048365430154, "learning_rate": 5.950197001261571e-07, "loss": 0.4129, "step": 36044 }, { "epoch": 2.678929765886288, "grad_norm": 2.241863010660611, "learning_rate": 5.947470863588167e-07, "loss": 0.2442, "step": 36045 }, { "epoch": 2.67900408769974, "grad_norm": 2.528867619162062, "learning_rate": 5.944745331420066e-07, "loss": 0.2772, "step": 36046 }, { "epoch": 2.679078409513192, "grad_norm": 2.1695318939266284, "learning_rate": 5.942020404774795e-07, "loss": 0.2592, "step": 36047 }, { "epoch": 2.6791527313266443, "grad_norm": 2.7753399776971035, "learning_rate": 5.939296083669943e-07, "loss": 0.2455, "step": 36048 }, { "epoch": 2.679227053140097, "grad_norm": 2.71720835544019, "learning_rate": 5.936572368122994e-07, "loss": 0.3119, "step": 36049 }, { "epoch": 2.679301374953549, "grad_norm": 2.1267225286333047, "learning_rate": 5.933849258151537e-07, "loss": 0.2408, "step": 36050 }, { "epoch": 2.6793756967670013, "grad_norm": 2.643876551894047, "learning_rate": 5.931126753773076e-07, "loss": 0.3067, "step": 36051 }, { "epoch": 2.6794500185804533, "grad_norm": 2.2320442488859826, "learning_rate": 5.92840485500512e-07, "loss": 0.3624, "step": 36052 }, { "epoch": 2.6795243403939057, "grad_norm": 2.3647771910489523, "learning_rate": 5.925683561865225e-07, "loss": 0.2406, "step": 36053 }, { "epoch": 2.6795986622073578, "grad_norm": 2.102240581601353, "learning_rate": 5.922962874370908e-07, "loss": 0.2817, "step": 36054 }, { "epoch": 2.6796729840208102, "grad_norm": 2.21179976728177, "learning_rate": 5.920242792539655e-07, "loss": 0.2505, "step": 36055 }, { "epoch": 2.6797473058342622, "grad_norm": 2.0977211745914146, "learning_rate": 5.917523316389028e-07, "loss": 0.2175, "step": 36056 }, { "epoch": 2.6798216276477147, "grad_norm": 2.0880311246870065, "learning_rate": 5.914804445936472e-07, "loss": 0.1677, "step": 36057 }, { "epoch": 2.6798959494611667, "grad_norm": 2.1651495758219688, "learning_rate": 5.912086181199539e-07, "loss": 0.2585, "step": 36058 }, { "epoch": 2.679970271274619, "grad_norm": 2.662509351739097, "learning_rate": 5.909368522195702e-07, "loss": 0.3015, "step": 36059 }, { "epoch": 2.680044593088071, "grad_norm": 2.73859974423765, "learning_rate": 5.906651468942482e-07, "loss": 0.3235, "step": 36060 }, { "epoch": 2.6801189149015237, "grad_norm": 2.367638595563492, "learning_rate": 5.903935021457352e-07, "loss": 0.3187, "step": 36061 }, { "epoch": 2.6801932367149757, "grad_norm": 1.7761015538104026, "learning_rate": 5.9012191797578e-07, "loss": 0.1738, "step": 36062 }, { "epoch": 2.680267558528428, "grad_norm": 2.726507368071365, "learning_rate": 5.898503943861323e-07, "loss": 0.2528, "step": 36063 }, { "epoch": 2.6803418803418806, "grad_norm": 2.307750107188036, "learning_rate": 5.895789313785382e-07, "loss": 0.28, "step": 36064 }, { "epoch": 2.6804162021553326, "grad_norm": 2.385628359481167, "learning_rate": 5.89307528954749e-07, "loss": 0.2969, "step": 36065 }, { "epoch": 2.6804905239687846, "grad_norm": 2.9502051134997807, "learning_rate": 5.890361871165084e-07, "loss": 0.2438, "step": 36066 }, { "epoch": 2.680564845782237, "grad_norm": 2.0960625379726627, "learning_rate": 5.887649058655653e-07, "loss": 0.2081, "step": 36067 }, { "epoch": 2.6806391675956895, "grad_norm": 2.3104830181210723, "learning_rate": 5.884936852036649e-07, "loss": 0.2903, "step": 36068 }, { "epoch": 2.6807134894091416, "grad_norm": 2.0343906986212965, "learning_rate": 5.882225251325535e-07, "loss": 0.265, "step": 36069 }, { "epoch": 2.6807878112225936, "grad_norm": 3.78420835528652, "learning_rate": 5.879514256539775e-07, "loss": 0.4146, "step": 36070 }, { "epoch": 2.680862133036046, "grad_norm": 2.480131664731473, "learning_rate": 5.876803867696801e-07, "loss": 0.3067, "step": 36071 }, { "epoch": 2.6809364548494985, "grad_norm": 2.5703187126944385, "learning_rate": 5.874094084814108e-07, "loss": 0.2593, "step": 36072 }, { "epoch": 2.6810107766629505, "grad_norm": 2.016424275782104, "learning_rate": 5.871384907909106e-07, "loss": 0.2879, "step": 36073 }, { "epoch": 2.681085098476403, "grad_norm": 2.051472038897025, "learning_rate": 5.868676336999224e-07, "loss": 0.2412, "step": 36074 }, { "epoch": 2.681159420289855, "grad_norm": 2.295755012638468, "learning_rate": 5.865968372101938e-07, "loss": 0.3804, "step": 36075 }, { "epoch": 2.6812337421033074, "grad_norm": 2.6496532809419486, "learning_rate": 5.863261013234667e-07, "loss": 0.2827, "step": 36076 }, { "epoch": 2.6813080639167595, "grad_norm": 2.169864335684386, "learning_rate": 5.860554260414819e-07, "loss": 0.2491, "step": 36077 }, { "epoch": 2.681382385730212, "grad_norm": 3.407592787976007, "learning_rate": 5.85784811365987e-07, "loss": 0.3429, "step": 36078 }, { "epoch": 2.681456707543664, "grad_norm": 3.062226327947925, "learning_rate": 5.855142572987182e-07, "loss": 0.2836, "step": 36079 }, { "epoch": 2.6815310293571164, "grad_norm": 2.097174512085926, "learning_rate": 5.85243763841421e-07, "loss": 0.1961, "step": 36080 }, { "epoch": 2.6816053511705684, "grad_norm": 3.0373593529799168, "learning_rate": 5.84973330995835e-07, "loss": 0.2837, "step": 36081 }, { "epoch": 2.681679672984021, "grad_norm": 3.1929386100291137, "learning_rate": 5.847029587637043e-07, "loss": 0.3724, "step": 36082 }, { "epoch": 2.681753994797473, "grad_norm": 2.0942400874003164, "learning_rate": 5.844326471467665e-07, "loss": 0.299, "step": 36083 }, { "epoch": 2.6818283166109254, "grad_norm": 2.24088819066043, "learning_rate": 5.841623961467613e-07, "loss": 0.3067, "step": 36084 }, { "epoch": 2.6819026384243774, "grad_norm": 2.3026375884290244, "learning_rate": 5.838922057654317e-07, "loss": 0.2585, "step": 36085 }, { "epoch": 2.68197696023783, "grad_norm": 2.258743099629639, "learning_rate": 5.83622076004513e-07, "loss": 0.2508, "step": 36086 }, { "epoch": 2.6820512820512823, "grad_norm": 1.8688814207899314, "learning_rate": 5.833520068657495e-07, "loss": 0.2492, "step": 36087 }, { "epoch": 2.6821256038647343, "grad_norm": 2.7268230000013296, "learning_rate": 5.830819983508774e-07, "loss": 0.3514, "step": 36088 }, { "epoch": 2.6821999256781863, "grad_norm": 2.488464566739593, "learning_rate": 5.82812050461633e-07, "loss": 0.254, "step": 36089 }, { "epoch": 2.682274247491639, "grad_norm": 3.0978525557823677, "learning_rate": 5.825421631997585e-07, "loss": 0.3091, "step": 36090 }, { "epoch": 2.6823485693050912, "grad_norm": 2.230795317174653, "learning_rate": 5.822723365669858e-07, "loss": 0.2449, "step": 36091 }, { "epoch": 2.6824228911185433, "grad_norm": 2.3434090647849692, "learning_rate": 5.820025705650578e-07, "loss": 0.2325, "step": 36092 }, { "epoch": 2.6824972129319953, "grad_norm": 2.502657442811446, "learning_rate": 5.817328651957077e-07, "loss": 0.2762, "step": 36093 }, { "epoch": 2.6825715347454477, "grad_norm": 2.5638779453714537, "learning_rate": 5.814632204606707e-07, "loss": 0.2828, "step": 36094 }, { "epoch": 2.6826458565589, "grad_norm": 2.037790557030083, "learning_rate": 5.811936363616877e-07, "loss": 0.2152, "step": 36095 }, { "epoch": 2.682720178372352, "grad_norm": 2.4324420054186926, "learning_rate": 5.809241129004894e-07, "loss": 0.3293, "step": 36096 }, { "epoch": 2.6827945001858047, "grad_norm": 2.257916301616354, "learning_rate": 5.806546500788135e-07, "loss": 0.2736, "step": 36097 }, { "epoch": 2.6828688219992567, "grad_norm": 1.887577392142958, "learning_rate": 5.803852478983952e-07, "loss": 0.2199, "step": 36098 }, { "epoch": 2.682943143812709, "grad_norm": 2.348915919196592, "learning_rate": 5.801159063609662e-07, "loss": 0.2872, "step": 36099 }, { "epoch": 2.683017465626161, "grad_norm": 2.9871793236093898, "learning_rate": 5.798466254682633e-07, "loss": 0.3577, "step": 36100 }, { "epoch": 2.6830917874396136, "grad_norm": 2.444179865261171, "learning_rate": 5.795774052220193e-07, "loss": 0.2226, "step": 36101 }, { "epoch": 2.6831661092530656, "grad_norm": 2.0676364035931707, "learning_rate": 5.793082456239674e-07, "loss": 0.2167, "step": 36102 }, { "epoch": 2.683240431066518, "grad_norm": 2.892702887570907, "learning_rate": 5.790391466758383e-07, "loss": 0.357, "step": 36103 }, { "epoch": 2.68331475287997, "grad_norm": 2.463429555641959, "learning_rate": 5.787701083793684e-07, "loss": 0.3475, "step": 36104 }, { "epoch": 2.6833890746934226, "grad_norm": 2.062496851301505, "learning_rate": 5.785011307362875e-07, "loss": 0.2423, "step": 36105 }, { "epoch": 2.683463396506875, "grad_norm": 2.421417927740297, "learning_rate": 5.782322137483265e-07, "loss": 0.2533, "step": 36106 }, { "epoch": 2.683537718320327, "grad_norm": 3.1925339971172932, "learning_rate": 5.779633574172183e-07, "loss": 0.3734, "step": 36107 }, { "epoch": 2.683612040133779, "grad_norm": 2.175520903032821, "learning_rate": 5.776945617446916e-07, "loss": 0.3085, "step": 36108 }, { "epoch": 2.6836863619472315, "grad_norm": 2.0358139413366687, "learning_rate": 5.774258267324806e-07, "loss": 0.281, "step": 36109 }, { "epoch": 2.683760683760684, "grad_norm": 1.6407859523664863, "learning_rate": 5.771571523823127e-07, "loss": 0.1944, "step": 36110 }, { "epoch": 2.683835005574136, "grad_norm": 2.9991799030249093, "learning_rate": 5.768885386959166e-07, "loss": 0.2734, "step": 36111 }, { "epoch": 2.683909327387588, "grad_norm": 2.2492092111039326, "learning_rate": 5.766199856750265e-07, "loss": 0.2488, "step": 36112 }, { "epoch": 2.6839836492010405, "grad_norm": 2.2589504423385285, "learning_rate": 5.763514933213643e-07, "loss": 0.2799, "step": 36113 }, { "epoch": 2.684057971014493, "grad_norm": 2.549878456950872, "learning_rate": 5.760830616366642e-07, "loss": 0.2856, "step": 36114 }, { "epoch": 2.684132292827945, "grad_norm": 2.6230114445319783, "learning_rate": 5.758146906226525e-07, "loss": 0.2842, "step": 36115 }, { "epoch": 2.684206614641397, "grad_norm": 2.211758664555673, "learning_rate": 5.755463802810557e-07, "loss": 0.2576, "step": 36116 }, { "epoch": 2.6842809364548494, "grad_norm": 2.2232612677066594, "learning_rate": 5.752781306136035e-07, "loss": 0.2989, "step": 36117 }, { "epoch": 2.684355258268302, "grad_norm": 2.4769063005343526, "learning_rate": 5.750099416220201e-07, "loss": 0.2807, "step": 36118 }, { "epoch": 2.684429580081754, "grad_norm": 2.594965598944114, "learning_rate": 5.747418133080351e-07, "loss": 0.2691, "step": 36119 }, { "epoch": 2.6845039018952064, "grad_norm": 3.007385899591073, "learning_rate": 5.744737456733729e-07, "loss": 0.4059, "step": 36120 }, { "epoch": 2.6845782237086584, "grad_norm": 2.502355452509214, "learning_rate": 5.742057387197586e-07, "loss": 0.2881, "step": 36121 }, { "epoch": 2.684652545522111, "grad_norm": 2.079102101354395, "learning_rate": 5.739377924489197e-07, "loss": 0.2292, "step": 36122 }, { "epoch": 2.684726867335563, "grad_norm": 2.8345968794926644, "learning_rate": 5.736699068625795e-07, "loss": 0.32, "step": 36123 }, { "epoch": 2.6848011891490153, "grad_norm": 3.125382065279963, "learning_rate": 5.734020819624664e-07, "loss": 0.2698, "step": 36124 }, { "epoch": 2.6848755109624673, "grad_norm": 2.6122574447722324, "learning_rate": 5.731343177502979e-07, "loss": 0.3403, "step": 36125 }, { "epoch": 2.68494983277592, "grad_norm": 1.975810067265455, "learning_rate": 5.728666142278028e-07, "loss": 0.2661, "step": 36126 }, { "epoch": 2.685024154589372, "grad_norm": 1.5027032390823254, "learning_rate": 5.72598971396704e-07, "loss": 0.157, "step": 36127 }, { "epoch": 2.6850984764028243, "grad_norm": 1.9230682662376137, "learning_rate": 5.723313892587223e-07, "loss": 0.2417, "step": 36128 }, { "epoch": 2.6851727982162767, "grad_norm": 2.4506574974187334, "learning_rate": 5.720638678155832e-07, "loss": 0.2949, "step": 36129 }, { "epoch": 2.6852471200297288, "grad_norm": 2.1703301058532327, "learning_rate": 5.717964070690063e-07, "loss": 0.2541, "step": 36130 }, { "epoch": 2.685321441843181, "grad_norm": 1.902447549475185, "learning_rate": 5.715290070207158e-07, "loss": 0.2678, "step": 36131 }, { "epoch": 2.6853957636566332, "grad_norm": 2.3504267129039094, "learning_rate": 5.712616676724336e-07, "loss": 0.2357, "step": 36132 }, { "epoch": 2.6854700854700857, "grad_norm": 2.13041794853133, "learning_rate": 5.709943890258774e-07, "loss": 0.2078, "step": 36133 }, { "epoch": 2.6855444072835377, "grad_norm": 2.2234076584738696, "learning_rate": 5.707271710827711e-07, "loss": 0.2393, "step": 36134 }, { "epoch": 2.6856187290969897, "grad_norm": 1.8816737219819897, "learning_rate": 5.704600138448347e-07, "loss": 0.2242, "step": 36135 }, { "epoch": 2.685693050910442, "grad_norm": 2.3768886321376748, "learning_rate": 5.701929173137877e-07, "loss": 0.2432, "step": 36136 }, { "epoch": 2.6857673727238947, "grad_norm": 2.829378233229415, "learning_rate": 5.699258814913489e-07, "loss": 0.3039, "step": 36137 }, { "epoch": 2.6858416945373467, "grad_norm": 2.069430871884275, "learning_rate": 5.696589063792368e-07, "loss": 0.3129, "step": 36138 }, { "epoch": 2.6859160163507987, "grad_norm": 2.292968832344897, "learning_rate": 5.693919919791724e-07, "loss": 0.2636, "step": 36139 }, { "epoch": 2.685990338164251, "grad_norm": 1.9380875408139173, "learning_rate": 5.691251382928719e-07, "loss": 0.1668, "step": 36140 }, { "epoch": 2.6860646599777036, "grad_norm": 1.9169694727020814, "learning_rate": 5.688583453220564e-07, "loss": 0.2662, "step": 36141 }, { "epoch": 2.6861389817911556, "grad_norm": 2.475114293996157, "learning_rate": 5.68591613068441e-07, "loss": 0.2844, "step": 36142 }, { "epoch": 2.686213303604608, "grad_norm": 2.8606469123357523, "learning_rate": 5.683249415337411e-07, "loss": 0.3108, "step": 36143 }, { "epoch": 2.68628762541806, "grad_norm": 2.635327805488715, "learning_rate": 5.680583307196785e-07, "loss": 0.2189, "step": 36144 }, { "epoch": 2.6863619472315126, "grad_norm": 2.1156228689989827, "learning_rate": 5.677917806279654e-07, "loss": 0.2871, "step": 36145 }, { "epoch": 2.6864362690449646, "grad_norm": 2.082614763638014, "learning_rate": 5.675252912603224e-07, "loss": 0.2224, "step": 36146 }, { "epoch": 2.686510590858417, "grad_norm": 2.085609398732244, "learning_rate": 5.672588626184583e-07, "loss": 0.2221, "step": 36147 }, { "epoch": 2.686584912671869, "grad_norm": 2.294825780863119, "learning_rate": 5.66992494704095e-07, "loss": 0.3033, "step": 36148 }, { "epoch": 2.6866592344853215, "grad_norm": 1.5150529849513878, "learning_rate": 5.667261875189433e-07, "loss": 0.1648, "step": 36149 }, { "epoch": 2.6867335562987735, "grad_norm": 2.1147085100418543, "learning_rate": 5.664599410647176e-07, "loss": 0.2726, "step": 36150 }, { "epoch": 2.686807878112226, "grad_norm": 2.490201125992619, "learning_rate": 5.661937553431362e-07, "loss": 0.3563, "step": 36151 }, { "epoch": 2.6868821999256784, "grad_norm": 2.30148629220734, "learning_rate": 5.65927630355907e-07, "loss": 0.2554, "step": 36152 }, { "epoch": 2.6869565217391305, "grad_norm": 2.0412610135197213, "learning_rate": 5.656615661047482e-07, "loss": 0.2034, "step": 36153 }, { "epoch": 2.6870308435525825, "grad_norm": 2.442301845753906, "learning_rate": 5.65395562591371e-07, "loss": 0.2967, "step": 36154 }, { "epoch": 2.687105165366035, "grad_norm": 2.0660056539053318, "learning_rate": 5.651296198174861e-07, "loss": 0.2021, "step": 36155 }, { "epoch": 2.6871794871794874, "grad_norm": 2.344474143709033, "learning_rate": 5.648637377848076e-07, "loss": 0.293, "step": 36156 }, { "epoch": 2.6872538089929394, "grad_norm": 2.1421095389404146, "learning_rate": 5.645979164950477e-07, "loss": 0.2474, "step": 36157 }, { "epoch": 2.6873281308063914, "grad_norm": 2.8293412148080095, "learning_rate": 5.643321559499171e-07, "loss": 0.3341, "step": 36158 }, { "epoch": 2.687402452619844, "grad_norm": 2.323673223760057, "learning_rate": 5.640664561511255e-07, "loss": 0.2579, "step": 36159 }, { "epoch": 2.6874767744332964, "grad_norm": 2.476447794409157, "learning_rate": 5.638008171003839e-07, "loss": 0.331, "step": 36160 }, { "epoch": 2.6875510962467484, "grad_norm": 2.3793228685250645, "learning_rate": 5.635352387994042e-07, "loss": 0.2759, "step": 36161 }, { "epoch": 2.687625418060201, "grad_norm": 2.446727752769499, "learning_rate": 5.632697212498939e-07, "loss": 0.2996, "step": 36162 }, { "epoch": 2.687699739873653, "grad_norm": 2.4965651027387525, "learning_rate": 5.630042644535649e-07, "loss": 0.2383, "step": 36163 }, { "epoch": 2.6877740616871053, "grad_norm": 2.2899434209537413, "learning_rate": 5.627388684121238e-07, "loss": 0.2733, "step": 36164 }, { "epoch": 2.6878483835005573, "grad_norm": 2.406963205280046, "learning_rate": 5.624735331272802e-07, "loss": 0.332, "step": 36165 }, { "epoch": 2.68792270531401, "grad_norm": 2.1392854232271574, "learning_rate": 5.622082586007426e-07, "loss": 0.2435, "step": 36166 }, { "epoch": 2.687997027127462, "grad_norm": 2.1109956026253958, "learning_rate": 5.619430448342178e-07, "loss": 0.2548, "step": 36167 }, { "epoch": 2.6880713489409143, "grad_norm": 2.1945615250073467, "learning_rate": 5.616778918294152e-07, "loss": 0.231, "step": 36168 }, { "epoch": 2.6881456707543663, "grad_norm": 1.8771063415890565, "learning_rate": 5.614127995880403e-07, "loss": 0.2035, "step": 36169 }, { "epoch": 2.6882199925678187, "grad_norm": 2.084391525891047, "learning_rate": 5.611477681118005e-07, "loss": 0.2011, "step": 36170 }, { "epoch": 2.6882943143812708, "grad_norm": 2.8592324949799806, "learning_rate": 5.608827974024012e-07, "loss": 0.3053, "step": 36171 }, { "epoch": 2.688368636194723, "grad_norm": 2.8489872326144625, "learning_rate": 5.606178874615476e-07, "loss": 0.3227, "step": 36172 }, { "epoch": 2.6884429580081752, "grad_norm": 2.14135913368719, "learning_rate": 5.603530382909472e-07, "loss": 0.2518, "step": 36173 }, { "epoch": 2.6885172798216277, "grad_norm": 2.471692262567389, "learning_rate": 5.600882498923032e-07, "loss": 0.2636, "step": 36174 }, { "epoch": 2.68859160163508, "grad_norm": 2.2361458367016933, "learning_rate": 5.598235222673232e-07, "loss": 0.2517, "step": 36175 }, { "epoch": 2.688665923448532, "grad_norm": 2.342844448402092, "learning_rate": 5.595588554177089e-07, "loss": 0.2683, "step": 36176 }, { "epoch": 2.688740245261984, "grad_norm": 2.3547039781030885, "learning_rate": 5.592942493451625e-07, "loss": 0.2881, "step": 36177 }, { "epoch": 2.6888145670754366, "grad_norm": 2.248273040536786, "learning_rate": 5.590297040513926e-07, "loss": 0.212, "step": 36178 }, { "epoch": 2.688888888888889, "grad_norm": 2.980803532371079, "learning_rate": 5.587652195380999e-07, "loss": 0.3189, "step": 36179 }, { "epoch": 2.688963210702341, "grad_norm": 2.427443529733219, "learning_rate": 5.585007958069855e-07, "loss": 0.2607, "step": 36180 }, { "epoch": 2.689037532515793, "grad_norm": 3.8001106802547127, "learning_rate": 5.582364328597545e-07, "loss": 0.3644, "step": 36181 }, { "epoch": 2.6891118543292456, "grad_norm": 2.3572358265823428, "learning_rate": 5.579721306981046e-07, "loss": 0.2696, "step": 36182 }, { "epoch": 2.689186176142698, "grad_norm": 2.5173408576337044, "learning_rate": 5.577078893237431e-07, "loss": 0.2932, "step": 36183 }, { "epoch": 2.68926049795615, "grad_norm": 2.4343697309607872, "learning_rate": 5.574437087383655e-07, "loss": 0.3511, "step": 36184 }, { "epoch": 2.6893348197696025, "grad_norm": 2.7129050329411597, "learning_rate": 5.571795889436771e-07, "loss": 0.2927, "step": 36185 }, { "epoch": 2.6894091415830546, "grad_norm": 3.181931376318748, "learning_rate": 5.569155299413775e-07, "loss": 0.2849, "step": 36186 }, { "epoch": 2.689483463396507, "grad_norm": 2.3146260894482507, "learning_rate": 5.566515317331633e-07, "loss": 0.2886, "step": 36187 }, { "epoch": 2.689557785209959, "grad_norm": 2.1770937614305, "learning_rate": 5.563875943207386e-07, "loss": 0.2032, "step": 36188 }, { "epoch": 2.6896321070234115, "grad_norm": 2.6124141223638047, "learning_rate": 5.561237177057988e-07, "loss": 0.2159, "step": 36189 }, { "epoch": 2.6897064288368635, "grad_norm": 2.1392875142306496, "learning_rate": 5.558599018900457e-07, "loss": 0.2446, "step": 36190 }, { "epoch": 2.689780750650316, "grad_norm": 2.4405794315968437, "learning_rate": 5.555961468751769e-07, "loss": 0.3316, "step": 36191 }, { "epoch": 2.689855072463768, "grad_norm": 2.3550596458722577, "learning_rate": 5.553324526628901e-07, "loss": 0.2911, "step": 36192 }, { "epoch": 2.6899293942772204, "grad_norm": 2.1895173600260005, "learning_rate": 5.550688192548825e-07, "loss": 0.2553, "step": 36193 }, { "epoch": 2.6900037160906725, "grad_norm": 2.2066055052450766, "learning_rate": 5.548052466528509e-07, "loss": 0.2248, "step": 36194 }, { "epoch": 2.690078037904125, "grad_norm": 2.7908661037381375, "learning_rate": 5.545417348584947e-07, "loss": 0.257, "step": 36195 }, { "epoch": 2.690152359717577, "grad_norm": 2.1649527090781104, "learning_rate": 5.542782838735072e-07, "loss": 0.257, "step": 36196 }, { "epoch": 2.6902266815310294, "grad_norm": 2.673909746698945, "learning_rate": 5.540148936995871e-07, "loss": 0.2755, "step": 36197 }, { "epoch": 2.690301003344482, "grad_norm": 2.4967268248063994, "learning_rate": 5.537515643384284e-07, "loss": 0.2946, "step": 36198 }, { "epoch": 2.690375325157934, "grad_norm": 5.149229222016682, "learning_rate": 5.534882957917264e-07, "loss": 0.2951, "step": 36199 }, { "epoch": 2.690449646971386, "grad_norm": 2.174526771294129, "learning_rate": 5.532250880611778e-07, "loss": 0.2484, "step": 36200 }, { "epoch": 2.6905239687848383, "grad_norm": 2.405934972828532, "learning_rate": 5.529619411484755e-07, "loss": 0.3576, "step": 36201 }, { "epoch": 2.690598290598291, "grad_norm": 2.1206492249928517, "learning_rate": 5.526988550553136e-07, "loss": 0.2061, "step": 36202 }, { "epoch": 2.690672612411743, "grad_norm": 2.7982763524915737, "learning_rate": 5.524358297833887e-07, "loss": 0.3362, "step": 36203 }, { "epoch": 2.690746934225195, "grad_norm": 1.8889319023794175, "learning_rate": 5.521728653343883e-07, "loss": 0.2384, "step": 36204 }, { "epoch": 2.6908212560386473, "grad_norm": 1.6202410541964634, "learning_rate": 5.51909961710011e-07, "loss": 0.1956, "step": 36205 }, { "epoch": 2.6908955778520998, "grad_norm": 2.192603070907693, "learning_rate": 5.516471189119455e-07, "loss": 0.2847, "step": 36206 }, { "epoch": 2.690969899665552, "grad_norm": 1.81552866892702, "learning_rate": 5.513843369418859e-07, "loss": 0.2343, "step": 36207 }, { "epoch": 2.6910442214790042, "grad_norm": 2.4114040767879326, "learning_rate": 5.511216158015253e-07, "loss": 0.2512, "step": 36208 }, { "epoch": 2.6911185432924563, "grad_norm": 3.0813857682074297, "learning_rate": 5.508589554925514e-07, "loss": 0.3061, "step": 36209 }, { "epoch": 2.6911928651059087, "grad_norm": 2.3094871465929576, "learning_rate": 5.505963560166583e-07, "loss": 0.2578, "step": 36210 }, { "epoch": 2.6912671869193607, "grad_norm": 5.737376515126622, "learning_rate": 5.503338173755346e-07, "loss": 0.3531, "step": 36211 }, { "epoch": 2.691341508732813, "grad_norm": 2.445001481679854, "learning_rate": 5.500713395708735e-07, "loss": 0.3188, "step": 36212 }, { "epoch": 2.691415830546265, "grad_norm": 2.172350601963857, "learning_rate": 5.498089226043613e-07, "loss": 0.2764, "step": 36213 }, { "epoch": 2.6914901523597177, "grad_norm": 2.680133669201169, "learning_rate": 5.4954656647769e-07, "loss": 0.29, "step": 36214 }, { "epoch": 2.6915644741731697, "grad_norm": 2.1349551538739484, "learning_rate": 5.492842711925472e-07, "loss": 0.2444, "step": 36215 }, { "epoch": 2.691638795986622, "grad_norm": 2.246367275582458, "learning_rate": 5.490220367506205e-07, "loss": 0.2936, "step": 36216 }, { "epoch": 2.691713117800074, "grad_norm": 2.3975006881618266, "learning_rate": 5.487598631536017e-07, "loss": 0.2416, "step": 36217 }, { "epoch": 2.6917874396135266, "grad_norm": 2.691887739650756, "learning_rate": 5.484977504031752e-07, "loss": 0.3244, "step": 36218 }, { "epoch": 2.6918617614269786, "grad_norm": 2.888063031524035, "learning_rate": 5.482356985010295e-07, "loss": 0.4144, "step": 36219 }, { "epoch": 2.691936083240431, "grad_norm": 2.171089516143099, "learning_rate": 5.479737074488523e-07, "loss": 0.3174, "step": 36220 }, { "epoch": 2.6920104050538836, "grad_norm": 1.9530486057172216, "learning_rate": 5.477117772483299e-07, "loss": 0.2599, "step": 36221 }, { "epoch": 2.6920847268673356, "grad_norm": 2.3045045603842382, "learning_rate": 5.474499079011486e-07, "loss": 0.2187, "step": 36222 }, { "epoch": 2.6921590486807876, "grad_norm": 2.614799114681604, "learning_rate": 5.471880994089951e-07, "loss": 0.2658, "step": 36223 }, { "epoch": 2.69223337049424, "grad_norm": 2.001951965646513, "learning_rate": 5.469263517735524e-07, "loss": 0.2246, "step": 36224 }, { "epoch": 2.6923076923076925, "grad_norm": 2.015818170619863, "learning_rate": 5.466646649965113e-07, "loss": 0.2361, "step": 36225 }, { "epoch": 2.6923820141211445, "grad_norm": 2.5554446331673546, "learning_rate": 5.464030390795482e-07, "loss": 0.273, "step": 36226 }, { "epoch": 2.6924563359345965, "grad_norm": 2.8251312158426765, "learning_rate": 5.461414740243543e-07, "loss": 0.2579, "step": 36227 }, { "epoch": 2.692530657748049, "grad_norm": 2.306294264369073, "learning_rate": 5.458799698326089e-07, "loss": 0.2177, "step": 36228 }, { "epoch": 2.6926049795615015, "grad_norm": 2.443476486465315, "learning_rate": 5.456185265059999e-07, "loss": 0.3395, "step": 36229 }, { "epoch": 2.6926793013749535, "grad_norm": 2.9327456658000095, "learning_rate": 5.45357144046208e-07, "loss": 0.2805, "step": 36230 }, { "epoch": 2.692753623188406, "grad_norm": 2.2528887507195825, "learning_rate": 5.450958224549141e-07, "loss": 0.2584, "step": 36231 }, { "epoch": 2.692827945001858, "grad_norm": 2.646885653559023, "learning_rate": 5.448345617338046e-07, "loss": 0.2688, "step": 36232 }, { "epoch": 2.6929022668153104, "grad_norm": 2.914672062938696, "learning_rate": 5.445733618845584e-07, "loss": 0.3007, "step": 36233 }, { "epoch": 2.6929765886287624, "grad_norm": 2.108673581410568, "learning_rate": 5.443122229088594e-07, "loss": 0.232, "step": 36234 }, { "epoch": 2.693050910442215, "grad_norm": 2.5048537079445627, "learning_rate": 5.440511448083885e-07, "loss": 0.2836, "step": 36235 }, { "epoch": 2.693125232255667, "grad_norm": 2.2594756544366428, "learning_rate": 5.437901275848234e-07, "loss": 0.2439, "step": 36236 }, { "epoch": 2.6931995540691194, "grad_norm": 2.202073199384679, "learning_rate": 5.435291712398494e-07, "loss": 0.2795, "step": 36237 }, { "epoch": 2.6932738758825714, "grad_norm": 2.1417606184250717, "learning_rate": 5.432682757751417e-07, "loss": 0.2689, "step": 36238 }, { "epoch": 2.693348197696024, "grad_norm": 1.9851193094743405, "learning_rate": 5.430074411923835e-07, "loss": 0.2631, "step": 36239 }, { "epoch": 2.693422519509476, "grad_norm": 2.6309398916492066, "learning_rate": 5.427466674932525e-07, "loss": 0.3186, "step": 36240 }, { "epoch": 2.6934968413229283, "grad_norm": 2.3490414609199504, "learning_rate": 5.42485954679427e-07, "loss": 0.2534, "step": 36241 }, { "epoch": 2.6935711631363803, "grad_norm": 2.4919708735789246, "learning_rate": 5.42225302752587e-07, "loss": 0.3501, "step": 36242 }, { "epoch": 2.693645484949833, "grad_norm": 2.565447116583333, "learning_rate": 5.419647117144089e-07, "loss": 0.2301, "step": 36243 }, { "epoch": 2.6937198067632853, "grad_norm": 2.271408960902349, "learning_rate": 5.417041815665725e-07, "loss": 0.2552, "step": 36244 }, { "epoch": 2.6937941285767373, "grad_norm": 2.5700106948612658, "learning_rate": 5.41443712310753e-07, "loss": 0.3145, "step": 36245 }, { "epoch": 2.6938684503901893, "grad_norm": 2.6502475623695707, "learning_rate": 5.411833039486269e-07, "loss": 0.2996, "step": 36246 }, { "epoch": 2.6939427722036418, "grad_norm": 2.1617008734075895, "learning_rate": 5.409229564818741e-07, "loss": 0.2603, "step": 36247 }, { "epoch": 2.694017094017094, "grad_norm": 2.23145902565032, "learning_rate": 5.406626699121675e-07, "loss": 0.2597, "step": 36248 }, { "epoch": 2.6940914158305462, "grad_norm": 1.9940178143344185, "learning_rate": 5.404024442411848e-07, "loss": 0.2306, "step": 36249 }, { "epoch": 2.6941657376439982, "grad_norm": 2.3363152527087654, "learning_rate": 5.401422794705991e-07, "loss": 0.2856, "step": 36250 }, { "epoch": 2.6942400594574507, "grad_norm": 2.261853165970473, "learning_rate": 5.398821756020866e-07, "loss": 0.3143, "step": 36251 }, { "epoch": 2.694314381270903, "grad_norm": 2.4275418505279696, "learning_rate": 5.39622132637323e-07, "loss": 0.2368, "step": 36252 }, { "epoch": 2.694388703084355, "grad_norm": 2.5663140536612787, "learning_rate": 5.3936215057798e-07, "loss": 0.3106, "step": 36253 }, { "epoch": 2.6944630248978076, "grad_norm": 2.7451573669185882, "learning_rate": 5.39102229425733e-07, "loss": 0.3374, "step": 36254 }, { "epoch": 2.6945373467112597, "grad_norm": 2.699386111160796, "learning_rate": 5.388423691822542e-07, "loss": 0.3836, "step": 36255 }, { "epoch": 2.694611668524712, "grad_norm": 2.4261934776794174, "learning_rate": 5.385825698492186e-07, "loss": 0.3027, "step": 36256 }, { "epoch": 2.694685990338164, "grad_norm": 2.974461549501314, "learning_rate": 5.383228314282974e-07, "loss": 0.3143, "step": 36257 }, { "epoch": 2.6947603121516166, "grad_norm": 2.4267506539855113, "learning_rate": 5.380631539211612e-07, "loss": 0.3154, "step": 36258 }, { "epoch": 2.6948346339650686, "grad_norm": 2.706360524833599, "learning_rate": 5.378035373294866e-07, "loss": 0.3035, "step": 36259 }, { "epoch": 2.694908955778521, "grad_norm": 2.8350920949356464, "learning_rate": 5.375439816549388e-07, "loss": 0.3701, "step": 36260 }, { "epoch": 2.694983277591973, "grad_norm": 2.1763421693798906, "learning_rate": 5.372844868991933e-07, "loss": 0.2079, "step": 36261 }, { "epoch": 2.6950575994054256, "grad_norm": 2.1255595679080708, "learning_rate": 5.370250530639198e-07, "loss": 0.2411, "step": 36262 }, { "epoch": 2.695131921218878, "grad_norm": 2.407420164776435, "learning_rate": 5.367656801507858e-07, "loss": 0.3204, "step": 36263 }, { "epoch": 2.69520624303233, "grad_norm": 2.3466123129220637, "learning_rate": 5.365063681614657e-07, "loss": 0.2198, "step": 36264 }, { "epoch": 2.695280564845782, "grad_norm": 2.181460981035122, "learning_rate": 5.362471170976246e-07, "loss": 0.1743, "step": 36265 }, { "epoch": 2.6953548866592345, "grad_norm": 3.002855437948692, "learning_rate": 5.359879269609359e-07, "loss": 0.3521, "step": 36266 }, { "epoch": 2.695429208472687, "grad_norm": 2.14308643190524, "learning_rate": 5.357287977530646e-07, "loss": 0.2668, "step": 36267 }, { "epoch": 2.695503530286139, "grad_norm": 2.174429212575482, "learning_rate": 5.354697294756794e-07, "loss": 0.2769, "step": 36268 }, { "epoch": 2.695577852099591, "grad_norm": 1.601051182189583, "learning_rate": 5.352107221304503e-07, "loss": 0.156, "step": 36269 }, { "epoch": 2.6956521739130435, "grad_norm": 3.361799127997494, "learning_rate": 5.349517757190426e-07, "loss": 0.3317, "step": 36270 }, { "epoch": 2.695726495726496, "grad_norm": 2.9621592115930606, "learning_rate": 5.346928902431259e-07, "loss": 0.2548, "step": 36271 }, { "epoch": 2.695800817539948, "grad_norm": 2.7248244342511003, "learning_rate": 5.344340657043622e-07, "loss": 0.3167, "step": 36272 }, { "epoch": 2.6958751393534, "grad_norm": 2.5799555356116968, "learning_rate": 5.341753021044238e-07, "loss": 0.2721, "step": 36273 }, { "epoch": 2.6959494611668524, "grad_norm": 2.328903469240826, "learning_rate": 5.339165994449724e-07, "loss": 0.3363, "step": 36274 }, { "epoch": 2.696023782980305, "grad_norm": 2.3316226654085974, "learning_rate": 5.336579577276746e-07, "loss": 0.2932, "step": 36275 }, { "epoch": 2.696098104793757, "grad_norm": 3.690665447320773, "learning_rate": 5.333993769541956e-07, "loss": 0.3249, "step": 36276 }, { "epoch": 2.6961724266072093, "grad_norm": 2.634075929850216, "learning_rate": 5.331408571261998e-07, "loss": 0.3533, "step": 36277 }, { "epoch": 2.6962467484206614, "grad_norm": 2.8082533907343805, "learning_rate": 5.328823982453535e-07, "loss": 0.3105, "step": 36278 }, { "epoch": 2.696321070234114, "grad_norm": 3.0790612983527383, "learning_rate": 5.326240003133188e-07, "loss": 0.3676, "step": 36279 }, { "epoch": 2.696395392047566, "grad_norm": 1.9961482505111807, "learning_rate": 5.323656633317575e-07, "loss": 0.238, "step": 36280 }, { "epoch": 2.6964697138610183, "grad_norm": 2.3471020233023885, "learning_rate": 5.321073873023375e-07, "loss": 0.2649, "step": 36281 }, { "epoch": 2.6965440356744703, "grad_norm": 2.069130126866841, "learning_rate": 5.318491722267183e-07, "loss": 0.2146, "step": 36282 }, { "epoch": 2.696618357487923, "grad_norm": 2.707935031145768, "learning_rate": 5.315910181065631e-07, "loss": 0.3236, "step": 36283 }, { "epoch": 2.696692679301375, "grad_norm": 2.8230668387415903, "learning_rate": 5.313329249435329e-07, "loss": 0.2825, "step": 36284 }, { "epoch": 2.6967670011148273, "grad_norm": 1.9851747526445995, "learning_rate": 5.310748927392895e-07, "loss": 0.1695, "step": 36285 }, { "epoch": 2.6968413229282797, "grad_norm": 2.943354379350883, "learning_rate": 5.30816921495495e-07, "loss": 0.2309, "step": 36286 }, { "epoch": 2.6969156447417317, "grad_norm": 2.2974827254040364, "learning_rate": 5.305590112138092e-07, "loss": 0.2832, "step": 36287 }, { "epoch": 2.6969899665551837, "grad_norm": 3.496715016124006, "learning_rate": 5.303011618958942e-07, "loss": 0.3201, "step": 36288 }, { "epoch": 2.697064288368636, "grad_norm": 5.072681265096174, "learning_rate": 5.300433735434097e-07, "loss": 0.3419, "step": 36289 }, { "epoch": 2.6971386101820887, "grad_norm": 2.549639306768532, "learning_rate": 5.29785646158012e-07, "loss": 0.2703, "step": 36290 }, { "epoch": 2.6972129319955407, "grad_norm": 2.1232630200287104, "learning_rate": 5.295279797413655e-07, "loss": 0.237, "step": 36291 }, { "epoch": 2.6972872538089927, "grad_norm": 2.22740192352214, "learning_rate": 5.292703742951244e-07, "loss": 0.2194, "step": 36292 }, { "epoch": 2.697361575622445, "grad_norm": 2.1037244550153744, "learning_rate": 5.290128298209518e-07, "loss": 0.2446, "step": 36293 }, { "epoch": 2.6974358974358976, "grad_norm": 3.020123680751984, "learning_rate": 5.287553463205009e-07, "loss": 0.3785, "step": 36294 }, { "epoch": 2.6975102192493496, "grad_norm": 2.794052683407926, "learning_rate": 5.284979237954335e-07, "loss": 0.289, "step": 36295 }, { "epoch": 2.6975845410628017, "grad_norm": 2.9899332968115178, "learning_rate": 5.282405622474041e-07, "loss": 0.3082, "step": 36296 }, { "epoch": 2.697658862876254, "grad_norm": 1.8795995010673137, "learning_rate": 5.2798326167807e-07, "loss": 0.2078, "step": 36297 }, { "epoch": 2.6977331846897066, "grad_norm": 2.890689226829958, "learning_rate": 5.277260220890889e-07, "loss": 0.2816, "step": 36298 }, { "epoch": 2.6978075065031586, "grad_norm": 2.516252401233326, "learning_rate": 5.274688434821151e-07, "loss": 0.2707, "step": 36299 }, { "epoch": 2.697881828316611, "grad_norm": 2.5988909699039704, "learning_rate": 5.27211725858806e-07, "loss": 0.2158, "step": 36300 }, { "epoch": 2.697956150130063, "grad_norm": 3.3142007791450494, "learning_rate": 5.269546692208171e-07, "loss": 0.3748, "step": 36301 }, { "epoch": 2.6980304719435155, "grad_norm": 2.421950122431588, "learning_rate": 5.266976735698015e-07, "loss": 0.36, "step": 36302 }, { "epoch": 2.6981047937569675, "grad_norm": 1.9398949622485888, "learning_rate": 5.264407389074155e-07, "loss": 0.2466, "step": 36303 }, { "epoch": 2.69817911557042, "grad_norm": 2.4943904958945837, "learning_rate": 5.261838652353124e-07, "loss": 0.2836, "step": 36304 }, { "epoch": 2.698253437383872, "grad_norm": 2.3194862437572157, "learning_rate": 5.259270525551463e-07, "loss": 0.2353, "step": 36305 }, { "epoch": 2.6983277591973245, "grad_norm": 2.785260723719819, "learning_rate": 5.256703008685704e-07, "loss": 0.3072, "step": 36306 }, { "epoch": 2.6984020810107765, "grad_norm": 2.1698849452819147, "learning_rate": 5.254136101772356e-07, "loss": 0.1937, "step": 36307 }, { "epoch": 2.698476402824229, "grad_norm": 2.5080016516667194, "learning_rate": 5.251569804827983e-07, "loss": 0.2417, "step": 36308 }, { "epoch": 2.6985507246376814, "grad_norm": 2.3648593578548813, "learning_rate": 5.24900411786906e-07, "loss": 0.3028, "step": 36309 }, { "epoch": 2.6986250464511334, "grad_norm": 2.1215792122235877, "learning_rate": 5.246439040912166e-07, "loss": 0.2006, "step": 36310 }, { "epoch": 2.6986993682645855, "grad_norm": 1.9157237579909232, "learning_rate": 5.243874573973773e-07, "loss": 0.163, "step": 36311 }, { "epoch": 2.698773690078038, "grad_norm": 2.3109886328828826, "learning_rate": 5.24131071707038e-07, "loss": 0.2136, "step": 36312 }, { "epoch": 2.6988480118914904, "grad_norm": 2.0393418591106807, "learning_rate": 5.238747470218531e-07, "loss": 0.2714, "step": 36313 }, { "epoch": 2.6989223337049424, "grad_norm": 2.121509057724709, "learning_rate": 5.236184833434688e-07, "loss": 0.2853, "step": 36314 }, { "epoch": 2.6989966555183944, "grad_norm": 2.9664301617552034, "learning_rate": 5.233622806735394e-07, "loss": 0.2168, "step": 36315 }, { "epoch": 2.699070977331847, "grad_norm": 1.9113835691043617, "learning_rate": 5.231061390137115e-07, "loss": 0.2687, "step": 36316 }, { "epoch": 2.6991452991452993, "grad_norm": 2.2864982660252244, "learning_rate": 5.228500583656349e-07, "loss": 0.2982, "step": 36317 }, { "epoch": 2.6992196209587513, "grad_norm": 2.3286517539812404, "learning_rate": 5.225940387309569e-07, "loss": 0.267, "step": 36318 }, { "epoch": 2.699293942772204, "grad_norm": 2.962056451472413, "learning_rate": 5.223380801113265e-07, "loss": 0.3133, "step": 36319 }, { "epoch": 2.699368264585656, "grad_norm": 2.511200439708633, "learning_rate": 5.220821825083921e-07, "loss": 0.301, "step": 36320 }, { "epoch": 2.6994425863991083, "grad_norm": 2.7718407230394386, "learning_rate": 5.218263459238005e-07, "loss": 0.3083, "step": 36321 }, { "epoch": 2.6995169082125603, "grad_norm": 2.37247209054422, "learning_rate": 5.215705703592e-07, "loss": 0.2815, "step": 36322 }, { "epoch": 2.6995912300260128, "grad_norm": 2.659130914942005, "learning_rate": 5.213148558162362e-07, "loss": 0.2706, "step": 36323 }, { "epoch": 2.6996655518394648, "grad_norm": 2.2923974416914352, "learning_rate": 5.210592022965544e-07, "loss": 0.2276, "step": 36324 }, { "epoch": 2.6997398736529172, "grad_norm": 2.527333341706832, "learning_rate": 5.208036098018032e-07, "loss": 0.3248, "step": 36325 }, { "epoch": 2.6998141954663692, "grad_norm": 2.4193317323442143, "learning_rate": 5.205480783336259e-07, "loss": 0.2378, "step": 36326 }, { "epoch": 2.6998885172798217, "grad_norm": 3.158436964030495, "learning_rate": 5.202926078936688e-07, "loss": 0.2699, "step": 36327 }, { "epoch": 2.6999628390932737, "grad_norm": 2.3682625933946593, "learning_rate": 5.200371984835761e-07, "loss": 0.2772, "step": 36328 }, { "epoch": 2.700037160906726, "grad_norm": 2.0399873734908205, "learning_rate": 5.1978185010499e-07, "loss": 0.1779, "step": 36329 }, { "epoch": 2.700111482720178, "grad_norm": 2.299165623506673, "learning_rate": 5.19526562759558e-07, "loss": 0.2228, "step": 36330 }, { "epoch": 2.7001858045336307, "grad_norm": 3.281494981939707, "learning_rate": 5.19271336448921e-07, "loss": 0.3236, "step": 36331 }, { "epoch": 2.700260126347083, "grad_norm": 2.575966286287104, "learning_rate": 5.190161711747244e-07, "loss": 0.3052, "step": 36332 }, { "epoch": 2.700334448160535, "grad_norm": 2.1051057823246033, "learning_rate": 5.1876106693861e-07, "loss": 0.2778, "step": 36333 }, { "epoch": 2.700408769973987, "grad_norm": 2.333868318401574, "learning_rate": 5.185060237422179e-07, "loss": 0.2606, "step": 36334 }, { "epoch": 2.7004830917874396, "grad_norm": 2.2969037965558723, "learning_rate": 5.182510415871944e-07, "loss": 0.2675, "step": 36335 }, { "epoch": 2.700557413600892, "grad_norm": 2.2632229572525007, "learning_rate": 5.17996120475176e-07, "loss": 0.2354, "step": 36336 }, { "epoch": 2.700631735414344, "grad_norm": 2.5454486146429502, "learning_rate": 5.177412604078092e-07, "loss": 0.217, "step": 36337 }, { "epoch": 2.700706057227796, "grad_norm": 2.7308410269198053, "learning_rate": 5.174864613867315e-07, "loss": 0.3329, "step": 36338 }, { "epoch": 2.7007803790412486, "grad_norm": 2.808391348097321, "learning_rate": 5.172317234135849e-07, "loss": 0.3303, "step": 36339 }, { "epoch": 2.700854700854701, "grad_norm": 2.1279893092062867, "learning_rate": 5.16977046490007e-07, "loss": 0.2426, "step": 36340 }, { "epoch": 2.700929022668153, "grad_norm": 2.02983080434593, "learning_rate": 5.167224306176389e-07, "loss": 0.2265, "step": 36341 }, { "epoch": 2.7010033444816055, "grad_norm": 2.0285886002123483, "learning_rate": 5.164678757981201e-07, "loss": 0.2432, "step": 36342 }, { "epoch": 2.7010776662950575, "grad_norm": 2.043812419252392, "learning_rate": 5.162133820330884e-07, "loss": 0.2356, "step": 36343 }, { "epoch": 2.70115198810851, "grad_norm": 2.2606978305809178, "learning_rate": 5.159589493241834e-07, "loss": 0.2572, "step": 36344 }, { "epoch": 2.701226309921962, "grad_norm": 2.6154127081443983, "learning_rate": 5.157045776730429e-07, "loss": 0.3066, "step": 36345 }, { "epoch": 2.7013006317354145, "grad_norm": 2.4886918823755124, "learning_rate": 5.154502670813033e-07, "loss": 0.3142, "step": 36346 }, { "epoch": 2.7013749535488665, "grad_norm": 1.8436710006957053, "learning_rate": 5.151960175506032e-07, "loss": 0.2459, "step": 36347 }, { "epoch": 2.701449275362319, "grad_norm": 3.3063253277768245, "learning_rate": 5.149418290825803e-07, "loss": 0.3376, "step": 36348 }, { "epoch": 2.701523597175771, "grad_norm": 2.2378418583000768, "learning_rate": 5.146877016788676e-07, "loss": 0.2621, "step": 36349 }, { "epoch": 2.7015979189892234, "grad_norm": 2.4038475548463403, "learning_rate": 5.144336353411061e-07, "loss": 0.3003, "step": 36350 }, { "epoch": 2.7016722408026754, "grad_norm": 2.697449468065894, "learning_rate": 5.141796300709257e-07, "loss": 0.2602, "step": 36351 }, { "epoch": 2.701746562616128, "grad_norm": 2.545786566152927, "learning_rate": 5.139256858699659e-07, "loss": 0.3023, "step": 36352 }, { "epoch": 2.70182088442958, "grad_norm": 2.8511752822152734, "learning_rate": 5.136718027398601e-07, "loss": 0.2372, "step": 36353 }, { "epoch": 2.7018952062430324, "grad_norm": 2.2559190455457756, "learning_rate": 5.134179806822437e-07, "loss": 0.244, "step": 36354 }, { "epoch": 2.701969528056485, "grad_norm": 2.18003076909244, "learning_rate": 5.131642196987496e-07, "loss": 0.262, "step": 36355 }, { "epoch": 2.702043849869937, "grad_norm": 2.7923905317927873, "learning_rate": 5.129105197910112e-07, "loss": 0.3015, "step": 36356 }, { "epoch": 2.702118171683389, "grad_norm": 2.515359345484676, "learning_rate": 5.126568809606635e-07, "loss": 0.3106, "step": 36357 }, { "epoch": 2.7021924934968413, "grad_norm": 2.2671847769706703, "learning_rate": 5.124033032093378e-07, "loss": 0.2609, "step": 36358 }, { "epoch": 2.702266815310294, "grad_norm": 2.3291999615110175, "learning_rate": 5.121497865386693e-07, "loss": 0.2626, "step": 36359 }, { "epoch": 2.702341137123746, "grad_norm": 2.2397383196972904, "learning_rate": 5.118963309502866e-07, "loss": 0.2001, "step": 36360 }, { "epoch": 2.702415458937198, "grad_norm": 2.185565449141003, "learning_rate": 5.116429364458241e-07, "loss": 0.2693, "step": 36361 }, { "epoch": 2.7024897807506503, "grad_norm": 2.151568475819317, "learning_rate": 5.113896030269117e-07, "loss": 0.1635, "step": 36362 }, { "epoch": 2.7025641025641027, "grad_norm": 2.8686800999194455, "learning_rate": 5.1113633069518e-07, "loss": 0.3262, "step": 36363 }, { "epoch": 2.7026384243775547, "grad_norm": 2.7861648527780867, "learning_rate": 5.108831194522601e-07, "loss": 0.338, "step": 36364 }, { "epoch": 2.702712746191007, "grad_norm": 2.5916444109906234, "learning_rate": 5.10629969299784e-07, "loss": 0.2783, "step": 36365 }, { "epoch": 2.7027870680044592, "grad_norm": 2.343152514001858, "learning_rate": 5.103768802393771e-07, "loss": 0.2114, "step": 36366 }, { "epoch": 2.7028613898179117, "grad_norm": 2.1554998175911373, "learning_rate": 5.101238522726737e-07, "loss": 0.2402, "step": 36367 }, { "epoch": 2.7029357116313637, "grad_norm": 2.3823491902321807, "learning_rate": 5.09870885401299e-07, "loss": 0.27, "step": 36368 }, { "epoch": 2.703010033444816, "grad_norm": 2.4376306159057672, "learning_rate": 5.096179796268841e-07, "loss": 0.2588, "step": 36369 }, { "epoch": 2.703084355258268, "grad_norm": 2.020576429763584, "learning_rate": 5.093651349510564e-07, "loss": 0.2013, "step": 36370 }, { "epoch": 2.7031586770717206, "grad_norm": 2.1774566010683385, "learning_rate": 5.091123513754425e-07, "loss": 0.2494, "step": 36371 }, { "epoch": 2.7032329988851727, "grad_norm": 2.1779991992114303, "learning_rate": 5.088596289016734e-07, "loss": 0.2524, "step": 36372 }, { "epoch": 2.703307320698625, "grad_norm": 3.4966772484670865, "learning_rate": 5.086069675313709e-07, "loss": 0.3028, "step": 36373 }, { "epoch": 2.703381642512077, "grad_norm": 2.870521408851248, "learning_rate": 5.08354367266165e-07, "loss": 0.3638, "step": 36374 }, { "epoch": 2.7034559643255296, "grad_norm": 1.8627030171416767, "learning_rate": 5.081018281076811e-07, "loss": 0.2113, "step": 36375 }, { "epoch": 2.7035302861389816, "grad_norm": 2.09122942570171, "learning_rate": 5.078493500575454e-07, "loss": 0.336, "step": 36376 }, { "epoch": 2.703604607952434, "grad_norm": 2.9183961247283956, "learning_rate": 5.075969331173835e-07, "loss": 0.365, "step": 36377 }, { "epoch": 2.7036789297658865, "grad_norm": 2.7076323013408037, "learning_rate": 5.073445772888186e-07, "loss": 0.3075, "step": 36378 }, { "epoch": 2.7037532515793385, "grad_norm": 2.160695681779728, "learning_rate": 5.070922825734781e-07, "loss": 0.2595, "step": 36379 }, { "epoch": 2.7038275733927906, "grad_norm": 2.2222361864556612, "learning_rate": 5.06840048972983e-07, "loss": 0.232, "step": 36380 }, { "epoch": 2.703901895206243, "grad_norm": 2.471827237879429, "learning_rate": 5.065878764889609e-07, "loss": 0.3248, "step": 36381 }, { "epoch": 2.7039762170196955, "grad_norm": 2.288368755018455, "learning_rate": 5.063357651230338e-07, "loss": 0.2617, "step": 36382 }, { "epoch": 2.7040505388331475, "grad_norm": 2.4431735968508845, "learning_rate": 5.060837148768239e-07, "loss": 0.2446, "step": 36383 }, { "epoch": 2.7041248606465995, "grad_norm": 2.106138433389351, "learning_rate": 5.058317257519552e-07, "loss": 0.2955, "step": 36384 }, { "epoch": 2.704199182460052, "grad_norm": 2.3783111995708532, "learning_rate": 5.055797977500476e-07, "loss": 0.2182, "step": 36385 }, { "epoch": 2.7042735042735044, "grad_norm": 2.3872557596231436, "learning_rate": 5.053279308727255e-07, "loss": 0.2697, "step": 36386 }, { "epoch": 2.7043478260869565, "grad_norm": 2.019760951832014, "learning_rate": 5.050761251216096e-07, "loss": 0.2413, "step": 36387 }, { "epoch": 2.704422147900409, "grad_norm": 2.2836345647621874, "learning_rate": 5.048243804983199e-07, "loss": 0.2662, "step": 36388 }, { "epoch": 2.704496469713861, "grad_norm": 2.5651060705659448, "learning_rate": 5.045726970044795e-07, "loss": 0.3108, "step": 36389 }, { "epoch": 2.7045707915273134, "grad_norm": 2.441464956261111, "learning_rate": 5.04321074641706e-07, "loss": 0.2972, "step": 36390 }, { "epoch": 2.7046451133407654, "grad_norm": 2.1287317233269833, "learning_rate": 5.040695134116225e-07, "loss": 0.2166, "step": 36391 }, { "epoch": 2.704719435154218, "grad_norm": 2.253212505458116, "learning_rate": 5.038180133158466e-07, "loss": 0.3312, "step": 36392 }, { "epoch": 2.70479375696767, "grad_norm": 2.3955964306096256, "learning_rate": 5.035665743559959e-07, "loss": 0.2488, "step": 36393 }, { "epoch": 2.7048680787811223, "grad_norm": 2.6404096685463347, "learning_rate": 5.033151965336913e-07, "loss": 0.2631, "step": 36394 }, { "epoch": 2.7049424005945744, "grad_norm": 2.2862847080973623, "learning_rate": 5.030638798505527e-07, "loss": 0.3181, "step": 36395 }, { "epoch": 2.705016722408027, "grad_norm": 3.0908495262708096, "learning_rate": 5.028126243081943e-07, "loss": 0.2674, "step": 36396 }, { "epoch": 2.7050910442214793, "grad_norm": 2.615184526677141, "learning_rate": 5.025614299082349e-07, "loss": 0.329, "step": 36397 }, { "epoch": 2.7051653660349313, "grad_norm": 2.051920045937639, "learning_rate": 5.023102966522942e-07, "loss": 0.2782, "step": 36398 }, { "epoch": 2.7052396878483833, "grad_norm": 2.0515980293575624, "learning_rate": 5.020592245419864e-07, "loss": 0.2345, "step": 36399 }, { "epoch": 2.7053140096618358, "grad_norm": 2.1887335648326984, "learning_rate": 5.018082135789271e-07, "loss": 0.2693, "step": 36400 }, { "epoch": 2.7053883314752882, "grad_norm": 3.4798542837102837, "learning_rate": 5.015572637647348e-07, "loss": 0.2201, "step": 36401 }, { "epoch": 2.7054626532887402, "grad_norm": 2.4850411059491964, "learning_rate": 5.013063751010228e-07, "loss": 0.2253, "step": 36402 }, { "epoch": 2.7055369751021923, "grad_norm": 2.7741239708973535, "learning_rate": 5.010555475894096e-07, "loss": 0.3393, "step": 36403 }, { "epoch": 2.7056112969156447, "grad_norm": 2.687985693554551, "learning_rate": 5.008047812315064e-07, "loss": 0.4051, "step": 36404 }, { "epoch": 2.705685618729097, "grad_norm": 3.326091983027462, "learning_rate": 5.005540760289295e-07, "loss": 0.3404, "step": 36405 }, { "epoch": 2.705759940542549, "grad_norm": 3.9425506620952913, "learning_rate": 5.003034319832945e-07, "loss": 0.2225, "step": 36406 }, { "epoch": 2.705834262356001, "grad_norm": 2.130892440361857, "learning_rate": 5.000528490962098e-07, "loss": 0.2296, "step": 36407 }, { "epoch": 2.7059085841694537, "grad_norm": 2.338428948942776, "learning_rate": 4.998023273692942e-07, "loss": 0.2821, "step": 36408 }, { "epoch": 2.705982905982906, "grad_norm": 2.561042024596908, "learning_rate": 4.995518668041577e-07, "loss": 0.2894, "step": 36409 }, { "epoch": 2.706057227796358, "grad_norm": 2.6343692529434435, "learning_rate": 4.993014674024122e-07, "loss": 0.3173, "step": 36410 }, { "epoch": 2.7061315496098106, "grad_norm": 1.9770505485962917, "learning_rate": 4.990511291656719e-07, "loss": 0.2265, "step": 36411 }, { "epoch": 2.7062058714232626, "grad_norm": 2.1498616851070436, "learning_rate": 4.988008520955467e-07, "loss": 0.2671, "step": 36412 }, { "epoch": 2.706280193236715, "grad_norm": 2.316993858517856, "learning_rate": 4.985506361936499e-07, "loss": 0.2234, "step": 36413 }, { "epoch": 2.706354515050167, "grad_norm": 2.498199803706793, "learning_rate": 4.98300481461591e-07, "loss": 0.2233, "step": 36414 }, { "epoch": 2.7064288368636196, "grad_norm": 2.5788170143500637, "learning_rate": 4.980503879009791e-07, "loss": 0.2422, "step": 36415 }, { "epoch": 2.7065031586770716, "grad_norm": 4.626322036280564, "learning_rate": 4.978003555134269e-07, "loss": 0.2825, "step": 36416 }, { "epoch": 2.706577480490524, "grad_norm": 2.5208699866089974, "learning_rate": 4.975503843005436e-07, "loss": 0.2181, "step": 36417 }, { "epoch": 2.706651802303976, "grad_norm": 2.984347168544831, "learning_rate": 4.973004742639376e-07, "loss": 0.2707, "step": 36418 }, { "epoch": 2.7067261241174285, "grad_norm": 2.7307913613024493, "learning_rate": 4.970506254052165e-07, "loss": 0.2299, "step": 36419 }, { "epoch": 2.706800445930881, "grad_norm": 2.3464358100783467, "learning_rate": 4.968008377259925e-07, "loss": 0.3214, "step": 36420 }, { "epoch": 2.706874767744333, "grad_norm": 2.527998170141323, "learning_rate": 4.96551111227871e-07, "loss": 0.2612, "step": 36421 }, { "epoch": 2.706949089557785, "grad_norm": 2.6234770848531475, "learning_rate": 4.963014459124582e-07, "loss": 0.3296, "step": 36422 }, { "epoch": 2.7070234113712375, "grad_norm": 2.125478766237263, "learning_rate": 4.960518417813664e-07, "loss": 0.2621, "step": 36423 }, { "epoch": 2.70709773318469, "grad_norm": 2.331214551887656, "learning_rate": 4.958022988361977e-07, "loss": 0.2735, "step": 36424 }, { "epoch": 2.707172054998142, "grad_norm": 2.12025496617662, "learning_rate": 4.955528170785617e-07, "loss": 0.2336, "step": 36425 }, { "epoch": 2.707246376811594, "grad_norm": 1.9198421361905704, "learning_rate": 4.95303396510064e-07, "loss": 0.2316, "step": 36426 }, { "epoch": 2.7073206986250464, "grad_norm": 2.607845395035548, "learning_rate": 4.950540371323087e-07, "loss": 0.2763, "step": 36427 }, { "epoch": 2.707395020438499, "grad_norm": 2.3766765506997607, "learning_rate": 4.948047389469035e-07, "loss": 0.3069, "step": 36428 }, { "epoch": 2.707469342251951, "grad_norm": 2.4935634735055374, "learning_rate": 4.945555019554515e-07, "loss": 0.3092, "step": 36429 }, { "epoch": 2.707543664065403, "grad_norm": 2.7351707183879985, "learning_rate": 4.943063261595594e-07, "loss": 0.3445, "step": 36430 }, { "epoch": 2.7076179858788554, "grad_norm": 2.0904954653473924, "learning_rate": 4.940572115608288e-07, "loss": 0.2171, "step": 36431 }, { "epoch": 2.707692307692308, "grad_norm": 2.4072141555855944, "learning_rate": 4.938081581608634e-07, "loss": 0.2809, "step": 36432 }, { "epoch": 2.70776662950576, "grad_norm": 2.1467821240753744, "learning_rate": 4.935591659612693e-07, "loss": 0.2156, "step": 36433 }, { "epoch": 2.7078409513192123, "grad_norm": 2.2770413848006714, "learning_rate": 4.933102349636465e-07, "loss": 0.2415, "step": 36434 }, { "epoch": 2.7079152731326643, "grad_norm": 2.2739286467781787, "learning_rate": 4.930613651696014e-07, "loss": 0.2657, "step": 36435 }, { "epoch": 2.707989594946117, "grad_norm": 2.303559947508533, "learning_rate": 4.928125565807329e-07, "loss": 0.2593, "step": 36436 }, { "epoch": 2.708063916759569, "grad_norm": 2.666670411501019, "learning_rate": 4.925638091986428e-07, "loss": 0.275, "step": 36437 }, { "epoch": 2.7081382385730213, "grad_norm": 2.687872698562091, "learning_rate": 4.923151230249356e-07, "loss": 0.2797, "step": 36438 }, { "epoch": 2.7082125603864733, "grad_norm": 2.0868804262929523, "learning_rate": 4.920664980612078e-07, "loss": 0.2245, "step": 36439 }, { "epoch": 2.7082868821999257, "grad_norm": 2.0346334636891727, "learning_rate": 4.918179343090668e-07, "loss": 0.2323, "step": 36440 }, { "epoch": 2.7083612040133778, "grad_norm": 2.08957974663537, "learning_rate": 4.915694317701047e-07, "loss": 0.2305, "step": 36441 }, { "epoch": 2.7084355258268302, "grad_norm": 2.463133662445519, "learning_rate": 4.91320990445927e-07, "loss": 0.3225, "step": 36442 }, { "epoch": 2.7085098476402827, "grad_norm": 2.253247737434914, "learning_rate": 4.910726103381313e-07, "loss": 0.2853, "step": 36443 }, { "epoch": 2.7085841694537347, "grad_norm": 2.0774728188466978, "learning_rate": 4.908242914483152e-07, "loss": 0.2344, "step": 36444 }, { "epoch": 2.7086584912671867, "grad_norm": 2.341986527993314, "learning_rate": 4.905760337780808e-07, "loss": 0.2792, "step": 36445 }, { "epoch": 2.708732813080639, "grad_norm": 2.469525461558062, "learning_rate": 4.903278373290221e-07, "loss": 0.2519, "step": 36446 }, { "epoch": 2.7088071348940916, "grad_norm": 2.8325339397509834, "learning_rate": 4.900797021027414e-07, "loss": 0.3021, "step": 36447 }, { "epoch": 2.7088814567075437, "grad_norm": 2.1230853429134475, "learning_rate": 4.898316281008342e-07, "loss": 0.2554, "step": 36448 }, { "epoch": 2.7089557785209957, "grad_norm": 2.492118193487482, "learning_rate": 4.895836153248956e-07, "loss": 0.302, "step": 36449 }, { "epoch": 2.709030100334448, "grad_norm": 2.0165790470605613, "learning_rate": 4.893356637765268e-07, "loss": 0.2547, "step": 36450 }, { "epoch": 2.7091044221479006, "grad_norm": 2.777304638417746, "learning_rate": 4.890877734573207e-07, "loss": 0.2905, "step": 36451 }, { "epoch": 2.7091787439613526, "grad_norm": 2.340346442702342, "learning_rate": 4.888399443688741e-07, "loss": 0.2628, "step": 36452 }, { "epoch": 2.709253065774805, "grad_norm": 2.225744667753062, "learning_rate": 4.885921765127822e-07, "loss": 0.2856, "step": 36453 }, { "epoch": 2.709327387588257, "grad_norm": 2.193622645297688, "learning_rate": 4.883444698906393e-07, "loss": 0.2642, "step": 36454 }, { "epoch": 2.7094017094017095, "grad_norm": 2.427958402225628, "learning_rate": 4.880968245040429e-07, "loss": 0.2647, "step": 36455 }, { "epoch": 2.7094760312151616, "grad_norm": 2.3369911488323742, "learning_rate": 4.878492403545831e-07, "loss": 0.2338, "step": 36456 }, { "epoch": 2.709550353028614, "grad_norm": 2.695120655455665, "learning_rate": 4.876017174438586e-07, "loss": 0.3356, "step": 36457 }, { "epoch": 2.709624674842066, "grad_norm": 2.392138009080303, "learning_rate": 4.873542557734612e-07, "loss": 0.239, "step": 36458 }, { "epoch": 2.7096989966555185, "grad_norm": 1.800899192283478, "learning_rate": 4.871068553449809e-07, "loss": 0.1487, "step": 36459 }, { "epoch": 2.7097733184689705, "grad_norm": 3.023193896759251, "learning_rate": 4.868595161600154e-07, "loss": 0.3429, "step": 36460 }, { "epoch": 2.709847640282423, "grad_norm": 2.4417119776053284, "learning_rate": 4.866122382201522e-07, "loss": 0.2338, "step": 36461 }, { "epoch": 2.709921962095875, "grad_norm": 2.271366767240066, "learning_rate": 4.863650215269888e-07, "loss": 0.2624, "step": 36462 }, { "epoch": 2.7099962839093275, "grad_norm": 2.3978723043763392, "learning_rate": 4.86117866082112e-07, "loss": 0.2472, "step": 36463 }, { "epoch": 2.7100706057227795, "grad_norm": 2.6276375032262527, "learning_rate": 4.858707718871159e-07, "loss": 0.2806, "step": 36464 }, { "epoch": 2.710144927536232, "grad_norm": 2.1240447634452573, "learning_rate": 4.856237389435902e-07, "loss": 0.3008, "step": 36465 }, { "epoch": 2.7102192493496844, "grad_norm": 2.2791560924967253, "learning_rate": 4.853767672531229e-07, "loss": 0.267, "step": 36466 }, { "epoch": 2.7102935711631364, "grad_norm": 3.222016562744105, "learning_rate": 4.851298568173091e-07, "loss": 0.3752, "step": 36467 }, { "epoch": 2.7103678929765884, "grad_norm": 2.691950155595317, "learning_rate": 4.848830076377331e-07, "loss": 0.3036, "step": 36468 }, { "epoch": 2.710442214790041, "grad_norm": 2.8468751043181983, "learning_rate": 4.846362197159881e-07, "loss": 0.3129, "step": 36469 }, { "epoch": 2.7105165366034933, "grad_norm": 2.1806293950899174, "learning_rate": 4.843894930536619e-07, "loss": 0.2154, "step": 36470 }, { "epoch": 2.7105908584169454, "grad_norm": 2.239582308519408, "learning_rate": 4.841428276523408e-07, "loss": 0.2483, "step": 36471 }, { "epoch": 2.7106651802303974, "grad_norm": 2.703217254647464, "learning_rate": 4.838962235136146e-07, "loss": 0.3051, "step": 36472 }, { "epoch": 2.71073950204385, "grad_norm": 2.560449867703932, "learning_rate": 4.836496806390723e-07, "loss": 0.2903, "step": 36473 }, { "epoch": 2.7108138238573023, "grad_norm": 2.0801008284359384, "learning_rate": 4.83403199030299e-07, "loss": 0.3487, "step": 36474 }, { "epoch": 2.7108881456707543, "grad_norm": 2.3290151972090865, "learning_rate": 4.831567786888825e-07, "loss": 0.2661, "step": 36475 }, { "epoch": 2.7109624674842068, "grad_norm": 1.9667374250077434, "learning_rate": 4.829104196164069e-07, "loss": 0.2499, "step": 36476 }, { "epoch": 2.711036789297659, "grad_norm": 2.640001841950334, "learning_rate": 4.826641218144623e-07, "loss": 0.2692, "step": 36477 }, { "epoch": 2.7111111111111112, "grad_norm": 2.249216163535283, "learning_rate": 4.824178852846306e-07, "loss": 0.3187, "step": 36478 }, { "epoch": 2.7111854329245633, "grad_norm": 2.441568492839816, "learning_rate": 4.821717100284995e-07, "loss": 0.322, "step": 36479 }, { "epoch": 2.7112597547380157, "grad_norm": 2.501589605087642, "learning_rate": 4.819255960476532e-07, "loss": 0.2968, "step": 36480 }, { "epoch": 2.7113340765514677, "grad_norm": 2.3684432523027015, "learning_rate": 4.816795433436761e-07, "loss": 0.3115, "step": 36481 }, { "epoch": 2.71140839836492, "grad_norm": 2.3890582831160367, "learning_rate": 4.814335519181524e-07, "loss": 0.3533, "step": 36482 }, { "epoch": 2.711482720178372, "grad_norm": 2.683076114789911, "learning_rate": 4.811876217726641e-07, "loss": 0.2977, "step": 36483 }, { "epoch": 2.7115570419918247, "grad_norm": 2.1848344971731746, "learning_rate": 4.809417529087978e-07, "loss": 0.169, "step": 36484 }, { "epoch": 2.7116313638052767, "grad_norm": 2.75205895341345, "learning_rate": 4.806959453281345e-07, "loss": 0.2405, "step": 36485 }, { "epoch": 2.711705685618729, "grad_norm": 2.618860052850881, "learning_rate": 4.804501990322564e-07, "loss": 0.2562, "step": 36486 }, { "epoch": 2.711780007432181, "grad_norm": 2.329580357081647, "learning_rate": 4.802045140227463e-07, "loss": 0.2521, "step": 36487 }, { "epoch": 2.7118543292456336, "grad_norm": 2.298408358888438, "learning_rate": 4.799588903011843e-07, "loss": 0.1726, "step": 36488 }, { "epoch": 2.711928651059086, "grad_norm": 2.7638844882355333, "learning_rate": 4.797133278691546e-07, "loss": 0.3016, "step": 36489 }, { "epoch": 2.712002972872538, "grad_norm": 2.481868528642685, "learning_rate": 4.79467826728236e-07, "loss": 0.2779, "step": 36490 }, { "epoch": 2.71207729468599, "grad_norm": 2.3962787319896024, "learning_rate": 4.792223868800084e-07, "loss": 0.2946, "step": 36491 }, { "epoch": 2.7121516164994426, "grad_norm": 2.1849538186033683, "learning_rate": 4.789770083260548e-07, "loss": 0.2731, "step": 36492 }, { "epoch": 2.712225938312895, "grad_norm": 2.294498506155787, "learning_rate": 4.787316910679507e-07, "loss": 0.3079, "step": 36493 }, { "epoch": 2.712300260126347, "grad_norm": 2.602592402341261, "learning_rate": 4.784864351072804e-07, "loss": 0.2462, "step": 36494 }, { "epoch": 2.712374581939799, "grad_norm": 2.2875219307829417, "learning_rate": 4.782412404456205e-07, "loss": 0.2359, "step": 36495 }, { "epoch": 2.7124489037532515, "grad_norm": 2.3595287512472813, "learning_rate": 4.779961070845485e-07, "loss": 0.2927, "step": 36496 }, { "epoch": 2.712523225566704, "grad_norm": 3.02857488946072, "learning_rate": 4.777510350256443e-07, "loss": 0.342, "step": 36497 }, { "epoch": 2.712597547380156, "grad_norm": 2.099509992965584, "learning_rate": 4.775060242704832e-07, "loss": 0.2299, "step": 36498 }, { "epoch": 2.7126718691936085, "grad_norm": 2.8080604555889535, "learning_rate": 4.772610748206452e-07, "loss": 0.325, "step": 36499 }, { "epoch": 2.7127461910070605, "grad_norm": 1.863581113424044, "learning_rate": 4.770161866777057e-07, "loss": 0.2176, "step": 36500 }, { "epoch": 2.712820512820513, "grad_norm": 2.854511966735378, "learning_rate": 4.7677135984324216e-07, "loss": 0.3329, "step": 36501 }, { "epoch": 2.712894834633965, "grad_norm": 2.1984315342901994, "learning_rate": 4.7652659431883133e-07, "loss": 0.2787, "step": 36502 }, { "epoch": 2.7129691564474174, "grad_norm": 2.8462739544699196, "learning_rate": 4.762818901060473e-07, "loss": 0.2621, "step": 36503 }, { "epoch": 2.7130434782608694, "grad_norm": 2.7998679662461936, "learning_rate": 4.760372472064667e-07, "loss": 0.3005, "step": 36504 }, { "epoch": 2.713117800074322, "grad_norm": 1.948546512117628, "learning_rate": 4.757926656216627e-07, "loss": 0.2569, "step": 36505 }, { "epoch": 2.713192121887774, "grad_norm": 1.9818357692082695, "learning_rate": 4.7554814535321405e-07, "loss": 0.2699, "step": 36506 }, { "epoch": 2.7132664437012264, "grad_norm": 2.8899993746247894, "learning_rate": 4.7530368640269053e-07, "loss": 0.3037, "step": 36507 }, { "epoch": 2.7133407655146784, "grad_norm": 2.4291239780562517, "learning_rate": 4.7505928877166875e-07, "loss": 0.2599, "step": 36508 }, { "epoch": 2.713415087328131, "grad_norm": 2.2599544734236128, "learning_rate": 4.748149524617207e-07, "loss": 0.2988, "step": 36509 }, { "epoch": 2.713489409141583, "grad_norm": 3.0713035935087936, "learning_rate": 4.7457067747441856e-07, "loss": 0.3369, "step": 36510 }, { "epoch": 2.7135637309550353, "grad_norm": 2.043182510320985, "learning_rate": 4.743264638113376e-07, "loss": 0.2496, "step": 36511 }, { "epoch": 2.713638052768488, "grad_norm": 2.5762642892696914, "learning_rate": 4.740823114740478e-07, "loss": 0.3218, "step": 36512 }, { "epoch": 2.71371237458194, "grad_norm": 2.040710577486674, "learning_rate": 4.738382204641212e-07, "loss": 0.2547, "step": 36513 }, { "epoch": 2.713786696395392, "grad_norm": 2.643633602657311, "learning_rate": 4.73594190783131e-07, "loss": 0.2865, "step": 36514 }, { "epoch": 2.7138610182088443, "grad_norm": 2.2350066706197267, "learning_rate": 4.7335022243264585e-07, "loss": 0.296, "step": 36515 }, { "epoch": 2.7139353400222967, "grad_norm": 2.898277011186378, "learning_rate": 4.7310631541423903e-07, "loss": 0.3113, "step": 36516 }, { "epoch": 2.7140096618357488, "grad_norm": 2.3363730770889646, "learning_rate": 4.7286246972948036e-07, "loss": 0.3164, "step": 36517 }, { "epoch": 2.714083983649201, "grad_norm": 1.7569757959519618, "learning_rate": 4.7261868537993636e-07, "loss": 0.1809, "step": 36518 }, { "epoch": 2.7141583054626532, "grad_norm": 2.3843352935887148, "learning_rate": 4.723749623671814e-07, "loss": 0.2847, "step": 36519 }, { "epoch": 2.7142326272761057, "grad_norm": 2.6618465012386, "learning_rate": 4.7213130069277966e-07, "loss": 0.2949, "step": 36520 }, { "epoch": 2.7143069490895577, "grad_norm": 1.2862799922829682, "learning_rate": 4.7188770035830335e-07, "loss": 0.1259, "step": 36521 }, { "epoch": 2.71438127090301, "grad_norm": 1.9985321776679834, "learning_rate": 4.7164416136531886e-07, "loss": 0.2603, "step": 36522 }, { "epoch": 2.714455592716462, "grad_norm": 2.7204095889062416, "learning_rate": 4.714006837153962e-07, "loss": 0.3038, "step": 36523 }, { "epoch": 2.7145299145299147, "grad_norm": 3.1154190312378507, "learning_rate": 4.711572674101006e-07, "loss": 0.2768, "step": 36524 }, { "epoch": 2.7146042363433667, "grad_norm": 2.840936628969501, "learning_rate": 4.709139124509998e-07, "loss": 0.3164, "step": 36525 }, { "epoch": 2.714678558156819, "grad_norm": 2.23702659846942, "learning_rate": 4.7067061883966147e-07, "loss": 0.2968, "step": 36526 }, { "epoch": 2.714752879970271, "grad_norm": 2.064474519251074, "learning_rate": 4.7042738657765096e-07, "loss": 0.2326, "step": 36527 }, { "epoch": 2.7148272017837236, "grad_norm": 2.51703618230754, "learning_rate": 4.701842156665348e-07, "loss": 0.2962, "step": 36528 }, { "epoch": 2.7149015235971756, "grad_norm": 2.7594808596004743, "learning_rate": 4.6994110610787846e-07, "loss": 0.2728, "step": 36529 }, { "epoch": 2.714975845410628, "grad_norm": 1.9288715536973497, "learning_rate": 4.696980579032473e-07, "loss": 0.1725, "step": 36530 }, { "epoch": 2.71505016722408, "grad_norm": 3.1093290482501033, "learning_rate": 4.6945507105420453e-07, "loss": 0.3187, "step": 36531 }, { "epoch": 2.7151244890375326, "grad_norm": 3.787045157176386, "learning_rate": 4.692121455623144e-07, "loss": 0.2405, "step": 36532 }, { "epoch": 2.7151988108509846, "grad_norm": 2.0640293251377715, "learning_rate": 4.6896928142914356e-07, "loss": 0.2784, "step": 36533 }, { "epoch": 2.715273132664437, "grad_norm": 2.0678536964501535, "learning_rate": 4.68726478656254e-07, "loss": 0.2623, "step": 36534 }, { "epoch": 2.7153474544778895, "grad_norm": 2.083520879356702, "learning_rate": 4.684837372452078e-07, "loss": 0.2255, "step": 36535 }, { "epoch": 2.7154217762913415, "grad_norm": 2.181249135583296, "learning_rate": 4.682410571975693e-07, "loss": 0.2398, "step": 36536 }, { "epoch": 2.7154960981047935, "grad_norm": 2.9335669063503698, "learning_rate": 4.679984385148995e-07, "loss": 0.2854, "step": 36537 }, { "epoch": 2.715570419918246, "grad_norm": 2.236058102483539, "learning_rate": 4.6775588119876257e-07, "loss": 0.2511, "step": 36538 }, { "epoch": 2.7156447417316985, "grad_norm": 2.7996223987706657, "learning_rate": 4.6751338525071856e-07, "loss": 0.2916, "step": 36539 }, { "epoch": 2.7157190635451505, "grad_norm": 2.562491195243585, "learning_rate": 4.672709506723272e-07, "loss": 0.2798, "step": 36540 }, { "epoch": 2.7157933853586025, "grad_norm": 2.703109933942599, "learning_rate": 4.6702857746515285e-07, "loss": 0.2995, "step": 36541 }, { "epoch": 2.715867707172055, "grad_norm": 1.9121832647788755, "learning_rate": 4.667862656307531e-07, "loss": 0.2122, "step": 36542 }, { "epoch": 2.7159420289855074, "grad_norm": 2.36166952572938, "learning_rate": 4.6654401517068905e-07, "loss": 0.2356, "step": 36543 }, { "epoch": 2.7160163507989594, "grad_norm": 2.0738630269984646, "learning_rate": 4.663018260865193e-07, "loss": 0.2106, "step": 36544 }, { "epoch": 2.716090672612412, "grad_norm": 2.339188057272107, "learning_rate": 4.660596983798038e-07, "loss": 0.2519, "step": 36545 }, { "epoch": 2.716164994425864, "grad_norm": 2.582700079375098, "learning_rate": 4.658176320521024e-07, "loss": 0.3074, "step": 36546 }, { "epoch": 2.7162393162393164, "grad_norm": 2.2704748302670072, "learning_rate": 4.655756271049705e-07, "loss": 0.2279, "step": 36547 }, { "epoch": 2.7163136380527684, "grad_norm": 2.621603923919205, "learning_rate": 4.653336835399691e-07, "loss": 0.3554, "step": 36548 }, { "epoch": 2.716387959866221, "grad_norm": 2.5064762387822466, "learning_rate": 4.650918013586536e-07, "loss": 0.3149, "step": 36549 }, { "epoch": 2.716462281679673, "grad_norm": 2.6772025802742623, "learning_rate": 4.648499805625839e-07, "loss": 0.2802, "step": 36550 }, { "epoch": 2.7165366034931253, "grad_norm": 2.7988931228670664, "learning_rate": 4.646082211533154e-07, "loss": 0.2671, "step": 36551 }, { "epoch": 2.7166109253065773, "grad_norm": 2.1229667340987595, "learning_rate": 4.643665231324024e-07, "loss": 0.3027, "step": 36552 }, { "epoch": 2.71668524712003, "grad_norm": 4.882895816371012, "learning_rate": 4.6412488650140586e-07, "loss": 0.2178, "step": 36553 }, { "epoch": 2.7167595689334822, "grad_norm": 2.670803655646854, "learning_rate": 4.638833112618768e-07, "loss": 0.3365, "step": 36554 }, { "epoch": 2.7168338907469343, "grad_norm": 1.779422511096208, "learning_rate": 4.6364179741537283e-07, "loss": 0.1728, "step": 36555 }, { "epoch": 2.7169082125603863, "grad_norm": 2.3124960582479597, "learning_rate": 4.634003449634472e-07, "loss": 0.3127, "step": 36556 }, { "epoch": 2.7169825343738387, "grad_norm": 2.3721188860035913, "learning_rate": 4.631589539076553e-07, "loss": 0.2035, "step": 36557 }, { "epoch": 2.717056856187291, "grad_norm": 2.5316982571767572, "learning_rate": 4.6291762424955256e-07, "loss": 0.277, "step": 36558 }, { "epoch": 2.717131178000743, "grad_norm": 2.22012108308011, "learning_rate": 4.626763559906888e-07, "loss": 0.31, "step": 36559 }, { "epoch": 2.7172054998141952, "grad_norm": 3.2016549902927847, "learning_rate": 4.624351491326218e-07, "loss": 0.2501, "step": 36560 }, { "epoch": 2.7172798216276477, "grad_norm": 2.691677735838044, "learning_rate": 4.6219400367690236e-07, "loss": 0.317, "step": 36561 }, { "epoch": 2.7173541434411, "grad_norm": 2.7768603946130823, "learning_rate": 4.619529196250827e-07, "loss": 0.289, "step": 36562 }, { "epoch": 2.717428465254552, "grad_norm": 2.6311632653447385, "learning_rate": 4.617118969787149e-07, "loss": 0.3295, "step": 36563 }, { "epoch": 2.717502787068004, "grad_norm": 1.5962535719256292, "learning_rate": 4.614709357393521e-07, "loss": 0.1739, "step": 36564 }, { "epoch": 2.7175771088814566, "grad_norm": 2.499305360749833, "learning_rate": 4.612300359085442e-07, "loss": 0.2962, "step": 36565 }, { "epoch": 2.717651430694909, "grad_norm": 2.3395548156740245, "learning_rate": 4.609891974878411e-07, "loss": 0.2005, "step": 36566 }, { "epoch": 2.717725752508361, "grad_norm": 2.252182569670212, "learning_rate": 4.60748420478796e-07, "loss": 0.2302, "step": 36567 }, { "epoch": 2.7178000743218136, "grad_norm": 2.4828135547092978, "learning_rate": 4.605077048829576e-07, "loss": 0.3225, "step": 36568 }, { "epoch": 2.7178743961352656, "grad_norm": 1.8980755700953347, "learning_rate": 4.602670507018747e-07, "loss": 0.218, "step": 36569 }, { "epoch": 2.717948717948718, "grad_norm": 2.3393630926839846, "learning_rate": 4.600264579370983e-07, "loss": 0.3046, "step": 36570 }, { "epoch": 2.71802303976217, "grad_norm": 2.2587791345223183, "learning_rate": 4.59785926590175e-07, "loss": 0.2332, "step": 36571 }, { "epoch": 2.7180973615756225, "grad_norm": 2.133707797282113, "learning_rate": 4.5954545666265673e-07, "loss": 0.2727, "step": 36572 }, { "epoch": 2.7181716833890746, "grad_norm": 2.3443836057033125, "learning_rate": 4.593050481560901e-07, "loss": 0.2798, "step": 36573 }, { "epoch": 2.718246005202527, "grad_norm": 2.623718274231688, "learning_rate": 4.590647010720206e-07, "loss": 0.3123, "step": 36574 }, { "epoch": 2.718320327015979, "grad_norm": 2.6018489441960315, "learning_rate": 4.5882441541200027e-07, "loss": 0.3206, "step": 36575 }, { "epoch": 2.7183946488294315, "grad_norm": 2.390883449935693, "learning_rate": 4.585841911775701e-07, "loss": 0.2827, "step": 36576 }, { "epoch": 2.718468970642884, "grad_norm": 2.361474567022087, "learning_rate": 4.5834402837028227e-07, "loss": 0.2198, "step": 36577 }, { "epoch": 2.718543292456336, "grad_norm": 6.38718615417487, "learning_rate": 4.581039269916798e-07, "loss": 0.2771, "step": 36578 }, { "epoch": 2.718617614269788, "grad_norm": 2.1397703109182387, "learning_rate": 4.5786388704330833e-07, "loss": 0.262, "step": 36579 }, { "epoch": 2.7186919360832404, "grad_norm": 2.3503559721235114, "learning_rate": 4.5762390852671424e-07, "loss": 0.2352, "step": 36580 }, { "epoch": 2.718766257896693, "grad_norm": 2.6964510893366307, "learning_rate": 4.573839914434419e-07, "loss": 0.2738, "step": 36581 }, { "epoch": 2.718840579710145, "grad_norm": 2.1639593907118924, "learning_rate": 4.5714413579503793e-07, "loss": 0.2701, "step": 36582 }, { "epoch": 2.718914901523597, "grad_norm": 2.063801617701634, "learning_rate": 4.569043415830443e-07, "loss": 0.1934, "step": 36583 }, { "epoch": 2.7189892233370494, "grad_norm": 3.0610683345447804, "learning_rate": 4.566646088090043e-07, "loss": 0.3105, "step": 36584 }, { "epoch": 2.719063545150502, "grad_norm": 2.3964635542561687, "learning_rate": 4.5642493747446336e-07, "loss": 0.285, "step": 36585 }, { "epoch": 2.719137866963954, "grad_norm": 2.4491447233220853, "learning_rate": 4.561853275809625e-07, "loss": 0.2779, "step": 36586 }, { "epoch": 2.719212188777406, "grad_norm": 2.2843155877443198, "learning_rate": 4.5594577913004814e-07, "loss": 0.2468, "step": 36587 }, { "epoch": 2.7192865105908584, "grad_norm": 2.1077594550297474, "learning_rate": 4.557062921232569e-07, "loss": 0.2441, "step": 36588 }, { "epoch": 2.719360832404311, "grad_norm": 2.6695657527422565, "learning_rate": 4.554668665621342e-07, "loss": 0.2642, "step": 36589 }, { "epoch": 2.719435154217763, "grad_norm": 2.552132020784333, "learning_rate": 4.552275024482211e-07, "loss": 0.3044, "step": 36590 }, { "epoch": 2.7195094760312153, "grad_norm": 2.425901532064037, "learning_rate": 4.549881997830563e-07, "loss": 0.2112, "step": 36591 }, { "epoch": 2.7195837978446673, "grad_norm": 2.5514467056471926, "learning_rate": 4.5474895856818303e-07, "loss": 0.2542, "step": 36592 }, { "epoch": 2.7196581196581198, "grad_norm": 1.9543947314377605, "learning_rate": 4.5450977880514e-07, "loss": 0.2857, "step": 36593 }, { "epoch": 2.719732441471572, "grad_norm": 2.0730942083523494, "learning_rate": 4.542706604954694e-07, "loss": 0.2818, "step": 36594 }, { "epoch": 2.7198067632850242, "grad_norm": 2.349009209509272, "learning_rate": 4.5403160364070884e-07, "loss": 0.2399, "step": 36595 }, { "epoch": 2.7198810850984763, "grad_norm": 2.0716312501873864, "learning_rate": 4.5379260824239603e-07, "loss": 0.2879, "step": 36596 }, { "epoch": 2.7199554069119287, "grad_norm": 2.1489009746181216, "learning_rate": 4.53553674302073e-07, "loss": 0.2364, "step": 36597 }, { "epoch": 2.7200297287253807, "grad_norm": 2.7448024530001014, "learning_rate": 4.533148018212752e-07, "loss": 0.3365, "step": 36598 }, { "epoch": 2.720104050538833, "grad_norm": 2.3294418987398817, "learning_rate": 4.530759908015414e-07, "loss": 0.3701, "step": 36599 }, { "epoch": 2.7201783723522857, "grad_norm": 2.505684120100567, "learning_rate": 4.528372412444093e-07, "loss": 0.3854, "step": 36600 }, { "epoch": 2.7202526941657377, "grad_norm": 2.533969543045163, "learning_rate": 4.5259855315141435e-07, "loss": 0.283, "step": 36601 }, { "epoch": 2.7203270159791897, "grad_norm": 2.2476071618171796, "learning_rate": 4.5235992652409635e-07, "loss": 0.2806, "step": 36602 }, { "epoch": 2.720401337792642, "grad_norm": 2.3103079595098053, "learning_rate": 4.5212136136398744e-07, "loss": 0.2831, "step": 36603 }, { "epoch": 2.7204756596060946, "grad_norm": 2.143142979573929, "learning_rate": 4.5188285767262863e-07, "loss": 0.2621, "step": 36604 }, { "epoch": 2.7205499814195466, "grad_norm": 2.314403719048761, "learning_rate": 4.516444154515509e-07, "loss": 0.3004, "step": 36605 }, { "epoch": 2.7206243032329986, "grad_norm": 2.972968721649866, "learning_rate": 4.5140603470229085e-07, "loss": 0.3399, "step": 36606 }, { "epoch": 2.720698625046451, "grad_norm": 2.7051086530513495, "learning_rate": 4.5116771542638384e-07, "loss": 0.2945, "step": 36607 }, { "epoch": 2.7207729468599036, "grad_norm": 2.3213294195971947, "learning_rate": 4.509294576253631e-07, "loss": 0.3187, "step": 36608 }, { "epoch": 2.7208472686733556, "grad_norm": 2.4303527821525095, "learning_rate": 4.5069126130076413e-07, "loss": 0.2584, "step": 36609 }, { "epoch": 2.720921590486808, "grad_norm": 2.3083800532400094, "learning_rate": 4.504531264541179e-07, "loss": 0.2693, "step": 36610 }, { "epoch": 2.72099591230026, "grad_norm": 2.285964039136351, "learning_rate": 4.502150530869609e-07, "loss": 0.2956, "step": 36611 }, { "epoch": 2.7210702341137125, "grad_norm": 2.6461648348877804, "learning_rate": 4.4997704120082306e-07, "loss": 0.2553, "step": 36612 }, { "epoch": 2.7211445559271645, "grad_norm": 5.429438097144996, "learning_rate": 4.4973909079723544e-07, "loss": 0.2552, "step": 36613 }, { "epoch": 2.721218877740617, "grad_norm": 1.962561143603055, "learning_rate": 4.495012018777345e-07, "loss": 0.2152, "step": 36614 }, { "epoch": 2.721293199554069, "grad_norm": 2.227790997565569, "learning_rate": 4.49263374443849e-07, "loss": 0.2594, "step": 36615 }, { "epoch": 2.7213675213675215, "grad_norm": 2.909972057033397, "learning_rate": 4.4902560849710897e-07, "loss": 0.287, "step": 36616 }, { "epoch": 2.7214418431809735, "grad_norm": 2.26011832762432, "learning_rate": 4.487879040390475e-07, "loss": 0.2726, "step": 36617 }, { "epoch": 2.721516164994426, "grad_norm": 2.861009196050017, "learning_rate": 4.4855026107119335e-07, "loss": 0.2907, "step": 36618 }, { "epoch": 2.721590486807878, "grad_norm": 2.238079005842037, "learning_rate": 4.4831267959507765e-07, "loss": 0.1791, "step": 36619 }, { "epoch": 2.7216648086213304, "grad_norm": 3.03813670400146, "learning_rate": 4.4807515961222905e-07, "loss": 0.2905, "step": 36620 }, { "epoch": 2.7217391304347824, "grad_norm": 2.691150726105754, "learning_rate": 4.4783770112417746e-07, "loss": 0.2922, "step": 36621 }, { "epoch": 2.721813452248235, "grad_norm": 2.5739111845785514, "learning_rate": 4.476003041324517e-07, "loss": 0.318, "step": 36622 }, { "epoch": 2.7218877740616874, "grad_norm": 2.7005926719921107, "learning_rate": 4.473629686385772e-07, "loss": 0.2784, "step": 36623 }, { "epoch": 2.7219620958751394, "grad_norm": 2.769804860496414, "learning_rate": 4.471256946440861e-07, "loss": 0.3754, "step": 36624 }, { "epoch": 2.7220364176885914, "grad_norm": 2.421339583775046, "learning_rate": 4.468884821505026e-07, "loss": 0.3017, "step": 36625 }, { "epoch": 2.722110739502044, "grad_norm": 2.502373799655366, "learning_rate": 4.4665133115935676e-07, "loss": 0.3202, "step": 36626 }, { "epoch": 2.7221850613154963, "grad_norm": 2.1006571894709705, "learning_rate": 4.4641424167217397e-07, "loss": 0.2335, "step": 36627 }, { "epoch": 2.7222593831289483, "grad_norm": 2.1530771419813926, "learning_rate": 4.461772136904796e-07, "loss": 0.2413, "step": 36628 }, { "epoch": 2.7223337049424003, "grad_norm": 2.530232727861974, "learning_rate": 4.459402472158014e-07, "loss": 0.266, "step": 36629 }, { "epoch": 2.722408026755853, "grad_norm": 2.7066625843832295, "learning_rate": 4.4570334224966373e-07, "loss": 0.258, "step": 36630 }, { "epoch": 2.7224823485693053, "grad_norm": 2.9873210141448743, "learning_rate": 4.4546649879359304e-07, "loss": 0.293, "step": 36631 }, { "epoch": 2.7225566703827573, "grad_norm": 2.5000072624478515, "learning_rate": 4.452297168491126e-07, "loss": 0.3021, "step": 36632 }, { "epoch": 2.7226309921962097, "grad_norm": 3.0439323686412267, "learning_rate": 4.4499299641774795e-07, "loss": 0.2955, "step": 36633 }, { "epoch": 2.7227053140096618, "grad_norm": 2.3573351686601094, "learning_rate": 4.447563375010233e-07, "loss": 0.2874, "step": 36634 }, { "epoch": 2.722779635823114, "grad_norm": 2.2910608242484525, "learning_rate": 4.445197401004586e-07, "loss": 0.2708, "step": 36635 }, { "epoch": 2.7228539576365662, "grad_norm": 1.8227780787793555, "learning_rate": 4.4428320421758263e-07, "loss": 0.1782, "step": 36636 }, { "epoch": 2.7229282794500187, "grad_norm": 2.4077913859892432, "learning_rate": 4.440467298539153e-07, "loss": 0.3221, "step": 36637 }, { "epoch": 2.7230026012634707, "grad_norm": 2.408511842637375, "learning_rate": 4.4381031701097865e-07, "loss": 0.2552, "step": 36638 }, { "epoch": 2.723076923076923, "grad_norm": 2.3571428112846453, "learning_rate": 4.43573965690296e-07, "loss": 0.3092, "step": 36639 }, { "epoch": 2.723151244890375, "grad_norm": 3.0805490208051314, "learning_rate": 4.4333767589338607e-07, "loss": 0.2987, "step": 36640 }, { "epoch": 2.7232255667038276, "grad_norm": 2.1660928797672123, "learning_rate": 4.4310144762177544e-07, "loss": 0.2402, "step": 36641 }, { "epoch": 2.7232998885172797, "grad_norm": 2.3798176927442762, "learning_rate": 4.4286528087698067e-07, "loss": 0.2658, "step": 36642 }, { "epoch": 2.723374210330732, "grad_norm": 1.875810093558417, "learning_rate": 4.426291756605239e-07, "loss": 0.2012, "step": 36643 }, { "epoch": 2.723448532144184, "grad_norm": 2.424953135336114, "learning_rate": 4.4239313197392384e-07, "loss": 0.3235, "step": 36644 }, { "epoch": 2.7235228539576366, "grad_norm": 2.50941577132954, "learning_rate": 4.421571498186994e-07, "loss": 0.2479, "step": 36645 }, { "epoch": 2.723597175771089, "grad_norm": 2.187941079339977, "learning_rate": 4.419212291963737e-07, "loss": 0.3113, "step": 36646 }, { "epoch": 2.723671497584541, "grad_norm": 2.0693596893896795, "learning_rate": 4.416853701084611e-07, "loss": 0.2486, "step": 36647 }, { "epoch": 2.723745819397993, "grad_norm": 2.8312070213377263, "learning_rate": 4.414495725564838e-07, "loss": 0.3752, "step": 36648 }, { "epoch": 2.7238201412114456, "grad_norm": 2.3195777757871783, "learning_rate": 4.4121383654195827e-07, "loss": 0.306, "step": 36649 }, { "epoch": 2.723894463024898, "grad_norm": 1.8391399901125023, "learning_rate": 4.409781620664011e-07, "loss": 0.2127, "step": 36650 }, { "epoch": 2.72396878483835, "grad_norm": 2.284592210804938, "learning_rate": 4.407425491313311e-07, "loss": 0.3026, "step": 36651 }, { "epoch": 2.724043106651802, "grad_norm": 1.9263453876009051, "learning_rate": 4.405069977382637e-07, "loss": 0.1976, "step": 36652 }, { "epoch": 2.7241174284652545, "grad_norm": 1.948905167299051, "learning_rate": 4.4027150788871763e-07, "loss": 0.2285, "step": 36653 }, { "epoch": 2.724191750278707, "grad_norm": 2.3767262919557375, "learning_rate": 4.4003607958420734e-07, "loss": 0.2129, "step": 36654 }, { "epoch": 2.724266072092159, "grad_norm": 2.1776371464161706, "learning_rate": 4.3980071282624824e-07, "loss": 0.2537, "step": 36655 }, { "epoch": 2.7243403939056114, "grad_norm": 2.9579244190701566, "learning_rate": 4.395654076163569e-07, "loss": 0.2426, "step": 36656 }, { "epoch": 2.7244147157190635, "grad_norm": 2.6022501263469193, "learning_rate": 4.3933016395604547e-07, "loss": 0.2517, "step": 36657 }, { "epoch": 2.724489037532516, "grad_norm": 2.211571797381387, "learning_rate": 4.3909498184683264e-07, "loss": 0.2303, "step": 36658 }, { "epoch": 2.724563359345968, "grad_norm": 2.197387331706737, "learning_rate": 4.388598612902295e-07, "loss": 0.308, "step": 36659 }, { "epoch": 2.7246376811594204, "grad_norm": 2.01977480456481, "learning_rate": 4.3862480228774926e-07, "loss": 0.2691, "step": 36660 }, { "epoch": 2.7247120029728724, "grad_norm": 2.4189083017489024, "learning_rate": 4.3838980484090744e-07, "loss": 0.2736, "step": 36661 }, { "epoch": 2.724786324786325, "grad_norm": 2.84134089707706, "learning_rate": 4.3815486895121495e-07, "loss": 0.3488, "step": 36662 }, { "epoch": 2.724860646599777, "grad_norm": 2.8728661231721264, "learning_rate": 4.3791999462018733e-07, "loss": 0.327, "step": 36663 }, { "epoch": 2.7249349684132294, "grad_norm": 2.439788768913156, "learning_rate": 4.3768518184933326e-07, "loss": 0.32, "step": 36664 }, { "epoch": 2.7250092902266814, "grad_norm": 2.444241839955587, "learning_rate": 4.37450430640165e-07, "loss": 0.2832, "step": 36665 }, { "epoch": 2.725083612040134, "grad_norm": 2.769413821002409, "learning_rate": 4.3721574099419793e-07, "loss": 0.2827, "step": 36666 }, { "epoch": 2.725157933853586, "grad_norm": 2.0658019689120506, "learning_rate": 4.3698111291293645e-07, "loss": 0.2431, "step": 36667 }, { "epoch": 2.7252322556670383, "grad_norm": 2.1244986611854357, "learning_rate": 4.3674654639789596e-07, "loss": 0.2676, "step": 36668 }, { "epoch": 2.7253065774804908, "grad_norm": 2.450406676939524, "learning_rate": 4.3651204145058413e-07, "loss": 0.3062, "step": 36669 }, { "epoch": 2.725380899293943, "grad_norm": 2.796811725537045, "learning_rate": 4.36277598072512e-07, "loss": 0.2449, "step": 36670 }, { "epoch": 2.725455221107395, "grad_norm": 2.7668909264397192, "learning_rate": 4.3604321626518955e-07, "loss": 0.3545, "step": 36671 }, { "epoch": 2.7255295429208473, "grad_norm": 2.1885591141163885, "learning_rate": 4.3580889603012213e-07, "loss": 0.2632, "step": 36672 }, { "epoch": 2.7256038647342997, "grad_norm": 2.4860427369826104, "learning_rate": 4.35574637368823e-07, "loss": 0.3416, "step": 36673 }, { "epoch": 2.7256781865477517, "grad_norm": 2.5296563399950727, "learning_rate": 4.3534044028279655e-07, "loss": 0.3407, "step": 36674 }, { "epoch": 2.7257525083612038, "grad_norm": 1.9411810386789383, "learning_rate": 4.3510630477355265e-07, "loss": 0.2255, "step": 36675 }, { "epoch": 2.725826830174656, "grad_norm": 2.212517553506925, "learning_rate": 4.34872230842599e-07, "loss": 0.2122, "step": 36676 }, { "epoch": 2.7259011519881087, "grad_norm": 2.006922623496547, "learning_rate": 4.346382184914411e-07, "loss": 0.284, "step": 36677 }, { "epoch": 2.7259754738015607, "grad_norm": 2.6948618147711496, "learning_rate": 4.344042677215865e-07, "loss": 0.376, "step": 36678 }, { "epoch": 2.726049795615013, "grad_norm": 2.284062387431148, "learning_rate": 4.341703785345386e-07, "loss": 0.2435, "step": 36679 }, { "epoch": 2.726124117428465, "grad_norm": 2.4205468714880203, "learning_rate": 4.339365509318083e-07, "loss": 0.2394, "step": 36680 }, { "epoch": 2.7261984392419176, "grad_norm": 2.6828649417596293, "learning_rate": 4.337027849148967e-07, "loss": 0.2924, "step": 36681 }, { "epoch": 2.7262727610553696, "grad_norm": 2.2738340716859504, "learning_rate": 4.3346908048531036e-07, "loss": 0.2676, "step": 36682 }, { "epoch": 2.726347082868822, "grad_norm": 2.0826527165929547, "learning_rate": 4.332354376445536e-07, "loss": 0.2086, "step": 36683 }, { "epoch": 2.726421404682274, "grad_norm": 2.1325426681330866, "learning_rate": 4.330018563941296e-07, "loss": 0.2697, "step": 36684 }, { "epoch": 2.7264957264957266, "grad_norm": 2.4587910188976836, "learning_rate": 4.3276833673554507e-07, "loss": 0.3188, "step": 36685 }, { "epoch": 2.7265700483091786, "grad_norm": 2.893998650360511, "learning_rate": 4.325348786703021e-07, "loss": 0.2757, "step": 36686 }, { "epoch": 2.726644370122631, "grad_norm": 2.5194652618296223, "learning_rate": 4.323014821999005e-07, "loss": 0.274, "step": 36687 }, { "epoch": 2.7267186919360835, "grad_norm": 2.5654168004356275, "learning_rate": 4.320681473258492e-07, "loss": 0.3281, "step": 36688 }, { "epoch": 2.7267930137495355, "grad_norm": 2.3782794465797816, "learning_rate": 4.318348740496436e-07, "loss": 0.2559, "step": 36689 }, { "epoch": 2.7268673355629875, "grad_norm": 2.4195448890952367, "learning_rate": 4.3160166237279034e-07, "loss": 0.3417, "step": 36690 }, { "epoch": 2.72694165737644, "grad_norm": 2.2689174452024825, "learning_rate": 4.31368512296787e-07, "loss": 0.2949, "step": 36691 }, { "epoch": 2.7270159791898925, "grad_norm": 2.2509848864493205, "learning_rate": 4.311354238231391e-07, "loss": 0.2604, "step": 36692 }, { "epoch": 2.7270903010033445, "grad_norm": 2.0542565348957433, "learning_rate": 4.3090239695334435e-07, "loss": 0.2296, "step": 36693 }, { "epoch": 2.7271646228167965, "grad_norm": 2.8393085608723765, "learning_rate": 4.306694316889015e-07, "loss": 0.3201, "step": 36694 }, { "epoch": 2.727238944630249, "grad_norm": 2.9187269308539974, "learning_rate": 4.3043652803131384e-07, "loss": 0.2631, "step": 36695 }, { "epoch": 2.7273132664437014, "grad_norm": 1.8183312743264808, "learning_rate": 4.3020368598207906e-07, "loss": 0.2167, "step": 36696 }, { "epoch": 2.7273875882571534, "grad_norm": 2.537260007675517, "learning_rate": 4.2997090554269706e-07, "loss": 0.2605, "step": 36697 }, { "epoch": 2.7274619100706055, "grad_norm": 2.2767608803409307, "learning_rate": 4.297381867146655e-07, "loss": 0.2743, "step": 36698 }, { "epoch": 2.727536231884058, "grad_norm": 2.2060640046863162, "learning_rate": 4.295055294994821e-07, "loss": 0.1823, "step": 36699 }, { "epoch": 2.7276105536975104, "grad_norm": 4.499568293909337, "learning_rate": 4.292729338986479e-07, "loss": 0.3413, "step": 36700 }, { "epoch": 2.7276848755109624, "grad_norm": 2.202851314188186, "learning_rate": 4.29040399913655e-07, "loss": 0.2173, "step": 36701 }, { "epoch": 2.727759197324415, "grad_norm": 2.359200721244033, "learning_rate": 4.288079275460055e-07, "loss": 0.2585, "step": 36702 }, { "epoch": 2.727833519137867, "grad_norm": 3.297111509476883, "learning_rate": 4.2857551679719276e-07, "loss": 0.4234, "step": 36703 }, { "epoch": 2.7279078409513193, "grad_norm": 2.681984849658129, "learning_rate": 4.2834316766871444e-07, "loss": 0.3624, "step": 36704 }, { "epoch": 2.7279821627647713, "grad_norm": 2.728188363003231, "learning_rate": 4.2811088016206594e-07, "loss": 0.3331, "step": 36705 }, { "epoch": 2.728056484578224, "grad_norm": 2.3480433350316043, "learning_rate": 4.2787865427874275e-07, "loss": 0.2398, "step": 36706 }, { "epoch": 2.728130806391676, "grad_norm": 2.0402238307872818, "learning_rate": 4.276464900202415e-07, "loss": 0.2676, "step": 36707 }, { "epoch": 2.7282051282051283, "grad_norm": 2.059110961897714, "learning_rate": 4.274143873880543e-07, "loss": 0.2446, "step": 36708 }, { "epoch": 2.7282794500185803, "grad_norm": 2.429900827866581, "learning_rate": 4.2718234638367663e-07, "loss": 0.2324, "step": 36709 }, { "epoch": 2.7283537718320328, "grad_norm": 2.7013306501787064, "learning_rate": 4.2695036700860284e-07, "loss": 0.2361, "step": 36710 }, { "epoch": 2.728428093645485, "grad_norm": 3.5061440039596445, "learning_rate": 4.2671844926432614e-07, "loss": 0.3072, "step": 36711 }, { "epoch": 2.7285024154589372, "grad_norm": 2.5603236939961778, "learning_rate": 4.2648659315233875e-07, "loss": 0.295, "step": 36712 }, { "epoch": 2.7285767372723893, "grad_norm": 2.0241537393026325, "learning_rate": 4.2625479867413387e-07, "loss": 0.2464, "step": 36713 }, { "epoch": 2.7286510590858417, "grad_norm": 2.0017099037870434, "learning_rate": 4.2602306583120365e-07, "loss": 0.2329, "step": 36714 }, { "epoch": 2.728725380899294, "grad_norm": 1.7958648655331588, "learning_rate": 4.2579139462504137e-07, "loss": 0.2126, "step": 36715 }, { "epoch": 2.728799702712746, "grad_norm": 1.95078740904791, "learning_rate": 4.255597850571358e-07, "loss": 0.2622, "step": 36716 }, { "epoch": 2.728874024526198, "grad_norm": 1.9961236294634592, "learning_rate": 4.253282371289802e-07, "loss": 0.2332, "step": 36717 }, { "epoch": 2.7289483463396507, "grad_norm": 1.5789901743525285, "learning_rate": 4.250967508420645e-07, "loss": 0.1861, "step": 36718 }, { "epoch": 2.729022668153103, "grad_norm": 2.931533347578835, "learning_rate": 4.2486532619787966e-07, "loss": 0.3373, "step": 36719 }, { "epoch": 2.729096989966555, "grad_norm": 2.836556565335016, "learning_rate": 4.2463396319791464e-07, "loss": 0.3365, "step": 36720 }, { "epoch": 2.729171311780007, "grad_norm": 2.3136926329769145, "learning_rate": 4.244026618436581e-07, "loss": 0.2266, "step": 36721 }, { "epoch": 2.7292456335934596, "grad_norm": 2.473943759580739, "learning_rate": 4.241714221366033e-07, "loss": 0.314, "step": 36722 }, { "epoch": 2.729319955406912, "grad_norm": 3.8837182712463427, "learning_rate": 4.2394024407823367e-07, "loss": 0.315, "step": 36723 }, { "epoch": 2.729394277220364, "grad_norm": 2.181748190102947, "learning_rate": 4.237091276700411e-07, "loss": 0.2265, "step": 36724 }, { "epoch": 2.7294685990338166, "grad_norm": 2.6185955252188586, "learning_rate": 4.234780729135124e-07, "loss": 0.2576, "step": 36725 }, { "epoch": 2.7295429208472686, "grad_norm": 2.22408640161899, "learning_rate": 4.2324707981013294e-07, "loss": 0.314, "step": 36726 }, { "epoch": 2.729617242660721, "grad_norm": 2.2741622275327296, "learning_rate": 4.230161483613937e-07, "loss": 0.2237, "step": 36727 }, { "epoch": 2.729691564474173, "grad_norm": 2.0328227724495247, "learning_rate": 4.2278527856877803e-07, "loss": 0.229, "step": 36728 }, { "epoch": 2.7297658862876255, "grad_norm": 2.3637135040791835, "learning_rate": 4.2255447043377584e-07, "loss": 0.2871, "step": 36729 }, { "epoch": 2.7298402081010775, "grad_norm": 2.2736213286465925, "learning_rate": 4.223237239578704e-07, "loss": 0.2675, "step": 36730 }, { "epoch": 2.72991452991453, "grad_norm": 3.07936358513879, "learning_rate": 4.2209303914254596e-07, "loss": 0.3412, "step": 36731 }, { "epoch": 2.729988851727982, "grad_norm": 2.2606387765995377, "learning_rate": 4.2186241598929146e-07, "loss": 0.1958, "step": 36732 }, { "epoch": 2.7300631735414345, "grad_norm": 2.9228345497749024, "learning_rate": 4.21631854499589e-07, "loss": 0.2707, "step": 36733 }, { "epoch": 2.730137495354887, "grad_norm": 2.4561364859660513, "learning_rate": 4.2140135467492627e-07, "loss": 0.2431, "step": 36734 }, { "epoch": 2.730211817168339, "grad_norm": 2.391664139292026, "learning_rate": 4.21170916516781e-07, "loss": 0.2788, "step": 36735 }, { "epoch": 2.730286138981791, "grad_norm": 2.9057167598918214, "learning_rate": 4.20940540026642e-07, "loss": 0.3162, "step": 36736 }, { "epoch": 2.7303604607952434, "grad_norm": 2.3180003888464857, "learning_rate": 4.2071022520599135e-07, "loss": 0.2513, "step": 36737 }, { "epoch": 2.730434782608696, "grad_norm": 2.723283780938328, "learning_rate": 4.204799720563102e-07, "loss": 0.2818, "step": 36738 }, { "epoch": 2.730509104422148, "grad_norm": 3.0189157025385587, "learning_rate": 4.202497805790817e-07, "loss": 0.3386, "step": 36739 }, { "epoch": 2.7305834262356, "grad_norm": 1.8398162972550833, "learning_rate": 4.200196507757892e-07, "loss": 0.219, "step": 36740 }, { "epoch": 2.7306577480490524, "grad_norm": 3.1480653286550595, "learning_rate": 4.197895826479115e-07, "loss": 0.2587, "step": 36741 }, { "epoch": 2.730732069862505, "grad_norm": 2.8418859565657506, "learning_rate": 4.1955957619693286e-07, "loss": 0.2897, "step": 36742 }, { "epoch": 2.730806391675957, "grad_norm": 2.0850724359574127, "learning_rate": 4.1932963142433113e-07, "loss": 0.2594, "step": 36743 }, { "epoch": 2.7308807134894093, "grad_norm": 2.4427410492621653, "learning_rate": 4.190997483315895e-07, "loss": 0.3264, "step": 36744 }, { "epoch": 2.7309550353028613, "grad_norm": 2.4433807712513986, "learning_rate": 4.188699269201868e-07, "loss": 0.2502, "step": 36745 }, { "epoch": 2.731029357116314, "grad_norm": 2.3324064009362213, "learning_rate": 4.1864016719160184e-07, "loss": 0.37, "step": 36746 }, { "epoch": 2.731103678929766, "grad_norm": 2.8444883736350426, "learning_rate": 4.1841046914731566e-07, "loss": 0.2851, "step": 36747 }, { "epoch": 2.7311780007432183, "grad_norm": 2.0219616737315937, "learning_rate": 4.181808327888026e-07, "loss": 0.1928, "step": 36748 }, { "epoch": 2.7312523225566703, "grad_norm": 2.6663549574146983, "learning_rate": 4.179512581175471e-07, "loss": 0.2353, "step": 36749 }, { "epoch": 2.7313266443701227, "grad_norm": 3.238711270321945, "learning_rate": 4.177217451350213e-07, "loss": 0.2989, "step": 36750 }, { "epoch": 2.7314009661835748, "grad_norm": 1.997497426159076, "learning_rate": 4.174922938427084e-07, "loss": 0.2289, "step": 36751 }, { "epoch": 2.731475287997027, "grad_norm": 2.547332067468399, "learning_rate": 4.172629042420817e-07, "loss": 0.2687, "step": 36752 }, { "epoch": 2.7315496098104792, "grad_norm": 2.3333037723863357, "learning_rate": 4.1703357633461896e-07, "loss": 0.2827, "step": 36753 }, { "epoch": 2.7316239316239317, "grad_norm": 2.7000609547720393, "learning_rate": 4.1680431012179777e-07, "loss": 0.2525, "step": 36754 }, { "epoch": 2.7316982534373837, "grad_norm": 2.5353684255070323, "learning_rate": 4.165751056050915e-07, "loss": 0.239, "step": 36755 }, { "epoch": 2.731772575250836, "grad_norm": 2.2519906177274005, "learning_rate": 4.1634596278598115e-07, "loss": 0.2407, "step": 36756 }, { "epoch": 2.7318468970642886, "grad_norm": 2.0979084590163217, "learning_rate": 4.1611688166593446e-07, "loss": 0.2698, "step": 36757 }, { "epoch": 2.7319212188777406, "grad_norm": 2.678313347285346, "learning_rate": 4.158878622464313e-07, "loss": 0.3063, "step": 36758 }, { "epoch": 2.7319955406911927, "grad_norm": 2.0130572811224434, "learning_rate": 4.156589045289461e-07, "loss": 0.2305, "step": 36759 }, { "epoch": 2.732069862504645, "grad_norm": 2.050523671136065, "learning_rate": 4.1543000851494874e-07, "loss": 0.2308, "step": 36760 }, { "epoch": 2.7321441843180976, "grad_norm": 2.6643665469177416, "learning_rate": 4.152011742059181e-07, "loss": 0.321, "step": 36761 }, { "epoch": 2.7322185061315496, "grad_norm": 2.266182623894071, "learning_rate": 4.149724016033241e-07, "loss": 0.2537, "step": 36762 }, { "epoch": 2.7322928279450016, "grad_norm": 2.607219315495044, "learning_rate": 4.1474369070863995e-07, "loss": 0.3004, "step": 36763 }, { "epoch": 2.732367149758454, "grad_norm": 2.1226977892052012, "learning_rate": 4.14515041523339e-07, "loss": 0.2127, "step": 36764 }, { "epoch": 2.7324414715719065, "grad_norm": 2.0827943875920036, "learning_rate": 4.142864540488922e-07, "loss": 0.2579, "step": 36765 }, { "epoch": 2.7325157933853585, "grad_norm": 2.7040635492135614, "learning_rate": 4.1405792828677295e-07, "loss": 0.3184, "step": 36766 }, { "epoch": 2.732590115198811, "grad_norm": 2.7401823450458127, "learning_rate": 4.1382946423845106e-07, "loss": 0.2678, "step": 36767 }, { "epoch": 2.732664437012263, "grad_norm": 1.993929057080695, "learning_rate": 4.1360106190539874e-07, "loss": 0.195, "step": 36768 }, { "epoch": 2.7327387588257155, "grad_norm": 2.0635688376355628, "learning_rate": 4.133727212890848e-07, "loss": 0.2867, "step": 36769 }, { "epoch": 2.7328130806391675, "grad_norm": 2.371759681226286, "learning_rate": 4.1314444239097805e-07, "loss": 0.3216, "step": 36770 }, { "epoch": 2.73288740245262, "grad_norm": 2.2997271594092155, "learning_rate": 4.1291622521255184e-07, "loss": 0.2691, "step": 36771 }, { "epoch": 2.732961724266072, "grad_norm": 2.903883549129353, "learning_rate": 4.1268806975527155e-07, "loss": 0.3112, "step": 36772 }, { "epoch": 2.7330360460795244, "grad_norm": 3.0529390263260545, "learning_rate": 4.1245997602060936e-07, "loss": 0.3876, "step": 36773 }, { "epoch": 2.7331103678929765, "grad_norm": 2.320011852244439, "learning_rate": 4.122319440100331e-07, "loss": 0.3266, "step": 36774 }, { "epoch": 2.733184689706429, "grad_norm": 2.2377691541901763, "learning_rate": 4.120039737250092e-07, "loss": 0.2064, "step": 36775 }, { "epoch": 2.733259011519881, "grad_norm": 2.4593945996448268, "learning_rate": 4.1177606516700665e-07, "loss": 0.2374, "step": 36776 }, { "epoch": 2.7333333333333334, "grad_norm": 1.851600167619302, "learning_rate": 4.115482183374908e-07, "loss": 0.2357, "step": 36777 }, { "epoch": 2.7334076551467854, "grad_norm": 2.541359026485365, "learning_rate": 4.113204332379317e-07, "loss": 0.3448, "step": 36778 }, { "epoch": 2.733481976960238, "grad_norm": 2.2745382310763764, "learning_rate": 4.110927098697948e-07, "loss": 0.2716, "step": 36779 }, { "epoch": 2.7335562987736903, "grad_norm": 2.038534621156898, "learning_rate": 4.1086504823454444e-07, "loss": 0.2713, "step": 36780 }, { "epoch": 2.7336306205871423, "grad_norm": 1.7561816016988439, "learning_rate": 4.1063744833364727e-07, "loss": 0.2199, "step": 36781 }, { "epoch": 2.7337049424005944, "grad_norm": 4.80947166152798, "learning_rate": 4.104099101685677e-07, "loss": 0.2961, "step": 36782 }, { "epoch": 2.733779264214047, "grad_norm": 4.142003247261243, "learning_rate": 4.101824337407734e-07, "loss": 0.3179, "step": 36783 }, { "epoch": 2.7338535860274993, "grad_norm": 2.584365347217541, "learning_rate": 4.0995501905172654e-07, "loss": 0.2605, "step": 36784 }, { "epoch": 2.7339279078409513, "grad_norm": 2.858683193141598, "learning_rate": 4.0972766610289037e-07, "loss": 0.2724, "step": 36785 }, { "epoch": 2.7340022296544033, "grad_norm": 3.0139703343135804, "learning_rate": 4.0950037489573156e-07, "loss": 0.3075, "step": 36786 }, { "epoch": 2.7340765514678558, "grad_norm": 2.838323693677938, "learning_rate": 4.0927314543171e-07, "loss": 0.2277, "step": 36787 }, { "epoch": 2.7341508732813082, "grad_norm": 1.8949257223832072, "learning_rate": 4.090459777122913e-07, "loss": 0.1914, "step": 36788 }, { "epoch": 2.7342251950947603, "grad_norm": 2.551215724150406, "learning_rate": 4.088188717389374e-07, "loss": 0.345, "step": 36789 }, { "epoch": 2.7342995169082127, "grad_norm": 2.7327316423629, "learning_rate": 4.085918275131107e-07, "loss": 0.2589, "step": 36790 }, { "epoch": 2.7343738387216647, "grad_norm": 2.4140527365212363, "learning_rate": 4.0836484503627094e-07, "loss": 0.2655, "step": 36791 }, { "epoch": 2.734448160535117, "grad_norm": 2.742565454829895, "learning_rate": 4.081379243098793e-07, "loss": 0.305, "step": 36792 }, { "epoch": 2.734522482348569, "grad_norm": 2.1960674668166447, "learning_rate": 4.0791106533540017e-07, "loss": 0.276, "step": 36793 }, { "epoch": 2.7345968041620217, "grad_norm": 2.9847490159260164, "learning_rate": 4.076842681142901e-07, "loss": 0.3225, "step": 36794 }, { "epoch": 2.7346711259754737, "grad_norm": 2.6202109582036344, "learning_rate": 4.0745753264801236e-07, "loss": 0.2459, "step": 36795 }, { "epoch": 2.734745447788926, "grad_norm": 2.0952786474326475, "learning_rate": 4.0723085893802585e-07, "loss": 0.2515, "step": 36796 }, { "epoch": 2.734819769602378, "grad_norm": 1.9028494132595013, "learning_rate": 4.0700424698578714e-07, "loss": 0.1759, "step": 36797 }, { "epoch": 2.7348940914158306, "grad_norm": 2.772448500239897, "learning_rate": 4.0677769679275947e-07, "loss": 0.2671, "step": 36798 }, { "epoch": 2.7349684132292826, "grad_norm": 2.4965684137540025, "learning_rate": 4.065512083603973e-07, "loss": 0.3512, "step": 36799 }, { "epoch": 2.735042735042735, "grad_norm": 3.752286137276599, "learning_rate": 4.0632478169016163e-07, "loss": 0.3566, "step": 36800 }, { "epoch": 2.735117056856187, "grad_norm": 2.581936410544831, "learning_rate": 4.0609841678351023e-07, "loss": 0.2822, "step": 36801 }, { "epoch": 2.7351913786696396, "grad_norm": 2.076643744478309, "learning_rate": 4.058721136418986e-07, "loss": 0.217, "step": 36802 }, { "epoch": 2.735265700483092, "grad_norm": 2.3555886647043787, "learning_rate": 4.056458722667844e-07, "loss": 0.2679, "step": 36803 }, { "epoch": 2.735340022296544, "grad_norm": 2.3032968214219256, "learning_rate": 4.0541969265962435e-07, "loss": 0.2463, "step": 36804 }, { "epoch": 2.735414344109996, "grad_norm": 2.0642228423252766, "learning_rate": 4.0519357482187494e-07, "loss": 0.2434, "step": 36805 }, { "epoch": 2.7354886659234485, "grad_norm": 2.434542146132371, "learning_rate": 4.0496751875499064e-07, "loss": 0.2864, "step": 36806 }, { "epoch": 2.735562987736901, "grad_norm": 2.102056906064422, "learning_rate": 4.047415244604269e-07, "loss": 0.2682, "step": 36807 }, { "epoch": 2.735637309550353, "grad_norm": 2.1314090308607656, "learning_rate": 4.045155919396415e-07, "loss": 0.2299, "step": 36808 }, { "epoch": 2.735711631363805, "grad_norm": 2.8710835663128424, "learning_rate": 4.042897211940844e-07, "loss": 0.3269, "step": 36809 }, { "epoch": 2.7357859531772575, "grad_norm": 2.1339541407962996, "learning_rate": 4.0406391222521434e-07, "loss": 0.2382, "step": 36810 }, { "epoch": 2.73586027499071, "grad_norm": 2.106714315700932, "learning_rate": 4.0383816503448246e-07, "loss": 0.2564, "step": 36811 }, { "epoch": 2.735934596804162, "grad_norm": 2.749646855634958, "learning_rate": 4.0361247962334096e-07, "loss": 0.2574, "step": 36812 }, { "epoch": 2.7360089186176144, "grad_norm": 2.5329374757300895, "learning_rate": 4.0338685599324744e-07, "loss": 0.3161, "step": 36813 }, { "epoch": 2.7360832404310664, "grad_norm": 3.003629111716945, "learning_rate": 4.0316129414564864e-07, "loss": 0.3283, "step": 36814 }, { "epoch": 2.736157562244519, "grad_norm": 1.7785769264063407, "learning_rate": 4.029357940820011e-07, "loss": 0.2083, "step": 36815 }, { "epoch": 2.736231884057971, "grad_norm": 2.183203720447909, "learning_rate": 4.0271035580375264e-07, "loss": 0.2154, "step": 36816 }, { "epoch": 2.7363062058714234, "grad_norm": 2.312006929821091, "learning_rate": 4.0248497931235975e-07, "loss": 0.2235, "step": 36817 }, { "epoch": 2.7363805276848754, "grad_norm": 2.4056358863715945, "learning_rate": 4.022596646092691e-07, "loss": 0.2612, "step": 36818 }, { "epoch": 2.736454849498328, "grad_norm": 2.348714819822552, "learning_rate": 4.0203441169593293e-07, "loss": 0.1851, "step": 36819 }, { "epoch": 2.73652917131178, "grad_norm": 2.233987465878344, "learning_rate": 4.0180922057380113e-07, "loss": 0.3063, "step": 36820 }, { "epoch": 2.7366034931252323, "grad_norm": 2.1590700941888303, "learning_rate": 4.0158409124432363e-07, "loss": 0.2806, "step": 36821 }, { "epoch": 2.7366778149386843, "grad_norm": 1.7583989580332227, "learning_rate": 4.013590237089504e-07, "loss": 0.1741, "step": 36822 }, { "epoch": 2.736752136752137, "grad_norm": 1.9947031387741587, "learning_rate": 4.011340179691292e-07, "loss": 0.2386, "step": 36823 }, { "epoch": 2.736826458565589, "grad_norm": 1.6016044527679933, "learning_rate": 4.0090907402630994e-07, "loss": 0.1711, "step": 36824 }, { "epoch": 2.7369007803790413, "grad_norm": 2.26503946203204, "learning_rate": 4.006841918819393e-07, "loss": 0.2674, "step": 36825 }, { "epoch": 2.7369751021924937, "grad_norm": 2.6101377061718405, "learning_rate": 4.004593715374649e-07, "loss": 0.3336, "step": 36826 }, { "epoch": 2.7370494240059458, "grad_norm": 1.8605540307464439, "learning_rate": 4.0023461299433577e-07, "loss": 0.2381, "step": 36827 }, { "epoch": 2.7371237458193978, "grad_norm": 2.4351231891868945, "learning_rate": 4.0000991625399835e-07, "loss": 0.2448, "step": 36828 }, { "epoch": 2.7371980676328502, "grad_norm": 3.0893041452747307, "learning_rate": 3.997852813178982e-07, "loss": 0.3031, "step": 36829 }, { "epoch": 2.7372723894463027, "grad_norm": 2.587347837575032, "learning_rate": 3.9956070818748305e-07, "loss": 0.2813, "step": 36830 }, { "epoch": 2.7373467112597547, "grad_norm": 2.320491549303263, "learning_rate": 3.993361968641962e-07, "loss": 0.251, "step": 36831 }, { "epoch": 2.7374210330732067, "grad_norm": 2.066303645133362, "learning_rate": 3.9911174734948766e-07, "loss": 0.2147, "step": 36832 }, { "epoch": 2.737495354886659, "grad_norm": 1.9108466862675917, "learning_rate": 3.9888735964479843e-07, "loss": 0.2043, "step": 36833 }, { "epoch": 2.7375696767001116, "grad_norm": 1.9712735101193863, "learning_rate": 3.986630337515729e-07, "loss": 0.2196, "step": 36834 }, { "epoch": 2.7376439985135637, "grad_norm": 2.49782567542648, "learning_rate": 3.9843876967125883e-07, "loss": 0.2497, "step": 36835 }, { "epoch": 2.737718320327016, "grad_norm": 2.4648350676134485, "learning_rate": 3.9821456740529617e-07, "loss": 0.2639, "step": 36836 }, { "epoch": 2.737792642140468, "grad_norm": 2.4840075710654728, "learning_rate": 3.9799042695513043e-07, "loss": 0.3409, "step": 36837 }, { "epoch": 2.7378669639539206, "grad_norm": 2.586651276715125, "learning_rate": 3.9776634832220274e-07, "loss": 0.2914, "step": 36838 }, { "epoch": 2.7379412857673726, "grad_norm": 1.9239792411826904, "learning_rate": 3.975423315079585e-07, "loss": 0.2512, "step": 36839 }, { "epoch": 2.738015607580825, "grad_norm": 2.2500774218483253, "learning_rate": 3.973183765138389e-07, "loss": 0.2594, "step": 36840 }, { "epoch": 2.738089929394277, "grad_norm": 2.3126291405466834, "learning_rate": 3.9709448334128374e-07, "loss": 0.288, "step": 36841 }, { "epoch": 2.7381642512077295, "grad_norm": 2.1455302183922336, "learning_rate": 3.968706519917376e-07, "loss": 0.2529, "step": 36842 }, { "epoch": 2.7382385730211816, "grad_norm": 2.9889821347431913, "learning_rate": 3.966468824666381e-07, "loss": 0.2587, "step": 36843 }, { "epoch": 2.738312894834634, "grad_norm": 3.410409039139025, "learning_rate": 3.9642317476742966e-07, "loss": 0.2319, "step": 36844 }, { "epoch": 2.7383872166480865, "grad_norm": 1.848232721149048, "learning_rate": 3.9619952889555004e-07, "loss": 0.202, "step": 36845 }, { "epoch": 2.7384615384615385, "grad_norm": 2.7159518116483623, "learning_rate": 3.9597594485243806e-07, "loss": 0.2989, "step": 36846 }, { "epoch": 2.7385358602749905, "grad_norm": 2.1326937268543937, "learning_rate": 3.957524226395382e-07, "loss": 0.2243, "step": 36847 }, { "epoch": 2.738610182088443, "grad_norm": 2.52234802188241, "learning_rate": 3.955289622582825e-07, "loss": 0.3022, "step": 36848 }, { "epoch": 2.7386845039018954, "grad_norm": 2.5570791453681783, "learning_rate": 3.953055637101144e-07, "loss": 0.3295, "step": 36849 }, { "epoch": 2.7387588257153475, "grad_norm": 1.9239072167243823, "learning_rate": 3.9508222699647046e-07, "loss": 0.179, "step": 36850 }, { "epoch": 2.7388331475287995, "grad_norm": 2.5920980612936195, "learning_rate": 3.948589521187884e-07, "loss": 0.3118, "step": 36851 }, { "epoch": 2.738907469342252, "grad_norm": 2.856487252312526, "learning_rate": 3.946357390785061e-07, "loss": 0.3314, "step": 36852 }, { "epoch": 2.7389817911557044, "grad_norm": 2.911540422172915, "learning_rate": 3.9441258787705996e-07, "loss": 0.3471, "step": 36853 }, { "epoch": 2.7390561129691564, "grad_norm": 1.9745952867733765, "learning_rate": 3.9418949851588895e-07, "loss": 0.2416, "step": 36854 }, { "epoch": 2.7391304347826084, "grad_norm": 2.0750911581983655, "learning_rate": 3.9396647099642635e-07, "loss": 0.237, "step": 36855 }, { "epoch": 2.739204756596061, "grad_norm": 3.4054920261836816, "learning_rate": 3.937435053201089e-07, "loss": 0.2961, "step": 36856 }, { "epoch": 2.7392790784095133, "grad_norm": 3.069312046727799, "learning_rate": 3.93520601488373e-07, "loss": 0.2974, "step": 36857 }, { "epoch": 2.7393534002229654, "grad_norm": 1.9408012108709807, "learning_rate": 3.9329775950265326e-07, "loss": 0.2261, "step": 36858 }, { "epoch": 2.739427722036418, "grad_norm": 2.096132695974373, "learning_rate": 3.93074979364384e-07, "loss": 0.2314, "step": 36859 }, { "epoch": 2.73950204384987, "grad_norm": 2.708464582919399, "learning_rate": 3.9285226107499854e-07, "loss": 0.3571, "step": 36860 }, { "epoch": 2.7395763656633223, "grad_norm": 1.941112231573817, "learning_rate": 3.9262960463593236e-07, "loss": 0.23, "step": 36861 }, { "epoch": 2.7396506874767743, "grad_norm": 2.181969631895033, "learning_rate": 3.924070100486188e-07, "loss": 0.2765, "step": 36862 }, { "epoch": 2.7397250092902268, "grad_norm": 2.5873062807118385, "learning_rate": 3.9218447731449004e-07, "loss": 0.3411, "step": 36863 }, { "epoch": 2.739799331103679, "grad_norm": 2.904606306048412, "learning_rate": 3.919620064349794e-07, "loss": 0.2669, "step": 36864 }, { "epoch": 2.7398736529171313, "grad_norm": 2.196206116787128, "learning_rate": 3.917395974115168e-07, "loss": 0.2322, "step": 36865 }, { "epoch": 2.7399479747305833, "grad_norm": 2.147096988524787, "learning_rate": 3.915172502455389e-07, "loss": 0.2152, "step": 36866 }, { "epoch": 2.7400222965440357, "grad_norm": 2.9251006136182354, "learning_rate": 3.912949649384734e-07, "loss": 0.2654, "step": 36867 }, { "epoch": 2.740096618357488, "grad_norm": 3.1291135313022247, "learning_rate": 3.9107274149175145e-07, "loss": 0.3008, "step": 36868 }, { "epoch": 2.74017094017094, "grad_norm": 2.002628843833115, "learning_rate": 3.908505799068074e-07, "loss": 0.2041, "step": 36869 }, { "epoch": 2.7402452619843922, "grad_norm": 3.077620259054563, "learning_rate": 3.9062848018506573e-07, "loss": 0.3494, "step": 36870 }, { "epoch": 2.7403195837978447, "grad_norm": 2.2935188784224003, "learning_rate": 3.9040644232796075e-07, "loss": 0.3054, "step": 36871 }, { "epoch": 2.740393905611297, "grad_norm": 2.730173484295821, "learning_rate": 3.901844663369203e-07, "loss": 0.2332, "step": 36872 }, { "epoch": 2.740468227424749, "grad_norm": 2.8629804518436415, "learning_rate": 3.8996255221337207e-07, "loss": 0.2149, "step": 36873 }, { "epoch": 2.740542549238201, "grad_norm": 2.3111588112238595, "learning_rate": 3.897406999587483e-07, "loss": 0.2827, "step": 36874 }, { "epoch": 2.7406168710516536, "grad_norm": 1.95457493596017, "learning_rate": 3.895189095744734e-07, "loss": 0.2451, "step": 36875 }, { "epoch": 2.740691192865106, "grad_norm": 2.2043706680526594, "learning_rate": 3.8929718106197943e-07, "loss": 0.2977, "step": 36876 }, { "epoch": 2.740765514678558, "grad_norm": 2.355877439825623, "learning_rate": 3.89075514422691e-07, "loss": 0.2652, "step": 36877 }, { "epoch": 2.74083983649201, "grad_norm": 2.3673099372290642, "learning_rate": 3.888539096580335e-07, "loss": 0.3193, "step": 36878 }, { "epoch": 2.7409141583054626, "grad_norm": 3.411660220323108, "learning_rate": 3.8863236676943807e-07, "loss": 0.3727, "step": 36879 }, { "epoch": 2.740988480118915, "grad_norm": 2.2244786703302433, "learning_rate": 3.8841088575832684e-07, "loss": 0.2922, "step": 36880 }, { "epoch": 2.741062801932367, "grad_norm": 2.358278639908362, "learning_rate": 3.88189466626131e-07, "loss": 0.2469, "step": 36881 }, { "epoch": 2.7411371237458195, "grad_norm": 1.9763681048107482, "learning_rate": 3.879681093742693e-07, "loss": 0.2424, "step": 36882 }, { "epoch": 2.7412114455592715, "grad_norm": 2.2367837271574005, "learning_rate": 3.877468140041729e-07, "loss": 0.2708, "step": 36883 }, { "epoch": 2.741285767372724, "grad_norm": 2.945303571333264, "learning_rate": 3.875255805172617e-07, "loss": 0.3346, "step": 36884 }, { "epoch": 2.741360089186176, "grad_norm": 1.7364144790700717, "learning_rate": 3.873044089149625e-07, "loss": 0.1924, "step": 36885 }, { "epoch": 2.7414344109996285, "grad_norm": 2.220939816994157, "learning_rate": 3.870832991986995e-07, "loss": 0.2079, "step": 36886 }, { "epoch": 2.7415087328130805, "grad_norm": 1.7914597985440848, "learning_rate": 3.8686225136989497e-07, "loss": 0.2041, "step": 36887 }, { "epoch": 2.741583054626533, "grad_norm": 2.2689772377986897, "learning_rate": 3.8664126542997114e-07, "loss": 0.1968, "step": 36888 }, { "epoch": 2.741657376439985, "grad_norm": 2.3320551542550056, "learning_rate": 3.8642034138035354e-07, "loss": 0.2744, "step": 36889 }, { "epoch": 2.7417316982534374, "grad_norm": 2.214515772698948, "learning_rate": 3.8619947922246216e-07, "loss": 0.2518, "step": 36890 }, { "epoch": 2.74180602006689, "grad_norm": 3.3101963250022206, "learning_rate": 3.8597867895772135e-07, "loss": 0.2623, "step": 36891 }, { "epoch": 2.741880341880342, "grad_norm": 2.935196964375014, "learning_rate": 3.8575794058755e-07, "loss": 0.307, "step": 36892 }, { "epoch": 2.741954663693794, "grad_norm": 2.3567723114857317, "learning_rate": 3.8553726411337036e-07, "loss": 0.303, "step": 36893 }, { "epoch": 2.7420289855072464, "grad_norm": 2.8672613456349167, "learning_rate": 3.853166495366034e-07, "loss": 0.2039, "step": 36894 }, { "epoch": 2.742103307320699, "grad_norm": 2.155937086975535, "learning_rate": 3.85096096858667e-07, "loss": 0.2389, "step": 36895 }, { "epoch": 2.742177629134151, "grad_norm": 2.676170675609009, "learning_rate": 3.848756060809844e-07, "loss": 0.3017, "step": 36896 }, { "epoch": 2.742251950947603, "grad_norm": 2.8312895242013405, "learning_rate": 3.846551772049734e-07, "loss": 0.3796, "step": 36897 }, { "epoch": 2.7423262727610553, "grad_norm": 2.2023891592950826, "learning_rate": 3.844348102320539e-07, "loss": 0.1963, "step": 36898 }, { "epoch": 2.742400594574508, "grad_norm": 1.7459501152521653, "learning_rate": 3.8421450516364365e-07, "loss": 0.2006, "step": 36899 }, { "epoch": 2.74247491638796, "grad_norm": 2.25285803946353, "learning_rate": 3.8399426200116164e-07, "loss": 0.2599, "step": 36900 }, { "epoch": 2.7425492382014123, "grad_norm": 2.5598184321753545, "learning_rate": 3.837740807460255e-07, "loss": 0.2953, "step": 36901 }, { "epoch": 2.7426235600148643, "grad_norm": 2.3645448957270188, "learning_rate": 3.8355396139965305e-07, "loss": 0.2843, "step": 36902 }, { "epoch": 2.7426978818283168, "grad_norm": 2.618390622732969, "learning_rate": 3.8333390396346313e-07, "loss": 0.2619, "step": 36903 }, { "epoch": 2.7427722036417688, "grad_norm": 1.8280383556623732, "learning_rate": 3.83113908438868e-07, "loss": 0.2009, "step": 36904 }, { "epoch": 2.7428465254552212, "grad_norm": 2.4118538223815555, "learning_rate": 3.828939748272875e-07, "loss": 0.2957, "step": 36905 }, { "epoch": 2.7429208472686732, "grad_norm": 2.1519917951287106, "learning_rate": 3.826741031301362e-07, "loss": 0.2214, "step": 36906 }, { "epoch": 2.7429951690821257, "grad_norm": 2.372393553572251, "learning_rate": 3.8245429334882955e-07, "loss": 0.2129, "step": 36907 }, { "epoch": 2.7430694908955777, "grad_norm": 2.4547145392774654, "learning_rate": 3.8223454548478424e-07, "loss": 0.3444, "step": 36908 }, { "epoch": 2.74314381270903, "grad_norm": 2.4085214476306605, "learning_rate": 3.820148595394135e-07, "loss": 0.2095, "step": 36909 }, { "epoch": 2.743218134522482, "grad_norm": 2.2007957263301328, "learning_rate": 3.8179523551413186e-07, "loss": 0.2484, "step": 36910 }, { "epoch": 2.7432924563359347, "grad_norm": 2.687149735116216, "learning_rate": 3.815756734103537e-07, "loss": 0.3123, "step": 36911 }, { "epoch": 2.7433667781493867, "grad_norm": 2.0915180195390244, "learning_rate": 3.8135617322949125e-07, "loss": 0.2691, "step": 36912 }, { "epoch": 2.743441099962839, "grad_norm": 2.7386166091549944, "learning_rate": 3.8113673497296e-07, "loss": 0.2611, "step": 36913 }, { "epoch": 2.7435154217762916, "grad_norm": 2.3454083543049857, "learning_rate": 3.8091735864217104e-07, "loss": 0.2872, "step": 36914 }, { "epoch": 2.7435897435897436, "grad_norm": 1.9185979158856115, "learning_rate": 3.806980442385377e-07, "loss": 0.1834, "step": 36915 }, { "epoch": 2.7436640654031956, "grad_norm": 2.4903713632256257, "learning_rate": 3.804787917634711e-07, "loss": 0.2772, "step": 36916 }, { "epoch": 2.743738387216648, "grad_norm": 3.3738088878667014, "learning_rate": 3.802596012183812e-07, "loss": 0.3479, "step": 36917 }, { "epoch": 2.7438127090301005, "grad_norm": 2.10211325635626, "learning_rate": 3.8004047260468245e-07, "loss": 0.2677, "step": 36918 }, { "epoch": 2.7438870308435526, "grad_norm": 3.2718249863762043, "learning_rate": 3.798214059237826e-07, "loss": 0.223, "step": 36919 }, { "epoch": 2.7439613526570046, "grad_norm": 3.9527181152882647, "learning_rate": 3.796024011770949e-07, "loss": 0.3196, "step": 36920 }, { "epoch": 2.744035674470457, "grad_norm": 2.0445138051581133, "learning_rate": 3.793834583660272e-07, "loss": 0.2474, "step": 36921 }, { "epoch": 2.7441099962839095, "grad_norm": 3.50555882088603, "learning_rate": 3.7916457749198945e-07, "loss": 0.2731, "step": 36922 }, { "epoch": 2.7441843180973615, "grad_norm": 2.4420136767884855, "learning_rate": 3.789457585563916e-07, "loss": 0.2682, "step": 36923 }, { "epoch": 2.744258639910814, "grad_norm": 3.003182718227932, "learning_rate": 3.7872700156064034e-07, "loss": 0.3821, "step": 36924 }, { "epoch": 2.744332961724266, "grad_norm": 2.541595968183898, "learning_rate": 3.785083065061468e-07, "loss": 0.2649, "step": 36925 }, { "epoch": 2.7444072835377185, "grad_norm": 2.0717340142536176, "learning_rate": 3.7828967339431645e-07, "loss": 0.1971, "step": 36926 }, { "epoch": 2.7444816053511705, "grad_norm": 1.9721522657020507, "learning_rate": 3.780711022265593e-07, "loss": 0.2294, "step": 36927 }, { "epoch": 2.744555927164623, "grad_norm": 2.479921443116665, "learning_rate": 3.778525930042809e-07, "loss": 0.2329, "step": 36928 }, { "epoch": 2.744630248978075, "grad_norm": 2.3767514230634648, "learning_rate": 3.776341457288868e-07, "loss": 0.2482, "step": 36929 }, { "epoch": 2.7447045707915274, "grad_norm": 2.170141882313161, "learning_rate": 3.77415760401787e-07, "loss": 0.2755, "step": 36930 }, { "epoch": 2.7447788926049794, "grad_norm": 2.280592375950699, "learning_rate": 3.771974370243836e-07, "loss": 0.2529, "step": 36931 }, { "epoch": 2.744853214418432, "grad_norm": 2.3617990698398943, "learning_rate": 3.769791755980834e-07, "loss": 0.2527, "step": 36932 }, { "epoch": 2.744927536231884, "grad_norm": 2.4051179833028535, "learning_rate": 3.7676097612429297e-07, "loss": 0.3023, "step": 36933 }, { "epoch": 2.7450018580453364, "grad_norm": 2.137944828198493, "learning_rate": 3.7654283860441564e-07, "loss": 0.2343, "step": 36934 }, { "epoch": 2.7450761798587884, "grad_norm": 2.2364925899791275, "learning_rate": 3.7632476303985697e-07, "loss": 0.2686, "step": 36935 }, { "epoch": 2.745150501672241, "grad_norm": 2.509887030807428, "learning_rate": 3.761067494320192e-07, "loss": 0.3008, "step": 36936 }, { "epoch": 2.7452248234856933, "grad_norm": 2.7949766046444404, "learning_rate": 3.758887977823078e-07, "loss": 0.3406, "step": 36937 }, { "epoch": 2.7452991452991453, "grad_norm": 1.9606342040278075, "learning_rate": 3.7567090809212504e-07, "loss": 0.2159, "step": 36938 }, { "epoch": 2.7453734671125973, "grad_norm": 2.8847520452492628, "learning_rate": 3.75453080362872e-07, "loss": 0.296, "step": 36939 }, { "epoch": 2.74544778892605, "grad_norm": 1.9913115003155624, "learning_rate": 3.752353145959542e-07, "loss": 0.2787, "step": 36940 }, { "epoch": 2.7455221107395023, "grad_norm": 1.8344514227515274, "learning_rate": 3.750176107927705e-07, "loss": 0.2071, "step": 36941 }, { "epoch": 2.7455964325529543, "grad_norm": 1.9875749282818715, "learning_rate": 3.747999689547255e-07, "loss": 0.2182, "step": 36942 }, { "epoch": 2.7456707543664063, "grad_norm": 2.3496353384575968, "learning_rate": 3.7458238908321897e-07, "loss": 0.2275, "step": 36943 }, { "epoch": 2.7457450761798587, "grad_norm": 2.572485409071816, "learning_rate": 3.7436487117964993e-07, "loss": 0.2837, "step": 36944 }, { "epoch": 2.745819397993311, "grad_norm": 2.6174364639345873, "learning_rate": 3.7414741524542165e-07, "loss": 0.1991, "step": 36945 }, { "epoch": 2.7458937198067632, "grad_norm": 2.0646192939574624, "learning_rate": 3.739300212819319e-07, "loss": 0.286, "step": 36946 }, { "epoch": 2.7459680416202157, "grad_norm": 2.226010879365782, "learning_rate": 3.737126892905829e-07, "loss": 0.2641, "step": 36947 }, { "epoch": 2.7460423634336677, "grad_norm": 2.0739717341023236, "learning_rate": 3.7349541927277135e-07, "loss": 0.2167, "step": 36948 }, { "epoch": 2.74611668524712, "grad_norm": 2.052749798039355, "learning_rate": 3.7327821122989717e-07, "loss": 0.2713, "step": 36949 }, { "epoch": 2.746191007060572, "grad_norm": 1.9349176814151587, "learning_rate": 3.7306106516335817e-07, "loss": 0.1933, "step": 36950 }, { "epoch": 2.7462653288740246, "grad_norm": 2.803363323991763, "learning_rate": 3.72843981074551e-07, "loss": 0.2338, "step": 36951 }, { "epoch": 2.7463396506874767, "grad_norm": 2.4897100743270193, "learning_rate": 3.726269589648768e-07, "loss": 0.2914, "step": 36952 }, { "epoch": 2.746413972500929, "grad_norm": 2.3051801800580787, "learning_rate": 3.724099988357299e-07, "loss": 0.2244, "step": 36953 }, { "epoch": 2.746488294314381, "grad_norm": 2.455187077564022, "learning_rate": 3.721931006885071e-07, "loss": 0.2469, "step": 36954 }, { "epoch": 2.7465626161278336, "grad_norm": 2.7055197249952974, "learning_rate": 3.719762645246061e-07, "loss": 0.2347, "step": 36955 }, { "epoch": 2.7466369379412856, "grad_norm": 2.5843933489531317, "learning_rate": 3.7175949034542246e-07, "loss": 0.2857, "step": 36956 }, { "epoch": 2.746711259754738, "grad_norm": 2.708581744662949, "learning_rate": 3.715427781523517e-07, "loss": 0.3461, "step": 36957 }, { "epoch": 2.74678558156819, "grad_norm": 1.9213787268249596, "learning_rate": 3.7132612794678946e-07, "loss": 0.2144, "step": 36958 }, { "epoch": 2.7468599033816425, "grad_norm": 2.787779573727096, "learning_rate": 3.7110953973012897e-07, "loss": 0.357, "step": 36959 }, { "epoch": 2.746934225195095, "grad_norm": 2.487608973742809, "learning_rate": 3.7089301350376806e-07, "loss": 0.2982, "step": 36960 }, { "epoch": 2.747008547008547, "grad_norm": 2.234998321332977, "learning_rate": 3.706765492690956e-07, "loss": 0.2888, "step": 36961 }, { "epoch": 2.747082868821999, "grad_norm": 2.622220827277904, "learning_rate": 3.704601470275093e-07, "loss": 0.3177, "step": 36962 }, { "epoch": 2.7471571906354515, "grad_norm": 1.9834437871541846, "learning_rate": 3.702438067803993e-07, "loss": 0.2151, "step": 36963 }, { "epoch": 2.747231512448904, "grad_norm": 2.448633820424686, "learning_rate": 3.7002752852916103e-07, "loss": 0.2567, "step": 36964 }, { "epoch": 2.747305834262356, "grad_norm": 2.595976555565451, "learning_rate": 3.6981131227518676e-07, "loss": 0.3046, "step": 36965 }, { "epoch": 2.747380156075808, "grad_norm": 2.2576904306553427, "learning_rate": 3.6959515801986534e-07, "loss": 0.2586, "step": 36966 }, { "epoch": 2.7474544778892604, "grad_norm": 2.0508630760390325, "learning_rate": 3.693790657645935e-07, "loss": 0.2189, "step": 36967 }, { "epoch": 2.747528799702713, "grad_norm": 2.0197213320196292, "learning_rate": 3.6916303551075674e-07, "loss": 0.2488, "step": 36968 }, { "epoch": 2.747603121516165, "grad_norm": 2.6060910385674694, "learning_rate": 3.6894706725975063e-07, "loss": 0.2421, "step": 36969 }, { "epoch": 2.7476774433296174, "grad_norm": 2.290312336192813, "learning_rate": 3.687311610129629e-07, "loss": 0.2668, "step": 36970 }, { "epoch": 2.7477517651430694, "grad_norm": 2.6823831716503603, "learning_rate": 3.6851531677178473e-07, "loss": 0.3105, "step": 36971 }, { "epoch": 2.747826086956522, "grad_norm": 2.267097474038884, "learning_rate": 3.68299534537605e-07, "loss": 0.2646, "step": 36972 }, { "epoch": 2.747900408769974, "grad_norm": 2.5044783367276953, "learning_rate": 3.680838143118126e-07, "loss": 0.2991, "step": 36973 }, { "epoch": 2.7479747305834263, "grad_norm": 2.05234493447781, "learning_rate": 3.678681560957975e-07, "loss": 0.2337, "step": 36974 }, { "epoch": 2.7480490523968784, "grad_norm": 2.2811592400358225, "learning_rate": 3.676525598909475e-07, "loss": 0.277, "step": 36975 }, { "epoch": 2.748123374210331, "grad_norm": 2.181805734085974, "learning_rate": 3.6743702569864926e-07, "loss": 0.2721, "step": 36976 }, { "epoch": 2.748197696023783, "grad_norm": 2.8642929644291955, "learning_rate": 3.6722155352029274e-07, "loss": 0.3477, "step": 36977 }, { "epoch": 2.7482720178372353, "grad_norm": 1.859472814119933, "learning_rate": 3.670061433572625e-07, "loss": 0.2188, "step": 36978 }, { "epoch": 2.7483463396506878, "grad_norm": 1.5987618652129276, "learning_rate": 3.667907952109484e-07, "loss": 0.1548, "step": 36979 }, { "epoch": 2.7484206614641398, "grad_norm": 2.079390732277943, "learning_rate": 3.66575509082735e-07, "loss": 0.2435, "step": 36980 }, { "epoch": 2.748494983277592, "grad_norm": 2.7221007561647474, "learning_rate": 3.663602849740089e-07, "loss": 0.3045, "step": 36981 }, { "epoch": 2.7485693050910442, "grad_norm": 2.3742580880400177, "learning_rate": 3.661451228861568e-07, "loss": 0.2485, "step": 36982 }, { "epoch": 2.7486436269044967, "grad_norm": 2.4771527026700437, "learning_rate": 3.659300228205598e-07, "loss": 0.2886, "step": 36983 }, { "epoch": 2.7487179487179487, "grad_norm": 2.124264707731262, "learning_rate": 3.657149847786068e-07, "loss": 0.2016, "step": 36984 }, { "epoch": 2.7487922705314007, "grad_norm": 2.51093408486382, "learning_rate": 3.6550000876168e-07, "loss": 0.219, "step": 36985 }, { "epoch": 2.748866592344853, "grad_norm": 2.7869437556466927, "learning_rate": 3.65285094771165e-07, "loss": 0.3089, "step": 36986 }, { "epoch": 2.7489409141583057, "grad_norm": 2.4737495920318864, "learning_rate": 3.6507024280844515e-07, "loss": 0.2997, "step": 36987 }, { "epoch": 2.7490152359717577, "grad_norm": 2.646599741693532, "learning_rate": 3.648554528749015e-07, "loss": 0.2111, "step": 36988 }, { "epoch": 2.7490895577852097, "grad_norm": 2.573396520827517, "learning_rate": 3.646407249719197e-07, "loss": 0.3557, "step": 36989 }, { "epoch": 2.749163879598662, "grad_norm": 2.1342921804243207, "learning_rate": 3.644260591008797e-07, "loss": 0.2693, "step": 36990 }, { "epoch": 2.7492382014121146, "grad_norm": 2.891874219204324, "learning_rate": 3.6421145526316704e-07, "loss": 0.312, "step": 36991 }, { "epoch": 2.7493125232255666, "grad_norm": 2.2006651672145003, "learning_rate": 3.6399691346015954e-07, "loss": 0.2278, "step": 36992 }, { "epoch": 2.749386845039019, "grad_norm": 2.5813124556681766, "learning_rate": 3.637824336932416e-07, "loss": 0.268, "step": 36993 }, { "epoch": 2.749461166852471, "grad_norm": 2.7657652826988595, "learning_rate": 3.635680159637911e-07, "loss": 0.2466, "step": 36994 }, { "epoch": 2.7495354886659236, "grad_norm": 1.9342877605022362, "learning_rate": 3.6335366027318906e-07, "loss": 0.187, "step": 36995 }, { "epoch": 2.7496098104793756, "grad_norm": 2.693940297560887, "learning_rate": 3.631393666228167e-07, "loss": 0.2713, "step": 36996 }, { "epoch": 2.749684132292828, "grad_norm": 1.819821438995306, "learning_rate": 3.6292513501405393e-07, "loss": 0.2326, "step": 36997 }, { "epoch": 2.74975845410628, "grad_norm": 2.8952456662477206, "learning_rate": 3.627109654482774e-07, "loss": 0.3855, "step": 36998 }, { "epoch": 2.7498327759197325, "grad_norm": 2.4379027666023867, "learning_rate": 3.624968579268684e-07, "loss": 0.2676, "step": 36999 }, { "epoch": 2.7499070977331845, "grad_norm": 2.4819265611647245, "learning_rate": 3.6228281245120457e-07, "loss": 0.2799, "step": 37000 }, { "epoch": 2.749981419546637, "grad_norm": 3.3082636793319304, "learning_rate": 3.620688290226637e-07, "loss": 0.3403, "step": 37001 }, { "epoch": 2.7500557413600895, "grad_norm": 2.8824257072207757, "learning_rate": 3.6185490764262477e-07, "loss": 0.2676, "step": 37002 }, { "epoch": 2.7501300631735415, "grad_norm": 2.2844819120428275, "learning_rate": 3.616410483124622e-07, "loss": 0.2603, "step": 37003 }, { "epoch": 2.7502043849869935, "grad_norm": 2.7828400882393627, "learning_rate": 3.614272510335559e-07, "loss": 0.3108, "step": 37004 }, { "epoch": 2.750278706800446, "grad_norm": 2.7641029141782987, "learning_rate": 3.6121351580728046e-07, "loss": 0.2919, "step": 37005 }, { "epoch": 2.7503530286138984, "grad_norm": 2.4559435955401274, "learning_rate": 3.6099984263501253e-07, "loss": 0.2211, "step": 37006 }, { "epoch": 2.7504273504273504, "grad_norm": 1.6876624495032755, "learning_rate": 3.6078623151812654e-07, "loss": 0.1808, "step": 37007 }, { "epoch": 2.7505016722408024, "grad_norm": 2.3668755834507222, "learning_rate": 3.605726824579991e-07, "loss": 0.294, "step": 37008 }, { "epoch": 2.750575994054255, "grad_norm": 2.4250275235112118, "learning_rate": 3.603591954560048e-07, "loss": 0.3515, "step": 37009 }, { "epoch": 2.7506503158677074, "grad_norm": 2.7398458629791183, "learning_rate": 3.6014577051351697e-07, "loss": 0.3092, "step": 37010 }, { "epoch": 2.7507246376811594, "grad_norm": 3.209756459503441, "learning_rate": 3.5993240763191217e-07, "loss": 0.3619, "step": 37011 }, { "epoch": 2.7507989594946114, "grad_norm": 2.1150600199412706, "learning_rate": 3.597191068125616e-07, "loss": 0.253, "step": 37012 }, { "epoch": 2.750873281308064, "grad_norm": 3.0342355376204786, "learning_rate": 3.595058680568375e-07, "loss": 0.3472, "step": 37013 }, { "epoch": 2.7509476031215163, "grad_norm": 2.199498390398566, "learning_rate": 3.5929269136611655e-07, "loss": 0.2979, "step": 37014 }, { "epoch": 2.7510219249349683, "grad_norm": 2.1091153587619336, "learning_rate": 3.590795767417676e-07, "loss": 0.2269, "step": 37015 }, { "epoch": 2.751096246748421, "grad_norm": 3.802580783248076, "learning_rate": 3.588665241851663e-07, "loss": 0.3873, "step": 37016 }, { "epoch": 2.751170568561873, "grad_norm": 1.9141628331158054, "learning_rate": 3.586535336976804e-07, "loss": 0.1899, "step": 37017 }, { "epoch": 2.7512448903753253, "grad_norm": 3.005029949923514, "learning_rate": 3.5844060528068326e-07, "loss": 0.2666, "step": 37018 }, { "epoch": 2.7513192121887773, "grad_norm": 2.602938007162511, "learning_rate": 3.5822773893554487e-07, "loss": 0.2319, "step": 37019 }, { "epoch": 2.7513935340022297, "grad_norm": 3.0521588471626657, "learning_rate": 3.5801493466363525e-07, "loss": 0.2833, "step": 37020 }, { "epoch": 2.7514678558156818, "grad_norm": 2.01829368235284, "learning_rate": 3.5780219246632663e-07, "loss": 0.2807, "step": 37021 }, { "epoch": 2.7515421776291342, "grad_norm": 3.381560048159798, "learning_rate": 3.575895123449857e-07, "loss": 0.2858, "step": 37022 }, { "epoch": 2.7516164994425862, "grad_norm": 2.422893343606071, "learning_rate": 3.573768943009848e-07, "loss": 0.2583, "step": 37023 }, { "epoch": 2.7516908212560387, "grad_norm": 2.634463881081039, "learning_rate": 3.571643383356915e-07, "loss": 0.2468, "step": 37024 }, { "epoch": 2.751765143069491, "grad_norm": 2.6843393606094295, "learning_rate": 3.569518444504716e-07, "loss": 0.2857, "step": 37025 }, { "epoch": 2.751839464882943, "grad_norm": 2.092738239683016, "learning_rate": 3.567394126466972e-07, "loss": 0.2077, "step": 37026 }, { "epoch": 2.751913786696395, "grad_norm": 2.2426510424049595, "learning_rate": 3.5652704292573395e-07, "loss": 0.2521, "step": 37027 }, { "epoch": 2.7519881085098477, "grad_norm": 2.670774217433497, "learning_rate": 3.5631473528894953e-07, "loss": 0.3413, "step": 37028 }, { "epoch": 2.7520624303233, "grad_norm": 2.6724121380873984, "learning_rate": 3.5610248973770857e-07, "loss": 0.2529, "step": 37029 }, { "epoch": 2.752136752136752, "grad_norm": 2.2558242752195468, "learning_rate": 3.5589030627338093e-07, "loss": 0.2575, "step": 37030 }, { "epoch": 2.752211073950204, "grad_norm": 1.988643225677948, "learning_rate": 3.556781848973312e-07, "loss": 0.234, "step": 37031 }, { "epoch": 2.7522853957636566, "grad_norm": 2.697274887716789, "learning_rate": 3.5546612561092375e-07, "loss": 0.3298, "step": 37032 }, { "epoch": 2.752359717577109, "grad_norm": 2.610260317141753, "learning_rate": 3.552541284155253e-07, "loss": 0.2891, "step": 37033 }, { "epoch": 2.752434039390561, "grad_norm": 1.9365439937632891, "learning_rate": 3.5504219331250147e-07, "loss": 0.2437, "step": 37034 }, { "epoch": 2.7525083612040135, "grad_norm": 2.5580214603157767, "learning_rate": 3.548303203032144e-07, "loss": 0.2543, "step": 37035 }, { "epoch": 2.7525826830174656, "grad_norm": 2.4893822043748264, "learning_rate": 3.5461850938902976e-07, "loss": 0.297, "step": 37036 }, { "epoch": 2.752657004830918, "grad_norm": 1.8039367217577587, "learning_rate": 3.5440676057130975e-07, "loss": 0.2212, "step": 37037 }, { "epoch": 2.75273132664437, "grad_norm": 2.2622974614861295, "learning_rate": 3.5419507385141995e-07, "loss": 0.2547, "step": 37038 }, { "epoch": 2.7528056484578225, "grad_norm": 1.95228218420818, "learning_rate": 3.5398344923072147e-07, "loss": 0.1743, "step": 37039 }, { "epoch": 2.7528799702712745, "grad_norm": 2.5528619013808878, "learning_rate": 3.537718867105766e-07, "loss": 0.2601, "step": 37040 }, { "epoch": 2.752954292084727, "grad_norm": 1.8279874381184735, "learning_rate": 3.535603862923487e-07, "loss": 0.2155, "step": 37041 }, { "epoch": 2.753028613898179, "grad_norm": 2.261399418087597, "learning_rate": 3.5334894797739663e-07, "loss": 0.2029, "step": 37042 }, { "epoch": 2.7531029357116314, "grad_norm": 2.07759944417423, "learning_rate": 3.53137571767086e-07, "loss": 0.237, "step": 37043 }, { "epoch": 2.7531772575250835, "grad_norm": 2.028466079241062, "learning_rate": 3.529262576627723e-07, "loss": 0.1921, "step": 37044 }, { "epoch": 2.753251579338536, "grad_norm": 1.9348244994676485, "learning_rate": 3.527150056658213e-07, "loss": 0.241, "step": 37045 }, { "epoch": 2.753325901151988, "grad_norm": 2.387019747293973, "learning_rate": 3.525038157775895e-07, "loss": 0.2653, "step": 37046 }, { "epoch": 2.7534002229654404, "grad_norm": 2.2334115497767675, "learning_rate": 3.5229268799943704e-07, "loss": 0.2625, "step": 37047 }, { "epoch": 2.753474544778893, "grad_norm": 2.206674365556295, "learning_rate": 3.5208162233272504e-07, "loss": 0.2576, "step": 37048 }, { "epoch": 2.753548866592345, "grad_norm": 3.4880099760849297, "learning_rate": 3.51870618778809e-07, "loss": 0.2699, "step": 37049 }, { "epoch": 2.753623188405797, "grad_norm": 1.8636052450896812, "learning_rate": 3.5165967733905237e-07, "loss": 0.2757, "step": 37050 }, { "epoch": 2.7536975102192494, "grad_norm": 2.6849030717212, "learning_rate": 3.514487980148074e-07, "loss": 0.2981, "step": 37051 }, { "epoch": 2.753771832032702, "grad_norm": 1.884969481468739, "learning_rate": 3.512379808074351e-07, "loss": 0.2136, "step": 37052 }, { "epoch": 2.753846153846154, "grad_norm": 2.0971373455600375, "learning_rate": 3.510272257182923e-07, "loss": 0.27, "step": 37053 }, { "epoch": 2.753920475659606, "grad_norm": 2.4478415246081857, "learning_rate": 3.508165327487345e-07, "loss": 0.3514, "step": 37054 }, { "epoch": 2.7539947974730583, "grad_norm": 1.7511922556672448, "learning_rate": 3.506059019001207e-07, "loss": 0.1953, "step": 37055 }, { "epoch": 2.7540691192865108, "grad_norm": 2.8928631792213384, "learning_rate": 3.5039533317380526e-07, "loss": 0.3301, "step": 37056 }, { "epoch": 2.754143441099963, "grad_norm": 2.922244856551181, "learning_rate": 3.5018482657114274e-07, "loss": 0.2712, "step": 37057 }, { "epoch": 2.7542177629134152, "grad_norm": 1.981963692144148, "learning_rate": 3.499743820934909e-07, "loss": 0.1663, "step": 37058 }, { "epoch": 2.7542920847268673, "grad_norm": 2.9198756026933474, "learning_rate": 3.49763999742202e-07, "loss": 0.294, "step": 37059 }, { "epoch": 2.7543664065403197, "grad_norm": 2.195341986255008, "learning_rate": 3.4955367951863383e-07, "loss": 0.2287, "step": 37060 }, { "epoch": 2.7544407283537717, "grad_norm": 2.292042811700966, "learning_rate": 3.4934342142413756e-07, "loss": 0.2964, "step": 37061 }, { "epoch": 2.754515050167224, "grad_norm": 2.7412720316636228, "learning_rate": 3.491332254600677e-07, "loss": 0.3686, "step": 37062 }, { "epoch": 2.754589371980676, "grad_norm": 2.2801642382833385, "learning_rate": 3.4892309162777637e-07, "loss": 0.2033, "step": 37063 }, { "epoch": 2.7546636937941287, "grad_norm": 3.185038374519159, "learning_rate": 3.4871301992861704e-07, "loss": 0.3419, "step": 37064 }, { "epoch": 2.7547380156075807, "grad_norm": 1.966859181483422, "learning_rate": 3.4850301036394416e-07, "loss": 0.1695, "step": 37065 }, { "epoch": 2.754812337421033, "grad_norm": 2.413112508531283, "learning_rate": 3.482930629351067e-07, "loss": 0.2809, "step": 37066 }, { "epoch": 2.754886659234485, "grad_norm": 2.663863572209964, "learning_rate": 3.48083177643459e-07, "loss": 0.2865, "step": 37067 }, { "epoch": 2.7549609810479376, "grad_norm": 3.254524910369561, "learning_rate": 3.4787335449035007e-07, "loss": 0.276, "step": 37068 }, { "epoch": 2.7550353028613896, "grad_norm": 2.7966180945793013, "learning_rate": 3.4766359347713105e-07, "loss": 0.299, "step": 37069 }, { "epoch": 2.755109624674842, "grad_norm": 2.409520885645136, "learning_rate": 3.474538946051542e-07, "loss": 0.3036, "step": 37070 }, { "epoch": 2.7551839464882946, "grad_norm": 2.14180798052707, "learning_rate": 3.4724425787576734e-07, "loss": 0.2693, "step": 37071 }, { "epoch": 2.7552582683017466, "grad_norm": 2.1727811204306304, "learning_rate": 3.4703468329032266e-07, "loss": 0.282, "step": 37072 }, { "epoch": 2.7553325901151986, "grad_norm": 3.3215839565749827, "learning_rate": 3.4682517085016686e-07, "loss": 0.3598, "step": 37073 }, { "epoch": 2.755406911928651, "grad_norm": 2.2454298580125074, "learning_rate": 3.4661572055665116e-07, "loss": 0.286, "step": 37074 }, { "epoch": 2.7554812337421035, "grad_norm": 1.7573257902775796, "learning_rate": 3.4640633241112107e-07, "loss": 0.1899, "step": 37075 }, { "epoch": 2.7555555555555555, "grad_norm": 2.4715353937786375, "learning_rate": 3.461970064149256e-07, "loss": 0.306, "step": 37076 }, { "epoch": 2.7556298773690076, "grad_norm": 2.8080892290304855, "learning_rate": 3.4598774256941356e-07, "loss": 0.339, "step": 37077 }, { "epoch": 2.75570419918246, "grad_norm": 2.299989284593416, "learning_rate": 3.457785408759329e-07, "loss": 0.2417, "step": 37078 }, { "epoch": 2.7557785209959125, "grad_norm": 2.345980613592804, "learning_rate": 3.4556940133582685e-07, "loss": 0.2254, "step": 37079 }, { "epoch": 2.7558528428093645, "grad_norm": 1.9713583466083533, "learning_rate": 3.4536032395044436e-07, "loss": 0.2357, "step": 37080 }, { "epoch": 2.755927164622817, "grad_norm": 2.5195556092398768, "learning_rate": 3.451513087211311e-07, "loss": 0.2293, "step": 37081 }, { "epoch": 2.756001486436269, "grad_norm": 2.4387291893900964, "learning_rate": 3.4494235564923374e-07, "loss": 0.2723, "step": 37082 }, { "epoch": 2.7560758082497214, "grad_norm": 2.763481877468647, "learning_rate": 3.447334647360967e-07, "loss": 0.3619, "step": 37083 }, { "epoch": 2.7561501300631734, "grad_norm": 2.6989734754758197, "learning_rate": 3.4452463598306453e-07, "loss": 0.3558, "step": 37084 }, { "epoch": 2.756224451876626, "grad_norm": 1.9941034011730936, "learning_rate": 3.4431586939148163e-07, "loss": 0.2394, "step": 37085 }, { "epoch": 2.756298773690078, "grad_norm": 2.474259916817994, "learning_rate": 3.441071649626915e-07, "loss": 0.2132, "step": 37086 }, { "epoch": 2.7563730955035304, "grad_norm": 2.2986741807564837, "learning_rate": 3.438985226980396e-07, "loss": 0.2281, "step": 37087 }, { "epoch": 2.7564474173169824, "grad_norm": 3.0543121522347594, "learning_rate": 3.4368994259886604e-07, "loss": 0.3484, "step": 37088 }, { "epoch": 2.756521739130435, "grad_norm": 3.197634663830414, "learning_rate": 3.4348142466651745e-07, "loss": 0.2934, "step": 37089 }, { "epoch": 2.756596060943887, "grad_norm": 2.8813851404250106, "learning_rate": 3.43272968902334e-07, "loss": 0.2701, "step": 37090 }, { "epoch": 2.7566703827573393, "grad_norm": 2.1562643693202106, "learning_rate": 3.430645753076567e-07, "loss": 0.2887, "step": 37091 }, { "epoch": 2.7567447045707913, "grad_norm": 2.756005380476508, "learning_rate": 3.428562438838301e-07, "loss": 0.274, "step": 37092 }, { "epoch": 2.756819026384244, "grad_norm": 1.775993983384524, "learning_rate": 3.426479746321931e-07, "loss": 0.1905, "step": 37093 }, { "epoch": 2.7568933481976963, "grad_norm": 2.3162668732475615, "learning_rate": 3.42439767554088e-07, "loss": 0.2624, "step": 37094 }, { "epoch": 2.7569676700111483, "grad_norm": 2.1967282725261312, "learning_rate": 3.4223162265085484e-07, "loss": 0.2242, "step": 37095 }, { "epoch": 2.7570419918246003, "grad_norm": 2.0109396382411857, "learning_rate": 3.4202353992383365e-07, "loss": 0.2061, "step": 37096 }, { "epoch": 2.7571163136380528, "grad_norm": 2.3478646762464295, "learning_rate": 3.418155193743633e-07, "loss": 0.2489, "step": 37097 }, { "epoch": 2.7571906354515052, "grad_norm": 3.0880663524168668, "learning_rate": 3.416075610037828e-07, "loss": 0.2411, "step": 37098 }, { "epoch": 2.7572649572649572, "grad_norm": 2.5599875779433416, "learning_rate": 3.4139966481343213e-07, "loss": 0.2513, "step": 37099 }, { "epoch": 2.7573392790784093, "grad_norm": 2.180643575230968, "learning_rate": 3.4119183080465025e-07, "loss": 0.2794, "step": 37100 }, { "epoch": 2.7574136008918617, "grad_norm": 2.7378363894193, "learning_rate": 3.4098405897877164e-07, "loss": 0.268, "step": 37101 }, { "epoch": 2.757487922705314, "grad_norm": 2.565014763830413, "learning_rate": 3.4077634933713854e-07, "loss": 0.2921, "step": 37102 }, { "epoch": 2.757562244518766, "grad_norm": 2.6463079327697017, "learning_rate": 3.405687018810855e-07, "loss": 0.347, "step": 37103 }, { "epoch": 2.7576365663322187, "grad_norm": 1.9719806734540295, "learning_rate": 3.4036111661195027e-07, "loss": 0.2123, "step": 37104 }, { "epoch": 2.7577108881456707, "grad_norm": 2.3442565057719453, "learning_rate": 3.401535935310696e-07, "loss": 0.2452, "step": 37105 }, { "epoch": 2.757785209959123, "grad_norm": 2.1320245797263397, "learning_rate": 3.3994613263977793e-07, "loss": 0.189, "step": 37106 }, { "epoch": 2.757859531772575, "grad_norm": 2.1321431580648826, "learning_rate": 3.397387339394131e-07, "loss": 0.2355, "step": 37107 }, { "epoch": 2.7579338535860276, "grad_norm": 2.2468145325523934, "learning_rate": 3.395313974313064e-07, "loss": 0.2641, "step": 37108 }, { "epoch": 2.7580081753994796, "grad_norm": 2.463348203531289, "learning_rate": 3.393241231167976e-07, "loss": 0.3213, "step": 37109 }, { "epoch": 2.758082497212932, "grad_norm": 2.499551134911418, "learning_rate": 3.3911691099721813e-07, "loss": 0.2661, "step": 37110 }, { "epoch": 2.758156819026384, "grad_norm": 2.507877884767946, "learning_rate": 3.389097610739023e-07, "loss": 0.3375, "step": 37111 }, { "epoch": 2.7582311408398366, "grad_norm": 2.125536168186014, "learning_rate": 3.3870267334818575e-07, "loss": 0.29, "step": 37112 }, { "epoch": 2.7583054626532886, "grad_norm": 2.9379599786662536, "learning_rate": 3.3849564782139745e-07, "loss": 0.2841, "step": 37113 }, { "epoch": 2.758379784466741, "grad_norm": 1.9484793462444725, "learning_rate": 3.3828868449487517e-07, "loss": 0.1902, "step": 37114 }, { "epoch": 2.758454106280193, "grad_norm": 2.424594831748478, "learning_rate": 3.380817833699468e-07, "loss": 0.2959, "step": 37115 }, { "epoch": 2.7585284280936455, "grad_norm": 2.5726886515786465, "learning_rate": 3.37874944447949e-07, "loss": 0.2553, "step": 37116 }, { "epoch": 2.758602749907098, "grad_norm": 2.7127796256909353, "learning_rate": 3.376681677302107e-07, "loss": 0.2203, "step": 37117 }, { "epoch": 2.75867707172055, "grad_norm": 2.471810040238652, "learning_rate": 3.37461453218062e-07, "loss": 0.1891, "step": 37118 }, { "epoch": 2.758751393534002, "grad_norm": 2.677215134473528, "learning_rate": 3.3725480091283626e-07, "loss": 0.3069, "step": 37119 }, { "epoch": 2.7588257153474545, "grad_norm": 2.7556346962330363, "learning_rate": 3.370482108158624e-07, "loss": 0.306, "step": 37120 }, { "epoch": 2.758900037160907, "grad_norm": 1.9158020794447208, "learning_rate": 3.368416829284704e-07, "loss": 0.2132, "step": 37121 }, { "epoch": 2.758974358974359, "grad_norm": 3.382473778738215, "learning_rate": 3.3663521725199155e-07, "loss": 0.3446, "step": 37122 }, { "epoch": 2.759048680787811, "grad_norm": 2.7957931405904213, "learning_rate": 3.3642881378775137e-07, "loss": 0.2995, "step": 37123 }, { "epoch": 2.7591230026012634, "grad_norm": 2.2066313956645227, "learning_rate": 3.362224725370833e-07, "loss": 0.2658, "step": 37124 }, { "epoch": 2.759197324414716, "grad_norm": 2.3806654989614358, "learning_rate": 3.3601619350131176e-07, "loss": 0.3127, "step": 37125 }, { "epoch": 2.759271646228168, "grad_norm": 2.5301745970412783, "learning_rate": 3.3580997668176793e-07, "loss": 0.3357, "step": 37126 }, { "epoch": 2.7593459680416204, "grad_norm": 2.136631664806061, "learning_rate": 3.356038220797775e-07, "loss": 0.211, "step": 37127 }, { "epoch": 2.7594202898550724, "grad_norm": 2.4056337767253035, "learning_rate": 3.3539772969666704e-07, "loss": 0.2961, "step": 37128 }, { "epoch": 2.759494611668525, "grad_norm": 2.030375872769457, "learning_rate": 3.3519169953376786e-07, "loss": 0.1906, "step": 37129 }, { "epoch": 2.759568933481977, "grad_norm": 2.3716544790415908, "learning_rate": 3.349857315923999e-07, "loss": 0.2803, "step": 37130 }, { "epoch": 2.7596432552954293, "grad_norm": 2.316856159156143, "learning_rate": 3.3477982587389436e-07, "loss": 0.2495, "step": 37131 }, { "epoch": 2.7597175771088813, "grad_norm": 2.5577883511032713, "learning_rate": 3.345739823795724e-07, "loss": 0.2157, "step": 37132 }, { "epoch": 2.759791898922334, "grad_norm": 2.564301475537229, "learning_rate": 3.3436820111076404e-07, "loss": 0.2795, "step": 37133 }, { "epoch": 2.759866220735786, "grad_norm": 1.7542437196762268, "learning_rate": 3.3416248206879054e-07, "loss": 0.2132, "step": 37134 }, { "epoch": 2.7599405425492383, "grad_norm": 2.229102831047133, "learning_rate": 3.3395682525497744e-07, "loss": 0.2538, "step": 37135 }, { "epoch": 2.7600148643626907, "grad_norm": 2.102001361079934, "learning_rate": 3.337512306706492e-07, "loss": 0.2736, "step": 37136 }, { "epoch": 2.7600891861761427, "grad_norm": 2.2095006253241527, "learning_rate": 3.335456983171281e-07, "loss": 0.2942, "step": 37137 }, { "epoch": 2.7601635079895948, "grad_norm": 2.3812687861076176, "learning_rate": 3.3334022819573875e-07, "loss": 0.2716, "step": 37138 }, { "epoch": 2.760237829803047, "grad_norm": 1.9327375667510462, "learning_rate": 3.331348203078033e-07, "loss": 0.215, "step": 37139 }, { "epoch": 2.7603121516164997, "grad_norm": 3.0026332739680384, "learning_rate": 3.329294746546452e-07, "loss": 0.3027, "step": 37140 }, { "epoch": 2.7603864734299517, "grad_norm": 3.4643654056512774, "learning_rate": 3.327241912375856e-07, "loss": 0.379, "step": 37141 }, { "epoch": 2.7604607952434037, "grad_norm": 2.826611950623507, "learning_rate": 3.3251897005794456e-07, "loss": 0.2719, "step": 37142 }, { "epoch": 2.760535117056856, "grad_norm": 3.466684059755765, "learning_rate": 3.323138111170454e-07, "loss": 0.3016, "step": 37143 }, { "epoch": 2.7606094388703086, "grad_norm": 2.0164952358471293, "learning_rate": 3.321087144162094e-07, "loss": 0.2066, "step": 37144 }, { "epoch": 2.7606837606837606, "grad_norm": 2.3124914247243384, "learning_rate": 3.319036799567543e-07, "loss": 0.2359, "step": 37145 }, { "epoch": 2.7607580824972127, "grad_norm": 2.607519928155032, "learning_rate": 3.3169870774000245e-07, "loss": 0.2504, "step": 37146 }, { "epoch": 2.760832404310665, "grad_norm": 3.0274906026045425, "learning_rate": 3.3149379776727163e-07, "loss": 0.32, "step": 37147 }, { "epoch": 2.7609067261241176, "grad_norm": 2.269951927062758, "learning_rate": 3.31288950039883e-07, "loss": 0.2812, "step": 37148 }, { "epoch": 2.7609810479375696, "grad_norm": 2.7596538277034752, "learning_rate": 3.310841645591556e-07, "loss": 0.3022, "step": 37149 }, { "epoch": 2.761055369751022, "grad_norm": 2.525422720345487, "learning_rate": 3.308794413264038e-07, "loss": 0.3003, "step": 37150 }, { "epoch": 2.761129691564474, "grad_norm": 3.458711791637731, "learning_rate": 3.306747803429511e-07, "loss": 0.3713, "step": 37151 }, { "epoch": 2.7612040133779265, "grad_norm": 2.260473227948303, "learning_rate": 3.304701816101119e-07, "loss": 0.2328, "step": 37152 }, { "epoch": 2.7612783351913786, "grad_norm": 2.6918385751622487, "learning_rate": 3.302656451292041e-07, "loss": 0.2969, "step": 37153 }, { "epoch": 2.761352657004831, "grad_norm": 2.774765457495509, "learning_rate": 3.300611709015422e-07, "loss": 0.3342, "step": 37154 }, { "epoch": 2.761426978818283, "grad_norm": 1.8832944755411163, "learning_rate": 3.298567589284474e-07, "loss": 0.2136, "step": 37155 }, { "epoch": 2.7615013006317355, "grad_norm": 2.4519400072455815, "learning_rate": 3.2965240921123185e-07, "loss": 0.2392, "step": 37156 }, { "epoch": 2.7615756224451875, "grad_norm": 2.872169298682129, "learning_rate": 3.2944812175121133e-07, "loss": 0.2915, "step": 37157 }, { "epoch": 2.76164994425864, "grad_norm": 1.7881147974615221, "learning_rate": 3.292438965497036e-07, "loss": 0.1956, "step": 37158 }, { "epoch": 2.7617242660720924, "grad_norm": 2.3353481650388854, "learning_rate": 3.29039733608022e-07, "loss": 0.315, "step": 37159 }, { "epoch": 2.7617985878855444, "grad_norm": 2.326880983839279, "learning_rate": 3.2883563292748e-07, "loss": 0.2677, "step": 37160 }, { "epoch": 2.7618729096989965, "grad_norm": 2.198881437572794, "learning_rate": 3.2863159450939207e-07, "loss": 0.2768, "step": 37161 }, { "epoch": 2.761947231512449, "grad_norm": 3.0736959609345123, "learning_rate": 3.2842761835507274e-07, "loss": 0.2683, "step": 37162 }, { "epoch": 2.7620215533259014, "grad_norm": 2.2618927779866165, "learning_rate": 3.282237044658354e-07, "loss": 0.2408, "step": 37163 }, { "epoch": 2.7620958751393534, "grad_norm": 1.8812096858756184, "learning_rate": 3.280198528429912e-07, "loss": 0.2726, "step": 37164 }, { "epoch": 2.7621701969528054, "grad_norm": 2.5676557517792413, "learning_rate": 3.2781606348785354e-07, "loss": 0.27, "step": 37165 }, { "epoch": 2.762244518766258, "grad_norm": 2.290555203014091, "learning_rate": 3.276123364017336e-07, "loss": 0.1957, "step": 37166 }, { "epoch": 2.7623188405797103, "grad_norm": 2.592074078757463, "learning_rate": 3.274086715859437e-07, "loss": 0.2979, "step": 37167 }, { "epoch": 2.7623931623931623, "grad_norm": 2.7269777607256662, "learning_rate": 3.272050690417949e-07, "loss": 0.2964, "step": 37168 }, { "epoch": 2.7624674842066144, "grad_norm": 2.289185154442771, "learning_rate": 3.270015287705974e-07, "loss": 0.2661, "step": 37169 }, { "epoch": 2.762541806020067, "grad_norm": 1.989110949423726, "learning_rate": 3.2679805077366344e-07, "loss": 0.2513, "step": 37170 }, { "epoch": 2.7626161278335193, "grad_norm": 2.4177062871291723, "learning_rate": 3.2659463505230195e-07, "loss": 0.2118, "step": 37171 }, { "epoch": 2.7626904496469713, "grad_norm": 2.2030490170560975, "learning_rate": 3.263912816078207e-07, "loss": 0.274, "step": 37172 }, { "epoch": 2.7627647714604238, "grad_norm": 2.036382942486383, "learning_rate": 3.2618799044153106e-07, "loss": 0.2379, "step": 37173 }, { "epoch": 2.762839093273876, "grad_norm": 1.8945336170041562, "learning_rate": 3.259847615547418e-07, "loss": 0.2125, "step": 37174 }, { "epoch": 2.7629134150873282, "grad_norm": 2.7397532407523504, "learning_rate": 3.257815949487608e-07, "loss": 0.2586, "step": 37175 }, { "epoch": 2.7629877369007803, "grad_norm": 2.3586256021669985, "learning_rate": 3.2557849062489486e-07, "loss": 0.2818, "step": 37176 }, { "epoch": 2.7630620587142327, "grad_norm": 2.5089158419131574, "learning_rate": 3.2537544858445403e-07, "loss": 0.2807, "step": 37177 }, { "epoch": 2.7631363805276847, "grad_norm": 2.8244366309228646, "learning_rate": 3.2517246882874386e-07, "loss": 0.2883, "step": 37178 }, { "epoch": 2.763210702341137, "grad_norm": 2.4305311530594778, "learning_rate": 3.2496955135907007e-07, "loss": 0.2895, "step": 37179 }, { "epoch": 2.763285024154589, "grad_norm": 2.1791614648964144, "learning_rate": 3.247666961767404e-07, "loss": 0.1893, "step": 37180 }, { "epoch": 2.7633593459680417, "grad_norm": 2.4283273764673066, "learning_rate": 3.2456390328306164e-07, "loss": 0.2434, "step": 37181 }, { "epoch": 2.763433667781494, "grad_norm": 2.801202667231991, "learning_rate": 3.243611726793383e-07, "loss": 0.2838, "step": 37182 }, { "epoch": 2.763507989594946, "grad_norm": 2.433923126522452, "learning_rate": 3.241585043668749e-07, "loss": 0.275, "step": 37183 }, { "epoch": 2.763582311408398, "grad_norm": 3.8120022187669482, "learning_rate": 3.2395589834697703e-07, "loss": 0.4851, "step": 37184 }, { "epoch": 2.7636566332218506, "grad_norm": 2.8082017255367164, "learning_rate": 3.237533546209504e-07, "loss": 0.269, "step": 37185 }, { "epoch": 2.763730955035303, "grad_norm": 1.996371246917097, "learning_rate": 3.2355087319009717e-07, "loss": 0.2251, "step": 37186 }, { "epoch": 2.763805276848755, "grad_norm": 2.3759525488547077, "learning_rate": 3.233484540557219e-07, "loss": 0.2524, "step": 37187 }, { "epoch": 2.763879598662207, "grad_norm": 3.5425460165852565, "learning_rate": 3.2314609721912584e-07, "loss": 0.4483, "step": 37188 }, { "epoch": 2.7639539204756596, "grad_norm": 2.261363744338031, "learning_rate": 3.2294380268161343e-07, "loss": 0.2589, "step": 37189 }, { "epoch": 2.764028242289112, "grad_norm": 2.267409520828988, "learning_rate": 3.2274157044448697e-07, "loss": 0.2954, "step": 37190 }, { "epoch": 2.764102564102564, "grad_norm": 2.213417644043438, "learning_rate": 3.2253940050904766e-07, "loss": 0.2287, "step": 37191 }, { "epoch": 2.7641768859160165, "grad_norm": 1.987861198077022, "learning_rate": 3.2233729287659777e-07, "loss": 0.2388, "step": 37192 }, { "epoch": 2.7642512077294685, "grad_norm": 1.6886381961991133, "learning_rate": 3.221352475484385e-07, "loss": 0.192, "step": 37193 }, { "epoch": 2.764325529542921, "grad_norm": 2.133640226295837, "learning_rate": 3.2193326452586993e-07, "loss": 0.2272, "step": 37194 }, { "epoch": 2.764399851356373, "grad_norm": 2.5573173878325046, "learning_rate": 3.217313438101932e-07, "loss": 0.3171, "step": 37195 }, { "epoch": 2.7644741731698255, "grad_norm": 2.1794310937054497, "learning_rate": 3.2152948540270734e-07, "loss": 0.2368, "step": 37196 }, { "epoch": 2.7645484949832775, "grad_norm": 2.3766415198764665, "learning_rate": 3.213276893047146e-07, "loss": 0.2823, "step": 37197 }, { "epoch": 2.76462281679673, "grad_norm": 2.12698253566091, "learning_rate": 3.2112595551750945e-07, "loss": 0.2645, "step": 37198 }, { "epoch": 2.764697138610182, "grad_norm": 2.1559539066813223, "learning_rate": 3.209242840423943e-07, "loss": 0.2376, "step": 37199 }, { "epoch": 2.7647714604236344, "grad_norm": 2.1316679433009367, "learning_rate": 3.2072267488066687e-07, "loss": 0.2747, "step": 37200 }, { "epoch": 2.7648457822370864, "grad_norm": 2.5807291313992593, "learning_rate": 3.20521128033624e-07, "loss": 0.2781, "step": 37201 }, { "epoch": 2.764920104050539, "grad_norm": 2.2689167918925763, "learning_rate": 3.203196435025635e-07, "loss": 0.2436, "step": 37202 }, { "epoch": 2.764994425863991, "grad_norm": 2.4862950138061835, "learning_rate": 3.2011822128878433e-07, "loss": 0.3079, "step": 37203 }, { "epoch": 2.7650687476774434, "grad_norm": 2.003193114479883, "learning_rate": 3.199168613935799e-07, "loss": 0.222, "step": 37204 }, { "epoch": 2.765143069490896, "grad_norm": 2.1649423809194444, "learning_rate": 3.197155638182492e-07, "loss": 0.254, "step": 37205 }, { "epoch": 2.765217391304348, "grad_norm": 2.172423715302654, "learning_rate": 3.195143285640867e-07, "loss": 0.3053, "step": 37206 }, { "epoch": 2.7652917131178, "grad_norm": 2.5078968646546125, "learning_rate": 3.193131556323903e-07, "loss": 0.3289, "step": 37207 }, { "epoch": 2.7653660349312523, "grad_norm": 1.9767522327946274, "learning_rate": 3.191120450244534e-07, "loss": 0.2467, "step": 37208 }, { "epoch": 2.765440356744705, "grad_norm": 2.5016087392472026, "learning_rate": 3.1891099674157046e-07, "loss": 0.3482, "step": 37209 }, { "epoch": 2.765514678558157, "grad_norm": 2.8738412114002654, "learning_rate": 3.187100107850361e-07, "loss": 0.3468, "step": 37210 }, { "epoch": 2.765589000371609, "grad_norm": 2.30554720830651, "learning_rate": 3.1850908715614357e-07, "loss": 0.2643, "step": 37211 }, { "epoch": 2.7656633221850613, "grad_norm": 2.7826737444409537, "learning_rate": 3.183082258561876e-07, "loss": 0.2552, "step": 37212 }, { "epoch": 2.7657376439985137, "grad_norm": 2.5901827470612533, "learning_rate": 3.1810742688646033e-07, "loss": 0.295, "step": 37213 }, { "epoch": 2.7658119658119658, "grad_norm": 1.962582052379889, "learning_rate": 3.179066902482564e-07, "loss": 0.2243, "step": 37214 }, { "epoch": 2.765886287625418, "grad_norm": 1.9767885361363107, "learning_rate": 3.177060159428669e-07, "loss": 0.2116, "step": 37215 }, { "epoch": 2.7659606094388702, "grad_norm": 2.105306666091851, "learning_rate": 3.1750540397158193e-07, "loss": 0.2497, "step": 37216 }, { "epoch": 2.7660349312523227, "grad_norm": 2.506739985484049, "learning_rate": 3.1730485433569715e-07, "loss": 0.293, "step": 37217 }, { "epoch": 2.7661092530657747, "grad_norm": 3.067178466784655, "learning_rate": 3.171043670364993e-07, "loss": 0.3303, "step": 37218 }, { "epoch": 2.766183574879227, "grad_norm": 2.8832174243585107, "learning_rate": 3.1690394207528285e-07, "loss": 0.2945, "step": 37219 }, { "epoch": 2.766257896692679, "grad_norm": 1.5366089348613534, "learning_rate": 3.167035794533346e-07, "loss": 0.1603, "step": 37220 }, { "epoch": 2.7663322185061316, "grad_norm": 2.5938838941089677, "learning_rate": 3.16503279171948e-07, "loss": 0.2901, "step": 37221 }, { "epoch": 2.7664065403195837, "grad_norm": 2.6983416084668863, "learning_rate": 3.1630304123240976e-07, "loss": 0.2397, "step": 37222 }, { "epoch": 2.766480862133036, "grad_norm": 2.672028747760372, "learning_rate": 3.161028656360099e-07, "loss": 0.3327, "step": 37223 }, { "epoch": 2.766555183946488, "grad_norm": 2.6066263816988577, "learning_rate": 3.1590275238403854e-07, "loss": 0.3087, "step": 37224 }, { "epoch": 2.7666295057599406, "grad_norm": 2.8680472138937767, "learning_rate": 3.157027014777825e-07, "loss": 0.2719, "step": 37225 }, { "epoch": 2.7667038275733926, "grad_norm": 2.493687340210162, "learning_rate": 3.1550271291852843e-07, "loss": 0.261, "step": 37226 }, { "epoch": 2.766778149386845, "grad_norm": 2.324359975187391, "learning_rate": 3.153027867075664e-07, "loss": 0.3436, "step": 37227 }, { "epoch": 2.7668524712002975, "grad_norm": 2.2407903215448304, "learning_rate": 3.151029228461822e-07, "loss": 0.2457, "step": 37228 }, { "epoch": 2.7669267930137496, "grad_norm": 2.118831369492159, "learning_rate": 3.1490312133566347e-07, "loss": 0.2863, "step": 37229 }, { "epoch": 2.7670011148272016, "grad_norm": 2.2136928526575126, "learning_rate": 3.1470338217729603e-07, "loss": 0.2559, "step": 37230 }, { "epoch": 2.767075436640654, "grad_norm": 2.8000795155046725, "learning_rate": 3.1450370537236543e-07, "loss": 0.3461, "step": 37231 }, { "epoch": 2.7671497584541065, "grad_norm": 2.581065822290477, "learning_rate": 3.1430409092215843e-07, "loss": 0.2952, "step": 37232 }, { "epoch": 2.7672240802675585, "grad_norm": 2.204660460113306, "learning_rate": 3.141045388279573e-07, "loss": 0.2375, "step": 37233 }, { "epoch": 2.7672984020810105, "grad_norm": 3.0236930889020726, "learning_rate": 3.139050490910489e-07, "loss": 0.2584, "step": 37234 }, { "epoch": 2.767372723894463, "grad_norm": 3.0104062299324448, "learning_rate": 3.137056217127177e-07, "loss": 0.2852, "step": 37235 }, { "epoch": 2.7674470457079154, "grad_norm": 2.4300480876801847, "learning_rate": 3.135062566942471e-07, "loss": 0.2633, "step": 37236 }, { "epoch": 2.7675213675213675, "grad_norm": 2.8535962576123715, "learning_rate": 3.133069540369205e-07, "loss": 0.3631, "step": 37237 }, { "epoch": 2.76759568933482, "grad_norm": 1.8269694357222128, "learning_rate": 3.131077137420213e-07, "loss": 0.193, "step": 37238 }, { "epoch": 2.767670011148272, "grad_norm": 2.65988485342411, "learning_rate": 3.129085358108319e-07, "loss": 0.339, "step": 37239 }, { "epoch": 2.7677443329617244, "grad_norm": 8.195940756412675, "learning_rate": 3.1270942024463345e-07, "loss": 0.2593, "step": 37240 }, { "epoch": 2.7678186547751764, "grad_norm": 2.088348593133464, "learning_rate": 3.125103670447116e-07, "loss": 0.2534, "step": 37241 }, { "epoch": 2.767892976588629, "grad_norm": 2.3568007347394886, "learning_rate": 3.123113762123442e-07, "loss": 0.1928, "step": 37242 }, { "epoch": 2.767967298402081, "grad_norm": 2.0881514335643248, "learning_rate": 3.1211244774881354e-07, "loss": 0.2589, "step": 37243 }, { "epoch": 2.7680416202155333, "grad_norm": 2.0826030927561163, "learning_rate": 3.1191358165540086e-07, "loss": 0.2012, "step": 37244 }, { "epoch": 2.7681159420289854, "grad_norm": 1.6800016942624032, "learning_rate": 3.11714777933384e-07, "loss": 0.1289, "step": 37245 }, { "epoch": 2.768190263842438, "grad_norm": 2.4077024471965003, "learning_rate": 3.1151603658404636e-07, "loss": 0.254, "step": 37246 }, { "epoch": 2.76826458565589, "grad_norm": 2.6225028476513925, "learning_rate": 3.1131735760866586e-07, "loss": 0.2268, "step": 37247 }, { "epoch": 2.7683389074693423, "grad_norm": 2.3068705256501025, "learning_rate": 3.1111874100852034e-07, "loss": 0.2603, "step": 37248 }, { "epoch": 2.7684132292827943, "grad_norm": 2.4984286836373766, "learning_rate": 3.10920186784891e-07, "loss": 0.2751, "step": 37249 }, { "epoch": 2.768487551096247, "grad_norm": 3.0313314042397286, "learning_rate": 3.1072169493905346e-07, "loss": 0.3085, "step": 37250 }, { "epoch": 2.7685618729096992, "grad_norm": 2.50088394620695, "learning_rate": 3.105232654722878e-07, "loss": 0.3218, "step": 37251 }, { "epoch": 2.7686361947231513, "grad_norm": 2.5424214432108876, "learning_rate": 3.1032489838586975e-07, "loss": 0.2664, "step": 37252 }, { "epoch": 2.7687105165366033, "grad_norm": 2.487871836857989, "learning_rate": 3.1012659368107824e-07, "loss": 0.2538, "step": 37253 }, { "epoch": 2.7687848383500557, "grad_norm": 2.5319458909793418, "learning_rate": 3.0992835135918884e-07, "loss": 0.2967, "step": 37254 }, { "epoch": 2.768859160163508, "grad_norm": 2.029300899752171, "learning_rate": 3.0973017142147734e-07, "loss": 0.284, "step": 37255 }, { "epoch": 2.76893348197696, "grad_norm": 2.2535318314761055, "learning_rate": 3.0953205386922037e-07, "loss": 0.2272, "step": 37256 }, { "epoch": 2.7690078037904122, "grad_norm": 1.6915064552746677, "learning_rate": 3.093339987036925e-07, "loss": 0.1736, "step": 37257 }, { "epoch": 2.7690821256038647, "grad_norm": 2.5593073143231657, "learning_rate": 3.0913600592617165e-07, "loss": 0.2563, "step": 37258 }, { "epoch": 2.769156447417317, "grad_norm": 2.3438006099431057, "learning_rate": 3.08938075537929e-07, "loss": 0.349, "step": 37259 }, { "epoch": 2.769230769230769, "grad_norm": 2.4384182554582625, "learning_rate": 3.0874020754023905e-07, "loss": 0.2516, "step": 37260 }, { "epoch": 2.7693050910442216, "grad_norm": 2.7371164740118394, "learning_rate": 3.0854240193437857e-07, "loss": 0.3034, "step": 37261 }, { "epoch": 2.7693794128576736, "grad_norm": 2.011709193773537, "learning_rate": 3.083446587216188e-07, "loss": 0.246, "step": 37262 }, { "epoch": 2.769453734671126, "grad_norm": 2.367186793144524, "learning_rate": 3.081469779032331e-07, "loss": 0.2782, "step": 37263 }, { "epoch": 2.769528056484578, "grad_norm": 2.6287269611034922, "learning_rate": 3.0794935948049384e-07, "loss": 0.2939, "step": 37264 }, { "epoch": 2.7696023782980306, "grad_norm": 2.453618020673904, "learning_rate": 3.077518034546745e-07, "loss": 0.2662, "step": 37265 }, { "epoch": 2.7696767001114826, "grad_norm": 2.17572519550822, "learning_rate": 3.0755430982704613e-07, "loss": 0.2501, "step": 37266 }, { "epoch": 2.769751021924935, "grad_norm": 2.3938682177884463, "learning_rate": 3.0735687859888006e-07, "loss": 0.3173, "step": 37267 }, { "epoch": 2.769825343738387, "grad_norm": 2.0279002419337937, "learning_rate": 3.071595097714475e-07, "loss": 0.2071, "step": 37268 }, { "epoch": 2.7698996655518395, "grad_norm": 2.508272488055646, "learning_rate": 3.0696220334601956e-07, "loss": 0.2541, "step": 37269 }, { "epoch": 2.769973987365292, "grad_norm": 1.7524489868010815, "learning_rate": 3.067649593238642e-07, "loss": 0.2122, "step": 37270 }, { "epoch": 2.770048309178744, "grad_norm": 2.0301647731774604, "learning_rate": 3.065677777062548e-07, "loss": 0.2669, "step": 37271 }, { "epoch": 2.770122630992196, "grad_norm": 2.242553800464685, "learning_rate": 3.0637065849445814e-07, "loss": 0.2086, "step": 37272 }, { "epoch": 2.7701969528056485, "grad_norm": 2.988716776564304, "learning_rate": 3.061736016897443e-07, "loss": 0.338, "step": 37273 }, { "epoch": 2.770271274619101, "grad_norm": 2.0715227454122997, "learning_rate": 3.059766072933823e-07, "loss": 0.2758, "step": 37274 }, { "epoch": 2.770345596432553, "grad_norm": 2.2387480972978167, "learning_rate": 3.0577967530663997e-07, "loss": 0.2811, "step": 37275 }, { "epoch": 2.770419918246005, "grad_norm": 2.4474841670508236, "learning_rate": 3.055828057307864e-07, "loss": 0.2728, "step": 37276 }, { "epoch": 2.7704942400594574, "grad_norm": 2.5824653091495717, "learning_rate": 3.0538599856708485e-07, "loss": 0.2615, "step": 37277 }, { "epoch": 2.77056856187291, "grad_norm": 2.2210504404699076, "learning_rate": 3.051892538168066e-07, "loss": 0.2926, "step": 37278 }, { "epoch": 2.770642883686362, "grad_norm": 2.1856725852434344, "learning_rate": 3.049925714812163e-07, "loss": 0.2591, "step": 37279 }, { "epoch": 2.770717205499814, "grad_norm": 3.2183798400388297, "learning_rate": 3.047959515615817e-07, "loss": 0.3002, "step": 37280 }, { "epoch": 2.7707915273132664, "grad_norm": 2.0629397237707083, "learning_rate": 3.045993940591685e-07, "loss": 0.2034, "step": 37281 }, { "epoch": 2.770865849126719, "grad_norm": 1.7212136150715647, "learning_rate": 3.04402898975239e-07, "loss": 0.248, "step": 37282 }, { "epoch": 2.770940170940171, "grad_norm": 2.356773816424105, "learning_rate": 3.042064663110622e-07, "loss": 0.2762, "step": 37283 }, { "epoch": 2.7710144927536233, "grad_norm": 2.224588398740366, "learning_rate": 3.0401009606790156e-07, "loss": 0.2545, "step": 37284 }, { "epoch": 2.7710888145670753, "grad_norm": 2.46024382465269, "learning_rate": 3.0381378824701825e-07, "loss": 0.2959, "step": 37285 }, { "epoch": 2.771163136380528, "grad_norm": 3.229898972774377, "learning_rate": 3.0361754284968124e-07, "loss": 0.2924, "step": 37286 }, { "epoch": 2.77123745819398, "grad_norm": 2.4389463920465575, "learning_rate": 3.034213598771507e-07, "loss": 0.2473, "step": 37287 }, { "epoch": 2.7713117800074323, "grad_norm": 2.2705183685768597, "learning_rate": 3.032252393306911e-07, "loss": 0.2749, "step": 37288 }, { "epoch": 2.7713861018208843, "grad_norm": 2.5700117557547917, "learning_rate": 3.0302918121156154e-07, "loss": 0.2273, "step": 37289 }, { "epoch": 2.7714604236343368, "grad_norm": 2.1180606938228284, "learning_rate": 3.028331855210298e-07, "loss": 0.2828, "step": 37290 }, { "epoch": 2.7715347454477888, "grad_norm": 2.12819080688613, "learning_rate": 3.026372522603538e-07, "loss": 0.2131, "step": 37291 }, { "epoch": 2.7716090672612412, "grad_norm": 2.1828810949131086, "learning_rate": 3.024413814307958e-07, "loss": 0.2895, "step": 37292 }, { "epoch": 2.7716833890746937, "grad_norm": 2.3097586194525053, "learning_rate": 3.0224557303361713e-07, "loss": 0.3244, "step": 37293 }, { "epoch": 2.7717577108881457, "grad_norm": 2.996457518268472, "learning_rate": 3.020498270700778e-07, "loss": 0.3718, "step": 37294 }, { "epoch": 2.7718320327015977, "grad_norm": 1.9537298404513874, "learning_rate": 3.0185414354143904e-07, "loss": 0.206, "step": 37295 }, { "epoch": 2.77190635451505, "grad_norm": 1.8649300518036227, "learning_rate": 3.01658522448961e-07, "loss": 0.2114, "step": 37296 }, { "epoch": 2.7719806763285026, "grad_norm": 2.111685469400609, "learning_rate": 3.0146296379390037e-07, "loss": 0.1956, "step": 37297 }, { "epoch": 2.7720549981419547, "grad_norm": 3.687028249623642, "learning_rate": 3.012674675775207e-07, "loss": 0.2629, "step": 37298 }, { "epoch": 2.7721293199554067, "grad_norm": 2.098421693061964, "learning_rate": 3.010720338010753e-07, "loss": 0.1941, "step": 37299 }, { "epoch": 2.772203641768859, "grad_norm": 2.4775200848673866, "learning_rate": 3.008766624658255e-07, "loss": 0.256, "step": 37300 }, { "epoch": 2.7722779635823116, "grad_norm": 2.1652045973885232, "learning_rate": 3.0068135357302795e-07, "loss": 0.2086, "step": 37301 }, { "epoch": 2.7723522853957636, "grad_norm": 2.346004128021227, "learning_rate": 3.0048610712394175e-07, "loss": 0.2546, "step": 37302 }, { "epoch": 2.7724266072092156, "grad_norm": 2.548522887084375, "learning_rate": 3.0029092311982256e-07, "loss": 0.2237, "step": 37303 }, { "epoch": 2.772500929022668, "grad_norm": 2.4330952126140284, "learning_rate": 3.0009580156192595e-07, "loss": 0.3229, "step": 37304 }, { "epoch": 2.7725752508361206, "grad_norm": 2.249288093554669, "learning_rate": 2.9990074245151104e-07, "loss": 0.2604, "step": 37305 }, { "epoch": 2.7726495726495726, "grad_norm": 2.5516787742709317, "learning_rate": 2.997057457898311e-07, "loss": 0.3251, "step": 37306 }, { "epoch": 2.772723894463025, "grad_norm": 2.909912330795376, "learning_rate": 2.995108115781409e-07, "loss": 0.3277, "step": 37307 }, { "epoch": 2.772798216276477, "grad_norm": 2.455750264299018, "learning_rate": 2.9931593981769813e-07, "loss": 0.2453, "step": 37308 }, { "epoch": 2.7728725380899295, "grad_norm": 2.2596388122116635, "learning_rate": 2.9912113050975524e-07, "loss": 0.3428, "step": 37309 }, { "epoch": 2.7729468599033815, "grad_norm": 2.908722452734914, "learning_rate": 2.9892638365556893e-07, "loss": 0.3526, "step": 37310 }, { "epoch": 2.773021181716834, "grad_norm": 2.9781689953007104, "learning_rate": 2.9873169925638935e-07, "loss": 0.3374, "step": 37311 }, { "epoch": 2.773095503530286, "grad_norm": 2.7598323748790485, "learning_rate": 2.9853707731347325e-07, "loss": 0.2917, "step": 37312 }, { "epoch": 2.7731698253437385, "grad_norm": 2.0866149147860353, "learning_rate": 2.983425178280719e-07, "loss": 0.2482, "step": 37313 }, { "epoch": 2.7732441471571905, "grad_norm": 2.1884080312295966, "learning_rate": 2.9814802080143645e-07, "loss": 0.2602, "step": 37314 }, { "epoch": 2.773318468970643, "grad_norm": 2.656517257493008, "learning_rate": 2.979535862348215e-07, "loss": 0.3361, "step": 37315 }, { "epoch": 2.7733927907840954, "grad_norm": 2.14075309505218, "learning_rate": 2.977592141294772e-07, "loss": 0.2614, "step": 37316 }, { "epoch": 2.7734671125975474, "grad_norm": 3.3414203803485503, "learning_rate": 2.975649044866569e-07, "loss": 0.3543, "step": 37317 }, { "epoch": 2.7735414344109994, "grad_norm": 3.4065105256421178, "learning_rate": 2.9737065730761073e-07, "loss": 0.4073, "step": 37318 }, { "epoch": 2.773615756224452, "grad_norm": 2.274415454469546, "learning_rate": 2.9717647259358664e-07, "loss": 0.2215, "step": 37319 }, { "epoch": 2.7736900780379043, "grad_norm": 3.169816289554813, "learning_rate": 2.969823503458391e-07, "loss": 0.4169, "step": 37320 }, { "epoch": 2.7737643998513564, "grad_norm": 1.784570094085441, "learning_rate": 2.96788290565615e-07, "loss": 0.215, "step": 37321 }, { "epoch": 2.7738387216648084, "grad_norm": 3.4172503231672335, "learning_rate": 2.9659429325416546e-07, "loss": 0.4046, "step": 37322 }, { "epoch": 2.773913043478261, "grad_norm": 2.80690650175098, "learning_rate": 2.964003584127362e-07, "loss": 0.3058, "step": 37323 }, { "epoch": 2.7739873652917133, "grad_norm": 2.337792735172359, "learning_rate": 2.962064860425795e-07, "loss": 0.3041, "step": 37324 }, { "epoch": 2.7740616871051653, "grad_norm": 2.08069387662169, "learning_rate": 2.9601267614494225e-07, "loss": 0.2182, "step": 37325 }, { "epoch": 2.774136008918618, "grad_norm": 2.359653342824476, "learning_rate": 2.9581892872107e-07, "loss": 0.231, "step": 37326 }, { "epoch": 2.77421033073207, "grad_norm": 2.4818427011887576, "learning_rate": 2.95625243772214e-07, "loss": 0.2754, "step": 37327 }, { "epoch": 2.7742846525455223, "grad_norm": 2.87573195406642, "learning_rate": 2.954316212996178e-07, "loss": 0.3093, "step": 37328 }, { "epoch": 2.7743589743589743, "grad_norm": 1.9126198489169277, "learning_rate": 2.9523806130452915e-07, "loss": 0.1952, "step": 37329 }, { "epoch": 2.7744332961724267, "grad_norm": 2.3988485069988092, "learning_rate": 2.9504456378819604e-07, "loss": 0.3104, "step": 37330 }, { "epoch": 2.7745076179858787, "grad_norm": 2.4018325460285985, "learning_rate": 2.948511287518596e-07, "loss": 0.2657, "step": 37331 }, { "epoch": 2.774581939799331, "grad_norm": 1.755642727918535, "learning_rate": 2.946577561967723e-07, "loss": 0.1815, "step": 37332 }, { "epoch": 2.7746562616127832, "grad_norm": 1.9193937068624123, "learning_rate": 2.944644461241708e-07, "loss": 0.2303, "step": 37333 }, { "epoch": 2.7747305834262357, "grad_norm": 2.194959168041224, "learning_rate": 2.942711985353053e-07, "loss": 0.2293, "step": 37334 }, { "epoch": 2.7748049052396877, "grad_norm": 2.875860260590131, "learning_rate": 2.940780134314192e-07, "loss": 0.3521, "step": 37335 }, { "epoch": 2.77487922705314, "grad_norm": 2.5642056550804164, "learning_rate": 2.938848908137526e-07, "loss": 0.4046, "step": 37336 }, { "epoch": 2.774953548866592, "grad_norm": 2.5885371942288526, "learning_rate": 2.9369183068355347e-07, "loss": 0.2647, "step": 37337 }, { "epoch": 2.7750278706800446, "grad_norm": 2.1822874657067475, "learning_rate": 2.9349883304206183e-07, "loss": 0.2482, "step": 37338 }, { "epoch": 2.775102192493497, "grad_norm": 2.302009407733441, "learning_rate": 2.933058978905212e-07, "loss": 0.2375, "step": 37339 }, { "epoch": 2.775176514306949, "grad_norm": 2.2773173676047955, "learning_rate": 2.931130252301739e-07, "loss": 0.2769, "step": 37340 }, { "epoch": 2.775250836120401, "grad_norm": 3.2069661188963314, "learning_rate": 2.929202150622612e-07, "loss": 0.312, "step": 37341 }, { "epoch": 2.7753251579338536, "grad_norm": 2.597324185086662, "learning_rate": 2.927274673880243e-07, "loss": 0.2743, "step": 37342 }, { "epoch": 2.775399479747306, "grad_norm": 1.9996786117389564, "learning_rate": 2.925347822087032e-07, "loss": 0.237, "step": 37343 }, { "epoch": 2.775473801560758, "grad_norm": 2.4282643271267492, "learning_rate": 2.923421595255427e-07, "loss": 0.2296, "step": 37344 }, { "epoch": 2.77554812337421, "grad_norm": 2.8659148192608885, "learning_rate": 2.9214959933977716e-07, "loss": 0.2948, "step": 37345 }, { "epoch": 2.7756224451876625, "grad_norm": 2.244638470746355, "learning_rate": 2.91957101652649e-07, "loss": 0.2475, "step": 37346 }, { "epoch": 2.775696767001115, "grad_norm": 2.5347589661943752, "learning_rate": 2.917646664653984e-07, "loss": 0.2588, "step": 37347 }, { "epoch": 2.775771088814567, "grad_norm": 3.383665198543652, "learning_rate": 2.91572293779262e-07, "loss": 0.3287, "step": 37348 }, { "epoch": 2.7758454106280195, "grad_norm": 2.5028684409270414, "learning_rate": 2.9137998359548004e-07, "loss": 0.3533, "step": 37349 }, { "epoch": 2.7759197324414715, "grad_norm": 1.9825877104370782, "learning_rate": 2.9118773591529035e-07, "loss": 0.2644, "step": 37350 }, { "epoch": 2.775994054254924, "grad_norm": 1.9438874200792884, "learning_rate": 2.909955507399287e-07, "loss": 0.1798, "step": 37351 }, { "epoch": 2.776068376068376, "grad_norm": 2.331681499134569, "learning_rate": 2.908034280706351e-07, "loss": 0.2273, "step": 37352 }, { "epoch": 2.7761426978818284, "grad_norm": 2.018863426712298, "learning_rate": 2.906113679086442e-07, "loss": 0.2631, "step": 37353 }, { "epoch": 2.7762170196952805, "grad_norm": 2.3699279472022616, "learning_rate": 2.9041937025519495e-07, "loss": 0.2423, "step": 37354 }, { "epoch": 2.776291341508733, "grad_norm": 2.646314979848584, "learning_rate": 2.90227435111522e-07, "loss": 0.3092, "step": 37355 }, { "epoch": 2.776365663322185, "grad_norm": 2.1781227641277057, "learning_rate": 2.900355624788609e-07, "loss": 0.2733, "step": 37356 }, { "epoch": 2.7764399851356374, "grad_norm": 2.531397212073035, "learning_rate": 2.898437523584463e-07, "loss": 0.2917, "step": 37357 }, { "epoch": 2.7765143069490894, "grad_norm": 2.6590656968231725, "learning_rate": 2.896520047515139e-07, "loss": 0.2933, "step": 37358 }, { "epoch": 2.776588628762542, "grad_norm": 2.386531472894066, "learning_rate": 2.894603196592982e-07, "loss": 0.2669, "step": 37359 }, { "epoch": 2.776662950575994, "grad_norm": 2.949543047659177, "learning_rate": 2.892686970830327e-07, "loss": 0.2803, "step": 37360 }, { "epoch": 2.7767372723894463, "grad_norm": 2.3429733402756794, "learning_rate": 2.89077137023952e-07, "loss": 0.2449, "step": 37361 }, { "epoch": 2.776811594202899, "grad_norm": 1.8661912885336902, "learning_rate": 2.888856394832895e-07, "loss": 0.248, "step": 37362 }, { "epoch": 2.776885916016351, "grad_norm": 2.568152038140702, "learning_rate": 2.8869420446227535e-07, "loss": 0.2282, "step": 37363 }, { "epoch": 2.776960237829803, "grad_norm": 3.1463300207305562, "learning_rate": 2.885028319621452e-07, "loss": 0.3246, "step": 37364 }, { "epoch": 2.7770345596432553, "grad_norm": 2.486854692738222, "learning_rate": 2.883115219841304e-07, "loss": 0.408, "step": 37365 }, { "epoch": 2.7771088814567078, "grad_norm": 2.8613226267765515, "learning_rate": 2.881202745294631e-07, "loss": 0.3514, "step": 37366 }, { "epoch": 2.7771832032701598, "grad_norm": 2.157086601620242, "learning_rate": 2.8792908959937136e-07, "loss": 0.1992, "step": 37367 }, { "epoch": 2.777257525083612, "grad_norm": 2.1172833467460954, "learning_rate": 2.877379671950886e-07, "loss": 0.3027, "step": 37368 }, { "epoch": 2.7773318468970642, "grad_norm": 2.5322620863549887, "learning_rate": 2.8754690731784605e-07, "loss": 0.291, "step": 37369 }, { "epoch": 2.7774061687105167, "grad_norm": 5.561639537484318, "learning_rate": 2.8735590996887163e-07, "loss": 0.1171, "step": 37370 }, { "epoch": 2.7774804905239687, "grad_norm": 3.205564949499749, "learning_rate": 2.8716497514939547e-07, "loss": 0.3924, "step": 37371 }, { "epoch": 2.777554812337421, "grad_norm": 1.807219054046613, "learning_rate": 2.869741028606488e-07, "loss": 0.1938, "step": 37372 }, { "epoch": 2.777629134150873, "grad_norm": 2.3975209667190525, "learning_rate": 2.867832931038572e-07, "loss": 0.2403, "step": 37373 }, { "epoch": 2.7777034559643257, "grad_norm": 2.5900359331823744, "learning_rate": 2.865925458802521e-07, "loss": 0.361, "step": 37374 }, { "epoch": 2.7777777777777777, "grad_norm": 2.5692966447193553, "learning_rate": 2.864018611910591e-07, "loss": 0.2905, "step": 37375 }, { "epoch": 2.77785209959123, "grad_norm": 2.606446267040564, "learning_rate": 2.8621123903750716e-07, "loss": 0.1757, "step": 37376 }, { "epoch": 2.777926421404682, "grad_norm": 1.4009345136058067, "learning_rate": 2.860206794208242e-07, "loss": 0.1547, "step": 37377 }, { "epoch": 2.7780007432181346, "grad_norm": 2.3516416407723684, "learning_rate": 2.85830182342236e-07, "loss": 0.2984, "step": 37378 }, { "epoch": 2.7780750650315866, "grad_norm": 2.035350096926735, "learning_rate": 2.856397478029682e-07, "loss": 0.2336, "step": 37379 }, { "epoch": 2.778149386845039, "grad_norm": 2.7367241081980724, "learning_rate": 2.854493758042476e-07, "loss": 0.3807, "step": 37380 }, { "epoch": 2.778223708658491, "grad_norm": 2.1606533835861925, "learning_rate": 2.8525906634730093e-07, "loss": 0.2277, "step": 37381 }, { "epoch": 2.7782980304719436, "grad_norm": 2.897453113577938, "learning_rate": 2.8506881943335063e-07, "loss": 0.311, "step": 37382 }, { "epoch": 2.7783723522853956, "grad_norm": 2.1528138953176197, "learning_rate": 2.848786350636246e-07, "loss": 0.2753, "step": 37383 }, { "epoch": 2.778446674098848, "grad_norm": 2.534397910564695, "learning_rate": 2.846885132393451e-07, "loss": 0.2756, "step": 37384 }, { "epoch": 2.7785209959123005, "grad_norm": 2.082198413599884, "learning_rate": 2.844984539617368e-07, "loss": 0.2313, "step": 37385 }, { "epoch": 2.7785953177257525, "grad_norm": 2.6257747045688675, "learning_rate": 2.843084572320243e-07, "loss": 0.2835, "step": 37386 }, { "epoch": 2.7786696395392045, "grad_norm": 2.1088946349391935, "learning_rate": 2.8411852305142874e-07, "loss": 0.2339, "step": 37387 }, { "epoch": 2.778743961352657, "grad_norm": 2.1577153831943163, "learning_rate": 2.8392865142117475e-07, "loss": 0.2008, "step": 37388 }, { "epoch": 2.7788182831661095, "grad_norm": 2.449216037227345, "learning_rate": 2.837388423424847e-07, "loss": 0.311, "step": 37389 }, { "epoch": 2.7788926049795615, "grad_norm": 2.7266121647526345, "learning_rate": 2.8354909581657875e-07, "loss": 0.2961, "step": 37390 }, { "epoch": 2.7789669267930135, "grad_norm": 2.2238812047169, "learning_rate": 2.833594118446792e-07, "loss": 0.2682, "step": 37391 }, { "epoch": 2.779041248606466, "grad_norm": 2.688118933368833, "learning_rate": 2.831697904280073e-07, "loss": 0.269, "step": 37392 }, { "epoch": 2.7791155704199184, "grad_norm": 2.0771580991464305, "learning_rate": 2.829802315677854e-07, "loss": 0.2784, "step": 37393 }, { "epoch": 2.7791898922333704, "grad_norm": 2.097552425081528, "learning_rate": 2.827907352652326e-07, "loss": 0.2691, "step": 37394 }, { "epoch": 2.779264214046823, "grad_norm": 2.207243657401348, "learning_rate": 2.8260130152156785e-07, "loss": 0.2192, "step": 37395 }, { "epoch": 2.779338535860275, "grad_norm": 2.4267282610699814, "learning_rate": 2.824119303380124e-07, "loss": 0.2495, "step": 37396 }, { "epoch": 2.7794128576737274, "grad_norm": 3.0885701118804016, "learning_rate": 2.8222262171578416e-07, "loss": 0.2238, "step": 37397 }, { "epoch": 2.7794871794871794, "grad_norm": 2.5155621771143966, "learning_rate": 2.8203337565610336e-07, "loss": 0.2468, "step": 37398 }, { "epoch": 2.779561501300632, "grad_norm": 2.6492186069470463, "learning_rate": 2.8184419216018664e-07, "loss": 0.2424, "step": 37399 }, { "epoch": 2.779635823114084, "grad_norm": 2.3532195083454437, "learning_rate": 2.816550712292532e-07, "loss": 0.2633, "step": 37400 }, { "epoch": 2.7797101449275363, "grad_norm": 2.7428210896002194, "learning_rate": 2.814660128645208e-07, "loss": 0.2589, "step": 37401 }, { "epoch": 2.7797844667409883, "grad_norm": 4.51279347794771, "learning_rate": 2.812770170672041e-07, "loss": 0.2681, "step": 37402 }, { "epoch": 2.779858788554441, "grad_norm": 2.1189878155455064, "learning_rate": 2.8108808383852325e-07, "loss": 0.2696, "step": 37403 }, { "epoch": 2.779933110367893, "grad_norm": 2.0947869802326107, "learning_rate": 2.808992131796917e-07, "loss": 0.2597, "step": 37404 }, { "epoch": 2.7800074321813453, "grad_norm": 2.0071368258225872, "learning_rate": 2.807104050919285e-07, "loss": 0.2672, "step": 37405 }, { "epoch": 2.7800817539947973, "grad_norm": 2.0432517764937113, "learning_rate": 2.8052165957644706e-07, "loss": 0.2129, "step": 37406 }, { "epoch": 2.7801560758082497, "grad_norm": 2.7383343792168318, "learning_rate": 2.803329766344609e-07, "loss": 0.2803, "step": 37407 }, { "epoch": 2.780230397621702, "grad_norm": 1.9650339712757456, "learning_rate": 2.8014435626718907e-07, "loss": 0.2265, "step": 37408 }, { "epoch": 2.7803047194351542, "grad_norm": 2.609129083555484, "learning_rate": 2.7995579847584276e-07, "loss": 0.2638, "step": 37409 }, { "epoch": 2.7803790412486062, "grad_norm": 3.0314663129443162, "learning_rate": 2.797673032616355e-07, "loss": 0.2725, "step": 37410 }, { "epoch": 2.7804533630620587, "grad_norm": 2.3382960624215663, "learning_rate": 2.79578870625784e-07, "loss": 0.3176, "step": 37411 }, { "epoch": 2.780527684875511, "grad_norm": 2.5512769325700653, "learning_rate": 2.7939050056949856e-07, "loss": 0.2573, "step": 37412 }, { "epoch": 2.780602006688963, "grad_norm": 2.054765985349548, "learning_rate": 2.7920219309399257e-07, "loss": 0.2777, "step": 37413 }, { "epoch": 2.780676328502415, "grad_norm": 2.4278898749519633, "learning_rate": 2.790139482004772e-07, "loss": 0.2485, "step": 37414 }, { "epoch": 2.7807506503158677, "grad_norm": 2.676877534398879, "learning_rate": 2.788257658901672e-07, "loss": 0.3099, "step": 37415 }, { "epoch": 2.78082497212932, "grad_norm": 2.605646472078633, "learning_rate": 2.7863764616427256e-07, "loss": 0.2467, "step": 37416 }, { "epoch": 2.780899293942772, "grad_norm": 2.69979239886869, "learning_rate": 2.7844958902400354e-07, "loss": 0.2751, "step": 37417 }, { "epoch": 2.7809736157562246, "grad_norm": 2.5479010950953067, "learning_rate": 2.7826159447057245e-07, "loss": 0.3154, "step": 37418 }, { "epoch": 2.7810479375696766, "grad_norm": 2.226206227120994, "learning_rate": 2.780736625051883e-07, "loss": 0.2938, "step": 37419 }, { "epoch": 2.781122259383129, "grad_norm": 4.659652649217935, "learning_rate": 2.778857931290613e-07, "loss": 0.2671, "step": 37420 }, { "epoch": 2.781196581196581, "grad_norm": 3.3736290490973113, "learning_rate": 2.776979863434015e-07, "loss": 0.3263, "step": 37421 }, { "epoch": 2.7812709030100335, "grad_norm": 2.1274682907277302, "learning_rate": 2.775102421494169e-07, "loss": 0.2402, "step": 37422 }, { "epoch": 2.7813452248234856, "grad_norm": 2.9645828806918404, "learning_rate": 2.7732256054831986e-07, "loss": 0.2964, "step": 37423 }, { "epoch": 2.781419546636938, "grad_norm": 2.608716091405746, "learning_rate": 2.771349415413127e-07, "loss": 0.2928, "step": 37424 }, { "epoch": 2.78149386845039, "grad_norm": 2.7105354708821747, "learning_rate": 2.7694738512960673e-07, "loss": 0.3109, "step": 37425 }, { "epoch": 2.7815681902638425, "grad_norm": 1.866146106773, "learning_rate": 2.767598913144087e-07, "loss": 0.1554, "step": 37426 }, { "epoch": 2.781642512077295, "grad_norm": 2.57607087998012, "learning_rate": 2.7657246009692664e-07, "loss": 0.3212, "step": 37427 }, { "epoch": 2.781716833890747, "grad_norm": 2.6429141005674297, "learning_rate": 2.7638509147836724e-07, "loss": 0.2793, "step": 37428 }, { "epoch": 2.781791155704199, "grad_norm": 2.9240755302274932, "learning_rate": 2.7619778545993403e-07, "loss": 0.3396, "step": 37429 }, { "epoch": 2.7818654775176515, "grad_norm": 2.7299441947051917, "learning_rate": 2.7601054204283716e-07, "loss": 0.3136, "step": 37430 }, { "epoch": 2.781939799331104, "grad_norm": 1.9228777548950082, "learning_rate": 2.7582336122827904e-07, "loss": 0.2488, "step": 37431 }, { "epoch": 2.782014121144556, "grad_norm": 2.408012732756234, "learning_rate": 2.7563624301746527e-07, "loss": 0.3107, "step": 37432 }, { "epoch": 2.782088442958008, "grad_norm": 1.9765409568586905, "learning_rate": 2.7544918741160053e-07, "loss": 0.1985, "step": 37433 }, { "epoch": 2.7821627647714604, "grad_norm": 2.135192717269509, "learning_rate": 2.752621944118905e-07, "loss": 0.2412, "step": 37434 }, { "epoch": 2.782237086584913, "grad_norm": 1.8317861817367744, "learning_rate": 2.7507526401953753e-07, "loss": 0.1508, "step": 37435 }, { "epoch": 2.782311408398365, "grad_norm": 3.220188454832756, "learning_rate": 2.7488839623574404e-07, "loss": 0.3002, "step": 37436 }, { "epoch": 2.782385730211817, "grad_norm": 2.4745860033860674, "learning_rate": 2.7470159106171566e-07, "loss": 0.3001, "step": 37437 }, { "epoch": 2.7824600520252694, "grad_norm": 1.9979859904073036, "learning_rate": 2.7451484849865486e-07, "loss": 0.2704, "step": 37438 }, { "epoch": 2.782534373838722, "grad_norm": 2.2768764923430385, "learning_rate": 2.7432816854776056e-07, "loss": 0.2762, "step": 37439 }, { "epoch": 2.782608695652174, "grad_norm": 1.9556143144570555, "learning_rate": 2.7414155121023856e-07, "loss": 0.1852, "step": 37440 }, { "epoch": 2.7826830174656263, "grad_norm": 2.173609969562485, "learning_rate": 2.7395499648728673e-07, "loss": 0.2417, "step": 37441 }, { "epoch": 2.7827573392790783, "grad_norm": 2.8366588156626427, "learning_rate": 2.737685043801097e-07, "loss": 0.2873, "step": 37442 }, { "epoch": 2.7828316610925308, "grad_norm": 2.9194892273187514, "learning_rate": 2.735820748899065e-07, "loss": 0.2292, "step": 37443 }, { "epoch": 2.782905982905983, "grad_norm": 2.969309314884549, "learning_rate": 2.733957080178762e-07, "loss": 0.3155, "step": 37444 }, { "epoch": 2.7829803047194352, "grad_norm": 3.36419720670399, "learning_rate": 2.732094037652211e-07, "loss": 0.2226, "step": 37445 }, { "epoch": 2.7830546265328873, "grad_norm": 2.1392008804116105, "learning_rate": 2.7302316213313804e-07, "loss": 0.2591, "step": 37446 }, { "epoch": 2.7831289483463397, "grad_norm": 2.463579559979292, "learning_rate": 2.7283698312282726e-07, "loss": 0.2625, "step": 37447 }, { "epoch": 2.7832032701597917, "grad_norm": 2.8981738334799294, "learning_rate": 2.7265086673548656e-07, "loss": 0.285, "step": 37448 }, { "epoch": 2.783277591973244, "grad_norm": 2.0848064605115315, "learning_rate": 2.724648129723162e-07, "loss": 0.2473, "step": 37449 }, { "epoch": 2.7833519137866967, "grad_norm": 2.3700057694177774, "learning_rate": 2.722788218345129e-07, "loss": 0.2779, "step": 37450 }, { "epoch": 2.7834262356001487, "grad_norm": 2.8932387184634605, "learning_rate": 2.7209289332327246e-07, "loss": 0.3666, "step": 37451 }, { "epoch": 2.7835005574136007, "grad_norm": 2.9220542345862084, "learning_rate": 2.719070274397939e-07, "loss": 0.2748, "step": 37452 }, { "epoch": 2.783574879227053, "grad_norm": 2.3662153826233774, "learning_rate": 2.7172122418527294e-07, "loss": 0.2531, "step": 37453 }, { "epoch": 2.7836492010405056, "grad_norm": 2.244608408521439, "learning_rate": 2.715354835609052e-07, "loss": 0.2825, "step": 37454 }, { "epoch": 2.7837235228539576, "grad_norm": 2.105313136102041, "learning_rate": 2.713498055678887e-07, "loss": 0.2087, "step": 37455 }, { "epoch": 2.7837978446674096, "grad_norm": 2.0943985577722994, "learning_rate": 2.711641902074158e-07, "loss": 0.2624, "step": 37456 }, { "epoch": 2.783872166480862, "grad_norm": 1.8774138385651458, "learning_rate": 2.709786374806855e-07, "loss": 0.211, "step": 37457 }, { "epoch": 2.7839464882943146, "grad_norm": 2.4322823009513495, "learning_rate": 2.7079314738888806e-07, "loss": 0.2755, "step": 37458 }, { "epoch": 2.7840208101077666, "grad_norm": 2.173033277913916, "learning_rate": 2.7060771993322024e-07, "loss": 0.2149, "step": 37459 }, { "epoch": 2.7840951319212186, "grad_norm": 2.3862325154946684, "learning_rate": 2.704223551148755e-07, "loss": 0.2722, "step": 37460 }, { "epoch": 2.784169453734671, "grad_norm": 2.4921145097269126, "learning_rate": 2.702370529350451e-07, "loss": 0.2914, "step": 37461 }, { "epoch": 2.7842437755481235, "grad_norm": 2.2679801481037027, "learning_rate": 2.700518133949248e-07, "loss": 0.2453, "step": 37462 }, { "epoch": 2.7843180973615755, "grad_norm": 2.0396445348122416, "learning_rate": 2.6986663649570474e-07, "loss": 0.2329, "step": 37463 }, { "epoch": 2.784392419175028, "grad_norm": 2.2703393095525155, "learning_rate": 2.696815222385807e-07, "loss": 0.2537, "step": 37464 }, { "epoch": 2.78446674098848, "grad_norm": 2.134883697331266, "learning_rate": 2.694964706247416e-07, "loss": 0.2842, "step": 37465 }, { "epoch": 2.7845410628019325, "grad_norm": 2.2146696485728627, "learning_rate": 2.693114816553777e-07, "loss": 0.2705, "step": 37466 }, { "epoch": 2.7846153846153845, "grad_norm": 3.2012856245546293, "learning_rate": 2.691265553316824e-07, "loss": 0.3187, "step": 37467 }, { "epoch": 2.784689706428837, "grad_norm": 2.9374588000447144, "learning_rate": 2.6894169165484707e-07, "loss": 0.3194, "step": 37468 }, { "epoch": 2.784764028242289, "grad_norm": 2.3453469179625452, "learning_rate": 2.687568906260585e-07, "loss": 0.2603, "step": 37469 }, { "epoch": 2.7848383500557414, "grad_norm": 2.339408125274861, "learning_rate": 2.685721522465079e-07, "loss": 0.1964, "step": 37470 }, { "epoch": 2.7849126718691934, "grad_norm": 2.6603464195922295, "learning_rate": 2.683874765173855e-07, "loss": 0.2972, "step": 37471 }, { "epoch": 2.784986993682646, "grad_norm": 2.1184154554970807, "learning_rate": 2.6820286343988035e-07, "loss": 0.2328, "step": 37472 }, { "epoch": 2.7850613154960984, "grad_norm": 2.273406756470428, "learning_rate": 2.680183130151792e-07, "loss": 0.2668, "step": 37473 }, { "epoch": 2.7851356373095504, "grad_norm": 2.278810913918287, "learning_rate": 2.678338252444712e-07, "loss": 0.2654, "step": 37474 }, { "epoch": 2.7852099591230024, "grad_norm": 2.6471250617684494, "learning_rate": 2.6764940012894423e-07, "loss": 0.2896, "step": 37475 }, { "epoch": 2.785284280936455, "grad_norm": 2.406866083649458, "learning_rate": 2.6746503766978516e-07, "loss": 0.2526, "step": 37476 }, { "epoch": 2.7853586027499073, "grad_norm": 2.6831829834551217, "learning_rate": 2.6728073786818185e-07, "loss": 0.2748, "step": 37477 }, { "epoch": 2.7854329245633593, "grad_norm": 2.8128408706851546, "learning_rate": 2.67096500725319e-07, "loss": 0.3446, "step": 37478 }, { "epoch": 2.7855072463768114, "grad_norm": 2.1490997773106493, "learning_rate": 2.669123262423867e-07, "loss": 0.2705, "step": 37479 }, { "epoch": 2.785581568190264, "grad_norm": 3.4451142227349503, "learning_rate": 2.667282144205652e-07, "loss": 0.3804, "step": 37480 }, { "epoch": 2.7856558900037163, "grad_norm": 2.290379897952171, "learning_rate": 2.6654416526104346e-07, "loss": 0.2837, "step": 37481 }, { "epoch": 2.7857302118171683, "grad_norm": 2.1469539004693874, "learning_rate": 2.66360178765005e-07, "loss": 0.2077, "step": 37482 }, { "epoch": 2.7858045336306207, "grad_norm": 2.1450276576390337, "learning_rate": 2.661762549336344e-07, "loss": 0.2733, "step": 37483 }, { "epoch": 2.7858788554440728, "grad_norm": 2.4508830558820267, "learning_rate": 2.659923937681164e-07, "loss": 0.3256, "step": 37484 }, { "epoch": 2.7859531772575252, "grad_norm": 2.2559486161177995, "learning_rate": 2.658085952696332e-07, "loss": 0.266, "step": 37485 }, { "epoch": 2.7860274990709772, "grad_norm": 2.5411444350077383, "learning_rate": 2.6562485943937067e-07, "loss": 0.2622, "step": 37486 }, { "epoch": 2.7861018208844297, "grad_norm": 2.3631786860011497, "learning_rate": 2.6544118627851003e-07, "loss": 0.28, "step": 37487 }, { "epoch": 2.7861761426978817, "grad_norm": 2.2088673665866576, "learning_rate": 2.6525757578823253e-07, "loss": 0.2051, "step": 37488 }, { "epoch": 2.786250464511334, "grad_norm": 1.872695249256051, "learning_rate": 2.6507402796972284e-07, "loss": 0.1882, "step": 37489 }, { "epoch": 2.786324786324786, "grad_norm": 2.2394797149488963, "learning_rate": 2.6489054282416104e-07, "loss": 0.22, "step": 37490 }, { "epoch": 2.7863991081382387, "grad_norm": 2.7417310466618012, "learning_rate": 2.647071203527307e-07, "loss": 0.2673, "step": 37491 }, { "epoch": 2.7864734299516907, "grad_norm": 2.2556065683268907, "learning_rate": 2.6452376055660867e-07, "loss": 0.2444, "step": 37492 }, { "epoch": 2.786547751765143, "grad_norm": 1.7410367172031296, "learning_rate": 2.643404634369773e-07, "loss": 0.2228, "step": 37493 }, { "epoch": 2.786622073578595, "grad_norm": 2.168150607553246, "learning_rate": 2.6415722899501893e-07, "loss": 0.2202, "step": 37494 }, { "epoch": 2.7866963953920476, "grad_norm": 1.9588402128924272, "learning_rate": 2.6397405723190825e-07, "loss": 0.2237, "step": 37495 }, { "epoch": 2.7867707172055, "grad_norm": 2.2265894359896636, "learning_rate": 2.637909481488299e-07, "loss": 0.25, "step": 37496 }, { "epoch": 2.786845039018952, "grad_norm": 2.322807778691428, "learning_rate": 2.6360790174695947e-07, "loss": 0.1739, "step": 37497 }, { "epoch": 2.786919360832404, "grad_norm": 2.219412605586027, "learning_rate": 2.6342491802747506e-07, "loss": 0.3037, "step": 37498 }, { "epoch": 2.7869936826458566, "grad_norm": 2.09310179582371, "learning_rate": 2.6324199699155785e-07, "loss": 0.2127, "step": 37499 }, { "epoch": 2.787068004459309, "grad_norm": 2.186575830760565, "learning_rate": 2.6305913864038134e-07, "loss": 0.2008, "step": 37500 }, { "epoch": 2.787142326272761, "grad_norm": 2.305057032390989, "learning_rate": 2.628763429751269e-07, "loss": 0.2762, "step": 37501 }, { "epoch": 2.787216648086213, "grad_norm": 2.8765323961598037, "learning_rate": 2.6269360999696794e-07, "loss": 0.2635, "step": 37502 }, { "epoch": 2.7872909698996655, "grad_norm": 1.776671823906098, "learning_rate": 2.625109397070835e-07, "loss": 0.1803, "step": 37503 }, { "epoch": 2.787365291713118, "grad_norm": 2.8500606708942486, "learning_rate": 2.623283321066483e-07, "loss": 0.3254, "step": 37504 }, { "epoch": 2.78743961352657, "grad_norm": 2.829692821527405, "learning_rate": 2.621457871968369e-07, "loss": 0.2813, "step": 37505 }, { "epoch": 2.7875139353400225, "grad_norm": 2.8353147050579475, "learning_rate": 2.6196330497882616e-07, "loss": 0.3057, "step": 37506 }, { "epoch": 2.7875882571534745, "grad_norm": 2.2753447645240215, "learning_rate": 2.617808854537895e-07, "loss": 0.2814, "step": 37507 }, { "epoch": 2.787662578966927, "grad_norm": 2.6173877470693285, "learning_rate": 2.615985286229039e-07, "loss": 0.2639, "step": 37508 }, { "epoch": 2.787736900780379, "grad_norm": 2.2788622790818915, "learning_rate": 2.6141623448734053e-07, "loss": 0.2521, "step": 37509 }, { "epoch": 2.7878112225938314, "grad_norm": 2.33426878835263, "learning_rate": 2.612340030482741e-07, "loss": 0.2878, "step": 37510 }, { "epoch": 2.7878855444072834, "grad_norm": 2.9191189384377236, "learning_rate": 2.610518343068791e-07, "loss": 0.3243, "step": 37511 }, { "epoch": 2.787959866220736, "grad_norm": 2.1308984195591107, "learning_rate": 2.608697282643247e-07, "loss": 0.2626, "step": 37512 }, { "epoch": 2.788034188034188, "grad_norm": 2.636322829065619, "learning_rate": 2.6068768492178874e-07, "loss": 0.2674, "step": 37513 }, { "epoch": 2.7881085098476404, "grad_norm": 2.269072161514919, "learning_rate": 2.605057042804393e-07, "loss": 0.2098, "step": 37514 }, { "epoch": 2.7881828316610924, "grad_norm": 1.948492020978913, "learning_rate": 2.603237863414476e-07, "loss": 0.223, "step": 37515 }, { "epoch": 2.788257153474545, "grad_norm": 2.4164689162650443, "learning_rate": 2.6014193110598607e-07, "loss": 0.3075, "step": 37516 }, { "epoch": 2.788331475287997, "grad_norm": 2.6885209160696597, "learning_rate": 2.5996013857522593e-07, "loss": 0.3141, "step": 37517 }, { "epoch": 2.7884057971014493, "grad_norm": 2.1889173951683594, "learning_rate": 2.597784087503363e-07, "loss": 0.2297, "step": 37518 }, { "epoch": 2.7884801189149018, "grad_norm": 2.1335742516714653, "learning_rate": 2.595967416324896e-07, "loss": 0.2902, "step": 37519 }, { "epoch": 2.788554440728354, "grad_norm": 2.909623889679269, "learning_rate": 2.594151372228515e-07, "loss": 0.2229, "step": 37520 }, { "epoch": 2.788628762541806, "grad_norm": 3.188359645855013, "learning_rate": 2.5923359552259555e-07, "loss": 0.2518, "step": 37521 }, { "epoch": 2.7887030843552583, "grad_norm": 2.141318470941716, "learning_rate": 2.590521165328863e-07, "loss": 0.1997, "step": 37522 }, { "epoch": 2.7887774061687107, "grad_norm": 2.3956761159802755, "learning_rate": 2.588707002548951e-07, "loss": 0.2966, "step": 37523 }, { "epoch": 2.7888517279821627, "grad_norm": 1.833778852263699, "learning_rate": 2.5868934668978886e-07, "loss": 0.1911, "step": 37524 }, { "epoch": 2.7889260497956148, "grad_norm": 2.7583098228898075, "learning_rate": 2.585080558387354e-07, "loss": 0.2428, "step": 37525 }, { "epoch": 2.789000371609067, "grad_norm": 3.964439636362827, "learning_rate": 2.5832682770290165e-07, "loss": 0.2322, "step": 37526 }, { "epoch": 2.7890746934225197, "grad_norm": 2.435199548739853, "learning_rate": 2.581456622834533e-07, "loss": 0.2846, "step": 37527 }, { "epoch": 2.7891490152359717, "grad_norm": 4.82303353822035, "learning_rate": 2.579645595815594e-07, "loss": 0.3325, "step": 37528 }, { "epoch": 2.789223337049424, "grad_norm": 1.8317212255422783, "learning_rate": 2.5778351959838243e-07, "loss": 0.1791, "step": 37529 }, { "epoch": 2.789297658862876, "grad_norm": 2.0845702097541445, "learning_rate": 2.576025423350903e-07, "loss": 0.2145, "step": 37530 }, { "epoch": 2.7893719806763286, "grad_norm": 2.332337102403587, "learning_rate": 2.574216277928476e-07, "loss": 0.2532, "step": 37531 }, { "epoch": 2.7894463024897806, "grad_norm": 2.8455422498357215, "learning_rate": 2.5724077597281903e-07, "loss": 0.3715, "step": 37532 }, { "epoch": 2.789520624303233, "grad_norm": 2.548304512473344, "learning_rate": 2.570599868761692e-07, "loss": 0.3449, "step": 37533 }, { "epoch": 2.789594946116685, "grad_norm": 2.045918934406838, "learning_rate": 2.5687926050406155e-07, "loss": 0.2612, "step": 37534 }, { "epoch": 2.7896692679301376, "grad_norm": 1.85092040130637, "learning_rate": 2.566985968576585e-07, "loss": 0.2218, "step": 37535 }, { "epoch": 2.7897435897435896, "grad_norm": 4.097379776655828, "learning_rate": 2.565179959381259e-07, "loss": 0.3106, "step": 37536 }, { "epoch": 2.789817911557042, "grad_norm": 2.4038678285277317, "learning_rate": 2.5633745774662377e-07, "loss": 0.2551, "step": 37537 }, { "epoch": 2.789892233370494, "grad_norm": 3.9075275013072304, "learning_rate": 2.561569822843168e-07, "loss": 0.3056, "step": 37538 }, { "epoch": 2.7899665551839465, "grad_norm": 2.159365581282422, "learning_rate": 2.5597656955236415e-07, "loss": 0.2154, "step": 37539 }, { "epoch": 2.7900408769973986, "grad_norm": 2.4735629090082036, "learning_rate": 2.5579621955192926e-07, "loss": 0.2726, "step": 37540 }, { "epoch": 2.790115198810851, "grad_norm": 2.2695266592877705, "learning_rate": 2.556159322841734e-07, "loss": 0.2246, "step": 37541 }, { "epoch": 2.7901895206243035, "grad_norm": 2.109383054714678, "learning_rate": 2.554357077502545e-07, "loss": 0.2894, "step": 37542 }, { "epoch": 2.7902638424377555, "grad_norm": 2.723353809712375, "learning_rate": 2.552555459513373e-07, "loss": 0.2606, "step": 37543 }, { "epoch": 2.7903381642512075, "grad_norm": 2.04078381005471, "learning_rate": 2.5507544688857745e-07, "loss": 0.2407, "step": 37544 }, { "epoch": 2.79041248606466, "grad_norm": 2.013457174924022, "learning_rate": 2.5489541056313737e-07, "loss": 0.2331, "step": 37545 }, { "epoch": 2.7904868078781124, "grad_norm": 2.9610720979121066, "learning_rate": 2.547154369761751e-07, "loss": 0.3155, "step": 37546 }, { "epoch": 2.7905611296915644, "grad_norm": 2.3731496622945563, "learning_rate": 2.5453552612884956e-07, "loss": 0.3376, "step": 37547 }, { "epoch": 2.7906354515050165, "grad_norm": 1.8834156388317453, "learning_rate": 2.5435567802231773e-07, "loss": 0.1818, "step": 37548 }, { "epoch": 2.790709773318469, "grad_norm": 2.018293543357082, "learning_rate": 2.5417589265773865e-07, "loss": 0.222, "step": 37549 }, { "epoch": 2.7907840951319214, "grad_norm": 2.774622034005028, "learning_rate": 2.539961700362692e-07, "loss": 0.2789, "step": 37550 }, { "epoch": 2.7908584169453734, "grad_norm": 2.15118491229559, "learning_rate": 2.5381651015906614e-07, "loss": 0.2646, "step": 37551 }, { "epoch": 2.790932738758826, "grad_norm": 2.3505010277162, "learning_rate": 2.5363691302728866e-07, "loss": 0.1756, "step": 37552 }, { "epoch": 2.791007060572278, "grad_norm": 2.5009855598423703, "learning_rate": 2.534573786420902e-07, "loss": 0.3272, "step": 37553 }, { "epoch": 2.7910813823857303, "grad_norm": 2.047353619150796, "learning_rate": 2.5327790700462653e-07, "loss": 0.2078, "step": 37554 }, { "epoch": 2.7911557041991824, "grad_norm": 2.2906970205619577, "learning_rate": 2.530984981160545e-07, "loss": 0.2697, "step": 37555 }, { "epoch": 2.791230026012635, "grad_norm": 2.291601141985783, "learning_rate": 2.529191519775287e-07, "loss": 0.284, "step": 37556 }, { "epoch": 2.791304347826087, "grad_norm": 2.367840124180115, "learning_rate": 2.5273986859020385e-07, "loss": 0.2149, "step": 37557 }, { "epoch": 2.7913786696395393, "grad_norm": 2.7073196213468336, "learning_rate": 2.5256064795523447e-07, "loss": 0.356, "step": 37558 }, { "epoch": 2.7914529914529913, "grad_norm": 2.3237407981304963, "learning_rate": 2.52381490073772e-07, "loss": 0.2849, "step": 37559 }, { "epoch": 2.7915273132664438, "grad_norm": 2.1242502220880475, "learning_rate": 2.522023949469743e-07, "loss": 0.2428, "step": 37560 }, { "epoch": 2.7916016350798962, "grad_norm": 2.251988671300376, "learning_rate": 2.5202336257598936e-07, "loss": 0.2354, "step": 37561 }, { "epoch": 2.7916759568933482, "grad_norm": 2.504208089767734, "learning_rate": 2.5184439296197294e-07, "loss": 0.2866, "step": 37562 }, { "epoch": 2.7917502787068003, "grad_norm": 2.6970248977047278, "learning_rate": 2.5166548610607744e-07, "loss": 0.32, "step": 37563 }, { "epoch": 2.7918246005202527, "grad_norm": 2.552875517618677, "learning_rate": 2.514866420094531e-07, "loss": 0.36, "step": 37564 }, { "epoch": 2.791898922333705, "grad_norm": 2.8727854510396678, "learning_rate": 2.513078606732522e-07, "loss": 0.3173, "step": 37565 }, { "epoch": 2.791973244147157, "grad_norm": 2.544692443917445, "learning_rate": 2.511291420986239e-07, "loss": 0.268, "step": 37566 }, { "epoch": 2.792047565960609, "grad_norm": 2.2341274062885716, "learning_rate": 2.509504862867218e-07, "loss": 0.2474, "step": 37567 }, { "epoch": 2.7921218877740617, "grad_norm": 1.8576777282852077, "learning_rate": 2.507718932386949e-07, "loss": 0.2511, "step": 37568 }, { "epoch": 2.792196209587514, "grad_norm": 2.7907270931043255, "learning_rate": 2.5059336295569223e-07, "loss": 0.293, "step": 37569 }, { "epoch": 2.792270531400966, "grad_norm": 2.068859647783462, "learning_rate": 2.5041489543886524e-07, "loss": 0.1964, "step": 37570 }, { "epoch": 2.792344853214418, "grad_norm": 2.95761901405689, "learning_rate": 2.502364906893584e-07, "loss": 0.2877, "step": 37571 }, { "epoch": 2.7924191750278706, "grad_norm": 2.1287608970558103, "learning_rate": 2.5005814870832533e-07, "loss": 0.2816, "step": 37572 }, { "epoch": 2.792493496841323, "grad_norm": 2.315335714680883, "learning_rate": 2.498798694969107e-07, "loss": 0.2774, "step": 37573 }, { "epoch": 2.792567818654775, "grad_norm": 3.076759020618644, "learning_rate": 2.497016530562635e-07, "loss": 0.2461, "step": 37574 }, { "epoch": 2.7926421404682276, "grad_norm": 3.166187641605508, "learning_rate": 2.4952349938753283e-07, "loss": 0.4094, "step": 37575 }, { "epoch": 2.7927164622816796, "grad_norm": 2.5341792036637467, "learning_rate": 2.493454084918623e-07, "loss": 0.2998, "step": 37576 }, { "epoch": 2.792790784095132, "grad_norm": 1.941889133036313, "learning_rate": 2.491673803704009e-07, "loss": 0.2069, "step": 37577 }, { "epoch": 2.792865105908584, "grad_norm": 1.7235394009472913, "learning_rate": 2.489894150242933e-07, "loss": 0.2025, "step": 37578 }, { "epoch": 2.7929394277220365, "grad_norm": 2.3787055581558834, "learning_rate": 2.488115124546864e-07, "loss": 0.2366, "step": 37579 }, { "epoch": 2.7930137495354885, "grad_norm": 2.73843871343424, "learning_rate": 2.486336726627248e-07, "loss": 0.2456, "step": 37580 }, { "epoch": 2.793088071348941, "grad_norm": 2.6506162807038, "learning_rate": 2.484558956495542e-07, "loss": 0.3052, "step": 37581 }, { "epoch": 2.793162393162393, "grad_norm": 2.452442714016075, "learning_rate": 2.482781814163182e-07, "loss": 0.2563, "step": 37582 }, { "epoch": 2.7932367149758455, "grad_norm": 2.10165237984071, "learning_rate": 2.4810052996416036e-07, "loss": 0.2143, "step": 37583 }, { "epoch": 2.793311036789298, "grad_norm": 3.170081287810192, "learning_rate": 2.4792294129422636e-07, "loss": 0.3157, "step": 37584 }, { "epoch": 2.79338535860275, "grad_norm": 2.483871367556505, "learning_rate": 2.477454154076586e-07, "loss": 0.3635, "step": 37585 }, { "epoch": 2.793459680416202, "grad_norm": 2.5590312963547555, "learning_rate": 2.475679523055996e-07, "loss": 0.3124, "step": 37586 }, { "epoch": 2.7935340022296544, "grad_norm": 2.624472623100883, "learning_rate": 2.473905519891917e-07, "loss": 0.3633, "step": 37587 }, { "epoch": 2.793608324043107, "grad_norm": 6.995061096110879, "learning_rate": 2.472132144595774e-07, "loss": 0.3005, "step": 37588 }, { "epoch": 2.793682645856559, "grad_norm": 2.3625649652296508, "learning_rate": 2.4703593971789897e-07, "loss": 0.251, "step": 37589 }, { "epoch": 2.793756967670011, "grad_norm": 1.9853941262117936, "learning_rate": 2.4685872776529783e-07, "loss": 0.2314, "step": 37590 }, { "epoch": 2.7938312894834634, "grad_norm": 2.316768957843728, "learning_rate": 2.4668157860291195e-07, "loss": 0.2964, "step": 37591 }, { "epoch": 2.793905611296916, "grad_norm": 2.3441151877988555, "learning_rate": 2.465044922318871e-07, "loss": 0.2603, "step": 37592 }, { "epoch": 2.793979933110368, "grad_norm": 2.5627250072377437, "learning_rate": 2.463274686533579e-07, "loss": 0.2337, "step": 37593 }, { "epoch": 2.79405425492382, "grad_norm": 2.1479725042623823, "learning_rate": 2.4615050786846675e-07, "loss": 0.2497, "step": 37594 }, { "epoch": 2.7941285767372723, "grad_norm": 3.021804986553306, "learning_rate": 2.459736098783516e-07, "loss": 0.321, "step": 37595 }, { "epoch": 2.794202898550725, "grad_norm": 2.394714816110502, "learning_rate": 2.4579677468415385e-07, "loss": 0.2572, "step": 37596 }, { "epoch": 2.794277220364177, "grad_norm": 2.071085420865065, "learning_rate": 2.4562000228701034e-07, "loss": 0.2988, "step": 37597 }, { "epoch": 2.7943515421776293, "grad_norm": 2.4417875935445226, "learning_rate": 2.4544329268805787e-07, "loss": 0.2852, "step": 37598 }, { "epoch": 2.7944258639910813, "grad_norm": 2.3172978621614875, "learning_rate": 2.4526664588843674e-07, "loss": 0.2765, "step": 37599 }, { "epoch": 2.7945001858045337, "grad_norm": 3.1859994509476963, "learning_rate": 2.450900618892815e-07, "loss": 0.3171, "step": 37600 }, { "epoch": 2.7945745076179858, "grad_norm": 1.8377920495929037, "learning_rate": 2.449135406917302e-07, "loss": 0.2334, "step": 37601 }, { "epoch": 2.794648829431438, "grad_norm": 2.6106159905612056, "learning_rate": 2.4473708229692084e-07, "loss": 0.2244, "step": 37602 }, { "epoch": 2.7947231512448902, "grad_norm": 1.853572362099881, "learning_rate": 2.44560686705988e-07, "loss": 0.2472, "step": 37603 }, { "epoch": 2.7947974730583427, "grad_norm": 2.2514264654285716, "learning_rate": 2.4438435392006633e-07, "loss": 0.249, "step": 37604 }, { "epoch": 2.7948717948717947, "grad_norm": 2.46502331433975, "learning_rate": 2.442080839402916e-07, "loss": 0.2185, "step": 37605 }, { "epoch": 2.794946116685247, "grad_norm": 2.0576646707346993, "learning_rate": 2.440318767677996e-07, "loss": 0.2026, "step": 37606 }, { "epoch": 2.7950204384986996, "grad_norm": 2.9597085404602472, "learning_rate": 2.4385573240372496e-07, "loss": 0.262, "step": 37607 }, { "epoch": 2.7950947603121516, "grad_norm": 3.045573156187583, "learning_rate": 2.436796508492001e-07, "loss": 0.3395, "step": 37608 }, { "epoch": 2.7951690821256037, "grad_norm": 2.4669548459870776, "learning_rate": 2.4350363210535965e-07, "loss": 0.263, "step": 37609 }, { "epoch": 2.795243403939056, "grad_norm": 2.4551714519326535, "learning_rate": 2.4332767617333495e-07, "loss": 0.2163, "step": 37610 }, { "epoch": 2.7953177257525086, "grad_norm": 2.711857654918111, "learning_rate": 2.431517830542629e-07, "loss": 0.2994, "step": 37611 }, { "epoch": 2.7953920475659606, "grad_norm": 2.3464587188254185, "learning_rate": 2.4297595274927254e-07, "loss": 0.2717, "step": 37612 }, { "epoch": 2.7954663693794126, "grad_norm": 2.289006431027109, "learning_rate": 2.4280018525949634e-07, "loss": 0.2623, "step": 37613 }, { "epoch": 2.795540691192865, "grad_norm": 2.1566667198606626, "learning_rate": 2.426244805860667e-07, "loss": 0.2419, "step": 37614 }, { "epoch": 2.7956150130063175, "grad_norm": 2.0344022905510992, "learning_rate": 2.4244883873011384e-07, "loss": 0.2141, "step": 37615 }, { "epoch": 2.7956893348197696, "grad_norm": 2.069762864482731, "learning_rate": 2.4227325969277017e-07, "loss": 0.2548, "step": 37616 }, { "epoch": 2.795763656633222, "grad_norm": 2.5781162606249137, "learning_rate": 2.4209774347516367e-07, "loss": 0.2513, "step": 37617 }, { "epoch": 2.795837978446674, "grad_norm": 2.7532067806330995, "learning_rate": 2.419222900784268e-07, "loss": 0.3407, "step": 37618 }, { "epoch": 2.7959123002601265, "grad_norm": 2.872269278958615, "learning_rate": 2.417468995036876e-07, "loss": 0.3554, "step": 37619 }, { "epoch": 2.7959866220735785, "grad_norm": 2.209424878250032, "learning_rate": 2.4157157175207393e-07, "loss": 0.212, "step": 37620 }, { "epoch": 2.796060943887031, "grad_norm": 1.6264415578904177, "learning_rate": 2.4139630682471826e-07, "loss": 0.1579, "step": 37621 }, { "epoch": 2.796135265700483, "grad_norm": 2.045835043365562, "learning_rate": 2.412211047227464e-07, "loss": 0.2185, "step": 37622 }, { "epoch": 2.7962095875139354, "grad_norm": 2.4676979286411562, "learning_rate": 2.410459654472852e-07, "loss": 0.2596, "step": 37623 }, { "epoch": 2.7962839093273875, "grad_norm": 2.7045314278701302, "learning_rate": 2.4087088899946595e-07, "loss": 0.3288, "step": 37624 }, { "epoch": 2.79635823114084, "grad_norm": 1.9272960966786286, "learning_rate": 2.406958753804112e-07, "loss": 0.2303, "step": 37625 }, { "epoch": 2.796432552954292, "grad_norm": 2.793498277710697, "learning_rate": 2.405209245912532e-07, "loss": 0.28, "step": 37626 }, { "epoch": 2.7965068747677444, "grad_norm": 2.4018409152515106, "learning_rate": 2.403460366331123e-07, "loss": 0.244, "step": 37627 }, { "epoch": 2.7965811965811964, "grad_norm": 2.181325753364845, "learning_rate": 2.4017121150711864e-07, "loss": 0.2418, "step": 37628 }, { "epoch": 2.796655518394649, "grad_norm": 1.9573017612982737, "learning_rate": 2.399964492143958e-07, "loss": 0.2176, "step": 37629 }, { "epoch": 2.7967298402081013, "grad_norm": 3.0929927206594052, "learning_rate": 2.398217497560684e-07, "loss": 0.3106, "step": 37630 }, { "epoch": 2.7968041620215534, "grad_norm": 2.9801566543584017, "learning_rate": 2.396471131332634e-07, "loss": 0.2876, "step": 37631 }, { "epoch": 2.7968784838350054, "grad_norm": 2.461854899674464, "learning_rate": 2.394725393471031e-07, "loss": 0.242, "step": 37632 }, { "epoch": 2.796952805648458, "grad_norm": 2.777753648591156, "learning_rate": 2.392980283987134e-07, "loss": 0.2814, "step": 37633 }, { "epoch": 2.7970271274619103, "grad_norm": 2.6647629542015614, "learning_rate": 2.3912358028921554e-07, "loss": 0.3379, "step": 37634 }, { "epoch": 2.7971014492753623, "grad_norm": 2.606696249094758, "learning_rate": 2.389491950197331e-07, "loss": 0.3552, "step": 37635 }, { "epoch": 2.7971757710888143, "grad_norm": 2.5477500911282944, "learning_rate": 2.387748725913908e-07, "loss": 0.3056, "step": 37636 }, { "epoch": 2.797250092902267, "grad_norm": 2.4381289994630793, "learning_rate": 2.3860061300530865e-07, "loss": 0.2028, "step": 37637 }, { "epoch": 2.7973244147157192, "grad_norm": 2.319905070941262, "learning_rate": 2.384264162626104e-07, "loss": 0.254, "step": 37638 }, { "epoch": 2.7973987365291713, "grad_norm": 2.935849937591559, "learning_rate": 2.3825228236441512e-07, "loss": 0.2854, "step": 37639 }, { "epoch": 2.7974730583426237, "grad_norm": 1.9352748268010336, "learning_rate": 2.3807821131184407e-07, "loss": 0.2501, "step": 37640 }, { "epoch": 2.7975473801560757, "grad_norm": 2.0752633034332995, "learning_rate": 2.379042031060208e-07, "loss": 0.2506, "step": 37641 }, { "epoch": 2.797621701969528, "grad_norm": 2.1998908161223123, "learning_rate": 2.3773025774806223e-07, "loss": 0.2904, "step": 37642 }, { "epoch": 2.79769602378298, "grad_norm": 2.2530116619357354, "learning_rate": 2.375563752390919e-07, "loss": 0.2334, "step": 37643 }, { "epoch": 2.7977703455964327, "grad_norm": 2.2409068830466152, "learning_rate": 2.373825555802256e-07, "loss": 0.2475, "step": 37644 }, { "epoch": 2.7978446674098847, "grad_norm": 2.2326379391730944, "learning_rate": 2.3720879877258351e-07, "loss": 0.2302, "step": 37645 }, { "epoch": 2.797918989223337, "grad_norm": 2.799982793359256, "learning_rate": 2.3703510481728588e-07, "loss": 0.3587, "step": 37646 }, { "epoch": 2.797993311036789, "grad_norm": 2.475350162980363, "learning_rate": 2.3686147371544843e-07, "loss": 0.271, "step": 37647 }, { "epoch": 2.7980676328502416, "grad_norm": 2.1926750320060373, "learning_rate": 2.3668790546819032e-07, "loss": 0.306, "step": 37648 }, { "epoch": 2.7981419546636936, "grad_norm": 2.3794477790546784, "learning_rate": 2.365144000766295e-07, "loss": 0.2584, "step": 37649 }, { "epoch": 2.798216276477146, "grad_norm": 2.402743480862194, "learning_rate": 2.363409575418829e-07, "loss": 0.3013, "step": 37650 }, { "epoch": 2.798290598290598, "grad_norm": 3.541246437108189, "learning_rate": 2.3616757786506628e-07, "loss": 0.2684, "step": 37651 }, { "epoch": 2.7983649201040506, "grad_norm": 5.022401297788075, "learning_rate": 2.359942610472954e-07, "loss": 0.2827, "step": 37652 }, { "epoch": 2.798439241917503, "grad_norm": 2.4575736386192975, "learning_rate": 2.3582100708968713e-07, "loss": 0.293, "step": 37653 }, { "epoch": 2.798513563730955, "grad_norm": 2.5613732105596734, "learning_rate": 2.3564781599335618e-07, "loss": 0.2933, "step": 37654 }, { "epoch": 2.798587885544407, "grad_norm": 2.0590654761667317, "learning_rate": 2.354746877594194e-07, "loss": 0.2423, "step": 37655 }, { "epoch": 2.7986622073578595, "grad_norm": 2.2028390584755315, "learning_rate": 2.3530162238898924e-07, "loss": 0.2887, "step": 37656 }, { "epoch": 2.798736529171312, "grad_norm": 2.4932142156295893, "learning_rate": 2.3512861988318038e-07, "loss": 0.2815, "step": 37657 }, { "epoch": 2.798810850984764, "grad_norm": 1.9782483418306127, "learning_rate": 2.3495568024310632e-07, "loss": 0.2877, "step": 37658 }, { "epoch": 2.798885172798216, "grad_norm": 2.2515286949615376, "learning_rate": 2.347828034698818e-07, "loss": 0.2114, "step": 37659 }, { "epoch": 2.7989594946116685, "grad_norm": 2.8757216936799153, "learning_rate": 2.346099895646192e-07, "loss": 0.292, "step": 37660 }, { "epoch": 2.799033816425121, "grad_norm": 2.5719002617410576, "learning_rate": 2.3443723852842993e-07, "loss": 0.2478, "step": 37661 }, { "epoch": 2.799108138238573, "grad_norm": 2.3875528789266363, "learning_rate": 2.3426455036242745e-07, "loss": 0.2887, "step": 37662 }, { "epoch": 2.7991824600520254, "grad_norm": 2.5546064023671424, "learning_rate": 2.3409192506772316e-07, "loss": 0.3052, "step": 37663 }, { "epoch": 2.7992567818654774, "grad_norm": 2.2071322856144344, "learning_rate": 2.3391936264542726e-07, "loss": 0.221, "step": 37664 }, { "epoch": 2.79933110367893, "grad_norm": 2.9562779846842457, "learning_rate": 2.3374686309665216e-07, "loss": 0.3265, "step": 37665 }, { "epoch": 2.799405425492382, "grad_norm": 1.8920132529283693, "learning_rate": 2.3357442642250928e-07, "loss": 0.2181, "step": 37666 }, { "epoch": 2.7994797473058344, "grad_norm": 2.303918870400466, "learning_rate": 2.3340205262410543e-07, "loss": 0.2961, "step": 37667 }, { "epoch": 2.7995540691192864, "grad_norm": 2.2433585958655984, "learning_rate": 2.3322974170255418e-07, "loss": 0.2758, "step": 37668 }, { "epoch": 2.799628390932739, "grad_norm": 2.2006645832782366, "learning_rate": 2.3305749365896247e-07, "loss": 0.223, "step": 37669 }, { "epoch": 2.799702712746191, "grad_norm": 2.455420101315577, "learning_rate": 2.3288530849443935e-07, "loss": 0.2664, "step": 37670 }, { "epoch": 2.7997770345596433, "grad_norm": 2.387253670616399, "learning_rate": 2.3271318621009507e-07, "loss": 0.2656, "step": 37671 }, { "epoch": 2.7998513563730953, "grad_norm": 2.30671814145007, "learning_rate": 2.3254112680703655e-07, "loss": 0.2615, "step": 37672 }, { "epoch": 2.799925678186548, "grad_norm": 2.22349026880765, "learning_rate": 2.3236913028637065e-07, "loss": 0.2649, "step": 37673 }, { "epoch": 2.8, "grad_norm": 2.2455613182270553, "learning_rate": 2.3219719664920538e-07, "loss": 0.2669, "step": 37674 }, { "epoch": 2.8000743218134523, "grad_norm": 1.9861676828862722, "learning_rate": 2.3202532589664761e-07, "loss": 0.1791, "step": 37675 }, { "epoch": 2.8001486436269047, "grad_norm": 2.086737954966177, "learning_rate": 2.3185351802980426e-07, "loss": 0.1967, "step": 37676 }, { "epoch": 2.8002229654403568, "grad_norm": 2.4212914308778477, "learning_rate": 2.3168177304978113e-07, "loss": 0.2797, "step": 37677 }, { "epoch": 2.8002972872538088, "grad_norm": 2.0827776816301453, "learning_rate": 2.3151009095768395e-07, "loss": 0.2394, "step": 37678 }, { "epoch": 2.8003716090672612, "grad_norm": 2.0105764362292784, "learning_rate": 2.3133847175461744e-07, "loss": 0.2474, "step": 37679 }, { "epoch": 2.8004459308807137, "grad_norm": 3.1882543382253004, "learning_rate": 2.3116691544168735e-07, "loss": 0.3149, "step": 37680 }, { "epoch": 2.8005202526941657, "grad_norm": 2.4607163154939395, "learning_rate": 2.3099542201999835e-07, "loss": 0.2594, "step": 37681 }, { "epoch": 2.8005945745076177, "grad_norm": 2.593733781402754, "learning_rate": 2.3082399149065294e-07, "loss": 0.2469, "step": 37682 }, { "epoch": 2.80066889632107, "grad_norm": 3.1813597981745763, "learning_rate": 2.3065262385475794e-07, "loss": 0.3152, "step": 37683 }, { "epoch": 2.8007432181345226, "grad_norm": 2.02818220281113, "learning_rate": 2.3048131911341253e-07, "loss": 0.1893, "step": 37684 }, { "epoch": 2.8008175399479747, "grad_norm": 3.005295444976517, "learning_rate": 2.3031007726772138e-07, "loss": 0.2309, "step": 37685 }, { "epoch": 2.800891861761427, "grad_norm": 1.8730022836843117, "learning_rate": 2.3013889831878688e-07, "loss": 0.2849, "step": 37686 }, { "epoch": 2.800966183574879, "grad_norm": 2.3369965426087314, "learning_rate": 2.2996778226771267e-07, "loss": 0.2322, "step": 37687 }, { "epoch": 2.8010405053883316, "grad_norm": 2.4007014886136044, "learning_rate": 2.2979672911559781e-07, "loss": 0.2984, "step": 37688 }, { "epoch": 2.8011148272017836, "grad_norm": 2.118643049111798, "learning_rate": 2.2962573886354477e-07, "loss": 0.2578, "step": 37689 }, { "epoch": 2.801189149015236, "grad_norm": 5.937053268320603, "learning_rate": 2.2945481151265493e-07, "loss": 0.2615, "step": 37690 }, { "epoch": 2.801263470828688, "grad_norm": 2.060174027844142, "learning_rate": 2.2928394706402624e-07, "loss": 0.1945, "step": 37691 }, { "epoch": 2.8013377926421406, "grad_norm": 1.6540161919700487, "learning_rate": 2.291131455187623e-07, "loss": 0.1912, "step": 37692 }, { "epoch": 2.8014121144555926, "grad_norm": 2.819064941371533, "learning_rate": 2.289424068779611e-07, "loss": 0.2686, "step": 37693 }, { "epoch": 2.801486436269045, "grad_norm": 1.9577902060750545, "learning_rate": 2.2877173114272178e-07, "loss": 0.2658, "step": 37694 }, { "epoch": 2.801560758082497, "grad_norm": 2.4145063506190123, "learning_rate": 2.2860111831414234e-07, "loss": 0.2987, "step": 37695 }, { "epoch": 2.8016350798959495, "grad_norm": 2.5020646952559837, "learning_rate": 2.284305683933208e-07, "loss": 0.3114, "step": 37696 }, { "epoch": 2.8017094017094015, "grad_norm": 3.8145806851631896, "learning_rate": 2.2826008138135846e-07, "loss": 0.3845, "step": 37697 }, { "epoch": 2.801783723522854, "grad_norm": 2.2687331088912153, "learning_rate": 2.2808965727934895e-07, "loss": 0.2326, "step": 37698 }, { "epoch": 2.8018580453363064, "grad_norm": 2.0132588087266643, "learning_rate": 2.2791929608839136e-07, "loss": 0.2472, "step": 37699 }, { "epoch": 2.8019323671497585, "grad_norm": 3.9902094837493585, "learning_rate": 2.2774899780958371e-07, "loss": 0.3517, "step": 37700 }, { "epoch": 2.8020066889632105, "grad_norm": 2.291754075772416, "learning_rate": 2.2757876244401955e-07, "loss": 0.2871, "step": 37701 }, { "epoch": 2.802081010776663, "grad_norm": 3.167890289759119, "learning_rate": 2.274085899927969e-07, "loss": 0.3354, "step": 37702 }, { "epoch": 2.8021553325901154, "grad_norm": 2.3126863326613996, "learning_rate": 2.2723848045701047e-07, "loss": 0.2974, "step": 37703 }, { "epoch": 2.8022296544035674, "grad_norm": 3.094500797739213, "learning_rate": 2.2706843383775602e-07, "loss": 0.4076, "step": 37704 }, { "epoch": 2.8023039762170194, "grad_norm": 2.590381804482202, "learning_rate": 2.2689845013612822e-07, "loss": 0.3842, "step": 37705 }, { "epoch": 2.802378298030472, "grad_norm": 2.0279321484962245, "learning_rate": 2.2672852935321955e-07, "loss": 0.2511, "step": 37706 }, { "epoch": 2.8024526198439244, "grad_norm": 2.8741421862713836, "learning_rate": 2.265586714901269e-07, "loss": 0.2643, "step": 37707 }, { "epoch": 2.8025269416573764, "grad_norm": 2.915645220500972, "learning_rate": 2.2638887654794162e-07, "loss": 0.2497, "step": 37708 }, { "epoch": 2.802601263470829, "grad_norm": 2.5124315842589673, "learning_rate": 2.262191445277584e-07, "loss": 0.258, "step": 37709 }, { "epoch": 2.802675585284281, "grad_norm": 2.0451316983309638, "learning_rate": 2.2604947543066857e-07, "loss": 0.2444, "step": 37710 }, { "epoch": 2.8027499070977333, "grad_norm": 2.5463752062376686, "learning_rate": 2.2587986925776463e-07, "loss": 0.2964, "step": 37711 }, { "epoch": 2.8028242289111853, "grad_norm": 1.8543596837932166, "learning_rate": 2.257103260101401e-07, "loss": 0.2324, "step": 37712 }, { "epoch": 2.802898550724638, "grad_norm": 2.769134388474185, "learning_rate": 2.2554084568888413e-07, "loss": 0.2254, "step": 37713 }, { "epoch": 2.80297287253809, "grad_norm": 2.4913058008692572, "learning_rate": 2.253714282950903e-07, "loss": 0.2606, "step": 37714 }, { "epoch": 2.8030471943515423, "grad_norm": 2.8133289231234135, "learning_rate": 2.2520207382984771e-07, "loss": 0.3039, "step": 37715 }, { "epoch": 2.8031215161649943, "grad_norm": 3.067972150114486, "learning_rate": 2.2503278229424774e-07, "loss": 0.3486, "step": 37716 }, { "epoch": 2.8031958379784467, "grad_norm": 2.5151596088726036, "learning_rate": 2.248635536893784e-07, "loss": 0.2889, "step": 37717 }, { "epoch": 2.803270159791899, "grad_norm": 2.0850850171951505, "learning_rate": 2.2469438801632991e-07, "loss": 0.318, "step": 37718 }, { "epoch": 2.803344481605351, "grad_norm": 2.246070866008679, "learning_rate": 2.2452528527619254e-07, "loss": 0.272, "step": 37719 }, { "epoch": 2.8034188034188032, "grad_norm": 2.464045645016932, "learning_rate": 2.243562454700532e-07, "loss": 0.241, "step": 37720 }, { "epoch": 2.8034931252322557, "grad_norm": 2.4303843926045228, "learning_rate": 2.2418726859900207e-07, "loss": 0.2568, "step": 37721 }, { "epoch": 2.803567447045708, "grad_norm": 2.580755007370116, "learning_rate": 2.2401835466412614e-07, "loss": 0.3117, "step": 37722 }, { "epoch": 2.80364176885916, "grad_norm": 2.176778277736311, "learning_rate": 2.2384950366651226e-07, "loss": 0.2316, "step": 37723 }, { "epoch": 2.803716090672612, "grad_norm": 2.121493291107313, "learning_rate": 2.2368071560724846e-07, "loss": 0.2583, "step": 37724 }, { "epoch": 2.8037904124860646, "grad_norm": 2.2750102660182585, "learning_rate": 2.2351199048742168e-07, "loss": 0.2229, "step": 37725 }, { "epoch": 2.803864734299517, "grad_norm": 2.2831700942713904, "learning_rate": 2.2334332830811545e-07, "loss": 0.2681, "step": 37726 }, { "epoch": 2.803939056112969, "grad_norm": 2.0194546805821645, "learning_rate": 2.2317472907041894e-07, "loss": 0.2141, "step": 37727 }, { "epoch": 2.804013377926421, "grad_norm": 2.092679027522971, "learning_rate": 2.230061927754168e-07, "loss": 0.2231, "step": 37728 }, { "epoch": 2.8040876997398736, "grad_norm": 2.568722233003236, "learning_rate": 2.2283771942419263e-07, "loss": 0.3251, "step": 37729 }, { "epoch": 2.804162021553326, "grad_norm": 3.1325401816285647, "learning_rate": 2.2266930901783113e-07, "loss": 0.3041, "step": 37730 }, { "epoch": 2.804236343366778, "grad_norm": 2.0860517236114133, "learning_rate": 2.2250096155741807e-07, "loss": 0.2422, "step": 37731 }, { "epoch": 2.8043106651802305, "grad_norm": 2.552841719247252, "learning_rate": 2.2233267704403706e-07, "loss": 0.2646, "step": 37732 }, { "epoch": 2.8043849869936825, "grad_norm": 2.106057599726922, "learning_rate": 2.2216445547877054e-07, "loss": 0.2577, "step": 37733 }, { "epoch": 2.804459308807135, "grad_norm": 1.928301698044769, "learning_rate": 2.2199629686270206e-07, "loss": 0.2362, "step": 37734 }, { "epoch": 2.804533630620587, "grad_norm": 2.1029794800863817, "learning_rate": 2.2182820119691307e-07, "loss": 0.2235, "step": 37735 }, { "epoch": 2.8046079524340395, "grad_norm": 1.8942498769994878, "learning_rate": 2.2166016848248817e-07, "loss": 0.2463, "step": 37736 }, { "epoch": 2.8046822742474915, "grad_norm": 1.734823658842213, "learning_rate": 2.2149219872050654e-07, "loss": 0.1697, "step": 37737 }, { "epoch": 2.804756596060944, "grad_norm": 2.1292517212556317, "learning_rate": 2.2132429191205173e-07, "loss": 0.179, "step": 37738 }, { "epoch": 2.804830917874396, "grad_norm": 1.9966923988933516, "learning_rate": 2.21156448058204e-07, "loss": 0.2319, "step": 37739 }, { "epoch": 2.8049052396878484, "grad_norm": 2.490670623043453, "learning_rate": 2.209886671600425e-07, "loss": 0.3309, "step": 37740 }, { "epoch": 2.804979561501301, "grad_norm": 2.380606365445476, "learning_rate": 2.208209492186497e-07, "loss": 0.351, "step": 37741 }, { "epoch": 2.805053883314753, "grad_norm": 2.1924726986644725, "learning_rate": 2.206532942351025e-07, "loss": 0.2999, "step": 37742 }, { "epoch": 2.805128205128205, "grad_norm": 2.304147341756088, "learning_rate": 2.2048570221048338e-07, "loss": 0.2293, "step": 37743 }, { "epoch": 2.8052025269416574, "grad_norm": 1.8720330983941522, "learning_rate": 2.2031817314587034e-07, "loss": 0.1815, "step": 37744 }, { "epoch": 2.80527684875511, "grad_norm": 2.2040241755389505, "learning_rate": 2.201507070423403e-07, "loss": 0.2208, "step": 37745 }, { "epoch": 2.805351170568562, "grad_norm": 2.847850847573094, "learning_rate": 2.1998330390097243e-07, "loss": 0.3483, "step": 37746 }, { "epoch": 2.805425492382014, "grad_norm": 2.1704439464033447, "learning_rate": 2.1981596372284586e-07, "loss": 0.2974, "step": 37747 }, { "epoch": 2.8054998141954663, "grad_norm": 2.415573650856353, "learning_rate": 2.1964868650903526e-07, "loss": 0.268, "step": 37748 }, { "epoch": 2.805574136008919, "grad_norm": 2.283645866909212, "learning_rate": 2.1948147226061978e-07, "loss": 0.2179, "step": 37749 }, { "epoch": 2.805648457822371, "grad_norm": 2.4909218398675463, "learning_rate": 2.1931432097867523e-07, "loss": 0.2612, "step": 37750 }, { "epoch": 2.805722779635823, "grad_norm": 1.947563103706218, "learning_rate": 2.191472326642785e-07, "loss": 0.1962, "step": 37751 }, { "epoch": 2.8057971014492753, "grad_norm": 2.2422970563922267, "learning_rate": 2.1898020731850213e-07, "loss": 0.2338, "step": 37752 }, { "epoch": 2.8058714232627278, "grad_norm": 2.1381725335169803, "learning_rate": 2.1881324494242516e-07, "loss": 0.3114, "step": 37753 }, { "epoch": 2.8059457450761798, "grad_norm": 2.0150744733777577, "learning_rate": 2.1864634553712127e-07, "loss": 0.1712, "step": 37754 }, { "epoch": 2.8060200668896322, "grad_norm": 2.177960275499433, "learning_rate": 2.1847950910366288e-07, "loss": 0.2423, "step": 37755 }, { "epoch": 2.8060943887030843, "grad_norm": 2.141284364385951, "learning_rate": 2.1831273564312805e-07, "loss": 0.2296, "step": 37756 }, { "epoch": 2.8061687105165367, "grad_norm": 2.3170409051922856, "learning_rate": 2.1814602515658588e-07, "loss": 0.2839, "step": 37757 }, { "epoch": 2.8062430323299887, "grad_norm": 2.179080489800774, "learning_rate": 2.1797937764511444e-07, "loss": 0.2798, "step": 37758 }, { "epoch": 2.806317354143441, "grad_norm": 1.8034049964394907, "learning_rate": 2.1781279310978286e-07, "loss": 0.1767, "step": 37759 }, { "epoch": 2.806391675956893, "grad_norm": 2.674121158075708, "learning_rate": 2.1764627155166474e-07, "loss": 0.2717, "step": 37760 }, { "epoch": 2.8064659977703457, "grad_norm": 1.9673188330565041, "learning_rate": 2.1747981297183364e-07, "loss": 0.2153, "step": 37761 }, { "epoch": 2.8065403195837977, "grad_norm": 3.5309159282511255, "learning_rate": 2.173134173713598e-07, "loss": 0.246, "step": 37762 }, { "epoch": 2.80661464139725, "grad_norm": 2.69094891072368, "learning_rate": 2.1714708475131353e-07, "loss": 0.3235, "step": 37763 }, { "epoch": 2.8066889632107026, "grad_norm": 2.1617875224062333, "learning_rate": 2.1698081511276726e-07, "loss": 0.2579, "step": 37764 }, { "epoch": 2.8067632850241546, "grad_norm": 2.179970514782727, "learning_rate": 2.1681460845679124e-07, "loss": 0.1849, "step": 37765 }, { "epoch": 2.8068376068376066, "grad_norm": 2.7650253207727578, "learning_rate": 2.1664846478445467e-07, "loss": 0.3322, "step": 37766 }, { "epoch": 2.806911928651059, "grad_norm": 2.5448977439433023, "learning_rate": 2.1648238409682776e-07, "loss": 0.3293, "step": 37767 }, { "epoch": 2.8069862504645116, "grad_norm": 2.74392811486353, "learning_rate": 2.1631636639497966e-07, "loss": 0.304, "step": 37768 }, { "epoch": 2.8070605722779636, "grad_norm": 3.383192366637996, "learning_rate": 2.1615041167997952e-07, "loss": 0.3798, "step": 37769 }, { "epoch": 2.8071348940914156, "grad_norm": 2.772066333693339, "learning_rate": 2.1598451995289427e-07, "loss": 0.3166, "step": 37770 }, { "epoch": 2.807209215904868, "grad_norm": 2.403573906050233, "learning_rate": 2.1581869121479416e-07, "loss": 0.3056, "step": 37771 }, { "epoch": 2.8072835377183205, "grad_norm": 3.344635970326471, "learning_rate": 2.1565292546674387e-07, "loss": 0.3006, "step": 37772 }, { "epoch": 2.8073578595317725, "grad_norm": 2.2921327696540326, "learning_rate": 2.1548722270981481e-07, "loss": 0.2652, "step": 37773 }, { "epoch": 2.807432181345225, "grad_norm": 3.017289730599513, "learning_rate": 2.1532158294507055e-07, "loss": 0.3071, "step": 37774 }, { "epoch": 2.807506503158677, "grad_norm": 2.3113504640385587, "learning_rate": 2.15156006173578e-07, "loss": 0.2649, "step": 37775 }, { "epoch": 2.8075808249721295, "grad_norm": 3.436622394707253, "learning_rate": 2.14990492396403e-07, "loss": 0.338, "step": 37776 }, { "epoch": 2.8076551467855815, "grad_norm": 2.3731060060227063, "learning_rate": 2.1482504161461138e-07, "loss": 0.3295, "step": 37777 }, { "epoch": 2.807729468599034, "grad_norm": 1.862150802691386, "learning_rate": 2.1465965382927e-07, "loss": 0.2224, "step": 37778 }, { "epoch": 2.807803790412486, "grad_norm": 2.0889348834709627, "learning_rate": 2.144943290414403e-07, "loss": 0.2538, "step": 37779 }, { "epoch": 2.8078781122259384, "grad_norm": 2.7852370123162684, "learning_rate": 2.1432906725218915e-07, "loss": 0.2793, "step": 37780 }, { "epoch": 2.8079524340393904, "grad_norm": 2.044753906516024, "learning_rate": 2.1416386846257907e-07, "loss": 0.1882, "step": 37781 }, { "epoch": 2.808026755852843, "grad_norm": 2.371094815475763, "learning_rate": 2.1399873267367478e-07, "loss": 0.2311, "step": 37782 }, { "epoch": 2.808101077666295, "grad_norm": 2.9127926694987796, "learning_rate": 2.138336598865387e-07, "loss": 0.261, "step": 37783 }, { "epoch": 2.8081753994797474, "grad_norm": 2.069351268176384, "learning_rate": 2.1366865010223447e-07, "loss": 0.2865, "step": 37784 }, { "epoch": 2.8082497212931994, "grad_norm": 2.3979773988754762, "learning_rate": 2.1350370332182346e-07, "loss": 0.267, "step": 37785 }, { "epoch": 2.808324043106652, "grad_norm": 2.753886909926337, "learning_rate": 2.1333881954636815e-07, "loss": 0.337, "step": 37786 }, { "epoch": 2.8083983649201043, "grad_norm": 2.4444115896578498, "learning_rate": 2.1317399877692768e-07, "loss": 0.2527, "step": 37787 }, { "epoch": 2.8084726867335563, "grad_norm": 2.279481404006376, "learning_rate": 2.1300924101456677e-07, "loss": 0.2814, "step": 37788 }, { "epoch": 2.8085470085470083, "grad_norm": 2.1671069921338555, "learning_rate": 2.1284454626034457e-07, "loss": 0.2893, "step": 37789 }, { "epoch": 2.808621330360461, "grad_norm": 2.428359626109258, "learning_rate": 2.1267991451532132e-07, "loss": 0.3449, "step": 37790 }, { "epoch": 2.8086956521739133, "grad_norm": 2.7170202695483945, "learning_rate": 2.1251534578055621e-07, "loss": 0.3055, "step": 37791 }, { "epoch": 2.8087699739873653, "grad_norm": 1.7292480675174344, "learning_rate": 2.123508400571095e-07, "loss": 0.1935, "step": 37792 }, { "epoch": 2.8088442958008173, "grad_norm": 2.5140627158485547, "learning_rate": 2.1218639734604142e-07, "loss": 0.2631, "step": 37793 }, { "epoch": 2.8089186176142698, "grad_norm": 2.888729446037368, "learning_rate": 2.120220176484078e-07, "loss": 0.3307, "step": 37794 }, { "epoch": 2.808992939427722, "grad_norm": 2.345689280930029, "learning_rate": 2.1185770096527003e-07, "loss": 0.3077, "step": 37795 }, { "epoch": 2.8090672612411742, "grad_norm": 1.9426475210300473, "learning_rate": 2.1169344729768504e-07, "loss": 0.1932, "step": 37796 }, { "epoch": 2.8091415830546267, "grad_norm": 2.8491758977637014, "learning_rate": 2.1152925664670864e-07, "loss": 0.3384, "step": 37797 }, { "epoch": 2.8092159048680787, "grad_norm": 2.7579149610614255, "learning_rate": 2.1136512901340001e-07, "loss": 0.3731, "step": 37798 }, { "epoch": 2.809290226681531, "grad_norm": 2.18197249551324, "learning_rate": 2.112010643988127e-07, "loss": 0.2575, "step": 37799 }, { "epoch": 2.809364548494983, "grad_norm": 2.2203692002196838, "learning_rate": 2.1103706280400703e-07, "loss": 0.2523, "step": 37800 }, { "epoch": 2.8094388703084356, "grad_norm": 2.5285518658144626, "learning_rate": 2.1087312423003547e-07, "loss": 0.2286, "step": 37801 }, { "epoch": 2.8095131921218877, "grad_norm": 2.5319386132451336, "learning_rate": 2.1070924867795494e-07, "loss": 0.2526, "step": 37802 }, { "epoch": 2.80958751393534, "grad_norm": 2.392609792356479, "learning_rate": 2.1054543614882128e-07, "loss": 0.2451, "step": 37803 }, { "epoch": 2.809661835748792, "grad_norm": 2.070928635545473, "learning_rate": 2.1038168664368698e-07, "loss": 0.2868, "step": 37804 }, { "epoch": 2.8097361575622446, "grad_norm": 2.3723411799314906, "learning_rate": 2.1021800016360895e-07, "loss": 0.2117, "step": 37805 }, { "epoch": 2.8098104793756966, "grad_norm": 2.1616148646184072, "learning_rate": 2.1005437670963856e-07, "loss": 0.2373, "step": 37806 }, { "epoch": 2.809884801189149, "grad_norm": 1.9825227753725605, "learning_rate": 2.0989081628283058e-07, "loss": 0.1442, "step": 37807 }, { "epoch": 2.809959123002601, "grad_norm": 3.5042795319122417, "learning_rate": 2.0972731888423746e-07, "loss": 0.3906, "step": 37808 }, { "epoch": 2.8100334448160535, "grad_norm": 2.14980498714881, "learning_rate": 2.095638845149106e-07, "loss": 0.2252, "step": 37809 }, { "epoch": 2.810107766629506, "grad_norm": 3.615805493080005, "learning_rate": 2.094005131759047e-07, "loss": 0.3015, "step": 37810 }, { "epoch": 2.810182088442958, "grad_norm": 2.605040324765596, "learning_rate": 2.0923720486827003e-07, "loss": 0.2991, "step": 37811 }, { "epoch": 2.81025641025641, "grad_norm": 2.8357035148437753, "learning_rate": 2.0907395959305797e-07, "loss": 0.3091, "step": 37812 }, { "epoch": 2.8103307320698625, "grad_norm": 3.6406096478926604, "learning_rate": 2.0891077735132105e-07, "loss": 0.346, "step": 37813 }, { "epoch": 2.810405053883315, "grad_norm": 2.4861107081465956, "learning_rate": 2.0874765814410613e-07, "loss": 0.2972, "step": 37814 }, { "epoch": 2.810479375696767, "grad_norm": 2.4067040695082054, "learning_rate": 2.0858460197246799e-07, "loss": 0.3263, "step": 37815 }, { "epoch": 2.810553697510219, "grad_norm": 2.321427247276486, "learning_rate": 2.0842160883745244e-07, "loss": 0.267, "step": 37816 }, { "epoch": 2.8106280193236715, "grad_norm": 2.032972153990923, "learning_rate": 2.0825867874011196e-07, "loss": 0.259, "step": 37817 }, { "epoch": 2.810702341137124, "grad_norm": 2.950945037328721, "learning_rate": 2.080958116814935e-07, "loss": 0.2152, "step": 37818 }, { "epoch": 2.810776662950576, "grad_norm": 1.931553766353098, "learning_rate": 2.0793300766264625e-07, "loss": 0.2532, "step": 37819 }, { "epoch": 2.8108509847640284, "grad_norm": 2.5220928785579906, "learning_rate": 2.077702666846182e-07, "loss": 0.3229, "step": 37820 }, { "epoch": 2.8109253065774804, "grad_norm": 2.1267123749946952, "learning_rate": 2.0760758874845631e-07, "loss": 0.2136, "step": 37821 }, { "epoch": 2.810999628390933, "grad_norm": 2.282243194285593, "learning_rate": 2.0744497385520978e-07, "loss": 0.3254, "step": 37822 }, { "epoch": 2.811073950204385, "grad_norm": 2.7397735983050326, "learning_rate": 2.072824220059233e-07, "loss": 0.2866, "step": 37823 }, { "epoch": 2.8111482720178373, "grad_norm": 3.2806160666397615, "learning_rate": 2.0711993320164491e-07, "loss": 0.3557, "step": 37824 }, { "epoch": 2.8112225938312894, "grad_norm": 2.74774520757854, "learning_rate": 2.0695750744342157e-07, "loss": 0.3344, "step": 37825 }, { "epoch": 2.811296915644742, "grad_norm": 2.440519925349162, "learning_rate": 2.0679514473229578e-07, "loss": 0.2899, "step": 37826 }, { "epoch": 2.811371237458194, "grad_norm": 3.8424382765617944, "learning_rate": 2.0663284506931558e-07, "loss": 0.354, "step": 37827 }, { "epoch": 2.8114455592716463, "grad_norm": 2.9626598943178877, "learning_rate": 2.0647060845552458e-07, "loss": 0.3218, "step": 37828 }, { "epoch": 2.8115198810850983, "grad_norm": 2.4440546535067647, "learning_rate": 2.063084348919686e-07, "loss": 0.2797, "step": 37829 }, { "epoch": 2.8115942028985508, "grad_norm": 2.2407539018210763, "learning_rate": 2.0614632437969018e-07, "loss": 0.2573, "step": 37830 }, { "epoch": 2.811668524712003, "grad_norm": 2.672473297161182, "learning_rate": 2.0598427691973288e-07, "loss": 0.2696, "step": 37831 }, { "epoch": 2.8117428465254553, "grad_norm": 3.092190353457792, "learning_rate": 2.0582229251314146e-07, "loss": 0.3477, "step": 37832 }, { "epoch": 2.8118171683389077, "grad_norm": 2.3280114086507866, "learning_rate": 2.0566037116095728e-07, "loss": 0.2843, "step": 37833 }, { "epoch": 2.8118914901523597, "grad_norm": 4.150965622169878, "learning_rate": 2.0549851286422397e-07, "loss": 0.3738, "step": 37834 }, { "epoch": 2.8119658119658117, "grad_norm": 3.0285239991237933, "learning_rate": 2.0533671762398399e-07, "loss": 0.318, "step": 37835 }, { "epoch": 2.812040133779264, "grad_norm": 2.827178267403215, "learning_rate": 2.0517498544127545e-07, "loss": 0.2251, "step": 37836 }, { "epoch": 2.8121144555927167, "grad_norm": 2.3961587409718423, "learning_rate": 2.0501331631714416e-07, "loss": 0.2766, "step": 37837 }, { "epoch": 2.8121887774061687, "grad_norm": 3.1443208177288002, "learning_rate": 2.0485171025262818e-07, "loss": 0.3462, "step": 37838 }, { "epoch": 2.8122630992196207, "grad_norm": 2.9213262183751523, "learning_rate": 2.0469016724876888e-07, "loss": 0.3532, "step": 37839 }, { "epoch": 2.812337421033073, "grad_norm": 2.8957713911533456, "learning_rate": 2.0452868730660657e-07, "loss": 0.2159, "step": 37840 }, { "epoch": 2.8124117428465256, "grad_norm": 3.1792648020068413, "learning_rate": 2.0436727042718041e-07, "loss": 0.2593, "step": 37841 }, { "epoch": 2.8124860646599776, "grad_norm": 2.278745971519143, "learning_rate": 2.0420591661152844e-07, "loss": 0.2785, "step": 37842 }, { "epoch": 2.81256038647343, "grad_norm": 1.6292364529557997, "learning_rate": 2.0404462586069097e-07, "loss": 0.1793, "step": 37843 }, { "epoch": 2.812634708286882, "grad_norm": 2.2780570853657536, "learning_rate": 2.0388339817570601e-07, "loss": 0.2648, "step": 37844 }, { "epoch": 2.8127090301003346, "grad_norm": 1.9284426583895964, "learning_rate": 2.0372223355761055e-07, "loss": 0.1517, "step": 37845 }, { "epoch": 2.8127833519137866, "grad_norm": 2.126476681223389, "learning_rate": 2.0356113200744376e-07, "loss": 0.2726, "step": 37846 }, { "epoch": 2.812857673727239, "grad_norm": 2.3745985059111363, "learning_rate": 2.0340009352624257e-07, "loss": 0.3272, "step": 37847 }, { "epoch": 2.812931995540691, "grad_norm": 2.4530298219948117, "learning_rate": 2.032391181150428e-07, "loss": 0.2943, "step": 37848 }, { "epoch": 2.8130063173541435, "grad_norm": 2.253937756886015, "learning_rate": 2.0307820577488145e-07, "loss": 0.2574, "step": 37849 }, { "epoch": 2.8130806391675955, "grad_norm": 3.0292887350244295, "learning_rate": 2.0291735650679433e-07, "loss": 0.2884, "step": 37850 }, { "epoch": 2.813154960981048, "grad_norm": 1.8521167839497297, "learning_rate": 2.0275657031181617e-07, "loss": 0.2344, "step": 37851 }, { "epoch": 2.8132292827945005, "grad_norm": 2.1880322151227767, "learning_rate": 2.0259584719098502e-07, "loss": 0.2708, "step": 37852 }, { "epoch": 2.8133036046079525, "grad_norm": 2.3959341627570985, "learning_rate": 2.0243518714533118e-07, "loss": 0.2966, "step": 37853 }, { "epoch": 2.8133779264214045, "grad_norm": 2.762817517433439, "learning_rate": 2.0227459017589158e-07, "loss": 0.3243, "step": 37854 }, { "epoch": 2.813452248234857, "grad_norm": 2.143300784155314, "learning_rate": 2.0211405628369984e-07, "loss": 0.2423, "step": 37855 }, { "epoch": 2.8135265700483094, "grad_norm": 1.85562982068082, "learning_rate": 2.0195358546979072e-07, "loss": 0.1738, "step": 37856 }, { "epoch": 2.8136008918617614, "grad_norm": 3.0546843489246243, "learning_rate": 2.0179317773519447e-07, "loss": 0.3389, "step": 37857 }, { "epoch": 2.8136752136752134, "grad_norm": 2.291301461278485, "learning_rate": 2.0163283308094582e-07, "loss": 0.3153, "step": 37858 }, { "epoch": 2.813749535488666, "grad_norm": 3.567267060612391, "learning_rate": 2.0147255150807731e-07, "loss": 0.3636, "step": 37859 }, { "epoch": 2.8138238573021184, "grad_norm": 2.574092876976789, "learning_rate": 2.013123330176181e-07, "loss": 0.287, "step": 37860 }, { "epoch": 2.8138981791155704, "grad_norm": 2.6679287384943953, "learning_rate": 2.01152177610604e-07, "loss": 0.3107, "step": 37861 }, { "epoch": 2.8139725009290224, "grad_norm": 2.230362518006691, "learning_rate": 2.0099208528806314e-07, "loss": 0.2531, "step": 37862 }, { "epoch": 2.814046822742475, "grad_norm": 3.022621097838342, "learning_rate": 2.0083205605102685e-07, "loss": 0.3056, "step": 37863 }, { "epoch": 2.8141211445559273, "grad_norm": 2.3024263844692947, "learning_rate": 2.0067208990052545e-07, "loss": 0.2753, "step": 37864 }, { "epoch": 2.8141954663693793, "grad_norm": 2.068712285675418, "learning_rate": 2.00512186837587e-07, "loss": 0.2357, "step": 37865 }, { "epoch": 2.814269788182832, "grad_norm": 1.784298797028796, "learning_rate": 2.00352346863244e-07, "loss": 0.1898, "step": 37866 }, { "epoch": 2.814344109996284, "grad_norm": 2.8171184131326585, "learning_rate": 2.0019256997852343e-07, "loss": 0.2656, "step": 37867 }, { "epoch": 2.8144184318097363, "grad_norm": 2.155096217940166, "learning_rate": 2.0003285618445557e-07, "loss": 0.2823, "step": 37868 }, { "epoch": 2.8144927536231883, "grad_norm": 2.443996332227078, "learning_rate": 1.9987320548206734e-07, "loss": 0.3216, "step": 37869 }, { "epoch": 2.8145670754366408, "grad_norm": 2.440250602280555, "learning_rate": 1.9971361787238686e-07, "loss": 0.264, "step": 37870 }, { "epoch": 2.8146413972500928, "grad_norm": 2.962836689109454, "learning_rate": 1.9955409335644217e-07, "loss": 0.384, "step": 37871 }, { "epoch": 2.8147157190635452, "grad_norm": 2.813503877294375, "learning_rate": 1.993946319352591e-07, "loss": 0.2829, "step": 37872 }, { "epoch": 2.8147900408769972, "grad_norm": 2.7543831640761036, "learning_rate": 1.9923523360986464e-07, "loss": 0.2846, "step": 37873 }, { "epoch": 2.8148643626904497, "grad_norm": 2.1417876785727894, "learning_rate": 1.9907589838128572e-07, "loss": 0.2317, "step": 37874 }, { "epoch": 2.814938684503902, "grad_norm": 2.889228187424875, "learning_rate": 1.9891662625054708e-07, "loss": 0.2956, "step": 37875 }, { "epoch": 2.815013006317354, "grad_norm": 2.387764395408241, "learning_rate": 1.987574172186757e-07, "loss": 0.2867, "step": 37876 }, { "epoch": 2.815087328130806, "grad_norm": 2.8417163582667686, "learning_rate": 1.9859827128669407e-07, "loss": 0.2284, "step": 37877 }, { "epoch": 2.8151616499442587, "grad_norm": 2.477119301353913, "learning_rate": 1.9843918845562915e-07, "loss": 0.2721, "step": 37878 }, { "epoch": 2.815235971757711, "grad_norm": 2.4738584397635552, "learning_rate": 1.9828016872650458e-07, "loss": 0.2756, "step": 37879 }, { "epoch": 2.815310293571163, "grad_norm": 2.3040890016208113, "learning_rate": 1.9812121210034286e-07, "loss": 0.2771, "step": 37880 }, { "epoch": 2.815384615384615, "grad_norm": 2.5395984130080045, "learning_rate": 1.9796231857816873e-07, "loss": 0.316, "step": 37881 }, { "epoch": 2.8154589371980676, "grad_norm": 2.36586715260863, "learning_rate": 1.9780348816100358e-07, "loss": 0.2829, "step": 37882 }, { "epoch": 2.81553325901152, "grad_norm": 9.494254223780043, "learning_rate": 1.9764472084987107e-07, "loss": 0.2674, "step": 37883 }, { "epoch": 2.815607580824972, "grad_norm": 2.247151356563982, "learning_rate": 1.974860166457948e-07, "loss": 0.2852, "step": 37884 }, { "epoch": 2.815681902638424, "grad_norm": 2.7499516339884287, "learning_rate": 1.9732737554979287e-07, "loss": 0.2745, "step": 37885 }, { "epoch": 2.8157562244518766, "grad_norm": 2.1894279013218187, "learning_rate": 1.971687975628911e-07, "loss": 0.2229, "step": 37886 }, { "epoch": 2.815830546265329, "grad_norm": 3.1031626558694714, "learning_rate": 1.9701028268610643e-07, "loss": 0.379, "step": 37887 }, { "epoch": 2.815904868078781, "grad_norm": 2.1430398687986076, "learning_rate": 1.9685183092046145e-07, "loss": 0.2447, "step": 37888 }, { "epoch": 2.8159791898922335, "grad_norm": 5.032155680786112, "learning_rate": 1.9669344226697419e-07, "loss": 0.2962, "step": 37889 }, { "epoch": 2.8160535117056855, "grad_norm": 2.4641598675441427, "learning_rate": 1.9653511672666825e-07, "loss": 0.2927, "step": 37890 }, { "epoch": 2.816127833519138, "grad_norm": 3.005254856559911, "learning_rate": 1.9637685430055952e-07, "loss": 0.3259, "step": 37891 }, { "epoch": 2.81620215533259, "grad_norm": 1.9748641093770127, "learning_rate": 1.962186549896672e-07, "loss": 0.2279, "step": 37892 }, { "epoch": 2.8162764771460425, "grad_norm": 2.676455653310237, "learning_rate": 1.9606051879501154e-07, "loss": 0.2928, "step": 37893 }, { "epoch": 2.8163507989594945, "grad_norm": 2.463496917619501, "learning_rate": 1.9590244571760952e-07, "loss": 0.2603, "step": 37894 }, { "epoch": 2.816425120772947, "grad_norm": 2.1557142747932896, "learning_rate": 1.957444357584781e-07, "loss": 0.2397, "step": 37895 }, { "epoch": 2.816499442586399, "grad_norm": 1.8780682685339989, "learning_rate": 1.955864889186354e-07, "loss": 0.1677, "step": 37896 }, { "epoch": 2.8165737643998514, "grad_norm": 1.7973149318639425, "learning_rate": 1.9542860519909945e-07, "loss": 0.1742, "step": 37897 }, { "epoch": 2.816648086213304, "grad_norm": 3.0712970455811974, "learning_rate": 1.952707846008839e-07, "loss": 0.3347, "step": 37898 }, { "epoch": 2.816722408026756, "grad_norm": 2.0912513680463825, "learning_rate": 1.951130271250068e-07, "loss": 0.2529, "step": 37899 }, { "epoch": 2.816796729840208, "grad_norm": 2.31495828878493, "learning_rate": 1.949553327724829e-07, "loss": 0.2773, "step": 37900 }, { "epoch": 2.8168710516536604, "grad_norm": 2.6163102491122046, "learning_rate": 1.947977015443292e-07, "loss": 0.3565, "step": 37901 }, { "epoch": 2.816945373467113, "grad_norm": 1.963870267789416, "learning_rate": 1.9464013344155706e-07, "loss": 0.244, "step": 37902 }, { "epoch": 2.817019695280565, "grad_norm": 1.9712998829521438, "learning_rate": 1.9448262846518463e-07, "loss": 0.2733, "step": 37903 }, { "epoch": 2.817094017094017, "grad_norm": 2.8269634140199584, "learning_rate": 1.9432518661622325e-07, "loss": 0.3491, "step": 37904 }, { "epoch": 2.8171683389074693, "grad_norm": 2.3525304378212097, "learning_rate": 1.9416780789568878e-07, "loss": 0.2904, "step": 37905 }, { "epoch": 2.8172426607209218, "grad_norm": 1.8209031889962761, "learning_rate": 1.940104923045927e-07, "loss": 0.1596, "step": 37906 }, { "epoch": 2.817316982534374, "grad_norm": 1.8712844448935315, "learning_rate": 1.9385323984394745e-07, "loss": 0.1989, "step": 37907 }, { "epoch": 2.8173913043478263, "grad_norm": 2.7268182034242248, "learning_rate": 1.936960505147678e-07, "loss": 0.2217, "step": 37908 }, { "epoch": 2.8174656261612783, "grad_norm": 2.5094038570712405, "learning_rate": 1.9353892431806186e-07, "loss": 0.2967, "step": 37909 }, { "epoch": 2.8175399479747307, "grad_norm": 2.1450712243468963, "learning_rate": 1.9338186125484548e-07, "loss": 0.2541, "step": 37910 }, { "epoch": 2.8176142697881827, "grad_norm": 2.2734666483725867, "learning_rate": 1.9322486132612784e-07, "loss": 0.3298, "step": 37911 }, { "epoch": 2.817688591601635, "grad_norm": 2.573085777862397, "learning_rate": 1.9306792453291812e-07, "loss": 0.2304, "step": 37912 }, { "epoch": 2.8177629134150872, "grad_norm": 3.1012459097757654, "learning_rate": 1.9291105087622996e-07, "loss": 0.2902, "step": 37913 }, { "epoch": 2.8178372352285397, "grad_norm": 3.0169594677457083, "learning_rate": 1.9275424035706926e-07, "loss": 0.3991, "step": 37914 }, { "epoch": 2.8179115570419917, "grad_norm": 2.634187610161452, "learning_rate": 1.9259749297644957e-07, "loss": 0.2596, "step": 37915 }, { "epoch": 2.817985878855444, "grad_norm": 2.40151607056577, "learning_rate": 1.9244080873537906e-07, "loss": 0.2165, "step": 37916 }, { "epoch": 2.818060200668896, "grad_norm": 2.8385089941837713, "learning_rate": 1.9228418763486466e-07, "loss": 0.2815, "step": 37917 }, { "epoch": 2.8181345224823486, "grad_norm": 2.0292155654132444, "learning_rate": 1.9212762967591558e-07, "loss": 0.2265, "step": 37918 }, { "epoch": 2.8182088442958007, "grad_norm": 2.520096047447044, "learning_rate": 1.919711348595399e-07, "loss": 0.2589, "step": 37919 }, { "epoch": 2.818283166109253, "grad_norm": 2.57714089376689, "learning_rate": 1.9181470318674677e-07, "loss": 0.2515, "step": 37920 }, { "epoch": 2.8183574879227056, "grad_norm": 2.222719770718434, "learning_rate": 1.916583346585399e-07, "loss": 0.296, "step": 37921 }, { "epoch": 2.8184318097361576, "grad_norm": 2.0417742483215244, "learning_rate": 1.9150202927592844e-07, "loss": 0.2331, "step": 37922 }, { "epoch": 2.8185061315496096, "grad_norm": 2.157071169854753, "learning_rate": 1.9134578703991825e-07, "loss": 0.2624, "step": 37923 }, { "epoch": 2.818580453363062, "grad_norm": 2.396621167377477, "learning_rate": 1.911896079515141e-07, "loss": 0.2744, "step": 37924 }, { "epoch": 2.8186547751765145, "grad_norm": 2.1844274713960363, "learning_rate": 1.9103349201172293e-07, "loss": 0.285, "step": 37925 }, { "epoch": 2.8187290969899665, "grad_norm": 2.255791787143866, "learning_rate": 1.9087743922154846e-07, "loss": 0.2973, "step": 37926 }, { "epoch": 2.8188034188034186, "grad_norm": 2.3847767937010684, "learning_rate": 1.9072144958199757e-07, "loss": 0.2386, "step": 37927 }, { "epoch": 2.818877740616871, "grad_norm": 2.3692918137842445, "learning_rate": 1.9056552309407172e-07, "loss": 0.2474, "step": 37928 }, { "epoch": 2.8189520624303235, "grad_norm": 2.162040667671584, "learning_rate": 1.9040965975877678e-07, "loss": 0.2193, "step": 37929 }, { "epoch": 2.8190263842437755, "grad_norm": 3.446962797348067, "learning_rate": 1.9025385957711528e-07, "loss": 0.3133, "step": 37930 }, { "epoch": 2.819100706057228, "grad_norm": 2.53704966864044, "learning_rate": 1.9009812255008975e-07, "loss": 0.2688, "step": 37931 }, { "epoch": 2.81917502787068, "grad_norm": 2.4032927408609313, "learning_rate": 1.8994244867870493e-07, "loss": 0.3459, "step": 37932 }, { "epoch": 2.8192493496841324, "grad_norm": 5.170675422502264, "learning_rate": 1.8978683796396113e-07, "loss": 0.3649, "step": 37933 }, { "epoch": 2.8193236714975844, "grad_norm": 2.967842640939364, "learning_rate": 1.896312904068598e-07, "loss": 0.3579, "step": 37934 }, { "epoch": 2.819397993311037, "grad_norm": 3.1894597579415227, "learning_rate": 1.8947580600840342e-07, "loss": 0.2789, "step": 37935 }, { "epoch": 2.819472315124489, "grad_norm": 2.364551030678417, "learning_rate": 1.8932038476959235e-07, "loss": 0.2784, "step": 37936 }, { "epoch": 2.8195466369379414, "grad_norm": 1.9941991143438476, "learning_rate": 1.891650266914291e-07, "loss": 0.2113, "step": 37937 }, { "epoch": 2.8196209587513934, "grad_norm": 2.6921114027375186, "learning_rate": 1.8900973177491177e-07, "loss": 0.3047, "step": 37938 }, { "epoch": 2.819695280564846, "grad_norm": 2.585067608252478, "learning_rate": 1.8885450002103955e-07, "loss": 0.2279, "step": 37939 }, { "epoch": 2.819769602378298, "grad_norm": 2.3973472922811507, "learning_rate": 1.8869933143081387e-07, "loss": 0.2889, "step": 37940 }, { "epoch": 2.8198439241917503, "grad_norm": 2.455988575413064, "learning_rate": 1.885442260052328e-07, "loss": 0.3075, "step": 37941 }, { "epoch": 2.8199182460052024, "grad_norm": 2.1671473725725634, "learning_rate": 1.883891837452956e-07, "loss": 0.1989, "step": 37942 }, { "epoch": 2.819992567818655, "grad_norm": 2.293018682042774, "learning_rate": 1.8823420465199915e-07, "loss": 0.2826, "step": 37943 }, { "epoch": 2.8200668896321073, "grad_norm": 2.4569788879380283, "learning_rate": 1.8807928872634162e-07, "loss": 0.2564, "step": 37944 }, { "epoch": 2.8201412114455593, "grad_norm": 3.390427207084533, "learning_rate": 1.8792443596932108e-07, "loss": 0.3365, "step": 37945 }, { "epoch": 2.8202155332590113, "grad_norm": 2.7065384668500525, "learning_rate": 1.877696463819334e-07, "loss": 0.2343, "step": 37946 }, { "epoch": 2.8202898550724638, "grad_norm": 2.5705749230134725, "learning_rate": 1.8761491996517557e-07, "loss": 0.2877, "step": 37947 }, { "epoch": 2.8203641768859162, "grad_norm": 1.9749545803717268, "learning_rate": 1.8746025672004343e-07, "loss": 0.2307, "step": 37948 }, { "epoch": 2.8204384986993682, "grad_norm": 1.9546574941782728, "learning_rate": 1.8730565664753397e-07, "loss": 0.1996, "step": 37949 }, { "epoch": 2.8205128205128203, "grad_norm": 2.671633641146292, "learning_rate": 1.8715111974864087e-07, "loss": 0.2221, "step": 37950 }, { "epoch": 2.8205871423262727, "grad_norm": 2.3764917328717763, "learning_rate": 1.8699664602435996e-07, "loss": 0.2532, "step": 37951 }, { "epoch": 2.820661464139725, "grad_norm": 2.352602765459729, "learning_rate": 1.8684223547568492e-07, "loss": 0.2276, "step": 37952 }, { "epoch": 2.820735785953177, "grad_norm": 2.275988091609017, "learning_rate": 1.866878881036116e-07, "loss": 0.2688, "step": 37953 }, { "epoch": 2.8208101077666297, "grad_norm": 1.902014091815625, "learning_rate": 1.8653360390913254e-07, "loss": 0.1864, "step": 37954 }, { "epoch": 2.8208844295800817, "grad_norm": 2.677240430326577, "learning_rate": 1.863793828932403e-07, "loss": 0.2569, "step": 37955 }, { "epoch": 2.820958751393534, "grad_norm": 2.09416045986945, "learning_rate": 1.8622522505692743e-07, "loss": 0.2267, "step": 37956 }, { "epoch": 2.821033073206986, "grad_norm": 2.2283984690705307, "learning_rate": 1.8607113040118863e-07, "loss": 0.2143, "step": 37957 }, { "epoch": 2.8211073950204386, "grad_norm": 2.644161577875589, "learning_rate": 1.8591709892701427e-07, "loss": 0.2875, "step": 37958 }, { "epoch": 2.8211817168338906, "grad_norm": 3.2169205553662894, "learning_rate": 1.8576313063539575e-07, "loss": 0.3109, "step": 37959 }, { "epoch": 2.821256038647343, "grad_norm": 3.103955048872206, "learning_rate": 1.8560922552732674e-07, "loss": 0.2718, "step": 37960 }, { "epoch": 2.821330360460795, "grad_norm": 2.376046684492993, "learning_rate": 1.8545538360379423e-07, "loss": 0.3316, "step": 37961 }, { "epoch": 2.8214046822742476, "grad_norm": 2.6467008667207366, "learning_rate": 1.8530160486579186e-07, "loss": 0.2926, "step": 37962 }, { "epoch": 2.8214790040876996, "grad_norm": 2.292675090169414, "learning_rate": 1.8514788931430773e-07, "loss": 0.2986, "step": 37963 }, { "epoch": 2.821553325901152, "grad_norm": 2.283835851442471, "learning_rate": 1.8499423695033325e-07, "loss": 0.246, "step": 37964 }, { "epoch": 2.821627647714604, "grad_norm": 3.057778659036742, "learning_rate": 1.8484064777485655e-07, "loss": 0.3036, "step": 37965 }, { "epoch": 2.8217019695280565, "grad_norm": 2.0462553159974277, "learning_rate": 1.8468712178886683e-07, "loss": 0.1972, "step": 37966 }, { "epoch": 2.821776291341509, "grad_norm": 2.634607912830456, "learning_rate": 1.8453365899335107e-07, "loss": 0.3012, "step": 37967 }, { "epoch": 2.821850613154961, "grad_norm": 2.4744735157697266, "learning_rate": 1.843802593892985e-07, "loss": 0.2083, "step": 37968 }, { "epoch": 2.821924934968413, "grad_norm": 3.029354781587507, "learning_rate": 1.8422692297769718e-07, "loss": 0.3825, "step": 37969 }, { "epoch": 2.8219992567818655, "grad_norm": 2.0858631037794972, "learning_rate": 1.840736497595319e-07, "loss": 0.2515, "step": 37970 }, { "epoch": 2.822073578595318, "grad_norm": 2.2450532508338257, "learning_rate": 1.8392043973579298e-07, "loss": 0.2816, "step": 37971 }, { "epoch": 2.82214790040877, "grad_norm": 2.3836253420595326, "learning_rate": 1.837672929074641e-07, "loss": 0.2645, "step": 37972 }, { "epoch": 2.822222222222222, "grad_norm": 2.517121872608186, "learning_rate": 1.836142092755322e-07, "loss": 0.2897, "step": 37973 }, { "epoch": 2.8222965440356744, "grad_norm": 2.822071694508767, "learning_rate": 1.8346118884098317e-07, "loss": 0.3641, "step": 37974 }, { "epoch": 2.822370865849127, "grad_norm": 2.25274041809627, "learning_rate": 1.8330823160480183e-07, "loss": 0.2616, "step": 37975 }, { "epoch": 2.822445187662579, "grad_norm": 2.1955712222079007, "learning_rate": 1.8315533756797178e-07, "loss": 0.2558, "step": 37976 }, { "epoch": 2.8225195094760314, "grad_norm": 2.3878853450191633, "learning_rate": 1.8300250673148002e-07, "loss": 0.2444, "step": 37977 }, { "epoch": 2.8225938312894834, "grad_norm": 2.6439494601434466, "learning_rate": 1.8284973909630688e-07, "loss": 0.3004, "step": 37978 }, { "epoch": 2.822668153102936, "grad_norm": 2.383901117122304, "learning_rate": 1.8269703466343824e-07, "loss": 0.2798, "step": 37979 }, { "epoch": 2.822742474916388, "grad_norm": 2.224140180286365, "learning_rate": 1.8254439343385554e-07, "loss": 0.2907, "step": 37980 }, { "epoch": 2.8228167967298403, "grad_norm": 1.9442960305788843, "learning_rate": 1.8239181540854466e-07, "loss": 0.2434, "step": 37981 }, { "epoch": 2.8228911185432923, "grad_norm": 2.5444165515332147, "learning_rate": 1.8223930058848593e-07, "loss": 0.3091, "step": 37982 }, { "epoch": 2.822965440356745, "grad_norm": 1.7167685889078206, "learning_rate": 1.8208684897465968e-07, "loss": 0.1787, "step": 37983 }, { "epoch": 2.823039762170197, "grad_norm": 2.705752211703667, "learning_rate": 1.8193446056804954e-07, "loss": 0.3375, "step": 37984 }, { "epoch": 2.8231140839836493, "grad_norm": 2.3503324309523577, "learning_rate": 1.8178213536963585e-07, "loss": 0.2626, "step": 37985 }, { "epoch": 2.8231884057971013, "grad_norm": 2.3776696652899543, "learning_rate": 1.8162987338039896e-07, "loss": 0.3029, "step": 37986 }, { "epoch": 2.8232627276105537, "grad_norm": 2.4620993761499275, "learning_rate": 1.8147767460132027e-07, "loss": 0.2835, "step": 37987 }, { "epoch": 2.8233370494240058, "grad_norm": 2.39481426971428, "learning_rate": 1.8132553903337901e-07, "loss": 0.226, "step": 37988 }, { "epoch": 2.8234113712374582, "grad_norm": 3.6708668856929636, "learning_rate": 1.811734666775544e-07, "loss": 0.3592, "step": 37989 }, { "epoch": 2.8234856930509107, "grad_norm": 2.5535836372131886, "learning_rate": 1.8102145753482458e-07, "loss": 0.2066, "step": 37990 }, { "epoch": 2.8235600148643627, "grad_norm": 2.54924685377084, "learning_rate": 1.8086951160616984e-07, "loss": 0.2649, "step": 37991 }, { "epoch": 2.8236343366778147, "grad_norm": 1.8042655516820003, "learning_rate": 1.8071762889256715e-07, "loss": 0.2556, "step": 37992 }, { "epoch": 2.823708658491267, "grad_norm": 2.388342843017328, "learning_rate": 1.805658093949958e-07, "loss": 0.2233, "step": 37993 }, { "epoch": 2.8237829803047196, "grad_norm": 2.129442215718227, "learning_rate": 1.804140531144316e-07, "loss": 0.2517, "step": 37994 }, { "epoch": 2.8238573021181717, "grad_norm": 2.4670753686527354, "learning_rate": 1.802623600518516e-07, "loss": 0.3035, "step": 37995 }, { "epoch": 2.8239316239316237, "grad_norm": 3.5294794713325732, "learning_rate": 1.80110730208235e-07, "loss": 0.2919, "step": 37996 }, { "epoch": 2.824005945745076, "grad_norm": 3.117081816297329, "learning_rate": 1.7995916358455434e-07, "loss": 0.2404, "step": 37997 }, { "epoch": 2.8240802675585286, "grad_norm": 3.1166993917712547, "learning_rate": 1.7980766018178775e-07, "loss": 0.2633, "step": 37998 }, { "epoch": 2.8241545893719806, "grad_norm": 2.550141227027702, "learning_rate": 1.7965622000090999e-07, "loss": 0.2773, "step": 37999 }, { "epoch": 2.824228911185433, "grad_norm": 2.6318119541556486, "learning_rate": 1.7950484304289584e-07, "loss": 0.3056, "step": 38000 }, { "epoch": 2.824303232998885, "grad_norm": 1.9200399474061458, "learning_rate": 1.7935352930872007e-07, "loss": 0.213, "step": 38001 }, { "epoch": 2.8243775548123375, "grad_norm": 2.6159608676241417, "learning_rate": 1.7920227879935526e-07, "loss": 0.3109, "step": 38002 }, { "epoch": 2.8244518766257896, "grad_norm": 2.304380628827897, "learning_rate": 1.7905109151577837e-07, "loss": 0.2268, "step": 38003 }, { "epoch": 2.824526198439242, "grad_norm": 2.334737890094576, "learning_rate": 1.7889996745896087e-07, "loss": 0.3215, "step": 38004 }, { "epoch": 2.824600520252694, "grad_norm": 2.3851348895929227, "learning_rate": 1.787489066298742e-07, "loss": 0.2717, "step": 38005 }, { "epoch": 2.8246748420661465, "grad_norm": 2.6893027211167686, "learning_rate": 1.7859790902949425e-07, "loss": 0.3237, "step": 38006 }, { "epoch": 2.8247491638795985, "grad_norm": 2.025398494337072, "learning_rate": 1.7844697465879025e-07, "loss": 0.1495, "step": 38007 }, { "epoch": 2.824823485693051, "grad_norm": 2.537399827341728, "learning_rate": 1.7829610351873584e-07, "loss": 0.2807, "step": 38008 }, { "epoch": 2.8248978075065034, "grad_norm": 2.1918637316217664, "learning_rate": 1.7814529561030137e-07, "loss": 0.2266, "step": 38009 }, { "epoch": 2.8249721293199554, "grad_norm": 2.515979304625574, "learning_rate": 1.7799455093445827e-07, "loss": 0.2664, "step": 38010 }, { "epoch": 2.8250464511334075, "grad_norm": 2.109617284313859, "learning_rate": 1.7784386949217582e-07, "loss": 0.2735, "step": 38011 }, { "epoch": 2.82512077294686, "grad_norm": 2.4854638289576956, "learning_rate": 1.7769325128442428e-07, "loss": 0.2542, "step": 38012 }, { "epoch": 2.8251950947603124, "grad_norm": 2.9800200308319442, "learning_rate": 1.7754269631217513e-07, "loss": 0.3588, "step": 38013 }, { "epoch": 2.8252694165737644, "grad_norm": 1.6486287331260618, "learning_rate": 1.773922045763954e-07, "loss": 0.1435, "step": 38014 }, { "epoch": 2.8253437383872164, "grad_norm": 2.330514078218213, "learning_rate": 1.7724177607805647e-07, "loss": 0.2579, "step": 38015 }, { "epoch": 2.825418060200669, "grad_norm": 1.9148230622851194, "learning_rate": 1.770914108181243e-07, "loss": 0.2389, "step": 38016 }, { "epoch": 2.8254923820141213, "grad_norm": 1.8340259399930114, "learning_rate": 1.769411087975681e-07, "loss": 0.2147, "step": 38017 }, { "epoch": 2.8255667038275734, "grad_norm": 2.211684022656685, "learning_rate": 1.7679087001735596e-07, "loss": 0.2541, "step": 38018 }, { "epoch": 2.8256410256410254, "grad_norm": 2.291649387391349, "learning_rate": 1.766406944784538e-07, "loss": 0.2431, "step": 38019 }, { "epoch": 2.825715347454478, "grad_norm": 2.1027531333796476, "learning_rate": 1.7649058218182857e-07, "loss": 0.2629, "step": 38020 }, { "epoch": 2.8257896692679303, "grad_norm": 1.5471245131851172, "learning_rate": 1.7634053312844956e-07, "loss": 0.1603, "step": 38021 }, { "epoch": 2.8258639910813823, "grad_norm": 2.542998914185415, "learning_rate": 1.761905473192782e-07, "loss": 0.2602, "step": 38022 }, { "epoch": 2.8259383128948348, "grad_norm": 2.8197812845812837, "learning_rate": 1.760406247552837e-07, "loss": 0.2573, "step": 38023 }, { "epoch": 2.826012634708287, "grad_norm": 2.613560267771994, "learning_rate": 1.7589076543742866e-07, "loss": 0.3412, "step": 38024 }, { "epoch": 2.8260869565217392, "grad_norm": 2.1151887848139848, "learning_rate": 1.7574096936668007e-07, "loss": 0.3105, "step": 38025 }, { "epoch": 2.8261612783351913, "grad_norm": 2.3589829348248204, "learning_rate": 1.7559123654400046e-07, "loss": 0.2629, "step": 38026 }, { "epoch": 2.8262356001486437, "grad_norm": 2.3933097687370837, "learning_rate": 1.7544156697035576e-07, "loss": 0.302, "step": 38027 }, { "epoch": 2.8263099219620957, "grad_norm": 2.1960985221968667, "learning_rate": 1.752919606467074e-07, "loss": 0.2924, "step": 38028 }, { "epoch": 2.826384243775548, "grad_norm": 2.4894011642387395, "learning_rate": 1.7514241757402017e-07, "loss": 0.2659, "step": 38029 }, { "epoch": 2.826458565589, "grad_norm": 2.1955256062157447, "learning_rate": 1.7499293775325665e-07, "loss": 0.2132, "step": 38030 }, { "epoch": 2.8265328874024527, "grad_norm": 2.8247035894847277, "learning_rate": 1.748435211853794e-07, "loss": 0.2989, "step": 38031 }, { "epoch": 2.826607209215905, "grad_norm": 2.7946443982840194, "learning_rate": 1.7469416787134762e-07, "loss": 0.2935, "step": 38032 }, { "epoch": 2.826681531029357, "grad_norm": 2.098848317888324, "learning_rate": 1.7454487781212837e-07, "loss": 0.2656, "step": 38033 }, { "epoch": 2.826755852842809, "grad_norm": 2.4672804579771146, "learning_rate": 1.743956510086764e-07, "loss": 0.1993, "step": 38034 }, { "epoch": 2.8268301746562616, "grad_norm": 2.190728496562228, "learning_rate": 1.742464874619565e-07, "loss": 0.2306, "step": 38035 }, { "epoch": 2.826904496469714, "grad_norm": 2.1896244675642547, "learning_rate": 1.740973871729279e-07, "loss": 0.2481, "step": 38036 }, { "epoch": 2.826978818283166, "grad_norm": 2.7852020633749333, "learning_rate": 1.7394835014254984e-07, "loss": 0.3306, "step": 38037 }, { "epoch": 2.827053140096618, "grad_norm": 2.360008122101865, "learning_rate": 1.737993763717838e-07, "loss": 0.262, "step": 38038 }, { "epoch": 2.8271274619100706, "grad_norm": 1.9743670397345277, "learning_rate": 1.7365046586158562e-07, "loss": 0.2536, "step": 38039 }, { "epoch": 2.827201783723523, "grad_norm": 2.53572336399736, "learning_rate": 1.735016186129168e-07, "loss": 0.2522, "step": 38040 }, { "epoch": 2.827276105536975, "grad_norm": 1.78886629946255, "learning_rate": 1.7335283462673546e-07, "loss": 0.2216, "step": 38041 }, { "epoch": 2.827350427350427, "grad_norm": 2.261381375616967, "learning_rate": 1.732041139039975e-07, "loss": 0.3114, "step": 38042 }, { "epoch": 2.8274247491638795, "grad_norm": 2.569195578430157, "learning_rate": 1.7305545644566212e-07, "loss": 0.2613, "step": 38043 }, { "epoch": 2.827499070977332, "grad_norm": 2.5416191174058156, "learning_rate": 1.7290686225268637e-07, "loss": 0.307, "step": 38044 }, { "epoch": 2.827573392790784, "grad_norm": 2.781285842301214, "learning_rate": 1.7275833132602616e-07, "loss": 0.2801, "step": 38045 }, { "epoch": 2.8276477146042365, "grad_norm": 2.3979665025296577, "learning_rate": 1.7260986366663734e-07, "loss": 0.2531, "step": 38046 }, { "epoch": 2.8277220364176885, "grad_norm": 2.3875873058587773, "learning_rate": 1.7246145927547697e-07, "loss": 0.3123, "step": 38047 }, { "epoch": 2.827796358231141, "grad_norm": 3.1262514192091677, "learning_rate": 1.7231311815349983e-07, "loss": 0.2996, "step": 38048 }, { "epoch": 2.827870680044593, "grad_norm": 2.3533694502785893, "learning_rate": 1.721648403016596e-07, "loss": 0.2122, "step": 38049 }, { "epoch": 2.8279450018580454, "grad_norm": 2.5895191471797667, "learning_rate": 1.7201662572091328e-07, "loss": 0.3256, "step": 38050 }, { "epoch": 2.8280193236714974, "grad_norm": 2.3633731636869943, "learning_rate": 1.7186847441221344e-07, "loss": 0.2912, "step": 38051 }, { "epoch": 2.82809364548495, "grad_norm": 2.2373745971702634, "learning_rate": 1.7172038637651488e-07, "loss": 0.2811, "step": 38052 }, { "epoch": 2.828167967298402, "grad_norm": 2.1626651586134846, "learning_rate": 1.715723616147702e-07, "loss": 0.2387, "step": 38053 }, { "epoch": 2.8282422891118544, "grad_norm": 2.598866528204919, "learning_rate": 1.7142440012793303e-07, "loss": 0.2364, "step": 38054 }, { "epoch": 2.828316610925307, "grad_norm": 2.52291803339756, "learning_rate": 1.71276501916956e-07, "loss": 0.2497, "step": 38055 }, { "epoch": 2.828390932738759, "grad_norm": 1.966201543590003, "learning_rate": 1.7112866698279052e-07, "loss": 0.2315, "step": 38056 }, { "epoch": 2.828465254552211, "grad_norm": 2.980706535831049, "learning_rate": 1.7098089532638806e-07, "loss": 0.323, "step": 38057 }, { "epoch": 2.8285395763656633, "grad_norm": 1.7962310157935275, "learning_rate": 1.7083318694870122e-07, "loss": 0.2187, "step": 38058 }, { "epoch": 2.828613898179116, "grad_norm": 2.0667291369754235, "learning_rate": 1.7068554185068032e-07, "loss": 0.2127, "step": 38059 }, { "epoch": 2.828688219992568, "grad_norm": 2.704218060301259, "learning_rate": 1.7053796003327572e-07, "loss": 0.3485, "step": 38060 }, { "epoch": 2.82876254180602, "grad_norm": 3.421596400540906, "learning_rate": 1.7039044149743666e-07, "loss": 0.3534, "step": 38061 }, { "epoch": 2.8288368636194723, "grad_norm": 2.7679463714182284, "learning_rate": 1.7024298624411572e-07, "loss": 0.2454, "step": 38062 }, { "epoch": 2.8289111854329247, "grad_norm": 2.377120267783196, "learning_rate": 1.7009559427425992e-07, "loss": 0.2882, "step": 38063 }, { "epoch": 2.8289855072463768, "grad_norm": 2.6881469845998356, "learning_rate": 1.699482655888185e-07, "loss": 0.2915, "step": 38064 }, { "epoch": 2.8290598290598292, "grad_norm": 2.144821435520232, "learning_rate": 1.6980100018873956e-07, "loss": 0.2198, "step": 38065 }, { "epoch": 2.8291341508732812, "grad_norm": 2.2628284943027905, "learning_rate": 1.6965379807497128e-07, "loss": 0.1924, "step": 38066 }, { "epoch": 2.8292084726867337, "grad_norm": 2.1822396313463432, "learning_rate": 1.6950665924846398e-07, "loss": 0.3155, "step": 38067 }, { "epoch": 2.8292827945001857, "grad_norm": 2.814181107611922, "learning_rate": 1.6935958371016137e-07, "loss": 0.2816, "step": 38068 }, { "epoch": 2.829357116313638, "grad_norm": 2.590273130388877, "learning_rate": 1.6921257146101155e-07, "loss": 0.3328, "step": 38069 }, { "epoch": 2.82943143812709, "grad_norm": 1.8968316015437128, "learning_rate": 1.6906562250196156e-07, "loss": 0.16, "step": 38070 }, { "epoch": 2.8295057599405427, "grad_norm": 2.9804413154005287, "learning_rate": 1.6891873683395621e-07, "loss": 0.3575, "step": 38071 }, { "epoch": 2.8295800817539947, "grad_norm": 2.8743692512919674, "learning_rate": 1.687719144579425e-07, "loss": 0.3039, "step": 38072 }, { "epoch": 2.829654403567447, "grad_norm": 2.195211409075315, "learning_rate": 1.6862515537486523e-07, "loss": 0.2893, "step": 38073 }, { "epoch": 2.829728725380899, "grad_norm": 2.6185502485539125, "learning_rate": 1.684784595856692e-07, "loss": 0.2988, "step": 38074 }, { "epoch": 2.8298030471943516, "grad_norm": 2.748206833066698, "learning_rate": 1.6833182709129814e-07, "loss": 0.2999, "step": 38075 }, { "epoch": 2.8298773690078036, "grad_norm": 2.159788747212579, "learning_rate": 1.6818525789269678e-07, "loss": 0.2304, "step": 38076 }, { "epoch": 2.829951690821256, "grad_norm": 3.054359695190261, "learning_rate": 1.6803875199080889e-07, "loss": 0.3286, "step": 38077 }, { "epoch": 2.8300260126347085, "grad_norm": 1.7976723303012543, "learning_rate": 1.6789230938657807e-07, "loss": 0.2183, "step": 38078 }, { "epoch": 2.8301003344481606, "grad_norm": 2.14576181675265, "learning_rate": 1.6774593008094587e-07, "loss": 0.3104, "step": 38079 }, { "epoch": 2.8301746562616126, "grad_norm": 1.9501402300841795, "learning_rate": 1.675996140748548e-07, "loss": 0.2622, "step": 38080 }, { "epoch": 2.830248978075065, "grad_norm": 2.048297580713077, "learning_rate": 1.6745336136924638e-07, "loss": 0.2443, "step": 38081 }, { "epoch": 2.8303232998885175, "grad_norm": 2.2494495281018723, "learning_rate": 1.673071719650643e-07, "loss": 0.267, "step": 38082 }, { "epoch": 2.8303976217019695, "grad_norm": 2.0350342417199387, "learning_rate": 1.6716104586324778e-07, "loss": 0.2416, "step": 38083 }, { "epoch": 2.8304719435154215, "grad_norm": 2.0326282981293264, "learning_rate": 1.670149830647394e-07, "loss": 0.2268, "step": 38084 }, { "epoch": 2.830546265328874, "grad_norm": 3.016697318813614, "learning_rate": 1.6686898357047733e-07, "loss": 0.3186, "step": 38085 }, { "epoch": 2.8306205871423264, "grad_norm": 2.61177811687105, "learning_rate": 1.66723047381403e-07, "loss": 0.273, "step": 38086 }, { "epoch": 2.8306949089557785, "grad_norm": 2.6555475919675198, "learning_rate": 1.6657717449845567e-07, "loss": 0.2861, "step": 38087 }, { "epoch": 2.830769230769231, "grad_norm": 2.591561571267842, "learning_rate": 1.664313649225724e-07, "loss": 0.3207, "step": 38088 }, { "epoch": 2.830843552582683, "grad_norm": 2.647326106129193, "learning_rate": 1.6628561865469683e-07, "loss": 0.2215, "step": 38089 }, { "epoch": 2.8309178743961354, "grad_norm": 2.7492769097895327, "learning_rate": 1.661399356957627e-07, "loss": 0.2855, "step": 38090 }, { "epoch": 2.8309921962095874, "grad_norm": 2.8485208025887023, "learning_rate": 1.6599431604670924e-07, "loss": 0.2611, "step": 38091 }, { "epoch": 2.83106651802304, "grad_norm": 2.2632191277918543, "learning_rate": 1.6584875970847457e-07, "loss": 0.3, "step": 38092 }, { "epoch": 2.831140839836492, "grad_norm": 2.5877746833675688, "learning_rate": 1.6570326668199466e-07, "loss": 0.2973, "step": 38093 }, { "epoch": 2.8312151616499444, "grad_norm": 2.0386553500913442, "learning_rate": 1.655578369682076e-07, "loss": 0.273, "step": 38094 }, { "epoch": 2.8312894834633964, "grad_norm": 2.7420981457059193, "learning_rate": 1.654124705680471e-07, "loss": 0.2965, "step": 38095 }, { "epoch": 2.831363805276849, "grad_norm": 1.8357293798884218, "learning_rate": 1.652671674824524e-07, "loss": 0.1899, "step": 38096 }, { "epoch": 2.831438127090301, "grad_norm": 3.0219130702199433, "learning_rate": 1.6512192771235835e-07, "loss": 0.2739, "step": 38097 }, { "epoch": 2.8315124489037533, "grad_norm": 1.7529540638294883, "learning_rate": 1.649767512586975e-07, "loss": 0.1864, "step": 38098 }, { "epoch": 2.8315867707172053, "grad_norm": 1.960256886810099, "learning_rate": 1.648316381224069e-07, "loss": 0.2557, "step": 38099 }, { "epoch": 2.831661092530658, "grad_norm": 2.356178841031067, "learning_rate": 1.6468658830441907e-07, "loss": 0.2559, "step": 38100 }, { "epoch": 2.8317354143441102, "grad_norm": 2.4438236363967616, "learning_rate": 1.6454160180566891e-07, "loss": 0.2531, "step": 38101 }, { "epoch": 2.8318097361575623, "grad_norm": 2.744639492429862, "learning_rate": 1.6439667862709008e-07, "loss": 0.2944, "step": 38102 }, { "epoch": 2.8318840579710143, "grad_norm": 1.9473120909569968, "learning_rate": 1.6425181876961515e-07, "loss": 0.1835, "step": 38103 }, { "epoch": 2.8319583797844667, "grad_norm": 2.309930733510301, "learning_rate": 1.6410702223417563e-07, "loss": 0.2572, "step": 38104 }, { "epoch": 2.832032701597919, "grad_norm": 1.441828216668339, "learning_rate": 1.6396228902170518e-07, "loss": 0.0959, "step": 38105 }, { "epoch": 2.832107023411371, "grad_norm": 2.5831889924241067, "learning_rate": 1.6381761913313533e-07, "loss": 0.3507, "step": 38106 }, { "epoch": 2.8321813452248232, "grad_norm": 1.9377982794057684, "learning_rate": 1.636730125693975e-07, "loss": 0.1584, "step": 38107 }, { "epoch": 2.8322556670382757, "grad_norm": 2.249241596932944, "learning_rate": 1.635284693314221e-07, "loss": 0.2507, "step": 38108 }, { "epoch": 2.832329988851728, "grad_norm": 5.3440567405067885, "learning_rate": 1.6338398942014056e-07, "loss": 0.3286, "step": 38109 }, { "epoch": 2.83240431066518, "grad_norm": 2.7749647981410495, "learning_rate": 1.6323957283648107e-07, "loss": 0.3102, "step": 38110 }, { "epoch": 2.8324786324786326, "grad_norm": 2.543816105966969, "learning_rate": 1.630952195813762e-07, "loss": 0.2908, "step": 38111 }, { "epoch": 2.8325529542920846, "grad_norm": 2.3768613869527417, "learning_rate": 1.629509296557541e-07, "loss": 0.2396, "step": 38112 }, { "epoch": 2.832627276105537, "grad_norm": 2.498130831434124, "learning_rate": 1.6280670306054292e-07, "loss": 0.2669, "step": 38113 }, { "epoch": 2.832701597918989, "grad_norm": 2.3206890041093304, "learning_rate": 1.62662539796673e-07, "loss": 0.3012, "step": 38114 }, { "epoch": 2.8327759197324416, "grad_norm": 2.19785097655219, "learning_rate": 1.6251843986506922e-07, "loss": 0.2654, "step": 38115 }, { "epoch": 2.8328502415458936, "grad_norm": 1.983540805058718, "learning_rate": 1.6237440326666188e-07, "loss": 0.237, "step": 38116 }, { "epoch": 2.832924563359346, "grad_norm": 2.0839456084585546, "learning_rate": 1.6223043000237803e-07, "loss": 0.1999, "step": 38117 }, { "epoch": 2.832998885172798, "grad_norm": 2.2329741007792814, "learning_rate": 1.6208652007314473e-07, "loss": 0.2831, "step": 38118 }, { "epoch": 2.8330732069862505, "grad_norm": 2.5084281324723534, "learning_rate": 1.619426734798868e-07, "loss": 0.2909, "step": 38119 }, { "epoch": 2.8331475287997026, "grad_norm": 2.7031703997970937, "learning_rate": 1.6179889022353234e-07, "loss": 0.2639, "step": 38120 }, { "epoch": 2.833221850613155, "grad_norm": 1.9708980117643764, "learning_rate": 1.616551703050051e-07, "loss": 0.2699, "step": 38121 }, { "epoch": 2.833296172426607, "grad_norm": 2.343495179990753, "learning_rate": 1.6151151372523321e-07, "loss": 0.2457, "step": 38122 }, { "epoch": 2.8333704942400595, "grad_norm": 2.999979139210676, "learning_rate": 1.6136792048513817e-07, "loss": 0.3009, "step": 38123 }, { "epoch": 2.833444816053512, "grad_norm": 3.0320847009348046, "learning_rate": 1.6122439058564697e-07, "loss": 0.2572, "step": 38124 }, { "epoch": 2.833519137866964, "grad_norm": 2.699522353461043, "learning_rate": 1.6108092402768228e-07, "loss": 0.2747, "step": 38125 }, { "epoch": 2.833593459680416, "grad_norm": 1.88244801993576, "learning_rate": 1.6093752081216774e-07, "loss": 0.2489, "step": 38126 }, { "epoch": 2.8336677814938684, "grad_norm": 2.3101481485316953, "learning_rate": 1.6079418094002707e-07, "loss": 0.2848, "step": 38127 }, { "epoch": 2.833742103307321, "grad_norm": 2.4555766047635537, "learning_rate": 1.606509044121829e-07, "loss": 0.3518, "step": 38128 }, { "epoch": 2.833816425120773, "grad_norm": 2.518027302587624, "learning_rate": 1.6050769122955778e-07, "loss": 0.3845, "step": 38129 }, { "epoch": 2.833890746934225, "grad_norm": 2.733372593236852, "learning_rate": 1.6036454139307323e-07, "loss": 0.2248, "step": 38130 }, { "epoch": 2.8339650687476774, "grad_norm": 2.5407370271741603, "learning_rate": 1.6022145490365183e-07, "loss": 0.2829, "step": 38131 }, { "epoch": 2.83403939056113, "grad_norm": 3.2455608629532056, "learning_rate": 1.6007843176221394e-07, "loss": 0.3009, "step": 38132 }, { "epoch": 2.834113712374582, "grad_norm": 2.910142652469173, "learning_rate": 1.5993547196967995e-07, "loss": 0.3811, "step": 38133 }, { "epoch": 2.8341880341880343, "grad_norm": 2.7875321766854344, "learning_rate": 1.5979257552697247e-07, "loss": 0.2956, "step": 38134 }, { "epoch": 2.8342623560014863, "grad_norm": 2.4446613553621894, "learning_rate": 1.5964974243500853e-07, "loss": 0.2871, "step": 38135 }, { "epoch": 2.834336677814939, "grad_norm": 2.19888787751751, "learning_rate": 1.5950697269470961e-07, "loss": 0.1916, "step": 38136 }, { "epoch": 2.834410999628391, "grad_norm": 1.9601252025830391, "learning_rate": 1.5936426630699275e-07, "loss": 0.2181, "step": 38137 }, { "epoch": 2.8344853214418433, "grad_norm": 2.1512802229380776, "learning_rate": 1.5922162327277945e-07, "loss": 0.3085, "step": 38138 }, { "epoch": 2.8345596432552953, "grad_norm": 2.191554051770971, "learning_rate": 1.5907904359298676e-07, "loss": 0.2362, "step": 38139 }, { "epoch": 2.8346339650687478, "grad_norm": 1.643013155624987, "learning_rate": 1.589365272685317e-07, "loss": 0.1438, "step": 38140 }, { "epoch": 2.8347082868822, "grad_norm": 2.49791620362095, "learning_rate": 1.5879407430033355e-07, "loss": 0.2711, "step": 38141 }, { "epoch": 2.8347826086956522, "grad_norm": 2.4520065980718333, "learning_rate": 1.5865168468930824e-07, "loss": 0.3208, "step": 38142 }, { "epoch": 2.8348569305091043, "grad_norm": 2.6844449223829865, "learning_rate": 1.5850935843637283e-07, "loss": 0.2922, "step": 38143 }, { "epoch": 2.8349312523225567, "grad_norm": 2.6523355983827868, "learning_rate": 1.5836709554244324e-07, "loss": 0.2896, "step": 38144 }, { "epoch": 2.8350055741360087, "grad_norm": 2.8648097424192844, "learning_rate": 1.5822489600843539e-07, "loss": 0.3512, "step": 38145 }, { "epoch": 2.835079895949461, "grad_norm": 1.8940724960055728, "learning_rate": 1.5808275983526745e-07, "loss": 0.2289, "step": 38146 }, { "epoch": 2.8351542177629137, "grad_norm": 1.779263644015498, "learning_rate": 1.579406870238487e-07, "loss": 0.2374, "step": 38147 }, { "epoch": 2.8352285395763657, "grad_norm": 2.3196031860332385, "learning_rate": 1.5779867757509947e-07, "loss": 0.2193, "step": 38148 }, { "epoch": 2.8353028613898177, "grad_norm": 1.7992072857047468, "learning_rate": 1.576567314899302e-07, "loss": 0.238, "step": 38149 }, { "epoch": 2.83537718320327, "grad_norm": 2.5466760956427303, "learning_rate": 1.5751484876925682e-07, "loss": 0.2388, "step": 38150 }, { "epoch": 2.8354515050167226, "grad_norm": 3.0689681428466336, "learning_rate": 1.573730294139919e-07, "loss": 0.1833, "step": 38151 }, { "epoch": 2.8355258268301746, "grad_norm": 2.8764106020534346, "learning_rate": 1.5723127342504807e-07, "loss": 0.2584, "step": 38152 }, { "epoch": 2.8356001486436266, "grad_norm": 2.414453006337445, "learning_rate": 1.5708958080334013e-07, "loss": 0.3505, "step": 38153 }, { "epoch": 2.835674470457079, "grad_norm": 2.2510565360226322, "learning_rate": 1.5694795154977737e-07, "loss": 0.23, "step": 38154 }, { "epoch": 2.8357487922705316, "grad_norm": 2.1508958722843206, "learning_rate": 1.568063856652735e-07, "loss": 0.1969, "step": 38155 }, { "epoch": 2.8358231140839836, "grad_norm": 2.2089915179394763, "learning_rate": 1.5666488315074003e-07, "loss": 0.2003, "step": 38156 }, { "epoch": 2.835897435897436, "grad_norm": 2.397707756366676, "learning_rate": 1.565234440070873e-07, "loss": 0.3004, "step": 38157 }, { "epoch": 2.835971757710888, "grad_norm": 2.3215105831722034, "learning_rate": 1.5638206823522572e-07, "loss": 0.2379, "step": 38158 }, { "epoch": 2.8360460795243405, "grad_norm": 2.864941098086622, "learning_rate": 1.5624075583606457e-07, "loss": 0.3743, "step": 38159 }, { "epoch": 2.8361204013377925, "grad_norm": 2.1054626898215916, "learning_rate": 1.5609950681051533e-07, "loss": 0.1941, "step": 38160 }, { "epoch": 2.836194723151245, "grad_norm": 2.6029904104174784, "learning_rate": 1.5595832115948728e-07, "loss": 0.3527, "step": 38161 }, { "epoch": 2.836269044964697, "grad_norm": 2.6963895580127724, "learning_rate": 1.5581719888388746e-07, "loss": 0.2708, "step": 38162 }, { "epoch": 2.8363433667781495, "grad_norm": 2.388716751408047, "learning_rate": 1.5567613998462627e-07, "loss": 0.2038, "step": 38163 }, { "epoch": 2.8364176885916015, "grad_norm": 2.140032963512131, "learning_rate": 1.5553514446261188e-07, "loss": 0.2903, "step": 38164 }, { "epoch": 2.836492010405054, "grad_norm": 2.421685914733803, "learning_rate": 1.5539421231875128e-07, "loss": 0.2302, "step": 38165 }, { "epoch": 2.8365663322185064, "grad_norm": 2.212911407669733, "learning_rate": 1.552533435539516e-07, "loss": 0.2507, "step": 38166 }, { "epoch": 2.8366406540319584, "grad_norm": 2.5351677202492193, "learning_rate": 1.5511253816911985e-07, "loss": 0.2919, "step": 38167 }, { "epoch": 2.8367149758454104, "grad_norm": 2.4658114170029757, "learning_rate": 1.5497179616516422e-07, "loss": 0.3361, "step": 38168 }, { "epoch": 2.836789297658863, "grad_norm": 2.481420209168784, "learning_rate": 1.548311175429884e-07, "loss": 0.2763, "step": 38169 }, { "epoch": 2.8368636194723154, "grad_norm": 2.834648922567485, "learning_rate": 1.5469050230349835e-07, "loss": 0.342, "step": 38170 }, { "epoch": 2.8369379412857674, "grad_norm": 2.4518033146388367, "learning_rate": 1.5454995044760002e-07, "loss": 0.3113, "step": 38171 }, { "epoch": 2.8370122630992194, "grad_norm": 2.5675024096060963, "learning_rate": 1.5440946197619933e-07, "loss": 0.2502, "step": 38172 }, { "epoch": 2.837086584912672, "grad_norm": 2.629834173764556, "learning_rate": 1.542690368901989e-07, "loss": 0.2914, "step": 38173 }, { "epoch": 2.8371609067261243, "grad_norm": 2.2004903105960874, "learning_rate": 1.5412867519050356e-07, "loss": 0.2905, "step": 38174 }, { "epoch": 2.8372352285395763, "grad_norm": 2.3137239937291216, "learning_rate": 1.5398837687801816e-07, "loss": 0.2702, "step": 38175 }, { "epoch": 2.8373095503530283, "grad_norm": 2.277672226062451, "learning_rate": 1.5384814195364305e-07, "loss": 0.3045, "step": 38176 }, { "epoch": 2.837383872166481, "grad_norm": 2.2476224875097244, "learning_rate": 1.5370797041828312e-07, "loss": 0.2583, "step": 38177 }, { "epoch": 2.8374581939799333, "grad_norm": 2.682401807199492, "learning_rate": 1.5356786227284092e-07, "loss": 0.3041, "step": 38178 }, { "epoch": 2.8375325157933853, "grad_norm": 2.286283178798165, "learning_rate": 1.534278175182169e-07, "loss": 0.2322, "step": 38179 }, { "epoch": 2.8376068376068377, "grad_norm": 2.8487213099318507, "learning_rate": 1.5328783615531583e-07, "loss": 0.3335, "step": 38180 }, { "epoch": 2.8376811594202898, "grad_norm": 2.243076568158496, "learning_rate": 1.5314791818503483e-07, "loss": 0.2972, "step": 38181 }, { "epoch": 2.837755481233742, "grad_norm": 2.959382878150818, "learning_rate": 1.5300806360827758e-07, "loss": 0.2352, "step": 38182 }, { "epoch": 2.8378298030471942, "grad_norm": 3.238340952014351, "learning_rate": 1.5286827242594338e-07, "loss": 0.2402, "step": 38183 }, { "epoch": 2.8379041248606467, "grad_norm": 2.1174529317390474, "learning_rate": 1.527285446389315e-07, "loss": 0.254, "step": 38184 }, { "epoch": 2.8379784466740987, "grad_norm": 2.008214206672087, "learning_rate": 1.5258888024814344e-07, "loss": 0.198, "step": 38185 }, { "epoch": 2.838052768487551, "grad_norm": 2.1264777579033898, "learning_rate": 1.5244927925447628e-07, "loss": 0.2608, "step": 38186 }, { "epoch": 2.838127090301003, "grad_norm": 2.4331989629139867, "learning_rate": 1.5230974165883038e-07, "loss": 0.2721, "step": 38187 }, { "epoch": 2.8382014121144556, "grad_norm": 2.4351713579027536, "learning_rate": 1.5217026746210283e-07, "loss": 0.2396, "step": 38188 }, { "epoch": 2.838275733927908, "grad_norm": 2.5052643023459154, "learning_rate": 1.5203085666519178e-07, "loss": 0.2629, "step": 38189 }, { "epoch": 2.83835005574136, "grad_norm": 2.7402168809865795, "learning_rate": 1.5189150926899544e-07, "loss": 0.2631, "step": 38190 }, { "epoch": 2.838424377554812, "grad_norm": 2.480683437819212, "learning_rate": 1.517522252744119e-07, "loss": 0.2733, "step": 38191 }, { "epoch": 2.8384986993682646, "grad_norm": 2.184988653082601, "learning_rate": 1.5161300468233497e-07, "loss": 0.2549, "step": 38192 }, { "epoch": 2.838573021181717, "grad_norm": 2.5165335950881103, "learning_rate": 1.514738474936628e-07, "loss": 0.307, "step": 38193 }, { "epoch": 2.838647342995169, "grad_norm": 2.0598116679020513, "learning_rate": 1.5133475370929128e-07, "loss": 0.2801, "step": 38194 }, { "epoch": 2.838721664808621, "grad_norm": 2.48752077915578, "learning_rate": 1.5119572333011645e-07, "loss": 0.2654, "step": 38195 }, { "epoch": 2.8387959866220736, "grad_norm": 2.123850359056976, "learning_rate": 1.510567563570309e-07, "loss": 0.2214, "step": 38196 }, { "epoch": 2.838870308435526, "grad_norm": 2.132620524872331, "learning_rate": 1.5091785279093162e-07, "loss": 0.2094, "step": 38197 }, { "epoch": 2.838944630248978, "grad_norm": 4.104170116768843, "learning_rate": 1.5077901263271132e-07, "loss": 0.4093, "step": 38198 }, { "epoch": 2.8390189520624305, "grad_norm": 2.3380487434475823, "learning_rate": 1.5064023588326593e-07, "loss": 0.1916, "step": 38199 }, { "epoch": 2.8390932738758825, "grad_norm": 2.7525928554409607, "learning_rate": 1.5050152254348804e-07, "loss": 0.337, "step": 38200 }, { "epoch": 2.839167595689335, "grad_norm": 2.911656160367805, "learning_rate": 1.503628726142692e-07, "loss": 0.3062, "step": 38201 }, { "epoch": 2.839241917502787, "grad_norm": 2.0714189007294017, "learning_rate": 1.502242860965042e-07, "loss": 0.2573, "step": 38202 }, { "epoch": 2.8393162393162394, "grad_norm": 2.3850056357248737, "learning_rate": 1.5008576299108236e-07, "loss": 0.2569, "step": 38203 }, { "epoch": 2.8393905611296915, "grad_norm": 2.6212805249862097, "learning_rate": 1.499473032988996e-07, "loss": 0.2717, "step": 38204 }, { "epoch": 2.839464882943144, "grad_norm": 1.5699012900592568, "learning_rate": 1.4980890702084304e-07, "loss": 0.154, "step": 38205 }, { "epoch": 2.839539204756596, "grad_norm": 2.0852814392387153, "learning_rate": 1.496705741578064e-07, "loss": 0.3036, "step": 38206 }, { "epoch": 2.8396135265700484, "grad_norm": 2.033598889999737, "learning_rate": 1.4953230471068003e-07, "loss": 0.218, "step": 38207 }, { "epoch": 2.8396878483835004, "grad_norm": 2.8674503738128574, "learning_rate": 1.4939409868035214e-07, "loss": 0.3396, "step": 38208 }, { "epoch": 2.839762170196953, "grad_norm": 2.6300028298334803, "learning_rate": 1.4925595606771536e-07, "loss": 0.2744, "step": 38209 }, { "epoch": 2.839836492010405, "grad_norm": 2.5758040242146665, "learning_rate": 1.4911787687365675e-07, "loss": 0.3112, "step": 38210 }, { "epoch": 2.8399108138238573, "grad_norm": 2.3762427337178744, "learning_rate": 1.489798610990667e-07, "loss": 0.2924, "step": 38211 }, { "epoch": 2.83998513563731, "grad_norm": 1.9579870706073685, "learning_rate": 1.488419087448334e-07, "loss": 0.2438, "step": 38212 }, { "epoch": 2.840059457450762, "grad_norm": 2.143881810495747, "learning_rate": 1.487040198118439e-07, "loss": 0.2618, "step": 38213 }, { "epoch": 2.840133779264214, "grad_norm": 2.6289487552164807, "learning_rate": 1.4856619430098752e-07, "loss": 0.3346, "step": 38214 }, { "epoch": 2.8402081010776663, "grad_norm": 2.8626434091034785, "learning_rate": 1.4842843221315017e-07, "loss": 0.2451, "step": 38215 }, { "epoch": 2.8402824228911188, "grad_norm": 2.332406863992433, "learning_rate": 1.4829073354922008e-07, "loss": 0.3251, "step": 38216 }, { "epoch": 2.840356744704571, "grad_norm": 2.6548823892528612, "learning_rate": 1.4815309831008206e-07, "loss": 0.3091, "step": 38217 }, { "epoch": 2.840431066518023, "grad_norm": 1.6821553156681854, "learning_rate": 1.480155264966243e-07, "loss": 0.2139, "step": 38218 }, { "epoch": 2.8405053883314753, "grad_norm": 2.058136850770755, "learning_rate": 1.4787801810973056e-07, "loss": 0.2838, "step": 38219 }, { "epoch": 2.8405797101449277, "grad_norm": 2.252256456161397, "learning_rate": 1.477405731502868e-07, "loss": 0.3037, "step": 38220 }, { "epoch": 2.8406540319583797, "grad_norm": 2.1284008256508145, "learning_rate": 1.4760319161917892e-07, "loss": 0.237, "step": 38221 }, { "epoch": 2.840728353771832, "grad_norm": 2.410789075541243, "learning_rate": 1.474658735172907e-07, "loss": 0.2688, "step": 38222 }, { "epoch": 2.840802675585284, "grad_norm": 2.0053210134778365, "learning_rate": 1.4732861884550477e-07, "loss": 0.2101, "step": 38223 }, { "epoch": 2.8408769973987367, "grad_norm": 2.531046757629734, "learning_rate": 1.4719142760470706e-07, "loss": 0.2515, "step": 38224 }, { "epoch": 2.8409513192121887, "grad_norm": 2.373072572312679, "learning_rate": 1.4705429979577913e-07, "loss": 0.2752, "step": 38225 }, { "epoch": 2.841025641025641, "grad_norm": 2.2694721744130892, "learning_rate": 1.469172354196047e-07, "loss": 0.3236, "step": 38226 }, { "epoch": 2.841099962839093, "grad_norm": 2.899653762044544, "learning_rate": 1.4678023447706634e-07, "loss": 0.3004, "step": 38227 }, { "epoch": 2.8411742846525456, "grad_norm": 2.5176533982119635, "learning_rate": 1.4664329696904456e-07, "loss": 0.3103, "step": 38228 }, { "epoch": 2.8412486064659976, "grad_norm": 2.152834767362651, "learning_rate": 1.4650642289642193e-07, "loss": 0.3018, "step": 38229 }, { "epoch": 2.84132292827945, "grad_norm": 2.5704752398564117, "learning_rate": 1.4636961226007996e-07, "loss": 0.2921, "step": 38230 }, { "epoch": 2.841397250092902, "grad_norm": 2.8448832181924666, "learning_rate": 1.4623286506089907e-07, "loss": 0.3572, "step": 38231 }, { "epoch": 2.8414715719063546, "grad_norm": 2.3451764957642727, "learning_rate": 1.4609618129976079e-07, "loss": 0.2629, "step": 38232 }, { "epoch": 2.8415458937198066, "grad_norm": 2.3182561033551377, "learning_rate": 1.4595956097754215e-07, "loss": 0.2472, "step": 38233 }, { "epoch": 2.841620215533259, "grad_norm": 2.4207614948533793, "learning_rate": 1.4582300409512583e-07, "loss": 0.2709, "step": 38234 }, { "epoch": 2.8416945373467115, "grad_norm": 2.258956487064511, "learning_rate": 1.456865106533889e-07, "loss": 0.2688, "step": 38235 }, { "epoch": 2.8417688591601635, "grad_norm": 2.53411444275838, "learning_rate": 1.4555008065321286e-07, "loss": 0.2739, "step": 38236 }, { "epoch": 2.8418431809736155, "grad_norm": 2.3974321493230204, "learning_rate": 1.4541371409547255e-07, "loss": 0.26, "step": 38237 }, { "epoch": 2.841917502787068, "grad_norm": 2.7374662990575893, "learning_rate": 1.4527741098104731e-07, "loss": 0.2243, "step": 38238 }, { "epoch": 2.8419918246005205, "grad_norm": 2.7204692783946545, "learning_rate": 1.451411713108153e-07, "loss": 0.2629, "step": 38239 }, { "epoch": 2.8420661464139725, "grad_norm": 2.352805839676194, "learning_rate": 1.4500499508565248e-07, "loss": 0.2858, "step": 38240 }, { "epoch": 2.8421404682274245, "grad_norm": 1.9798198446399398, "learning_rate": 1.4486888230643703e-07, "loss": 0.2059, "step": 38241 }, { "epoch": 2.842214790040877, "grad_norm": 2.203492342635996, "learning_rate": 1.4473283297404273e-07, "loss": 0.2846, "step": 38242 }, { "epoch": 2.8422891118543294, "grad_norm": 2.745847933691677, "learning_rate": 1.4459684708934886e-07, "loss": 0.34, "step": 38243 }, { "epoch": 2.8423634336677814, "grad_norm": 1.9878637009345919, "learning_rate": 1.4446092465322914e-07, "loss": 0.2578, "step": 38244 }, { "epoch": 2.842437755481234, "grad_norm": 2.280532505558816, "learning_rate": 1.4432506566655735e-07, "loss": 0.2158, "step": 38245 }, { "epoch": 2.842512077294686, "grad_norm": 1.8475460719882855, "learning_rate": 1.4418927013020945e-07, "loss": 0.2214, "step": 38246 }, { "epoch": 2.8425863991081384, "grad_norm": 2.373257767644874, "learning_rate": 1.440535380450614e-07, "loss": 0.3124, "step": 38247 }, { "epoch": 2.8426607209215904, "grad_norm": 2.2977690234992894, "learning_rate": 1.439178694119836e-07, "loss": 0.2717, "step": 38248 }, { "epoch": 2.842735042735043, "grad_norm": 2.1535095903674284, "learning_rate": 1.4378226423185204e-07, "loss": 0.2633, "step": 38249 }, { "epoch": 2.842809364548495, "grad_norm": 1.942874518842425, "learning_rate": 1.4364672250553823e-07, "loss": 0.2584, "step": 38250 }, { "epoch": 2.8428836863619473, "grad_norm": 2.679783305448602, "learning_rate": 1.4351124423391594e-07, "loss": 0.3131, "step": 38251 }, { "epoch": 2.8429580081753993, "grad_norm": 2.2487694567094927, "learning_rate": 1.4337582941785556e-07, "loss": 0.2818, "step": 38252 }, { "epoch": 2.843032329988852, "grad_norm": 2.0782863495931165, "learning_rate": 1.4324047805823194e-07, "loss": 0.3021, "step": 38253 }, { "epoch": 2.843106651802304, "grad_norm": 2.131160466776768, "learning_rate": 1.431051901559144e-07, "loss": 0.2254, "step": 38254 }, { "epoch": 2.8431809736157563, "grad_norm": 2.013732163375605, "learning_rate": 1.4296996571177224e-07, "loss": 0.2589, "step": 38255 }, { "epoch": 2.8432552954292083, "grad_norm": 2.4651648117405047, "learning_rate": 1.428348047266803e-07, "loss": 0.2873, "step": 38256 }, { "epoch": 2.8433296172426608, "grad_norm": 2.1295313641095923, "learning_rate": 1.426997072015046e-07, "loss": 0.1997, "step": 38257 }, { "epoch": 2.843403939056113, "grad_norm": 2.1512504837544797, "learning_rate": 1.4256467313711887e-07, "loss": 0.2086, "step": 38258 }, { "epoch": 2.8434782608695652, "grad_norm": 2.5567711652216247, "learning_rate": 1.4242970253438903e-07, "loss": 0.394, "step": 38259 }, { "epoch": 2.8435525826830172, "grad_norm": 2.1270159699129625, "learning_rate": 1.4229479539418556e-07, "loss": 0.2034, "step": 38260 }, { "epoch": 2.8436269044964697, "grad_norm": 1.903316953163381, "learning_rate": 1.421599517173766e-07, "loss": 0.2092, "step": 38261 }, { "epoch": 2.843701226309922, "grad_norm": 3.1086431592104224, "learning_rate": 1.420251715048304e-07, "loss": 0.2934, "step": 38262 }, { "epoch": 2.843775548123374, "grad_norm": 2.588368132990088, "learning_rate": 1.4189045475741404e-07, "loss": 0.2786, "step": 38263 }, { "epoch": 2.843849869936826, "grad_norm": 2.5707182273172946, "learning_rate": 1.4175580147599565e-07, "loss": 0.2635, "step": 38264 }, { "epoch": 2.8439241917502787, "grad_norm": 2.13858520543903, "learning_rate": 1.4162121166144238e-07, "loss": 0.2234, "step": 38265 }, { "epoch": 2.843998513563731, "grad_norm": 2.2908382918971277, "learning_rate": 1.4148668531462128e-07, "loss": 0.2582, "step": 38266 }, { "epoch": 2.844072835377183, "grad_norm": 1.5973615989278436, "learning_rate": 1.4135222243639502e-07, "loss": 0.2226, "step": 38267 }, { "epoch": 2.8441471571906356, "grad_norm": 2.072444299380607, "learning_rate": 1.4121782302763287e-07, "loss": 0.2553, "step": 38268 }, { "epoch": 2.8442214790040876, "grad_norm": 2.237885069754767, "learning_rate": 1.4108348708919973e-07, "loss": 0.2745, "step": 38269 }, { "epoch": 2.84429580081754, "grad_norm": 2.074920047732761, "learning_rate": 1.409492146219582e-07, "loss": 0.2549, "step": 38270 }, { "epoch": 2.844370122630992, "grad_norm": 2.419336759518773, "learning_rate": 1.4081500562677542e-07, "loss": 0.2254, "step": 38271 }, { "epoch": 2.8444444444444446, "grad_norm": 2.395170465368246, "learning_rate": 1.4068086010451175e-07, "loss": 0.3054, "step": 38272 }, { "epoch": 2.8445187662578966, "grad_norm": 1.9332507023457541, "learning_rate": 1.4054677805603545e-07, "loss": 0.2749, "step": 38273 }, { "epoch": 2.844593088071349, "grad_norm": 2.5976809411360495, "learning_rate": 1.4041275948220577e-07, "loss": 0.3041, "step": 38274 }, { "epoch": 2.844667409884801, "grad_norm": 3.1355240653609235, "learning_rate": 1.4027880438388763e-07, "loss": 0.448, "step": 38275 }, { "epoch": 2.8447417316982535, "grad_norm": 2.288617495520643, "learning_rate": 1.4014491276194363e-07, "loss": 0.2236, "step": 38276 }, { "epoch": 2.8448160535117055, "grad_norm": 2.5432853495060477, "learning_rate": 1.4001108461723421e-07, "loss": 0.2898, "step": 38277 }, { "epoch": 2.844890375325158, "grad_norm": 2.686791446049089, "learning_rate": 1.3987731995062314e-07, "loss": 0.3822, "step": 38278 }, { "epoch": 2.84496469713861, "grad_norm": 2.079073335439815, "learning_rate": 1.397436187629686e-07, "loss": 0.2273, "step": 38279 }, { "epoch": 2.8450390189520625, "grad_norm": 2.2833143510456178, "learning_rate": 1.3960998105513436e-07, "loss": 0.2481, "step": 38280 }, { "epoch": 2.845113340765515, "grad_norm": 1.989263885739793, "learning_rate": 1.3947640682797859e-07, "loss": 0.1728, "step": 38281 }, { "epoch": 2.845187662578967, "grad_norm": 1.96968290041519, "learning_rate": 1.3934289608236284e-07, "loss": 0.1948, "step": 38282 }, { "epoch": 2.845261984392419, "grad_norm": 2.3991397296604937, "learning_rate": 1.3920944881914645e-07, "loss": 0.2319, "step": 38283 }, { "epoch": 2.8453363062058714, "grad_norm": 2.263216571777742, "learning_rate": 1.3907606503918646e-07, "loss": 0.2245, "step": 38284 }, { "epoch": 2.845410628019324, "grad_norm": 2.1315933631159134, "learning_rate": 1.3894274474334336e-07, "loss": 0.2843, "step": 38285 }, { "epoch": 2.845484949832776, "grad_norm": 1.743904070071192, "learning_rate": 1.3880948793247528e-07, "loss": 0.1828, "step": 38286 }, { "epoch": 2.845559271646228, "grad_norm": 2.4441559973346383, "learning_rate": 1.3867629460744046e-07, "loss": 0.2835, "step": 38287 }, { "epoch": 2.8456335934596804, "grad_norm": 2.314914837978324, "learning_rate": 1.3854316476909603e-07, "loss": 0.2836, "step": 38288 }, { "epoch": 2.845707915273133, "grad_norm": 2.3773590268264604, "learning_rate": 1.384100984182979e-07, "loss": 0.2849, "step": 38289 }, { "epoch": 2.845782237086585, "grad_norm": 2.4155418027778692, "learning_rate": 1.382770955559054e-07, "loss": 0.302, "step": 38290 }, { "epoch": 2.8458565589000373, "grad_norm": 3.217664377423535, "learning_rate": 1.3814415618277344e-07, "loss": 0.3193, "step": 38291 }, { "epoch": 2.8459308807134893, "grad_norm": 2.1777903564508483, "learning_rate": 1.3801128029975574e-07, "loss": 0.1654, "step": 38292 }, { "epoch": 2.846005202526942, "grad_norm": 2.01671269185005, "learning_rate": 1.3787846790771165e-07, "loss": 0.2474, "step": 38293 }, { "epoch": 2.846079524340394, "grad_norm": 2.592495253142869, "learning_rate": 1.377457190074938e-07, "loss": 0.251, "step": 38294 }, { "epoch": 2.8461538461538463, "grad_norm": 2.3065831168484223, "learning_rate": 1.3761303359995703e-07, "loss": 0.2848, "step": 38295 }, { "epoch": 2.8462281679672983, "grad_norm": 2.57941225922592, "learning_rate": 1.3748041168595517e-07, "loss": 0.2948, "step": 38296 }, { "epoch": 2.8463024897807507, "grad_norm": 2.9297357737590124, "learning_rate": 1.3734785326634415e-07, "loss": 0.3015, "step": 38297 }, { "epoch": 2.8463768115942027, "grad_norm": 1.8137954018457723, "learning_rate": 1.3721535834197441e-07, "loss": 0.1566, "step": 38298 }, { "epoch": 2.846451133407655, "grad_norm": 2.249709394764227, "learning_rate": 1.3708292691370085e-07, "loss": 0.2522, "step": 38299 }, { "epoch": 2.8465254552211077, "grad_norm": 2.291710843752321, "learning_rate": 1.3695055898237608e-07, "loss": 0.2894, "step": 38300 }, { "epoch": 2.8465997770345597, "grad_norm": 3.4567335445168608, "learning_rate": 1.3681825454885167e-07, "loss": 0.3059, "step": 38301 }, { "epoch": 2.8466740988480117, "grad_norm": 2.994111764656973, "learning_rate": 1.3668601361398025e-07, "loss": 0.2975, "step": 38302 }, { "epoch": 2.846748420661464, "grad_norm": 2.3366606073677834, "learning_rate": 1.3655383617861117e-07, "loss": 0.23, "step": 38303 }, { "epoch": 2.8468227424749166, "grad_norm": 2.4305454016133248, "learning_rate": 1.3642172224359818e-07, "loss": 0.308, "step": 38304 }, { "epoch": 2.8468970642883686, "grad_norm": 2.025001531063648, "learning_rate": 1.3628967180978947e-07, "loss": 0.2069, "step": 38305 }, { "epoch": 2.8469713861018207, "grad_norm": 2.74891738304943, "learning_rate": 1.3615768487803548e-07, "loss": 0.2236, "step": 38306 }, { "epoch": 2.847045707915273, "grad_norm": 2.2761701621505925, "learning_rate": 1.3602576144918778e-07, "loss": 0.2601, "step": 38307 }, { "epoch": 2.8471200297287256, "grad_norm": 2.443532375353966, "learning_rate": 1.3589390152409454e-07, "loss": 0.2883, "step": 38308 }, { "epoch": 2.8471943515421776, "grad_norm": 2.2811661932294687, "learning_rate": 1.3576210510360288e-07, "loss": 0.2501, "step": 38309 }, { "epoch": 2.8472686733556296, "grad_norm": 2.079906897236278, "learning_rate": 1.3563037218856433e-07, "loss": 0.237, "step": 38310 }, { "epoch": 2.847342995169082, "grad_norm": 2.4991898066206724, "learning_rate": 1.354987027798249e-07, "loss": 0.318, "step": 38311 }, { "epoch": 2.8474173169825345, "grad_norm": 2.5123594725531353, "learning_rate": 1.353670968782339e-07, "loss": 0.2597, "step": 38312 }, { "epoch": 2.8474916387959865, "grad_norm": 2.4401973540537805, "learning_rate": 1.3523555448463732e-07, "loss": 0.3477, "step": 38313 }, { "epoch": 2.847565960609439, "grad_norm": 2.3334620895434375, "learning_rate": 1.3510407559988114e-07, "loss": 0.2788, "step": 38314 }, { "epoch": 2.847640282422891, "grad_norm": 1.9166066763784098, "learning_rate": 1.349726602248147e-07, "loss": 0.2465, "step": 38315 }, { "epoch": 2.8477146042363435, "grad_norm": 2.3581079346935274, "learning_rate": 1.3484130836028175e-07, "loss": 0.2372, "step": 38316 }, { "epoch": 2.8477889260497955, "grad_norm": 2.1380694314074655, "learning_rate": 1.347100200071294e-07, "loss": 0.2401, "step": 38317 }, { "epoch": 2.847863247863248, "grad_norm": 2.5658263021234076, "learning_rate": 1.345787951662003e-07, "loss": 0.274, "step": 38318 }, { "epoch": 2.8479375696767, "grad_norm": 2.596849004254954, "learning_rate": 1.3444763383834269e-07, "loss": 0.2564, "step": 38319 }, { "epoch": 2.8480118914901524, "grad_norm": 2.541741985076745, "learning_rate": 1.343165360243992e-07, "loss": 0.3076, "step": 38320 }, { "epoch": 2.8480862133036045, "grad_norm": 2.5551846527307247, "learning_rate": 1.341855017252125e-07, "loss": 0.2463, "step": 38321 }, { "epoch": 2.848160535117057, "grad_norm": 4.390920108992872, "learning_rate": 1.3405453094162856e-07, "loss": 0.2414, "step": 38322 }, { "epoch": 2.8482348569305094, "grad_norm": 2.329640124867122, "learning_rate": 1.3392362367448896e-07, "loss": 0.322, "step": 38323 }, { "epoch": 2.8483091787439614, "grad_norm": 3.0119250046618338, "learning_rate": 1.3379277992463856e-07, "loss": 0.3447, "step": 38324 }, { "epoch": 2.8483835005574134, "grad_norm": 2.334556962065535, "learning_rate": 1.336619996929178e-07, "loss": 0.2808, "step": 38325 }, { "epoch": 2.848457822370866, "grad_norm": 2.139041604290064, "learning_rate": 1.3353128298016937e-07, "loss": 0.274, "step": 38326 }, { "epoch": 2.8485321441843183, "grad_norm": 2.3935572330649526, "learning_rate": 1.3340062978723477e-07, "loss": 0.31, "step": 38327 }, { "epoch": 2.8486064659977703, "grad_norm": 2.361822894746289, "learning_rate": 1.3327004011495337e-07, "loss": 0.27, "step": 38328 }, { "epoch": 2.8486807878112224, "grad_norm": 2.935127697302994, "learning_rate": 1.3313951396416892e-07, "loss": 0.2891, "step": 38329 }, { "epoch": 2.848755109624675, "grad_norm": 1.8676954563619326, "learning_rate": 1.3300905133572074e-07, "loss": 0.1287, "step": 38330 }, { "epoch": 2.8488294314381273, "grad_norm": 1.8860741060027262, "learning_rate": 1.3287865223044706e-07, "loss": 0.2128, "step": 38331 }, { "epoch": 2.8489037532515793, "grad_norm": 2.407432253430835, "learning_rate": 1.327483166491883e-07, "loss": 0.2489, "step": 38332 }, { "epoch": 2.8489780750650313, "grad_norm": 3.688486052747854, "learning_rate": 1.3261804459278492e-07, "loss": 0.3389, "step": 38333 }, { "epoch": 2.8490523968784838, "grad_norm": 2.1113199898125874, "learning_rate": 1.3248783606207404e-07, "loss": 0.2407, "step": 38334 }, { "epoch": 2.8491267186919362, "grad_norm": 2.8321380371677534, "learning_rate": 1.3235769105789498e-07, "loss": 0.2851, "step": 38335 }, { "epoch": 2.8492010405053882, "grad_norm": 2.4115799255115573, "learning_rate": 1.3222760958108482e-07, "loss": 0.3228, "step": 38336 }, { "epoch": 2.8492753623188407, "grad_norm": 2.6357650095267684, "learning_rate": 1.3209759163248183e-07, "loss": 0.3195, "step": 38337 }, { "epoch": 2.8493496841322927, "grad_norm": 2.5232012665324595, "learning_rate": 1.3196763721292194e-07, "loss": 0.281, "step": 38338 }, { "epoch": 2.849424005945745, "grad_norm": 2.2871233091772782, "learning_rate": 1.3183774632324341e-07, "loss": 0.2201, "step": 38339 }, { "epoch": 2.849498327759197, "grad_norm": 2.577500631853628, "learning_rate": 1.3170791896428004e-07, "loss": 0.2915, "step": 38340 }, { "epoch": 2.8495726495726497, "grad_norm": 2.4745307595769046, "learning_rate": 1.3157815513687e-07, "loss": 0.2797, "step": 38341 }, { "epoch": 2.8496469713861017, "grad_norm": 1.9409889384835253, "learning_rate": 1.314484548418471e-07, "loss": 0.2234, "step": 38342 }, { "epoch": 2.849721293199554, "grad_norm": 2.5215341810298804, "learning_rate": 1.3131881808004733e-07, "loss": 0.2764, "step": 38343 }, { "epoch": 2.849795615013006, "grad_norm": 2.8773248076725673, "learning_rate": 1.3118924485230555e-07, "loss": 0.3306, "step": 38344 }, { "epoch": 2.8498699368264586, "grad_norm": 2.6816769622552528, "learning_rate": 1.3105973515945337e-07, "loss": 0.2312, "step": 38345 }, { "epoch": 2.849944258639911, "grad_norm": 2.3353083885576935, "learning_rate": 1.3093028900232895e-07, "loss": 0.2079, "step": 38346 }, { "epoch": 2.850018580453363, "grad_norm": 2.5366935905009127, "learning_rate": 1.3080090638176168e-07, "loss": 0.2238, "step": 38347 }, { "epoch": 2.850092902266815, "grad_norm": 2.1828165570811495, "learning_rate": 1.3067158729858642e-07, "loss": 0.2612, "step": 38348 }, { "epoch": 2.8501672240802676, "grad_norm": 2.5134221503693635, "learning_rate": 1.3054233175363697e-07, "loss": 0.3563, "step": 38349 }, { "epoch": 2.85024154589372, "grad_norm": 2.188500194980373, "learning_rate": 1.3041313974774151e-07, "loss": 0.2823, "step": 38350 }, { "epoch": 2.850315867707172, "grad_norm": 2.8157900229076125, "learning_rate": 1.3028401128173607e-07, "loss": 0.3232, "step": 38351 }, { "epoch": 2.850390189520624, "grad_norm": 3.186492622150714, "learning_rate": 1.301549463564489e-07, "loss": 0.403, "step": 38352 }, { "epoch": 2.8504645113340765, "grad_norm": 2.4694123096787917, "learning_rate": 1.3002594497271258e-07, "loss": 0.2505, "step": 38353 }, { "epoch": 2.850538833147529, "grad_norm": 2.4250376815253665, "learning_rate": 1.2989700713135656e-07, "loss": 0.261, "step": 38354 }, { "epoch": 2.850613154960981, "grad_norm": 2.7309065105278374, "learning_rate": 1.2976813283321123e-07, "loss": 0.3557, "step": 38355 }, { "epoch": 2.8506874767744335, "grad_norm": 2.238977500241224, "learning_rate": 1.2963932207910702e-07, "loss": 0.2101, "step": 38356 }, { "epoch": 2.8507617985878855, "grad_norm": 2.7798722235544964, "learning_rate": 1.295105748698733e-07, "loss": 0.291, "step": 38357 }, { "epoch": 2.850836120401338, "grad_norm": 2.635078931518897, "learning_rate": 1.2938189120633716e-07, "loss": 0.3317, "step": 38358 }, { "epoch": 2.85091044221479, "grad_norm": 3.119673424290395, "learning_rate": 1.2925327108933016e-07, "loss": 0.2877, "step": 38359 }, { "epoch": 2.8509847640282424, "grad_norm": 2.8202123968634702, "learning_rate": 1.2912471451967722e-07, "loss": 0.3112, "step": 38360 }, { "epoch": 2.8510590858416944, "grad_norm": 2.2441749896157916, "learning_rate": 1.289962214982077e-07, "loss": 0.2245, "step": 38361 }, { "epoch": 2.851133407655147, "grad_norm": 2.4058439494630868, "learning_rate": 1.2886779202574639e-07, "loss": 0.251, "step": 38362 }, { "epoch": 2.851207729468599, "grad_norm": 2.7561755913484935, "learning_rate": 1.2873942610312384e-07, "loss": 0.2716, "step": 38363 }, { "epoch": 2.8512820512820514, "grad_norm": 2.452762936229621, "learning_rate": 1.286111237311649e-07, "loss": 0.2607, "step": 38364 }, { "epoch": 2.8513563730955034, "grad_norm": 2.771659877127883, "learning_rate": 1.284828849106945e-07, "loss": 0.2405, "step": 38365 }, { "epoch": 2.851430694908956, "grad_norm": 2.230464010527481, "learning_rate": 1.283547096425397e-07, "loss": 0.2663, "step": 38366 }, { "epoch": 2.851505016722408, "grad_norm": 1.7470109124918833, "learning_rate": 1.2822659792752434e-07, "loss": 0.2114, "step": 38367 }, { "epoch": 2.8515793385358603, "grad_norm": 2.31786386572364, "learning_rate": 1.280985497664744e-07, "loss": 0.2254, "step": 38368 }, { "epoch": 2.851653660349313, "grad_norm": 2.138224351996249, "learning_rate": 1.2797056516021367e-07, "loss": 0.2357, "step": 38369 }, { "epoch": 2.851727982162765, "grad_norm": 1.5333557557340476, "learning_rate": 1.2784264410956593e-07, "loss": 0.172, "step": 38370 }, { "epoch": 2.851802303976217, "grad_norm": 2.31292379090725, "learning_rate": 1.277147866153561e-07, "loss": 0.2923, "step": 38371 }, { "epoch": 2.8518766257896693, "grad_norm": 1.498375295451915, "learning_rate": 1.275869926784057e-07, "loss": 0.1569, "step": 38372 }, { "epoch": 2.8519509476031217, "grad_norm": 2.699075666498007, "learning_rate": 1.2745926229953743e-07, "loss": 0.2655, "step": 38373 }, { "epoch": 2.8520252694165737, "grad_norm": 2.195198374464666, "learning_rate": 1.2733159547957508e-07, "loss": 0.2145, "step": 38374 }, { "epoch": 2.8520995912300258, "grad_norm": 3.108387369939678, "learning_rate": 1.272039922193391e-07, "loss": 0.2935, "step": 38375 }, { "epoch": 2.8521739130434782, "grad_norm": 2.844513382086026, "learning_rate": 1.2707645251965105e-07, "loss": 0.2344, "step": 38376 }, { "epoch": 2.8522482348569307, "grad_norm": 2.480247390449129, "learning_rate": 1.2694897638133362e-07, "loss": 0.2842, "step": 38377 }, { "epoch": 2.8523225566703827, "grad_norm": 2.428715309039248, "learning_rate": 1.2682156380520506e-07, "loss": 0.3168, "step": 38378 }, { "epoch": 2.852396878483835, "grad_norm": 2.3992254211891653, "learning_rate": 1.2669421479208798e-07, "loss": 0.2292, "step": 38379 }, { "epoch": 2.852471200297287, "grad_norm": 2.087066728513221, "learning_rate": 1.2656692934280067e-07, "loss": 0.2224, "step": 38380 }, { "epoch": 2.8525455221107396, "grad_norm": 1.704162313320954, "learning_rate": 1.2643970745816468e-07, "loss": 0.1982, "step": 38381 }, { "epoch": 2.8526198439241917, "grad_norm": 2.421400421420341, "learning_rate": 1.2631254913899605e-07, "loss": 0.2278, "step": 38382 }, { "epoch": 2.852694165737644, "grad_norm": 2.2197891446895364, "learning_rate": 1.2618545438611517e-07, "loss": 0.2584, "step": 38383 }, { "epoch": 2.852768487551096, "grad_norm": 2.714859040946692, "learning_rate": 1.2605842320034034e-07, "loss": 0.3009, "step": 38384 }, { "epoch": 2.8528428093645486, "grad_norm": 2.4963317400280856, "learning_rate": 1.2593145558248865e-07, "loss": 0.2518, "step": 38385 }, { "epoch": 2.8529171311780006, "grad_norm": 3.0553665467508044, "learning_rate": 1.2580455153337833e-07, "loss": 0.3532, "step": 38386 }, { "epoch": 2.852991452991453, "grad_norm": 2.429757554821099, "learning_rate": 1.2567771105382543e-07, "loss": 0.2791, "step": 38387 }, { "epoch": 2.853065774804905, "grad_norm": 2.5410855759605075, "learning_rate": 1.2555093414464814e-07, "loss": 0.259, "step": 38388 }, { "epoch": 2.8531400966183575, "grad_norm": 2.7750609962073147, "learning_rate": 1.254242208066603e-07, "loss": 0.3579, "step": 38389 }, { "epoch": 2.8532144184318096, "grad_norm": 2.2336122146250403, "learning_rate": 1.2529757104068007e-07, "loss": 0.1888, "step": 38390 }, { "epoch": 2.853288740245262, "grad_norm": 2.794091235377383, "learning_rate": 1.2517098484752134e-07, "loss": 0.2925, "step": 38391 }, { "epoch": 2.8533630620587145, "grad_norm": 2.4404188869504764, "learning_rate": 1.2504446222799782e-07, "loss": 0.2865, "step": 38392 }, { "epoch": 2.8534373838721665, "grad_norm": 2.820235780103347, "learning_rate": 1.249180031829278e-07, "loss": 0.304, "step": 38393 }, { "epoch": 2.8535117056856185, "grad_norm": 2.4361011502613414, "learning_rate": 1.247916077131228e-07, "loss": 0.2405, "step": 38394 }, { "epoch": 2.853586027499071, "grad_norm": 2.3253216782801482, "learning_rate": 1.246652758193978e-07, "loss": 0.2742, "step": 38395 }, { "epoch": 2.8536603493125234, "grad_norm": 2.557704303930301, "learning_rate": 1.2453900750256431e-07, "loss": 0.2982, "step": 38396 }, { "epoch": 2.8537346711259755, "grad_norm": 2.84535640448042, "learning_rate": 1.2441280276343616e-07, "loss": 0.209, "step": 38397 }, { "epoch": 2.8538089929394275, "grad_norm": 2.3220115492109854, "learning_rate": 1.2428666160282598e-07, "loss": 0.2957, "step": 38398 }, { "epoch": 2.85388331475288, "grad_norm": 2.1727329413598797, "learning_rate": 1.2416058402154542e-07, "loss": 0.2219, "step": 38399 }, { "epoch": 2.8539576365663324, "grad_norm": 2.513822188805586, "learning_rate": 1.240345700204082e-07, "loss": 0.3227, "step": 38400 }, { "epoch": 2.8540319583797844, "grad_norm": 2.0928919366514167, "learning_rate": 1.239086196002226e-07, "loss": 0.2301, "step": 38401 }, { "epoch": 2.854106280193237, "grad_norm": 2.323666246132214, "learning_rate": 1.2378273276180131e-07, "loss": 0.2895, "step": 38402 }, { "epoch": 2.854180602006689, "grad_norm": 2.3295700734887355, "learning_rate": 1.2365690950595477e-07, "loss": 0.2483, "step": 38403 }, { "epoch": 2.8542549238201413, "grad_norm": 2.390930872957755, "learning_rate": 1.2353114983349123e-07, "loss": 0.229, "step": 38404 }, { "epoch": 2.8543292456335934, "grad_norm": 2.408507521395667, "learning_rate": 1.2340545374522339e-07, "loss": 0.2499, "step": 38405 }, { "epoch": 2.854403567447046, "grad_norm": 2.1150140969642797, "learning_rate": 1.2327982124195836e-07, "loss": 0.2232, "step": 38406 }, { "epoch": 2.854477889260498, "grad_norm": 2.31851652599108, "learning_rate": 1.231542523245044e-07, "loss": 0.2151, "step": 38407 }, { "epoch": 2.8545522110739503, "grad_norm": 2.60209841942809, "learning_rate": 1.2302874699367197e-07, "loss": 0.3054, "step": 38408 }, { "epoch": 2.8546265328874023, "grad_norm": 2.586608968692572, "learning_rate": 1.2290330525026705e-07, "loss": 0.2523, "step": 38409 }, { "epoch": 2.8547008547008548, "grad_norm": 2.4597141137653047, "learning_rate": 1.2277792709509905e-07, "loss": 0.3098, "step": 38410 }, { "epoch": 2.854775176514307, "grad_norm": 2.010780575810932, "learning_rate": 1.2265261252897398e-07, "loss": 0.2358, "step": 38411 }, { "epoch": 2.8548494983277592, "grad_norm": 2.3552269991626233, "learning_rate": 1.2252736155269896e-07, "loss": 0.257, "step": 38412 }, { "epoch": 2.8549238201412113, "grad_norm": 2.1472286491560184, "learning_rate": 1.2240217416708e-07, "loss": 0.2869, "step": 38413 }, { "epoch": 2.8549981419546637, "grad_norm": 2.763107493261791, "learning_rate": 1.2227705037292314e-07, "loss": 0.3182, "step": 38414 }, { "epoch": 2.855072463768116, "grad_norm": 2.6872668471673724, "learning_rate": 1.221519901710344e-07, "loss": 0.3277, "step": 38415 }, { "epoch": 2.855146785581568, "grad_norm": 3.1553243319321362, "learning_rate": 1.220269935622187e-07, "loss": 0.3495, "step": 38416 }, { "epoch": 2.85522110739502, "grad_norm": 2.7438107359564747, "learning_rate": 1.2190206054728094e-07, "loss": 0.2936, "step": 38417 }, { "epoch": 2.8552954292084727, "grad_norm": 2.458855967968882, "learning_rate": 1.217771911270249e-07, "loss": 0.2945, "step": 38418 }, { "epoch": 2.855369751021925, "grad_norm": 2.4818492823934313, "learning_rate": 1.2165238530225442e-07, "loss": 0.3235, "step": 38419 }, { "epoch": 2.855444072835377, "grad_norm": 2.5830102044152787, "learning_rate": 1.215276430737744e-07, "loss": 0.3016, "step": 38420 }, { "epoch": 2.855518394648829, "grad_norm": 2.2646371057347494, "learning_rate": 1.2140296444238532e-07, "loss": 0.2945, "step": 38421 }, { "epoch": 2.8555927164622816, "grad_norm": 2.650024014944745, "learning_rate": 1.2127834940889206e-07, "loss": 0.2802, "step": 38422 }, { "epoch": 2.855667038275734, "grad_norm": 2.3721103484897172, "learning_rate": 1.211537979740962e-07, "loss": 0.2235, "step": 38423 }, { "epoch": 2.855741360089186, "grad_norm": 2.226504227016836, "learning_rate": 1.2102931013879938e-07, "loss": 0.2891, "step": 38424 }, { "epoch": 2.8558156819026386, "grad_norm": 2.6361438793217515, "learning_rate": 1.2090488590380423e-07, "loss": 0.3265, "step": 38425 }, { "epoch": 2.8558900037160906, "grad_norm": 2.2820191151112645, "learning_rate": 1.2078052526991014e-07, "loss": 0.3557, "step": 38426 }, { "epoch": 2.855964325529543, "grad_norm": 2.9302880798693702, "learning_rate": 1.2065622823791866e-07, "loss": 0.3832, "step": 38427 }, { "epoch": 2.856038647342995, "grad_norm": 2.4484277237739187, "learning_rate": 1.205319948086292e-07, "loss": 0.2437, "step": 38428 }, { "epoch": 2.8561129691564475, "grad_norm": 2.6990966735323854, "learning_rate": 1.2040782498284332e-07, "loss": 0.3425, "step": 38429 }, { "epoch": 2.8561872909698995, "grad_norm": 2.37224837514555, "learning_rate": 1.2028371876135814e-07, "loss": 0.2209, "step": 38430 }, { "epoch": 2.856261612783352, "grad_norm": 1.9353029431625073, "learning_rate": 1.2015967614497414e-07, "loss": 0.1888, "step": 38431 }, { "epoch": 2.856335934596804, "grad_norm": 3.5364601808124494, "learning_rate": 1.200356971344896e-07, "loss": 0.3615, "step": 38432 }, { "epoch": 2.8564102564102565, "grad_norm": 2.9897397824681127, "learning_rate": 1.1991178173070273e-07, "loss": 0.2979, "step": 38433 }, { "epoch": 2.8564845782237085, "grad_norm": 2.129534298268237, "learning_rate": 1.197879299344107e-07, "loss": 0.2568, "step": 38434 }, { "epoch": 2.856558900037161, "grad_norm": 2.8384406846169568, "learning_rate": 1.1966414174641172e-07, "loss": 0.308, "step": 38435 }, { "epoch": 2.856633221850613, "grad_norm": 2.1536626794557647, "learning_rate": 1.1954041716750188e-07, "loss": 0.2387, "step": 38436 }, { "epoch": 2.8567075436640654, "grad_norm": 2.2583037063683444, "learning_rate": 1.1941675619847825e-07, "loss": 0.2455, "step": 38437 }, { "epoch": 2.856781865477518, "grad_norm": 2.63041204733991, "learning_rate": 1.1929315884013693e-07, "loss": 0.2529, "step": 38438 }, { "epoch": 2.85685618729097, "grad_norm": 2.3383494007306154, "learning_rate": 1.191696250932739e-07, "loss": 0.2715, "step": 38439 }, { "epoch": 2.856930509104422, "grad_norm": 2.457456174530416, "learning_rate": 1.1904615495868521e-07, "loss": 0.2874, "step": 38440 }, { "epoch": 2.8570048309178744, "grad_norm": 2.0398764150758897, "learning_rate": 1.1892274843716245e-07, "loss": 0.2703, "step": 38441 }, { "epoch": 2.857079152731327, "grad_norm": 3.1821618494162562, "learning_rate": 1.1879940552950386e-07, "loss": 0.3304, "step": 38442 }, { "epoch": 2.857153474544779, "grad_norm": 1.8089380661958911, "learning_rate": 1.1867612623650104e-07, "loss": 0.2083, "step": 38443 }, { "epoch": 2.857227796358231, "grad_norm": 1.657638920860747, "learning_rate": 1.1855291055894891e-07, "loss": 0.1825, "step": 38444 }, { "epoch": 2.8573021181716833, "grad_norm": 1.9722159652603977, "learning_rate": 1.1842975849764127e-07, "loss": 0.2209, "step": 38445 }, { "epoch": 2.857376439985136, "grad_norm": 2.3005587358951853, "learning_rate": 1.183066700533686e-07, "loss": 0.1904, "step": 38446 }, { "epoch": 2.857450761798588, "grad_norm": 2.251515565097619, "learning_rate": 1.1818364522692582e-07, "loss": 0.2391, "step": 38447 }, { "epoch": 2.8575250836120403, "grad_norm": 2.85215220917173, "learning_rate": 1.1806068401910342e-07, "loss": 0.2996, "step": 38448 }, { "epoch": 2.8575994054254923, "grad_norm": 1.965241613850423, "learning_rate": 1.1793778643069408e-07, "loss": 0.1939, "step": 38449 }, { "epoch": 2.8576737272389447, "grad_norm": 2.502437058439212, "learning_rate": 1.1781495246248942e-07, "loss": 0.282, "step": 38450 }, { "epoch": 2.8577480490523968, "grad_norm": 2.654679879217417, "learning_rate": 1.1769218211527767e-07, "loss": 0.3297, "step": 38451 }, { "epoch": 2.8578223708658492, "grad_norm": 2.723663749439339, "learning_rate": 1.1756947538985263e-07, "loss": 0.3198, "step": 38452 }, { "epoch": 2.8578966926793012, "grad_norm": 2.653255971427718, "learning_rate": 1.1744683228700038e-07, "loss": 0.2731, "step": 38453 }, { "epoch": 2.8579710144927537, "grad_norm": 2.315077047524863, "learning_rate": 1.1732425280751469e-07, "loss": 0.2514, "step": 38454 }, { "epoch": 2.8580453363062057, "grad_norm": 2.345321547570938, "learning_rate": 1.1720173695218162e-07, "loss": 0.2331, "step": 38455 }, { "epoch": 2.858119658119658, "grad_norm": 2.2021131088638324, "learning_rate": 1.1707928472179053e-07, "loss": 0.2529, "step": 38456 }, { "epoch": 2.8581939799331106, "grad_norm": 2.2436698410079337, "learning_rate": 1.169568961171319e-07, "loss": 0.2756, "step": 38457 }, { "epoch": 2.8582683017465627, "grad_norm": 2.4784825654159506, "learning_rate": 1.1683457113899066e-07, "loss": 0.306, "step": 38458 }, { "epoch": 2.8583426235600147, "grad_norm": 2.0830350758512424, "learning_rate": 1.1671230978815618e-07, "loss": 0.2318, "step": 38459 }, { "epoch": 2.858416945373467, "grad_norm": 2.6054767890538995, "learning_rate": 1.1659011206541448e-07, "loss": 0.2753, "step": 38460 }, { "epoch": 2.8584912671869196, "grad_norm": 2.7073206724210737, "learning_rate": 1.1646797797155274e-07, "loss": 0.2817, "step": 38461 }, { "epoch": 2.8585655890003716, "grad_norm": 1.8948608851889919, "learning_rate": 1.1634590750735808e-07, "loss": 0.2112, "step": 38462 }, { "epoch": 2.8586399108138236, "grad_norm": 2.7368501155288145, "learning_rate": 1.1622390067361544e-07, "loss": 0.3711, "step": 38463 }, { "epoch": 2.858714232627276, "grad_norm": 4.803405515901541, "learning_rate": 1.1610195747111086e-07, "loss": 0.249, "step": 38464 }, { "epoch": 2.8587885544407285, "grad_norm": 2.7821557858530466, "learning_rate": 1.1598007790062815e-07, "loss": 0.2858, "step": 38465 }, { "epoch": 2.8588628762541806, "grad_norm": 2.240907306346418, "learning_rate": 1.1585826196295336e-07, "loss": 0.2486, "step": 38466 }, { "epoch": 2.8589371980676326, "grad_norm": 2.3886044892343428, "learning_rate": 1.1573650965887029e-07, "loss": 0.2348, "step": 38467 }, { "epoch": 2.859011519881085, "grad_norm": 2.3598931629080937, "learning_rate": 1.1561482098916166e-07, "loss": 0.2864, "step": 38468 }, { "epoch": 2.8590858416945375, "grad_norm": 2.1737385002931364, "learning_rate": 1.154931959546135e-07, "loss": 0.2169, "step": 38469 }, { "epoch": 2.8591601635079895, "grad_norm": 2.1465217806378165, "learning_rate": 1.153716345560052e-07, "loss": 0.2338, "step": 38470 }, { "epoch": 2.859234485321442, "grad_norm": 2.305544479802818, "learning_rate": 1.1525013679412389e-07, "loss": 0.259, "step": 38471 }, { "epoch": 2.859308807134894, "grad_norm": 2.6757263334461396, "learning_rate": 1.1512870266974786e-07, "loss": 0.2548, "step": 38472 }, { "epoch": 2.8593831289483465, "grad_norm": 2.9198852337590187, "learning_rate": 1.1500733218366089e-07, "loss": 0.2589, "step": 38473 }, { "epoch": 2.8594574507617985, "grad_norm": 2.5866771548143466, "learning_rate": 1.1488602533664461e-07, "loss": 0.355, "step": 38474 }, { "epoch": 2.859531772575251, "grad_norm": 2.035548662238954, "learning_rate": 1.1476478212947728e-07, "loss": 0.1975, "step": 38475 }, { "epoch": 2.859606094388703, "grad_norm": 2.1483604908752176, "learning_rate": 1.1464360256294272e-07, "loss": 0.2081, "step": 38476 }, { "epoch": 2.8596804162021554, "grad_norm": 2.585095571789818, "learning_rate": 1.1452248663781917e-07, "loss": 0.2444, "step": 38477 }, { "epoch": 2.8597547380156074, "grad_norm": 3.2824218275339416, "learning_rate": 1.1440143435488716e-07, "loss": 0.3607, "step": 38478 }, { "epoch": 2.85982905982906, "grad_norm": 2.125070687583911, "learning_rate": 1.1428044571492602e-07, "loss": 0.2518, "step": 38479 }, { "epoch": 2.8599033816425123, "grad_norm": 2.391337255726652, "learning_rate": 1.1415952071871295e-07, "loss": 0.3039, "step": 38480 }, { "epoch": 2.8599777034559644, "grad_norm": 2.8340795480813905, "learning_rate": 1.1403865936702951e-07, "loss": 0.2907, "step": 38481 }, { "epoch": 2.8600520252694164, "grad_norm": 2.3795520327302278, "learning_rate": 1.1391786166065178e-07, "loss": 0.2936, "step": 38482 }, { "epoch": 2.860126347082869, "grad_norm": 1.5582831951742364, "learning_rate": 1.13797127600358e-07, "loss": 0.215, "step": 38483 }, { "epoch": 2.8602006688963213, "grad_norm": 2.3471889994463258, "learning_rate": 1.1367645718692533e-07, "loss": 0.2814, "step": 38484 }, { "epoch": 2.8602749907097733, "grad_norm": 2.290357773566011, "learning_rate": 1.1355585042113094e-07, "loss": 0.2545, "step": 38485 }, { "epoch": 2.8603493125232253, "grad_norm": 1.7684482004646027, "learning_rate": 1.1343530730375086e-07, "loss": 0.1682, "step": 38486 }, { "epoch": 2.860423634336678, "grad_norm": 3.115323123582033, "learning_rate": 1.1331482783556113e-07, "loss": 0.2565, "step": 38487 }, { "epoch": 2.8604979561501302, "grad_norm": 1.7855157604084388, "learning_rate": 1.1319441201733782e-07, "loss": 0.1585, "step": 38488 }, { "epoch": 2.8605722779635823, "grad_norm": 2.4881035992917533, "learning_rate": 1.1307405984985587e-07, "loss": 0.2398, "step": 38489 }, { "epoch": 2.8606465997770343, "grad_norm": 2.285124000505539, "learning_rate": 1.1295377133388907e-07, "loss": 0.2423, "step": 38490 }, { "epoch": 2.8607209215904867, "grad_norm": 1.6688133455169556, "learning_rate": 1.128335464702146e-07, "loss": 0.2048, "step": 38491 }, { "epoch": 2.860795243403939, "grad_norm": 2.891830716678014, "learning_rate": 1.1271338525960296e-07, "loss": 0.3103, "step": 38492 }, { "epoch": 2.860869565217391, "grad_norm": 2.6732280057434186, "learning_rate": 1.1259328770283128e-07, "loss": 0.2272, "step": 38493 }, { "epoch": 2.8609438870308437, "grad_norm": 2.640041661896043, "learning_rate": 1.1247325380067009e-07, "loss": 0.2773, "step": 38494 }, { "epoch": 2.8610182088442957, "grad_norm": 2.02988594983837, "learning_rate": 1.1235328355389208e-07, "loss": 0.2, "step": 38495 }, { "epoch": 2.861092530657748, "grad_norm": 2.850825373068179, "learning_rate": 1.122333769632733e-07, "loss": 0.2477, "step": 38496 }, { "epoch": 2.8611668524712, "grad_norm": 2.5598978187956276, "learning_rate": 1.1211353402958092e-07, "loss": 0.2854, "step": 38497 }, { "epoch": 2.8612411742846526, "grad_norm": 2.595027407830905, "learning_rate": 1.1199375475358876e-07, "loss": 0.361, "step": 38498 }, { "epoch": 2.8613154960981046, "grad_norm": 2.469571295807422, "learning_rate": 1.1187403913606843e-07, "loss": 0.362, "step": 38499 }, { "epoch": 2.861389817911557, "grad_norm": 2.1817830237801807, "learning_rate": 1.117543871777893e-07, "loss": 0.2791, "step": 38500 }, { "epoch": 2.861464139725009, "grad_norm": 2.2497727452230203, "learning_rate": 1.11634798879523e-07, "loss": 0.2682, "step": 38501 }, { "epoch": 2.8615384615384616, "grad_norm": 2.733525973470757, "learning_rate": 1.1151527424203779e-07, "loss": 0.2507, "step": 38502 }, { "epoch": 2.861612783351914, "grad_norm": 1.8869415062742627, "learning_rate": 1.1139581326610416e-07, "loss": 0.2458, "step": 38503 }, { "epoch": 2.861687105165366, "grad_norm": 2.1862442531639816, "learning_rate": 1.112764159524926e-07, "loss": 0.2338, "step": 38504 }, { "epoch": 2.861761426978818, "grad_norm": 2.160977406574395, "learning_rate": 1.111570823019692e-07, "loss": 0.192, "step": 38505 }, { "epoch": 2.8618357487922705, "grad_norm": 1.9965767189072747, "learning_rate": 1.1103781231530441e-07, "loss": 0.2076, "step": 38506 }, { "epoch": 2.861910070605723, "grad_norm": 1.9041276910033216, "learning_rate": 1.1091860599326432e-07, "loss": 0.2676, "step": 38507 }, { "epoch": 2.861984392419175, "grad_norm": 2.201616676879816, "learning_rate": 1.1079946333661718e-07, "loss": 0.2227, "step": 38508 }, { "epoch": 2.862058714232627, "grad_norm": 2.753011528050446, "learning_rate": 1.1068038434613015e-07, "loss": 0.2985, "step": 38509 }, { "epoch": 2.8621330360460795, "grad_norm": 2.190747071530989, "learning_rate": 1.1056136902256931e-07, "loss": 0.2061, "step": 38510 }, { "epoch": 2.862207357859532, "grad_norm": 2.212512307589629, "learning_rate": 1.104424173667018e-07, "loss": 0.2285, "step": 38511 }, { "epoch": 2.862281679672984, "grad_norm": 3.7820531305059504, "learning_rate": 1.1032352937929258e-07, "loss": 0.2545, "step": 38512 }, { "epoch": 2.8623560014864364, "grad_norm": 2.2441986549721156, "learning_rate": 1.102047050611077e-07, "loss": 0.3217, "step": 38513 }, { "epoch": 2.8624303232998884, "grad_norm": 2.1713926291179395, "learning_rate": 1.1008594441291098e-07, "loss": 0.2459, "step": 38514 }, { "epoch": 2.862504645113341, "grad_norm": 2.3789578795855615, "learning_rate": 1.099672474354685e-07, "loss": 0.3228, "step": 38515 }, { "epoch": 2.862578966926793, "grad_norm": 2.394915383545173, "learning_rate": 1.0984861412954295e-07, "loss": 0.27, "step": 38516 }, { "epoch": 2.8626532887402454, "grad_norm": 3.2869893198946953, "learning_rate": 1.0973004449589931e-07, "loss": 0.3195, "step": 38517 }, { "epoch": 2.8627276105536974, "grad_norm": 2.5892089207973266, "learning_rate": 1.0961153853530138e-07, "loss": 0.2817, "step": 38518 }, { "epoch": 2.86280193236715, "grad_norm": 2.240290805016967, "learning_rate": 1.0949309624850968e-07, "loss": 0.2315, "step": 38519 }, { "epoch": 2.862876254180602, "grad_norm": 2.2237533477461913, "learning_rate": 1.0937471763628916e-07, "loss": 0.245, "step": 38520 }, { "epoch": 2.8629505759940543, "grad_norm": 2.853745161814236, "learning_rate": 1.0925640269940142e-07, "loss": 0.2744, "step": 38521 }, { "epoch": 2.8630248978075064, "grad_norm": 2.154416543285937, "learning_rate": 1.0913815143860696e-07, "loss": 0.2118, "step": 38522 }, { "epoch": 2.863099219620959, "grad_norm": 2.517622717050742, "learning_rate": 1.090199638546685e-07, "loss": 0.2856, "step": 38523 }, { "epoch": 2.863173541434411, "grad_norm": 1.8501089240914887, "learning_rate": 1.0890183994834546e-07, "loss": 0.1951, "step": 38524 }, { "epoch": 2.8632478632478633, "grad_norm": 2.5251598931642834, "learning_rate": 1.0878377972039944e-07, "loss": 0.3001, "step": 38525 }, { "epoch": 2.8633221850613157, "grad_norm": 2.0329215162328476, "learning_rate": 1.0866578317158983e-07, "loss": 0.2158, "step": 38526 }, { "epoch": 2.8633965068747678, "grad_norm": 1.8107652775885525, "learning_rate": 1.0854785030267711e-07, "loss": 0.1693, "step": 38527 }, { "epoch": 2.86347082868822, "grad_norm": 2.1597976747805796, "learning_rate": 1.0842998111442071e-07, "loss": 0.2357, "step": 38528 }, { "epoch": 2.8635451505016722, "grad_norm": 2.1893906265666976, "learning_rate": 1.0831217560757779e-07, "loss": 0.2215, "step": 38529 }, { "epoch": 2.8636194723151247, "grad_norm": 2.87397502587138, "learning_rate": 1.0819443378290884e-07, "loss": 0.3381, "step": 38530 }, { "epoch": 2.8636937941285767, "grad_norm": 2.1258670058342934, "learning_rate": 1.0807675564116993e-07, "loss": 0.2582, "step": 38531 }, { "epoch": 2.8637681159420287, "grad_norm": 1.6425260380372582, "learning_rate": 1.0795914118312045e-07, "loss": 0.1906, "step": 38532 }, { "epoch": 2.863842437755481, "grad_norm": 2.0612238585181997, "learning_rate": 1.0784159040951648e-07, "loss": 0.2738, "step": 38533 }, { "epoch": 2.8639167595689337, "grad_norm": 2.6184047793351173, "learning_rate": 1.0772410332111516e-07, "loss": 0.2843, "step": 38534 }, { "epoch": 2.8639910813823857, "grad_norm": 3.6895029116994875, "learning_rate": 1.0760667991867258e-07, "loss": 0.3933, "step": 38535 }, { "epoch": 2.864065403195838, "grad_norm": 2.74311702296724, "learning_rate": 1.0748932020294478e-07, "loss": 0.2197, "step": 38536 }, { "epoch": 2.86413972500929, "grad_norm": 1.889727920076143, "learning_rate": 1.0737202417468784e-07, "loss": 0.1878, "step": 38537 }, { "epoch": 2.8642140468227426, "grad_norm": 1.9986938208361653, "learning_rate": 1.072547918346567e-07, "loss": 0.1892, "step": 38538 }, { "epoch": 2.8642883686361946, "grad_norm": 2.6648278959690717, "learning_rate": 1.0713762318360522e-07, "loss": 0.2915, "step": 38539 }, { "epoch": 2.864362690449647, "grad_norm": 2.2204040126154747, "learning_rate": 1.0702051822228831e-07, "loss": 0.2021, "step": 38540 }, { "epoch": 2.864437012263099, "grad_norm": 2.1687771404707523, "learning_rate": 1.0690347695146097e-07, "loss": 0.2535, "step": 38541 }, { "epoch": 2.8645113340765516, "grad_norm": 2.5490436848452105, "learning_rate": 1.0678649937187479e-07, "loss": 0.2814, "step": 38542 }, { "epoch": 2.8645856558900036, "grad_norm": 1.9328164110743553, "learning_rate": 1.0666958548428474e-07, "loss": 0.2492, "step": 38543 }, { "epoch": 2.864659977703456, "grad_norm": 2.621214556951947, "learning_rate": 1.0655273528944133e-07, "loss": 0.2703, "step": 38544 }, { "epoch": 2.864734299516908, "grad_norm": 2.7984698609269767, "learning_rate": 1.0643594878809949e-07, "loss": 0.2519, "step": 38545 }, { "epoch": 2.8648086213303605, "grad_norm": 1.9730961848406459, "learning_rate": 1.0631922598100863e-07, "loss": 0.2065, "step": 38546 }, { "epoch": 2.8648829431438125, "grad_norm": 2.2724878206936667, "learning_rate": 1.062025668689215e-07, "loss": 0.2887, "step": 38547 }, { "epoch": 2.864957264957265, "grad_norm": 3.263633481695083, "learning_rate": 1.0608597145258859e-07, "loss": 0.2493, "step": 38548 }, { "epoch": 2.8650315867707175, "grad_norm": 2.5960434892830238, "learning_rate": 1.0596943973276152e-07, "loss": 0.3249, "step": 38549 }, { "epoch": 2.8651059085841695, "grad_norm": 2.707337066736955, "learning_rate": 1.0585297171018971e-07, "loss": 0.3286, "step": 38550 }, { "epoch": 2.8651802303976215, "grad_norm": 2.404087001350985, "learning_rate": 1.0573656738562254e-07, "loss": 0.2463, "step": 38551 }, { "epoch": 2.865254552211074, "grad_norm": 1.6454799463911989, "learning_rate": 1.0562022675981054e-07, "loss": 0.156, "step": 38552 }, { "epoch": 2.8653288740245264, "grad_norm": 2.7696577580863937, "learning_rate": 1.0550394983350198e-07, "loss": 0.3266, "step": 38553 }, { "epoch": 2.8654031958379784, "grad_norm": 2.5646013627653734, "learning_rate": 1.0538773660744516e-07, "loss": 0.2731, "step": 38554 }, { "epoch": 2.8654775176514304, "grad_norm": 2.6496241668860177, "learning_rate": 1.052715870823895e-07, "loss": 0.2843, "step": 38555 }, { "epoch": 2.865551839464883, "grad_norm": 1.7172555257482198, "learning_rate": 1.0515550125908102e-07, "loss": 0.1844, "step": 38556 }, { "epoch": 2.8656261612783354, "grad_norm": 8.813973395264874, "learning_rate": 1.0503947913826918e-07, "loss": 0.2714, "step": 38557 }, { "epoch": 2.8657004830917874, "grad_norm": 2.8449677085677667, "learning_rate": 1.049235207206989e-07, "loss": 0.3056, "step": 38558 }, { "epoch": 2.86577480490524, "grad_norm": 2.1414271065579396, "learning_rate": 1.0480762600711735e-07, "loss": 0.2067, "step": 38559 }, { "epoch": 2.865849126718692, "grad_norm": 2.7697887991314665, "learning_rate": 1.0469179499827175e-07, "loss": 0.3454, "step": 38560 }, { "epoch": 2.8659234485321443, "grad_norm": 1.9885088418754198, "learning_rate": 1.0457602769490594e-07, "loss": 0.2372, "step": 38561 }, { "epoch": 2.8659977703455963, "grad_norm": 2.494377611470116, "learning_rate": 1.0446032409776708e-07, "loss": 0.2543, "step": 38562 }, { "epoch": 2.866072092159049, "grad_norm": 2.308863116888053, "learning_rate": 1.0434468420759902e-07, "loss": 0.2847, "step": 38563 }, { "epoch": 2.866146413972501, "grad_norm": 2.3909856410049217, "learning_rate": 1.0422910802514564e-07, "loss": 0.2813, "step": 38564 }, { "epoch": 2.8662207357859533, "grad_norm": 1.8965765393643803, "learning_rate": 1.0411359555115297e-07, "loss": 0.2118, "step": 38565 }, { "epoch": 2.8662950575994053, "grad_norm": 3.205401256023069, "learning_rate": 1.0399814678636267e-07, "loss": 0.2914, "step": 38566 }, { "epoch": 2.8663693794128577, "grad_norm": 1.8095343271896813, "learning_rate": 1.0388276173151967e-07, "loss": 0.1676, "step": 38567 }, { "epoch": 2.8664437012263098, "grad_norm": 1.9341275396121653, "learning_rate": 1.0376744038736453e-07, "loss": 0.24, "step": 38568 }, { "epoch": 2.866518023039762, "grad_norm": 2.3660962747783674, "learning_rate": 1.0365218275464217e-07, "loss": 0.2667, "step": 38569 }, { "epoch": 2.8665923448532142, "grad_norm": 2.4313697302939428, "learning_rate": 1.0353698883409425e-07, "loss": 0.2427, "step": 38570 }, { "epoch": 2.8666666666666667, "grad_norm": 1.737987828374998, "learning_rate": 1.0342185862646015e-07, "loss": 0.2265, "step": 38571 }, { "epoch": 2.866740988480119, "grad_norm": 2.2856731135860384, "learning_rate": 1.0330679213248373e-07, "loss": 0.2168, "step": 38572 }, { "epoch": 2.866815310293571, "grad_norm": 2.4479257753146575, "learning_rate": 1.0319178935290441e-07, "loss": 0.2562, "step": 38573 }, { "epoch": 2.866889632107023, "grad_norm": 3.023951870408962, "learning_rate": 1.0307685028846271e-07, "loss": 0.2983, "step": 38574 }, { "epoch": 2.8669639539204756, "grad_norm": 2.143838428163953, "learning_rate": 1.0296197493989913e-07, "loss": 0.2336, "step": 38575 }, { "epoch": 2.867038275733928, "grad_norm": 2.8423552396161877, "learning_rate": 1.0284716330795309e-07, "loss": 0.3396, "step": 38576 }, { "epoch": 2.86711259754738, "grad_norm": 2.7743694899129627, "learning_rate": 1.0273241539336287e-07, "loss": 0.3062, "step": 38577 }, { "epoch": 2.867186919360832, "grad_norm": 4.674196467011452, "learning_rate": 1.026177311968668e-07, "loss": 0.2096, "step": 38578 }, { "epoch": 2.8672612411742846, "grad_norm": 2.1681311146833546, "learning_rate": 1.0250311071920538e-07, "loss": 0.237, "step": 38579 }, { "epoch": 2.867335562987737, "grad_norm": 2.3860696330914166, "learning_rate": 1.023885539611158e-07, "loss": 0.2644, "step": 38580 }, { "epoch": 2.867409884801189, "grad_norm": 2.3030426197363574, "learning_rate": 1.0227406092333414e-07, "loss": 0.2714, "step": 38581 }, { "epoch": 2.8674842066146415, "grad_norm": 1.9687588306553279, "learning_rate": 1.0215963160659981e-07, "loss": 0.2115, "step": 38582 }, { "epoch": 2.8675585284280936, "grad_norm": 2.028976351213321, "learning_rate": 1.0204526601164666e-07, "loss": 0.2065, "step": 38583 }, { "epoch": 2.867632850241546, "grad_norm": 2.9990910557259767, "learning_rate": 1.0193096413921299e-07, "loss": 0.2688, "step": 38584 }, { "epoch": 2.867707172054998, "grad_norm": 2.813608004294282, "learning_rate": 1.018167259900349e-07, "loss": 0.2989, "step": 38585 }, { "epoch": 2.8677814938684505, "grad_norm": 2.160417283016098, "learning_rate": 1.0170255156484732e-07, "loss": 0.2218, "step": 38586 }, { "epoch": 2.8678558156819025, "grad_norm": 2.205759449384884, "learning_rate": 1.0158844086438413e-07, "loss": 0.2021, "step": 38587 }, { "epoch": 2.867930137495355, "grad_norm": 2.0481742695005876, "learning_rate": 1.014743938893814e-07, "loss": 0.238, "step": 38588 }, { "epoch": 2.868004459308807, "grad_norm": 2.117732265244792, "learning_rate": 1.01360410640573e-07, "loss": 0.2692, "step": 38589 }, { "epoch": 2.8680787811222594, "grad_norm": 3.6745057294657717, "learning_rate": 1.0124649111869278e-07, "loss": 0.3411, "step": 38590 }, { "epoch": 2.868153102935712, "grad_norm": 2.404705378881916, "learning_rate": 1.0113263532447348e-07, "loss": 0.2676, "step": 38591 }, { "epoch": 2.868227424749164, "grad_norm": 2.7669439245085745, "learning_rate": 1.0101884325864897e-07, "loss": 0.2454, "step": 38592 }, { "epoch": 2.868301746562616, "grad_norm": 2.4614203933895515, "learning_rate": 1.0090511492195198e-07, "loss": 0.3229, "step": 38593 }, { "epoch": 2.8683760683760684, "grad_norm": 2.594775690622227, "learning_rate": 1.0079145031511306e-07, "loss": 0.2622, "step": 38594 }, { "epoch": 2.868450390189521, "grad_norm": 1.9025269833129381, "learning_rate": 1.0067784943886605e-07, "loss": 0.2344, "step": 38595 }, { "epoch": 2.868524712002973, "grad_norm": 2.0180989692867226, "learning_rate": 1.0056431229394148e-07, "loss": 0.1916, "step": 38596 }, { "epoch": 2.868599033816425, "grad_norm": 2.3306974625007166, "learning_rate": 1.0045083888106988e-07, "loss": 0.3559, "step": 38597 }, { "epoch": 2.8686733556298774, "grad_norm": 1.9695627693950752, "learning_rate": 1.0033742920098288e-07, "loss": 0.1907, "step": 38598 }, { "epoch": 2.86874767744333, "grad_norm": 2.4431515193561886, "learning_rate": 1.002240832544088e-07, "loss": 0.3014, "step": 38599 }, { "epoch": 2.868821999256782, "grad_norm": 2.661871146007236, "learning_rate": 1.0011080104207927e-07, "loss": 0.3003, "step": 38600 }, { "epoch": 2.868896321070234, "grad_norm": 2.0581380426420393, "learning_rate": 9.999758256472258e-08, "loss": 0.1822, "step": 38601 }, { "epoch": 2.8689706428836863, "grad_norm": 2.3224786544070204, "learning_rate": 9.988442782306706e-08, "loss": 0.1756, "step": 38602 }, { "epoch": 2.8690449646971388, "grad_norm": 1.6567309193135757, "learning_rate": 9.977133681784213e-08, "loss": 0.2286, "step": 38603 }, { "epoch": 2.869119286510591, "grad_norm": 2.4365764856321017, "learning_rate": 9.965830954977607e-08, "loss": 0.3006, "step": 38604 }, { "epoch": 2.8691936083240432, "grad_norm": 4.018907383159633, "learning_rate": 9.954534601959498e-08, "loss": 0.2329, "step": 38605 }, { "epoch": 2.8692679301374953, "grad_norm": 2.2400019894406977, "learning_rate": 9.943244622802827e-08, "loss": 0.2224, "step": 38606 }, { "epoch": 2.8693422519509477, "grad_norm": 3.0144774686492233, "learning_rate": 9.931961017580205e-08, "loss": 0.3776, "step": 38607 }, { "epoch": 2.8694165737643997, "grad_norm": 2.0702302810916287, "learning_rate": 9.920683786364238e-08, "loss": 0.2692, "step": 38608 }, { "epoch": 2.869490895577852, "grad_norm": 2.7342423517269374, "learning_rate": 9.909412929227535e-08, "loss": 0.3332, "step": 38609 }, { "epoch": 2.869565217391304, "grad_norm": 2.6565267537814607, "learning_rate": 9.898148446242595e-08, "loss": 0.2481, "step": 38610 }, { "epoch": 2.8696395392047567, "grad_norm": 2.2891087786301565, "learning_rate": 9.886890337482025e-08, "loss": 0.2118, "step": 38611 }, { "epoch": 2.8697138610182087, "grad_norm": 2.2496878095687785, "learning_rate": 9.875638603018322e-08, "loss": 0.241, "step": 38612 }, { "epoch": 2.869788182831661, "grad_norm": 2.6328159943563487, "learning_rate": 9.864393242923875e-08, "loss": 0.3091, "step": 38613 }, { "epoch": 2.8698625046451136, "grad_norm": 1.9346138834008872, "learning_rate": 9.853154257271069e-08, "loss": 0.1617, "step": 38614 }, { "epoch": 2.8699368264585656, "grad_norm": 2.3354378640557236, "learning_rate": 9.841921646132291e-08, "loss": 0.2629, "step": 38615 }, { "epoch": 2.8700111482720176, "grad_norm": 2.284392066377596, "learning_rate": 9.830695409579816e-08, "loss": 0.29, "step": 38616 }, { "epoch": 2.87008547008547, "grad_norm": 2.2602914163971235, "learning_rate": 9.819475547685919e-08, "loss": 0.2576, "step": 38617 }, { "epoch": 2.8701597918989226, "grad_norm": 2.4925375538871557, "learning_rate": 9.808262060522989e-08, "loss": 0.2796, "step": 38618 }, { "epoch": 2.8702341137123746, "grad_norm": 2.1524608764292665, "learning_rate": 9.797054948162965e-08, "loss": 0.2587, "step": 38619 }, { "epoch": 2.8703084355258266, "grad_norm": 2.3100754617502233, "learning_rate": 9.785854210678236e-08, "loss": 0.2717, "step": 38620 }, { "epoch": 2.870382757339279, "grad_norm": 2.644272219876152, "learning_rate": 9.774659848140744e-08, "loss": 0.2343, "step": 38621 }, { "epoch": 2.8704570791527315, "grad_norm": 2.1621713329500056, "learning_rate": 9.763471860622542e-08, "loss": 0.2112, "step": 38622 }, { "epoch": 2.8705314009661835, "grad_norm": 2.116183454989864, "learning_rate": 9.752290248195794e-08, "loss": 0.2713, "step": 38623 }, { "epoch": 2.8706057227796355, "grad_norm": 2.379024787872802, "learning_rate": 9.741115010932445e-08, "loss": 0.2565, "step": 38624 }, { "epoch": 2.870680044593088, "grad_norm": 2.266097303960671, "learning_rate": 9.729946148904323e-08, "loss": 0.2913, "step": 38625 }, { "epoch": 2.8707543664065405, "grad_norm": 1.9666333576438717, "learning_rate": 9.718783662183595e-08, "loss": 0.2156, "step": 38626 }, { "epoch": 2.8708286882199925, "grad_norm": 2.2691912212513503, "learning_rate": 9.707627550841758e-08, "loss": 0.2369, "step": 38627 }, { "epoch": 2.870903010033445, "grad_norm": 1.732291044805824, "learning_rate": 9.696477814950977e-08, "loss": 0.1872, "step": 38628 }, { "epoch": 2.870977331846897, "grad_norm": 1.9082784502820624, "learning_rate": 9.685334454582862e-08, "loss": 0.2426, "step": 38629 }, { "epoch": 2.8710516536603494, "grad_norm": 2.7731959402909516, "learning_rate": 9.674197469809133e-08, "loss": 0.1619, "step": 38630 }, { "epoch": 2.8711259754738014, "grad_norm": 2.277928125173443, "learning_rate": 9.66306686070162e-08, "loss": 0.2518, "step": 38631 }, { "epoch": 2.871200297287254, "grad_norm": 2.5985138493275035, "learning_rate": 9.651942627331711e-08, "loss": 0.2603, "step": 38632 }, { "epoch": 2.871274619100706, "grad_norm": 2.4636240182530633, "learning_rate": 9.640824769771351e-08, "loss": 0.2164, "step": 38633 }, { "epoch": 2.8713489409141584, "grad_norm": 2.2463472545393985, "learning_rate": 9.629713288091924e-08, "loss": 0.2782, "step": 38634 }, { "epoch": 2.8714232627276104, "grad_norm": 2.296426902127922, "learning_rate": 9.618608182365152e-08, "loss": 0.2161, "step": 38635 }, { "epoch": 2.871497584541063, "grad_norm": 2.8112351255790005, "learning_rate": 9.607509452662311e-08, "loss": 0.3005, "step": 38636 }, { "epoch": 2.8715719063545153, "grad_norm": 2.3105131576648383, "learning_rate": 9.596417099054789e-08, "loss": 0.2935, "step": 38637 }, { "epoch": 2.8716462281679673, "grad_norm": 3.0609615643078207, "learning_rate": 9.585331121614306e-08, "loss": 0.371, "step": 38638 }, { "epoch": 2.8717205499814193, "grad_norm": 1.933941092278651, "learning_rate": 9.574251520412026e-08, "loss": 0.198, "step": 38639 }, { "epoch": 2.871794871794872, "grad_norm": 2.2370871595342816, "learning_rate": 9.563178295519337e-08, "loss": 0.2944, "step": 38640 }, { "epoch": 2.8718691936083243, "grad_norm": 2.327341094345081, "learning_rate": 9.552111447007518e-08, "loss": 0.2227, "step": 38641 }, { "epoch": 2.8719435154217763, "grad_norm": 2.9273783657072037, "learning_rate": 9.541050974947841e-08, "loss": 0.2322, "step": 38642 }, { "epoch": 2.8720178372352283, "grad_norm": 2.124949798156108, "learning_rate": 9.529996879411474e-08, "loss": 0.2357, "step": 38643 }, { "epoch": 2.8720921590486808, "grad_norm": 3.5430297917500857, "learning_rate": 9.518949160469582e-08, "loss": 0.4057, "step": 38644 }, { "epoch": 2.872166480862133, "grad_norm": 2.501634255936663, "learning_rate": 9.50790781819333e-08, "loss": 0.2943, "step": 38645 }, { "epoch": 2.8722408026755852, "grad_norm": 2.740019118605019, "learning_rate": 9.496872852653772e-08, "loss": 0.3134, "step": 38646 }, { "epoch": 2.8723151244890377, "grad_norm": 2.608472765001223, "learning_rate": 9.485844263921851e-08, "loss": 0.2984, "step": 38647 }, { "epoch": 2.8723894463024897, "grad_norm": 2.0477607620939513, "learning_rate": 9.474822052068844e-08, "loss": 0.2191, "step": 38648 }, { "epoch": 2.872463768115942, "grad_norm": 2.836406362898231, "learning_rate": 9.463806217165361e-08, "loss": 0.3268, "step": 38649 }, { "epoch": 2.872538089929394, "grad_norm": 2.295484160462363, "learning_rate": 9.452796759282568e-08, "loss": 0.2561, "step": 38650 }, { "epoch": 2.8726124117428466, "grad_norm": 3.7841954038794525, "learning_rate": 9.441793678491295e-08, "loss": 0.4311, "step": 38651 }, { "epoch": 2.8726867335562987, "grad_norm": 2.330632456963092, "learning_rate": 9.430796974862267e-08, "loss": 0.2585, "step": 38652 }, { "epoch": 2.872761055369751, "grad_norm": 2.1421626688914253, "learning_rate": 9.419806648466534e-08, "loss": 0.2498, "step": 38653 }, { "epoch": 2.872835377183203, "grad_norm": 2.4299811564050193, "learning_rate": 9.408822699374598e-08, "loss": 0.2552, "step": 38654 }, { "epoch": 2.8729096989966556, "grad_norm": 2.763001620553485, "learning_rate": 9.39784512765729e-08, "loss": 0.2436, "step": 38655 }, { "epoch": 2.8729840208101076, "grad_norm": 2.308306471301989, "learning_rate": 9.386873933385221e-08, "loss": 0.3012, "step": 38656 }, { "epoch": 2.87305834262356, "grad_norm": 2.1240426835458126, "learning_rate": 9.375909116629112e-08, "loss": 0.2426, "step": 38657 }, { "epoch": 2.873132664437012, "grad_norm": 2.5326041758775233, "learning_rate": 9.364950677459461e-08, "loss": 0.2818, "step": 38658 }, { "epoch": 2.8732069862504646, "grad_norm": 2.8535242351519425, "learning_rate": 9.35399861594677e-08, "loss": 0.3185, "step": 38659 }, { "epoch": 2.873281308063917, "grad_norm": 3.1818937398818283, "learning_rate": 9.343052932161756e-08, "loss": 0.2676, "step": 38660 }, { "epoch": 2.873355629877369, "grad_norm": 2.9638687644016266, "learning_rate": 9.3321136261747e-08, "loss": 0.3835, "step": 38661 }, { "epoch": 2.873429951690821, "grad_norm": 3.2304837643141004, "learning_rate": 9.321180698056098e-08, "loss": 0.3323, "step": 38662 }, { "epoch": 2.8735042735042735, "grad_norm": 2.4606176922256724, "learning_rate": 9.310254147876451e-08, "loss": 0.2724, "step": 38663 }, { "epoch": 2.873578595317726, "grad_norm": 2.450699376683231, "learning_rate": 9.299333975705816e-08, "loss": 0.2641, "step": 38664 }, { "epoch": 2.873652917131178, "grad_norm": 2.2312721692423585, "learning_rate": 9.2884201816148e-08, "loss": 0.2812, "step": 38665 }, { "epoch": 2.87372723894463, "grad_norm": 2.418034865162958, "learning_rate": 9.277512765673457e-08, "loss": 0.1987, "step": 38666 }, { "epoch": 2.8738015607580825, "grad_norm": 2.0370296719117085, "learning_rate": 9.266611727952068e-08, "loss": 0.1915, "step": 38667 }, { "epoch": 2.873875882571535, "grad_norm": 2.2273751731711307, "learning_rate": 9.255717068520908e-08, "loss": 0.2676, "step": 38668 }, { "epoch": 2.873950204384987, "grad_norm": 2.559592889438423, "learning_rate": 9.244828787449923e-08, "loss": 0.2422, "step": 38669 }, { "epoch": 2.8740245261984394, "grad_norm": 2.485761494809869, "learning_rate": 9.233946884809386e-08, "loss": 0.3069, "step": 38670 }, { "epoch": 2.8740988480118914, "grad_norm": 2.744158710948955, "learning_rate": 9.223071360669245e-08, "loss": 0.3067, "step": 38671 }, { "epoch": 2.874173169825344, "grad_norm": 1.9955505977014256, "learning_rate": 9.212202215099664e-08, "loss": 0.2022, "step": 38672 }, { "epoch": 2.874247491638796, "grad_norm": 2.5045427168181456, "learning_rate": 9.201339448170477e-08, "loss": 0.3157, "step": 38673 }, { "epoch": 2.8743218134522484, "grad_norm": 2.2213476022348355, "learning_rate": 9.190483059951627e-08, "loss": 0.2773, "step": 38674 }, { "epoch": 2.8743961352657004, "grad_norm": 2.545708970404116, "learning_rate": 9.179633050513059e-08, "loss": 0.3649, "step": 38675 }, { "epoch": 2.874470457079153, "grad_norm": 2.3495609630849397, "learning_rate": 9.168789419924496e-08, "loss": 0.2813, "step": 38676 }, { "epoch": 2.874544778892605, "grad_norm": 2.3977700175594374, "learning_rate": 9.157952168256101e-08, "loss": 0.2873, "step": 38677 }, { "epoch": 2.8746191007060573, "grad_norm": 2.605818510818246, "learning_rate": 9.147121295577154e-08, "loss": 0.2711, "step": 38678 }, { "epoch": 2.8746934225195093, "grad_norm": 3.1386654104100176, "learning_rate": 9.136296801957823e-08, "loss": 0.3006, "step": 38679 }, { "epoch": 2.874767744332962, "grad_norm": 2.683537398059016, "learning_rate": 9.125478687467492e-08, "loss": 0.2761, "step": 38680 }, { "epoch": 2.874842066146414, "grad_norm": 2.6443238046457034, "learning_rate": 9.114666952175888e-08, "loss": 0.2365, "step": 38681 }, { "epoch": 2.8749163879598663, "grad_norm": 2.654012474037382, "learning_rate": 9.10386159615273e-08, "loss": 0.2929, "step": 38682 }, { "epoch": 2.8749907097733187, "grad_norm": 2.240009295993088, "learning_rate": 9.093062619467407e-08, "loss": 0.2696, "step": 38683 }, { "epoch": 2.8750650315867707, "grad_norm": 1.8119115155401861, "learning_rate": 9.082270022189532e-08, "loss": 0.1737, "step": 38684 }, { "epoch": 2.8751393534002228, "grad_norm": 2.5498098626974737, "learning_rate": 9.071483804388714e-08, "loss": 0.2815, "step": 38685 }, { "epoch": 2.875213675213675, "grad_norm": 2.645726671152863, "learning_rate": 9.060703966134122e-08, "loss": 0.321, "step": 38686 }, { "epoch": 2.8752879970271277, "grad_norm": 2.481497780565003, "learning_rate": 9.049930507495363e-08, "loss": 0.2114, "step": 38687 }, { "epoch": 2.8753623188405797, "grad_norm": 2.1405232306843196, "learning_rate": 9.039163428541719e-08, "loss": 0.2256, "step": 38688 }, { "epoch": 2.8754366406540317, "grad_norm": 1.9840634750311958, "learning_rate": 9.028402729342577e-08, "loss": 0.2542, "step": 38689 }, { "epoch": 2.875510962467484, "grad_norm": 1.8965349187153746, "learning_rate": 9.017648409967106e-08, "loss": 0.2589, "step": 38690 }, { "epoch": 2.8755852842809366, "grad_norm": 2.475905626992268, "learning_rate": 9.006900470484581e-08, "loss": 0.3279, "step": 38691 }, { "epoch": 2.8756596060943886, "grad_norm": 3.0595776141507924, "learning_rate": 8.996158910964282e-08, "loss": 0.3314, "step": 38692 }, { "epoch": 2.875733927907841, "grad_norm": 2.1397418724885298, "learning_rate": 8.985423731475263e-08, "loss": 0.2631, "step": 38693 }, { "epoch": 2.875808249721293, "grad_norm": 1.731357377465882, "learning_rate": 8.974694932086691e-08, "loss": 0.1938, "step": 38694 }, { "epoch": 2.8758825715347456, "grad_norm": 2.1702416036970247, "learning_rate": 8.963972512867514e-08, "loss": 0.2648, "step": 38695 }, { "epoch": 2.8759568933481976, "grad_norm": 1.8418722755136236, "learning_rate": 8.953256473886895e-08, "loss": 0.2276, "step": 38696 }, { "epoch": 2.87603121516165, "grad_norm": 1.7303779472751326, "learning_rate": 8.942546815213892e-08, "loss": 0.207, "step": 38697 }, { "epoch": 2.876105536975102, "grad_norm": 2.9807688063543436, "learning_rate": 8.931843536917229e-08, "loss": 0.226, "step": 38698 }, { "epoch": 2.8761798587885545, "grad_norm": 2.015705250608891, "learning_rate": 8.92114663906607e-08, "loss": 0.2151, "step": 38699 }, { "epoch": 2.8762541806020065, "grad_norm": 2.90656660159242, "learning_rate": 8.910456121729027e-08, "loss": 0.2542, "step": 38700 }, { "epoch": 2.876328502415459, "grad_norm": 2.3771165818381723, "learning_rate": 8.899771984975048e-08, "loss": 0.2494, "step": 38701 }, { "epoch": 2.876402824228911, "grad_norm": 2.2874539148917363, "learning_rate": 8.889094228872963e-08, "loss": 0.3117, "step": 38702 }, { "epoch": 2.8764771460423635, "grad_norm": 2.0227546511472, "learning_rate": 8.878422853491387e-08, "loss": 0.1823, "step": 38703 }, { "epoch": 2.8765514678558155, "grad_norm": 2.523770626674222, "learning_rate": 8.86775785889915e-08, "loss": 0.2124, "step": 38704 }, { "epoch": 2.876625789669268, "grad_norm": 2.146072311958767, "learning_rate": 8.857099245164869e-08, "loss": 0.2733, "step": 38705 }, { "epoch": 2.8767001114827204, "grad_norm": 2.6833732533085164, "learning_rate": 8.846447012357151e-08, "loss": 0.2542, "step": 38706 }, { "epoch": 2.8767744332961724, "grad_norm": 2.277895698964865, "learning_rate": 8.83580116054461e-08, "loss": 0.2488, "step": 38707 }, { "epoch": 2.8768487551096245, "grad_norm": 2.486307620711018, "learning_rate": 8.825161689795635e-08, "loss": 0.3188, "step": 38708 }, { "epoch": 2.876923076923077, "grad_norm": 1.810021340061993, "learning_rate": 8.814528600178951e-08, "loss": 0.1936, "step": 38709 }, { "epoch": 2.8769973987365294, "grad_norm": 2.652236774862281, "learning_rate": 8.803901891762945e-08, "loss": 0.3644, "step": 38710 }, { "epoch": 2.8770717205499814, "grad_norm": 2.9066235448685633, "learning_rate": 8.793281564615897e-08, "loss": 0.3372, "step": 38711 }, { "epoch": 2.8771460423634334, "grad_norm": 2.1816195813453763, "learning_rate": 8.782667618806307e-08, "loss": 0.2049, "step": 38712 }, { "epoch": 2.877220364176886, "grad_norm": 2.3512207436167256, "learning_rate": 8.772060054402454e-08, "loss": 0.2912, "step": 38713 }, { "epoch": 2.8772946859903383, "grad_norm": 1.9510628441698639, "learning_rate": 8.761458871472728e-08, "loss": 0.2004, "step": 38714 }, { "epoch": 2.8773690078037903, "grad_norm": 2.962692499771293, "learning_rate": 8.750864070085185e-08, "loss": 0.3569, "step": 38715 }, { "epoch": 2.877443329617243, "grad_norm": 2.2729451872176853, "learning_rate": 8.740275650308105e-08, "loss": 0.2384, "step": 38716 }, { "epoch": 2.877517651430695, "grad_norm": 4.901855924082366, "learning_rate": 8.729693612209878e-08, "loss": 0.2048, "step": 38717 }, { "epoch": 2.8775919732441473, "grad_norm": 2.8928381816487043, "learning_rate": 8.719117955858336e-08, "loss": 0.3299, "step": 38718 }, { "epoch": 2.8776662950575993, "grad_norm": 1.7432041144819905, "learning_rate": 8.708548681321648e-08, "loss": 0.1971, "step": 38719 }, { "epoch": 2.8777406168710518, "grad_norm": 2.1969876675505238, "learning_rate": 8.697985788667873e-08, "loss": 0.2351, "step": 38720 }, { "epoch": 2.8778149386845038, "grad_norm": 2.2567528570011137, "learning_rate": 8.687429277965176e-08, "loss": 0.2313, "step": 38721 }, { "epoch": 2.8778892604979562, "grad_norm": 2.8391911668884444, "learning_rate": 8.67687914928128e-08, "loss": 0.3369, "step": 38722 }, { "epoch": 2.8779635823114083, "grad_norm": 2.277301095887905, "learning_rate": 8.666335402684245e-08, "loss": 0.3116, "step": 38723 }, { "epoch": 2.8780379041248607, "grad_norm": 2.740582323153172, "learning_rate": 8.655798038241902e-08, "loss": 0.3022, "step": 38724 }, { "epoch": 2.8781122259383127, "grad_norm": 2.100415339940696, "learning_rate": 8.645267056021978e-08, "loss": 0.2627, "step": 38725 }, { "epoch": 2.878186547751765, "grad_norm": 2.1024425447371073, "learning_rate": 8.634742456092527e-08, "loss": 0.2509, "step": 38726 }, { "epoch": 2.878260869565217, "grad_norm": 2.1222224917181753, "learning_rate": 8.624224238521162e-08, "loss": 0.2054, "step": 38727 }, { "epoch": 2.8783351913786697, "grad_norm": 2.6131247230898667, "learning_rate": 8.613712403375496e-08, "loss": 0.2574, "step": 38728 }, { "epoch": 2.878409513192122, "grad_norm": 2.642847809215684, "learning_rate": 8.603206950723363e-08, "loss": 0.3609, "step": 38729 }, { "epoch": 2.878483835005574, "grad_norm": 3.002945711202251, "learning_rate": 8.592707880632267e-08, "loss": 0.314, "step": 38730 }, { "epoch": 2.878558156819026, "grad_norm": 2.1924757845576894, "learning_rate": 8.58221519317004e-08, "loss": 0.2134, "step": 38731 }, { "epoch": 2.8786324786324786, "grad_norm": 2.6418767189585703, "learning_rate": 8.571728888403963e-08, "loss": 0.3192, "step": 38732 }, { "epoch": 2.878706800445931, "grad_norm": 2.06346659245275, "learning_rate": 8.561248966401647e-08, "loss": 0.2122, "step": 38733 }, { "epoch": 2.878781122259383, "grad_norm": 1.9942633649238304, "learning_rate": 8.550775427230706e-08, "loss": 0.2594, "step": 38734 }, { "epoch": 2.878855444072835, "grad_norm": 2.352258381459246, "learning_rate": 8.540308270958198e-08, "loss": 0.2495, "step": 38735 }, { "epoch": 2.8789297658862876, "grad_norm": 2.8145709341499194, "learning_rate": 8.529847497651955e-08, "loss": 0.2368, "step": 38736 }, { "epoch": 2.87900408769974, "grad_norm": 3.0406550815427087, "learning_rate": 8.519393107378926e-08, "loss": 0.2562, "step": 38737 }, { "epoch": 2.879078409513192, "grad_norm": 2.208092645503811, "learning_rate": 8.508945100206722e-08, "loss": 0.2577, "step": 38738 }, { "epoch": 2.8791527313266445, "grad_norm": 2.6077844129436047, "learning_rate": 8.498503476202512e-08, "loss": 0.2519, "step": 38739 }, { "epoch": 2.8792270531400965, "grad_norm": 2.3019022534295597, "learning_rate": 8.488068235433355e-08, "loss": 0.2244, "step": 38740 }, { "epoch": 2.879301374953549, "grad_norm": 2.0665696448949435, "learning_rate": 8.47763937796664e-08, "loss": 0.2297, "step": 38741 }, { "epoch": 2.879375696767001, "grad_norm": 2.2218331827140174, "learning_rate": 8.467216903869423e-08, "loss": 0.31, "step": 38742 }, { "epoch": 2.8794500185804535, "grad_norm": 2.7746681276970078, "learning_rate": 8.456800813208876e-08, "loss": 0.3562, "step": 38743 }, { "epoch": 2.8795243403939055, "grad_norm": 1.955800082465362, "learning_rate": 8.446391106051943e-08, "loss": 0.235, "step": 38744 }, { "epoch": 2.879598662207358, "grad_norm": 2.428803943605907, "learning_rate": 8.435987782465794e-08, "loss": 0.3197, "step": 38745 }, { "epoch": 2.87967298402081, "grad_norm": 2.077445833167013, "learning_rate": 8.425590842517262e-08, "loss": 0.2366, "step": 38746 }, { "epoch": 2.8797473058342624, "grad_norm": 2.6177691979071955, "learning_rate": 8.415200286273183e-08, "loss": 0.3308, "step": 38747 }, { "epoch": 2.879821627647715, "grad_norm": 2.6861661134292865, "learning_rate": 8.404816113800729e-08, "loss": 0.2872, "step": 38748 }, { "epoch": 2.879895949461167, "grad_norm": 3.437217151340795, "learning_rate": 8.394438325166621e-08, "loss": 0.2767, "step": 38749 }, { "epoch": 2.879970271274619, "grad_norm": 2.3727091227173385, "learning_rate": 8.384066920437583e-08, "loss": 0.3244, "step": 38750 }, { "epoch": 2.8800445930880714, "grad_norm": 2.3859993755930105, "learning_rate": 8.373701899680565e-08, "loss": 0.2986, "step": 38751 }, { "epoch": 2.880118914901524, "grad_norm": 2.3138134442228746, "learning_rate": 8.363343262962176e-08, "loss": 0.2226, "step": 38752 }, { "epoch": 2.880193236714976, "grad_norm": 1.8187095201721795, "learning_rate": 8.352991010349142e-08, "loss": 0.2439, "step": 38753 }, { "epoch": 2.880267558528428, "grad_norm": 2.5812086552330036, "learning_rate": 8.342645141908079e-08, "loss": 0.2452, "step": 38754 }, { "epoch": 2.8803418803418803, "grad_norm": 2.70589152778, "learning_rate": 8.332305657705597e-08, "loss": 0.2796, "step": 38755 }, { "epoch": 2.880416202155333, "grad_norm": 2.0366820289703917, "learning_rate": 8.321972557808311e-08, "loss": 0.3087, "step": 38756 }, { "epoch": 2.880490523968785, "grad_norm": 2.0518407688299893, "learning_rate": 8.311645842282612e-08, "loss": 0.2194, "step": 38757 }, { "epoch": 2.880564845782237, "grad_norm": 2.836869972668083, "learning_rate": 8.301325511195224e-08, "loss": 0.3458, "step": 38758 }, { "epoch": 2.8806391675956893, "grad_norm": 2.6059960697024853, "learning_rate": 8.291011564612316e-08, "loss": 0.3297, "step": 38759 }, { "epoch": 2.8807134894091417, "grad_norm": 2.9512675772044963, "learning_rate": 8.280704002600503e-08, "loss": 0.3569, "step": 38760 }, { "epoch": 2.8807878112225938, "grad_norm": 3.0590306909137084, "learning_rate": 8.270402825226065e-08, "loss": 0.3733, "step": 38761 }, { "epoch": 2.880862133036046, "grad_norm": 2.5849992566769693, "learning_rate": 8.260108032555281e-08, "loss": 0.3368, "step": 38762 }, { "epoch": 2.8809364548494982, "grad_norm": 2.0772278054010287, "learning_rate": 8.249819624654543e-08, "loss": 0.2176, "step": 38763 }, { "epoch": 2.8810107766629507, "grad_norm": 2.2453814924472657, "learning_rate": 8.239537601589798e-08, "loss": 0.2877, "step": 38764 }, { "epoch": 2.8810850984764027, "grad_norm": 2.170413353046065, "learning_rate": 8.229261963427659e-08, "loss": 0.2694, "step": 38765 }, { "epoch": 2.881159420289855, "grad_norm": 2.5250918929294524, "learning_rate": 8.218992710234075e-08, "loss": 0.2504, "step": 38766 }, { "epoch": 2.881233742103307, "grad_norm": 2.1107438598762696, "learning_rate": 8.208729842075103e-08, "loss": 0.2172, "step": 38767 }, { "epoch": 2.8813080639167596, "grad_norm": 2.295253686373789, "learning_rate": 8.198473359016801e-08, "loss": 0.3082, "step": 38768 }, { "epoch": 2.8813823857302117, "grad_norm": 2.2272943182959484, "learning_rate": 8.188223261125338e-08, "loss": 0.2883, "step": 38769 }, { "epoch": 2.881456707543664, "grad_norm": 2.6070387888967814, "learning_rate": 8.177979548466552e-08, "loss": 0.3647, "step": 38770 }, { "epoch": 2.8815310293571166, "grad_norm": 2.7303433644141535, "learning_rate": 8.167742221106611e-08, "loss": 0.3305, "step": 38771 }, { "epoch": 2.8816053511705686, "grad_norm": 2.3294632021783044, "learning_rate": 8.15751127911113e-08, "loss": 0.2589, "step": 38772 }, { "epoch": 2.8816796729840206, "grad_norm": 2.8111180143353747, "learning_rate": 8.147286722546277e-08, "loss": 0.3336, "step": 38773 }, { "epoch": 2.881753994797473, "grad_norm": 2.3918230852674593, "learning_rate": 8.137068551477556e-08, "loss": 0.252, "step": 38774 }, { "epoch": 2.8818283166109255, "grad_norm": 2.253160656760216, "learning_rate": 8.126856765971025e-08, "loss": 0.1731, "step": 38775 }, { "epoch": 2.8819026384243775, "grad_norm": 2.9716564343461784, "learning_rate": 8.11665136609241e-08, "loss": 0.351, "step": 38776 }, { "epoch": 2.8819769602378296, "grad_norm": 2.4294103010447103, "learning_rate": 8.106452351907212e-08, "loss": 0.2748, "step": 38777 }, { "epoch": 2.882051282051282, "grad_norm": 3.0527557221449393, "learning_rate": 8.09625972348127e-08, "loss": 0.3745, "step": 38778 }, { "epoch": 2.8821256038647345, "grad_norm": 2.400475871556617, "learning_rate": 8.086073480880086e-08, "loss": 0.2828, "step": 38779 }, { "epoch": 2.8821999256781865, "grad_norm": 1.8404229922015662, "learning_rate": 8.075893624169273e-08, "loss": 0.2189, "step": 38780 }, { "epoch": 2.8822742474916385, "grad_norm": 2.252584587935066, "learning_rate": 8.065720153414448e-08, "loss": 0.2874, "step": 38781 }, { "epoch": 2.882348569305091, "grad_norm": 3.345038419108947, "learning_rate": 8.055553068681e-08, "loss": 0.2673, "step": 38782 }, { "epoch": 2.8824228911185434, "grad_norm": 2.5534331532342254, "learning_rate": 8.045392370034543e-08, "loss": 0.2415, "step": 38783 }, { "epoch": 2.8824972129319955, "grad_norm": 1.7750925050190147, "learning_rate": 8.035238057540251e-08, "loss": 0.2197, "step": 38784 }, { "epoch": 2.882571534745448, "grad_norm": 2.7322790815847076, "learning_rate": 8.025090131263735e-08, "loss": 0.3018, "step": 38785 }, { "epoch": 2.8826458565589, "grad_norm": 2.1731468805409366, "learning_rate": 8.014948591270055e-08, "loss": 0.2451, "step": 38786 }, { "epoch": 2.8827201783723524, "grad_norm": 2.0738707482507284, "learning_rate": 8.004813437624825e-08, "loss": 0.2334, "step": 38787 }, { "epoch": 2.8827945001858044, "grad_norm": 2.4499140843817733, "learning_rate": 7.994684670393105e-08, "loss": 0.3052, "step": 38788 }, { "epoch": 2.882868821999257, "grad_norm": 2.7445539020517717, "learning_rate": 7.984562289640063e-08, "loss": 0.2912, "step": 38789 }, { "epoch": 2.882943143812709, "grad_norm": 2.766314170572844, "learning_rate": 7.974446295431092e-08, "loss": 0.2602, "step": 38790 }, { "epoch": 2.8830174656261613, "grad_norm": 2.163522771384194, "learning_rate": 7.964336687831031e-08, "loss": 0.2386, "step": 38791 }, { "epoch": 2.8830917874396134, "grad_norm": 2.6059525852419365, "learning_rate": 7.954233466905049e-08, "loss": 0.2384, "step": 38792 }, { "epoch": 2.883166109253066, "grad_norm": 2.47956893124057, "learning_rate": 7.944136632718424e-08, "loss": 0.2129, "step": 38793 }, { "epoch": 2.8832404310665183, "grad_norm": 3.096046563391904, "learning_rate": 7.934046185335775e-08, "loss": 0.3335, "step": 38794 }, { "epoch": 2.8833147528799703, "grad_norm": 2.3107022177614662, "learning_rate": 7.923962124822381e-08, "loss": 0.3429, "step": 38795 }, { "epoch": 2.8833890746934223, "grad_norm": 2.4819006081965815, "learning_rate": 7.91388445124297e-08, "loss": 0.3059, "step": 38796 }, { "epoch": 2.8834633965068748, "grad_norm": 2.8700460394648553, "learning_rate": 7.903813164662488e-08, "loss": 0.3628, "step": 38797 }, { "epoch": 2.8835377183203272, "grad_norm": 2.041278114227445, "learning_rate": 7.893748265145884e-08, "loss": 0.1941, "step": 38798 }, { "epoch": 2.8836120401337793, "grad_norm": 2.422216683088894, "learning_rate": 7.883689752757773e-08, "loss": 0.1722, "step": 38799 }, { "epoch": 2.8836863619472313, "grad_norm": 1.87642694946642, "learning_rate": 7.87363762756299e-08, "loss": 0.215, "step": 38800 }, { "epoch": 2.8837606837606837, "grad_norm": 2.37623521403058, "learning_rate": 7.863591889626376e-08, "loss": 0.2546, "step": 38801 }, { "epoch": 2.883835005574136, "grad_norm": 3.3802653143409573, "learning_rate": 7.853552539012321e-08, "loss": 0.3162, "step": 38802 }, { "epoch": 2.883909327387588, "grad_norm": 2.6412614669188046, "learning_rate": 7.843519575785664e-08, "loss": 0.3426, "step": 38803 }, { "epoch": 2.8839836492010407, "grad_norm": 2.521477876318259, "learning_rate": 7.833493000011017e-08, "loss": 0.3164, "step": 38804 }, { "epoch": 2.8840579710144927, "grad_norm": 2.543479828362248, "learning_rate": 7.823472811752775e-08, "loss": 0.2752, "step": 38805 }, { "epoch": 2.884132292827945, "grad_norm": 3.078892299525305, "learning_rate": 7.813459011075553e-08, "loss": 0.3544, "step": 38806 }, { "epoch": 2.884206614641397, "grad_norm": 2.5068205264549404, "learning_rate": 7.803451598043743e-08, "loss": 0.35, "step": 38807 }, { "epoch": 2.8842809364548496, "grad_norm": 2.7097658552137394, "learning_rate": 7.79345057272185e-08, "loss": 0.3316, "step": 38808 }, { "epoch": 2.8843552582683016, "grad_norm": 2.1117532186768804, "learning_rate": 7.783455935174378e-08, "loss": 0.1948, "step": 38809 }, { "epoch": 2.884429580081754, "grad_norm": 2.0839159592278484, "learning_rate": 7.773467685465385e-08, "loss": 0.2151, "step": 38810 }, { "epoch": 2.884503901895206, "grad_norm": 2.5874927710406923, "learning_rate": 7.763485823659378e-08, "loss": 0.3297, "step": 38811 }, { "epoch": 2.8845782237086586, "grad_norm": 3.213724702323216, "learning_rate": 7.753510349820637e-08, "loss": 0.428, "step": 38812 }, { "epoch": 2.8846525455221106, "grad_norm": 2.580398524965326, "learning_rate": 7.743541264013221e-08, "loss": 0.2542, "step": 38813 }, { "epoch": 2.884726867335563, "grad_norm": 2.8526029848689842, "learning_rate": 7.733578566301414e-08, "loss": 0.3374, "step": 38814 }, { "epoch": 2.884801189149015, "grad_norm": 2.4867831275824224, "learning_rate": 7.723622256749385e-08, "loss": 0.2358, "step": 38815 }, { "epoch": 2.8848755109624675, "grad_norm": 2.354016776140251, "learning_rate": 7.713672335421085e-08, "loss": 0.2734, "step": 38816 }, { "epoch": 2.88494983277592, "grad_norm": 3.02051174941993, "learning_rate": 7.703728802380794e-08, "loss": 0.4004, "step": 38817 }, { "epoch": 2.885024154589372, "grad_norm": 2.4292651066413513, "learning_rate": 7.693791657692351e-08, "loss": 0.2918, "step": 38818 }, { "epoch": 2.885098476402824, "grad_norm": 2.431991020423054, "learning_rate": 7.683860901419815e-08, "loss": 0.286, "step": 38819 }, { "epoch": 2.8851727982162765, "grad_norm": 2.2849558662190237, "learning_rate": 7.673936533627135e-08, "loss": 0.2464, "step": 38820 }, { "epoch": 2.885247120029729, "grad_norm": 1.9190414227079977, "learning_rate": 7.664018554378038e-08, "loss": 0.19, "step": 38821 }, { "epoch": 2.885321441843181, "grad_norm": 2.1050832296114574, "learning_rate": 7.654106963736585e-08, "loss": 0.2273, "step": 38822 }, { "epoch": 2.885395763656633, "grad_norm": 2.3095783875195797, "learning_rate": 7.644201761766501e-08, "loss": 0.2226, "step": 38823 }, { "epoch": 2.8854700854700854, "grad_norm": 2.2848308841066407, "learning_rate": 7.634302948531513e-08, "loss": 0.2969, "step": 38824 }, { "epoch": 2.885544407283538, "grad_norm": 2.411214543991396, "learning_rate": 7.62441052409546e-08, "loss": 0.2906, "step": 38825 }, { "epoch": 2.88561872909699, "grad_norm": 3.1911158635059484, "learning_rate": 7.614524488521957e-08, "loss": 0.3108, "step": 38826 }, { "epoch": 2.8856930509104424, "grad_norm": 2.6380091084114854, "learning_rate": 7.60464484187462e-08, "loss": 0.2687, "step": 38827 }, { "epoch": 2.8857673727238944, "grad_norm": 2.3909155319063125, "learning_rate": 7.594771584217065e-08, "loss": 0.2527, "step": 38828 }, { "epoch": 2.885841694537347, "grad_norm": 2.747412229150608, "learning_rate": 7.584904715612906e-08, "loss": 0.2784, "step": 38829 }, { "epoch": 2.885916016350799, "grad_norm": 1.9606126054521777, "learning_rate": 7.575044236125651e-08, "loss": 0.2097, "step": 38830 }, { "epoch": 2.8859903381642513, "grad_norm": 2.0624098385062624, "learning_rate": 7.56519014581869e-08, "loss": 0.2133, "step": 38831 }, { "epoch": 2.8860646599777033, "grad_norm": 2.708133535542739, "learning_rate": 7.555342444755642e-08, "loss": 0.256, "step": 38832 }, { "epoch": 2.886138981791156, "grad_norm": 2.147429706960687, "learning_rate": 7.545501132999788e-08, "loss": 0.2577, "step": 38833 }, { "epoch": 2.886213303604608, "grad_norm": 1.9520617652805805, "learning_rate": 7.535666210614412e-08, "loss": 0.2393, "step": 38834 }, { "epoch": 2.8862876254180603, "grad_norm": 2.3741778010208705, "learning_rate": 7.525837677663017e-08, "loss": 0.1933, "step": 38835 }, { "epoch": 2.8863619472315123, "grad_norm": 2.5438712445299996, "learning_rate": 7.516015534208775e-08, "loss": 0.3183, "step": 38836 }, { "epoch": 2.8864362690449648, "grad_norm": 2.655567711300289, "learning_rate": 7.506199780314971e-08, "loss": 0.3246, "step": 38837 }, { "epoch": 2.8865105908584168, "grad_norm": 2.5333349549668895, "learning_rate": 7.496390416044663e-08, "loss": 0.3238, "step": 38838 }, { "epoch": 2.8865849126718692, "grad_norm": 2.630924644353806, "learning_rate": 7.486587441461135e-08, "loss": 0.3425, "step": 38839 }, { "epoch": 2.8866592344853217, "grad_norm": 2.505015204248171, "learning_rate": 7.476790856627448e-08, "loss": 0.2619, "step": 38840 }, { "epoch": 2.8867335562987737, "grad_norm": 2.641755113248238, "learning_rate": 7.46700066160666e-08, "loss": 0.3031, "step": 38841 }, { "epoch": 2.8868078781122257, "grad_norm": 2.6208095657617454, "learning_rate": 7.457216856461835e-08, "loss": 0.2678, "step": 38842 }, { "epoch": 2.886882199925678, "grad_norm": 2.5257394642773443, "learning_rate": 7.447439441255921e-08, "loss": 0.3327, "step": 38843 }, { "epoch": 2.8869565217391306, "grad_norm": 2.9431234842535012, "learning_rate": 7.43766841605198e-08, "loss": 0.2927, "step": 38844 }, { "epoch": 2.8870308435525827, "grad_norm": 2.466289878588479, "learning_rate": 7.427903780912738e-08, "loss": 0.2362, "step": 38845 }, { "epoch": 2.8871051653660347, "grad_norm": 2.4321137616977198, "learning_rate": 7.418145535901255e-08, "loss": 0.2421, "step": 38846 }, { "epoch": 2.887179487179487, "grad_norm": 2.301562062732767, "learning_rate": 7.40839368108015e-08, "loss": 0.2638, "step": 38847 }, { "epoch": 2.8872538089929396, "grad_norm": 2.1523697456790285, "learning_rate": 7.398648216512371e-08, "loss": 0.2622, "step": 38848 }, { "epoch": 2.8873281308063916, "grad_norm": 2.0528338688781864, "learning_rate": 7.388909142260648e-08, "loss": 0.3031, "step": 38849 }, { "epoch": 2.887402452619844, "grad_norm": 3.5300787967845526, "learning_rate": 7.379176458387483e-08, "loss": 0.3544, "step": 38850 }, { "epoch": 2.887476774433296, "grad_norm": 2.226961358117592, "learning_rate": 7.369450164955716e-08, "loss": 0.3077, "step": 38851 }, { "epoch": 2.8875510962467485, "grad_norm": 3.280321231986741, "learning_rate": 7.359730262027964e-08, "loss": 0.4004, "step": 38852 }, { "epoch": 2.8876254180602006, "grad_norm": 2.6897762928415876, "learning_rate": 7.350016749666732e-08, "loss": 0.3383, "step": 38853 }, { "epoch": 2.887699739873653, "grad_norm": 1.653394421316739, "learning_rate": 7.340309627934639e-08, "loss": 0.2152, "step": 38854 }, { "epoch": 2.887774061687105, "grad_norm": 2.8802486552689484, "learning_rate": 7.330608896893965e-08, "loss": 0.3766, "step": 38855 }, { "epoch": 2.8878483835005575, "grad_norm": 2.4084386100938238, "learning_rate": 7.320914556607551e-08, "loss": 0.2272, "step": 38856 }, { "epoch": 2.8879227053140095, "grad_norm": 2.450895981386038, "learning_rate": 7.311226607137455e-08, "loss": 0.3124, "step": 38857 }, { "epoch": 2.887997027127462, "grad_norm": 2.2093992045145785, "learning_rate": 7.301545048546189e-08, "loss": 0.2883, "step": 38858 }, { "epoch": 2.888071348940914, "grad_norm": 2.72746921812727, "learning_rate": 7.29186988089614e-08, "loss": 0.3906, "step": 38859 }, { "epoch": 2.8881456707543665, "grad_norm": 2.2108863183799534, "learning_rate": 7.282201104249375e-08, "loss": 0.2576, "step": 38860 }, { "epoch": 2.8882199925678185, "grad_norm": 1.7377786076387511, "learning_rate": 7.272538718668398e-08, "loss": 0.2057, "step": 38861 }, { "epoch": 2.888294314381271, "grad_norm": 2.6282895345423576, "learning_rate": 7.262882724215269e-08, "loss": 0.23, "step": 38862 }, { "epoch": 2.8883686361947234, "grad_norm": 2.560786305684405, "learning_rate": 7.253233120952275e-08, "loss": 0.2768, "step": 38863 }, { "epoch": 2.8884429580081754, "grad_norm": 2.644567803159104, "learning_rate": 7.243589908941362e-08, "loss": 0.2832, "step": 38864 }, { "epoch": 2.8885172798216274, "grad_norm": 2.0417571517811655, "learning_rate": 7.233953088244705e-08, "loss": 0.2613, "step": 38865 }, { "epoch": 2.88859160163508, "grad_norm": 2.563664928039841, "learning_rate": 7.224322658924365e-08, "loss": 0.2681, "step": 38866 }, { "epoch": 2.8886659234485323, "grad_norm": 2.8061805489174247, "learning_rate": 7.214698621042294e-08, "loss": 0.1937, "step": 38867 }, { "epoch": 2.8887402452619844, "grad_norm": 2.2789056386282387, "learning_rate": 7.20508097466055e-08, "loss": 0.2198, "step": 38868 }, { "epoch": 2.8888145670754364, "grad_norm": 2.1210292091579563, "learning_rate": 7.195469719840975e-08, "loss": 0.306, "step": 38869 }, { "epoch": 2.888888888888889, "grad_norm": 2.027692980432747, "learning_rate": 7.185864856645519e-08, "loss": 0.2126, "step": 38870 }, { "epoch": 2.8889632107023413, "grad_norm": 2.3305269415964576, "learning_rate": 7.176266385135799e-08, "loss": 0.286, "step": 38871 }, { "epoch": 2.8890375325157933, "grad_norm": 2.373164506430958, "learning_rate": 7.166674305373877e-08, "loss": 0.2396, "step": 38872 }, { "epoch": 2.8891118543292458, "grad_norm": 2.242792615047279, "learning_rate": 7.157088617421371e-08, "loss": 0.2735, "step": 38873 }, { "epoch": 2.889186176142698, "grad_norm": 3.856364088283744, "learning_rate": 7.147509321340007e-08, "loss": 0.3763, "step": 38874 }, { "epoch": 2.8892604979561503, "grad_norm": 7.229057487948722, "learning_rate": 7.137936417191405e-08, "loss": 0.2321, "step": 38875 }, { "epoch": 2.8893348197696023, "grad_norm": 3.2265157876909982, "learning_rate": 7.128369905037292e-08, "loss": 0.3001, "step": 38876 }, { "epoch": 2.8894091415830547, "grad_norm": 2.3821075468558304, "learning_rate": 7.118809784939173e-08, "loss": 0.2607, "step": 38877 }, { "epoch": 2.8894834633965067, "grad_norm": 2.034393899089992, "learning_rate": 7.109256056958669e-08, "loss": 0.2096, "step": 38878 }, { "epoch": 2.889557785209959, "grad_norm": 3.748998127327431, "learning_rate": 7.099708721157284e-08, "loss": 0.3188, "step": 38879 }, { "epoch": 2.8896321070234112, "grad_norm": 3.148040692365169, "learning_rate": 7.090167777596413e-08, "loss": 0.3483, "step": 38880 }, { "epoch": 2.8897064288368637, "grad_norm": 2.3815427423171562, "learning_rate": 7.080633226337564e-08, "loss": 0.2695, "step": 38881 }, { "epoch": 2.889780750650316, "grad_norm": 2.863333818064091, "learning_rate": 7.07110506744213e-08, "loss": 0.2675, "step": 38882 }, { "epoch": 2.889855072463768, "grad_norm": 2.5191388110519037, "learning_rate": 7.061583300971287e-08, "loss": 0.3439, "step": 38883 }, { "epoch": 2.88992939427722, "grad_norm": 2.308053813888905, "learning_rate": 7.052067926986427e-08, "loss": 0.2409, "step": 38884 }, { "epoch": 2.8900037160906726, "grad_norm": 3.4486464824250134, "learning_rate": 7.04255894554895e-08, "loss": 0.3667, "step": 38885 }, { "epoch": 2.890078037904125, "grad_norm": 2.85803562791502, "learning_rate": 7.033056356719913e-08, "loss": 0.3102, "step": 38886 }, { "epoch": 2.890152359717577, "grad_norm": 2.182616120651009, "learning_rate": 7.023560160560605e-08, "loss": 0.2739, "step": 38887 }, { "epoch": 2.890226681531029, "grad_norm": 3.477467854306767, "learning_rate": 7.014070357131974e-08, "loss": 0.2811, "step": 38888 }, { "epoch": 2.8903010033444816, "grad_norm": 2.611224945806734, "learning_rate": 7.004586946495307e-08, "loss": 0.2977, "step": 38889 }, { "epoch": 2.890375325157934, "grad_norm": 1.936779676628008, "learning_rate": 6.995109928711552e-08, "loss": 0.2458, "step": 38890 }, { "epoch": 2.890449646971386, "grad_norm": 2.642831992462245, "learning_rate": 6.985639303841773e-08, "loss": 0.3621, "step": 38891 }, { "epoch": 2.890523968784838, "grad_norm": 2.2429080679595126, "learning_rate": 6.976175071946922e-08, "loss": 0.2721, "step": 38892 }, { "epoch": 2.8905982905982905, "grad_norm": 1.8771803600040204, "learning_rate": 6.96671723308795e-08, "loss": 0.3131, "step": 38893 }, { "epoch": 2.890672612411743, "grad_norm": 2.182931195981879, "learning_rate": 6.957265787325695e-08, "loss": 0.2522, "step": 38894 }, { "epoch": 2.890746934225195, "grad_norm": 2.4200798189331922, "learning_rate": 6.947820734721112e-08, "loss": 0.2212, "step": 38895 }, { "epoch": 2.8908212560386475, "grad_norm": 3.789654525114093, "learning_rate": 6.938382075334816e-08, "loss": 0.3082, "step": 38896 }, { "epoch": 2.8908955778520995, "grad_norm": 2.3909235032446747, "learning_rate": 6.928949809227758e-08, "loss": 0.3135, "step": 38897 }, { "epoch": 2.890969899665552, "grad_norm": 2.836150270296168, "learning_rate": 6.91952393646056e-08, "loss": 0.3138, "step": 38898 }, { "epoch": 2.891044221479004, "grad_norm": 2.4177712329106904, "learning_rate": 6.910104457093947e-08, "loss": 0.3043, "step": 38899 }, { "epoch": 2.8911185432924564, "grad_norm": 2.0267950374613104, "learning_rate": 6.90069137118865e-08, "loss": 0.3327, "step": 38900 }, { "epoch": 2.8911928651059084, "grad_norm": 2.2992665131955183, "learning_rate": 6.891284678805066e-08, "loss": 0.2601, "step": 38901 }, { "epoch": 2.891267186919361, "grad_norm": 1.9555536862183822, "learning_rate": 6.881884380003923e-08, "loss": 0.266, "step": 38902 }, { "epoch": 2.891341508732813, "grad_norm": 1.4394969940101525, "learning_rate": 6.872490474845616e-08, "loss": 0.1621, "step": 38903 }, { "epoch": 2.8914158305462654, "grad_norm": 2.5144865872707443, "learning_rate": 6.863102963390767e-08, "loss": 0.2809, "step": 38904 }, { "epoch": 2.891490152359718, "grad_norm": 2.1031218991640452, "learning_rate": 6.853721845699657e-08, "loss": 0.2585, "step": 38905 }, { "epoch": 2.89156447417317, "grad_norm": 2.0749167709920586, "learning_rate": 6.844347121832794e-08, "loss": 0.1941, "step": 38906 }, { "epoch": 2.891638795986622, "grad_norm": 2.5584168335327786, "learning_rate": 6.834978791850466e-08, "loss": 0.2879, "step": 38907 }, { "epoch": 2.8917131178000743, "grad_norm": 2.7260724472237556, "learning_rate": 6.825616855813067e-08, "loss": 0.3806, "step": 38908 }, { "epoch": 2.891787439613527, "grad_norm": 2.1602413711408635, "learning_rate": 6.816261313780658e-08, "loss": 0.1733, "step": 38909 }, { "epoch": 2.891861761426979, "grad_norm": 2.507026374092078, "learning_rate": 6.806912165813751e-08, "loss": 0.3535, "step": 38910 }, { "epoch": 2.891936083240431, "grad_norm": 3.0723414957099546, "learning_rate": 6.797569411972405e-08, "loss": 0.3017, "step": 38911 }, { "epoch": 2.8920104050538833, "grad_norm": 2.6657040185746332, "learning_rate": 6.788233052316795e-08, "loss": 0.3381, "step": 38912 }, { "epoch": 2.8920847268673358, "grad_norm": 2.1745850747489475, "learning_rate": 6.778903086906985e-08, "loss": 0.2758, "step": 38913 }, { "epoch": 2.8921590486807878, "grad_norm": 2.5895042377510187, "learning_rate": 6.769579515803038e-08, "loss": 0.3166, "step": 38914 }, { "epoch": 2.89223337049424, "grad_norm": 2.265330128296087, "learning_rate": 6.760262339065016e-08, "loss": 0.2998, "step": 38915 }, { "epoch": 2.8923076923076922, "grad_norm": 2.4902553632129116, "learning_rate": 6.750951556752871e-08, "loss": 0.2371, "step": 38916 }, { "epoch": 2.8923820141211447, "grad_norm": 2.8128981072898225, "learning_rate": 6.741647168926668e-08, "loss": 0.3301, "step": 38917 }, { "epoch": 2.8924563359345967, "grad_norm": 2.859194994401318, "learning_rate": 6.732349175646136e-08, "loss": 0.2704, "step": 38918 }, { "epoch": 2.892530657748049, "grad_norm": 2.4580130282736667, "learning_rate": 6.723057576971114e-08, "loss": 0.2618, "step": 38919 }, { "epoch": 2.892604979561501, "grad_norm": 2.191738653658264, "learning_rate": 6.713772372961558e-08, "loss": 0.2438, "step": 38920 }, { "epoch": 2.8926793013749537, "grad_norm": 2.264991588469383, "learning_rate": 6.704493563677195e-08, "loss": 0.2807, "step": 38921 }, { "epoch": 2.8927536231884057, "grad_norm": 2.0467604066577207, "learning_rate": 6.695221149177866e-08, "loss": 0.2326, "step": 38922 }, { "epoch": 2.892827945001858, "grad_norm": 2.091711341172382, "learning_rate": 6.685955129523081e-08, "loss": 0.1566, "step": 38923 }, { "epoch": 2.89290226681531, "grad_norm": 1.878871700187062, "learning_rate": 6.676695504772567e-08, "loss": 0.2027, "step": 38924 }, { "epoch": 2.8929765886287626, "grad_norm": 2.5268579102740643, "learning_rate": 6.667442274986057e-08, "loss": 0.369, "step": 38925 }, { "epoch": 2.8930509104422146, "grad_norm": 2.130795263776624, "learning_rate": 6.658195440222836e-08, "loss": 0.2303, "step": 38926 }, { "epoch": 2.893125232255667, "grad_norm": 2.228541941454349, "learning_rate": 6.648955000542745e-08, "loss": 0.2913, "step": 38927 }, { "epoch": 2.8931995540691196, "grad_norm": 1.9773493320961566, "learning_rate": 6.639720956005069e-08, "loss": 0.2355, "step": 38928 }, { "epoch": 2.8932738758825716, "grad_norm": 2.19299884974565, "learning_rate": 6.630493306669317e-08, "loss": 0.2233, "step": 38929 }, { "epoch": 2.8933481976960236, "grad_norm": 2.144136197418545, "learning_rate": 6.621272052594996e-08, "loss": 0.3098, "step": 38930 }, { "epoch": 2.893422519509476, "grad_norm": 4.267971011521471, "learning_rate": 6.612057193841281e-08, "loss": 0.3427, "step": 38931 }, { "epoch": 2.8934968413229285, "grad_norm": 3.088519454448627, "learning_rate": 6.602848730467682e-08, "loss": 0.3631, "step": 38932 }, { "epoch": 2.8935711631363805, "grad_norm": 3.158254469213346, "learning_rate": 6.59364666253326e-08, "loss": 0.344, "step": 38933 }, { "epoch": 2.8936454849498325, "grad_norm": 2.3508118522429178, "learning_rate": 6.584450990097525e-08, "loss": 0.2422, "step": 38934 }, { "epoch": 2.893719806763285, "grad_norm": 3.2169503436360625, "learning_rate": 6.575261713219538e-08, "loss": 0.3405, "step": 38935 }, { "epoch": 2.8937941285767375, "grad_norm": 2.013007762456434, "learning_rate": 6.566078831958478e-08, "loss": 0.2642, "step": 38936 }, { "epoch": 2.8938684503901895, "grad_norm": 1.771050589199359, "learning_rate": 6.556902346373517e-08, "loss": 0.2576, "step": 38937 }, { "epoch": 2.893942772203642, "grad_norm": 2.03821050444675, "learning_rate": 6.547732256523497e-08, "loss": 0.2368, "step": 38938 }, { "epoch": 2.894017094017094, "grad_norm": 2.5530823512316196, "learning_rate": 6.538568562467818e-08, "loss": 0.2709, "step": 38939 }, { "epoch": 2.8940914158305464, "grad_norm": 2.1041834013590472, "learning_rate": 6.529411264265206e-08, "loss": 0.278, "step": 38940 }, { "epoch": 2.8941657376439984, "grad_norm": 2.214661228052226, "learning_rate": 6.520260361974617e-08, "loss": 0.2164, "step": 38941 }, { "epoch": 2.894240059457451, "grad_norm": 1.618089202408973, "learning_rate": 6.511115855655115e-08, "loss": 0.1489, "step": 38942 }, { "epoch": 2.894314381270903, "grad_norm": 2.6676699842136924, "learning_rate": 6.501977745365428e-08, "loss": 0.3331, "step": 38943 }, { "epoch": 2.8943887030843554, "grad_norm": 2.4349638022363136, "learning_rate": 6.492846031164624e-08, "loss": 0.3073, "step": 38944 }, { "epoch": 2.8944630248978074, "grad_norm": 2.6333884328657042, "learning_rate": 6.483720713111208e-08, "loss": 0.2963, "step": 38945 }, { "epoch": 2.89453734671126, "grad_norm": 1.8003266467346644, "learning_rate": 6.474601791264024e-08, "loss": 0.1937, "step": 38946 }, { "epoch": 2.894611668524712, "grad_norm": 2.0243185054839086, "learning_rate": 6.465489265681912e-08, "loss": 0.2148, "step": 38947 }, { "epoch": 2.8946859903381643, "grad_norm": 2.862588827732781, "learning_rate": 6.456383136423383e-08, "loss": 0.2964, "step": 38948 }, { "epoch": 2.8947603121516163, "grad_norm": 2.6081096205414127, "learning_rate": 6.447283403547055e-08, "loss": 0.3003, "step": 38949 }, { "epoch": 2.894834633965069, "grad_norm": 2.4408991130029567, "learning_rate": 6.438190067111549e-08, "loss": 0.3036, "step": 38950 }, { "epoch": 2.8949089557785213, "grad_norm": 2.6189878163866225, "learning_rate": 6.429103127175485e-08, "loss": 0.2914, "step": 38951 }, { "epoch": 2.8949832775919733, "grad_norm": 2.256734320185936, "learning_rate": 6.420022583797369e-08, "loss": 0.309, "step": 38952 }, { "epoch": 2.8950575994054253, "grad_norm": 2.269785686515515, "learning_rate": 6.410948437035492e-08, "loss": 0.2291, "step": 38953 }, { "epoch": 2.8951319212188777, "grad_norm": 1.9162490010313091, "learning_rate": 6.401880686948358e-08, "loss": 0.2363, "step": 38954 }, { "epoch": 2.89520624303233, "grad_norm": 2.505935584532258, "learning_rate": 6.39281933359448e-08, "loss": 0.3069, "step": 38955 }, { "epoch": 2.8952805648457822, "grad_norm": 2.292153353230702, "learning_rate": 6.38376437703192e-08, "loss": 0.2013, "step": 38956 }, { "epoch": 2.8953548866592342, "grad_norm": 2.1004893886676808, "learning_rate": 6.374715817319298e-08, "loss": 0.284, "step": 38957 }, { "epoch": 2.8954292084726867, "grad_norm": 3.0530766574524315, "learning_rate": 6.36567365451457e-08, "loss": 0.2654, "step": 38958 }, { "epoch": 2.895503530286139, "grad_norm": 2.3344730724143568, "learning_rate": 6.356637888676132e-08, "loss": 0.2582, "step": 38959 }, { "epoch": 2.895577852099591, "grad_norm": 2.3876937389150528, "learning_rate": 6.347608519862047e-08, "loss": 0.2243, "step": 38960 }, { "epoch": 2.8956521739130436, "grad_norm": 2.5345969303646885, "learning_rate": 6.338585548130605e-08, "loss": 0.2698, "step": 38961 }, { "epoch": 2.8957264957264957, "grad_norm": 3.338929162921496, "learning_rate": 6.329568973539757e-08, "loss": 0.3519, "step": 38962 }, { "epoch": 2.895800817539948, "grad_norm": 1.9379849971443148, "learning_rate": 6.320558796147569e-08, "loss": 0.234, "step": 38963 }, { "epoch": 2.8958751393534, "grad_norm": 2.784718983162564, "learning_rate": 6.311555016011995e-08, "loss": 0.3074, "step": 38964 }, { "epoch": 2.8959494611668526, "grad_norm": 2.855956611464133, "learning_rate": 6.302557633191098e-08, "loss": 0.3037, "step": 38965 }, { "epoch": 2.8960237829803046, "grad_norm": 1.9888930846610202, "learning_rate": 6.293566647742833e-08, "loss": 0.2422, "step": 38966 }, { "epoch": 2.896098104793757, "grad_norm": 2.401317962066396, "learning_rate": 6.284582059725042e-08, "loss": 0.2389, "step": 38967 }, { "epoch": 2.896172426607209, "grad_norm": 2.660542221481051, "learning_rate": 6.275603869195457e-08, "loss": 0.3119, "step": 38968 }, { "epoch": 2.8962467484206615, "grad_norm": 2.648775770040638, "learning_rate": 6.266632076212031e-08, "loss": 0.2455, "step": 38969 }, { "epoch": 2.8963210702341136, "grad_norm": 2.1561244851804977, "learning_rate": 6.257666680832497e-08, "loss": 0.2636, "step": 38970 }, { "epoch": 2.896395392047566, "grad_norm": 2.2110195949905544, "learning_rate": 6.248707683114586e-08, "loss": 0.2161, "step": 38971 }, { "epoch": 2.896469713861018, "grad_norm": 1.95984740039454, "learning_rate": 6.239755083115806e-08, "loss": 0.2373, "step": 38972 }, { "epoch": 2.8965440356744705, "grad_norm": 2.9033009094055564, "learning_rate": 6.230808880894002e-08, "loss": 0.2987, "step": 38973 }, { "epoch": 2.896618357487923, "grad_norm": 2.182828514166494, "learning_rate": 6.221869076506681e-08, "loss": 0.3376, "step": 38974 }, { "epoch": 2.896692679301375, "grad_norm": 2.6430933856928536, "learning_rate": 6.212935670011355e-08, "loss": 0.3083, "step": 38975 }, { "epoch": 2.896767001114827, "grad_norm": 2.7531131214400792, "learning_rate": 6.204008661465532e-08, "loss": 0.2633, "step": 38976 }, { "epoch": 2.8968413229282794, "grad_norm": 1.7911552753747568, "learning_rate": 6.195088050926834e-08, "loss": 0.2006, "step": 38977 }, { "epoch": 2.896915644741732, "grad_norm": 2.492372825108353, "learning_rate": 6.186173838452547e-08, "loss": 0.2814, "step": 38978 }, { "epoch": 2.896989966555184, "grad_norm": 2.3291582170905523, "learning_rate": 6.177266024100182e-08, "loss": 0.2451, "step": 38979 }, { "epoch": 2.897064288368636, "grad_norm": 2.285093880809517, "learning_rate": 6.168364607926913e-08, "loss": 0.3062, "step": 38980 }, { "epoch": 2.8971386101820884, "grad_norm": 2.3229430169781966, "learning_rate": 6.159469589990253e-08, "loss": 0.254, "step": 38981 }, { "epoch": 2.897212931995541, "grad_norm": 6.559693691603552, "learning_rate": 6.150580970347265e-08, "loss": 0.3483, "step": 38982 }, { "epoch": 2.897287253808993, "grad_norm": 3.8137608404036416, "learning_rate": 6.141698749055347e-08, "loss": 0.3734, "step": 38983 }, { "epoch": 2.8973615756224453, "grad_norm": 2.3432386266063996, "learning_rate": 6.132822926171566e-08, "loss": 0.2388, "step": 38984 }, { "epoch": 2.8974358974358974, "grad_norm": 1.7376160115183141, "learning_rate": 6.123953501753099e-08, "loss": 0.2403, "step": 38985 }, { "epoch": 2.89751021924935, "grad_norm": 2.34529839079573, "learning_rate": 6.115090475857122e-08, "loss": 0.3041, "step": 38986 }, { "epoch": 2.897584541062802, "grad_norm": 2.708140658675095, "learning_rate": 6.106233848540477e-08, "loss": 0.2358, "step": 38987 }, { "epoch": 2.8976588628762543, "grad_norm": 2.673026762649875, "learning_rate": 6.097383619860453e-08, "loss": 0.2241, "step": 38988 }, { "epoch": 2.8977331846897063, "grad_norm": 2.5758470544839147, "learning_rate": 6.088539789873893e-08, "loss": 0.2908, "step": 38989 }, { "epoch": 2.8978075065031588, "grad_norm": 2.172577278098124, "learning_rate": 6.07970235863764e-08, "loss": 0.2462, "step": 38990 }, { "epoch": 2.897881828316611, "grad_norm": 2.529749628180981, "learning_rate": 6.070871326208761e-08, "loss": 0.3346, "step": 38991 }, { "epoch": 2.8979561501300632, "grad_norm": 3.793924672707843, "learning_rate": 6.062046692643986e-08, "loss": 0.3506, "step": 38992 }, { "epoch": 2.8980304719435153, "grad_norm": 4.985148175010818, "learning_rate": 6.053228458000383e-08, "loss": 0.2413, "step": 38993 }, { "epoch": 2.8981047937569677, "grad_norm": 2.7004112084181977, "learning_rate": 6.044416622334348e-08, "loss": 0.2984, "step": 38994 }, { "epoch": 2.8981791155704197, "grad_norm": 2.505278389036884, "learning_rate": 6.035611185702839e-08, "loss": 0.311, "step": 38995 }, { "epoch": 2.898253437383872, "grad_norm": 2.7297312901886612, "learning_rate": 6.026812148162476e-08, "loss": 0.1915, "step": 38996 }, { "epoch": 2.8983277591973247, "grad_norm": 2.286275566370038, "learning_rate": 6.018019509769878e-08, "loss": 0.2706, "step": 38997 }, { "epoch": 2.8984020810107767, "grad_norm": 2.3073159591769485, "learning_rate": 6.009233270581782e-08, "loss": 0.3272, "step": 38998 }, { "epoch": 2.8984764028242287, "grad_norm": 2.7323754852256066, "learning_rate": 6.000453430654585e-08, "loss": 0.1984, "step": 38999 }, { "epoch": 2.898550724637681, "grad_norm": 2.8564502892824777, "learning_rate": 5.991679990044907e-08, "loss": 0.363, "step": 39000 }, { "epoch": 2.8986250464511336, "grad_norm": 2.0579095798015565, "learning_rate": 5.98291294880926e-08, "loss": 0.2004, "step": 39001 }, { "epoch": 2.8986993682645856, "grad_norm": 2.4863706118737805, "learning_rate": 5.974152307003932e-08, "loss": 0.2469, "step": 39002 }, { "epoch": 2.8987736900780376, "grad_norm": 2.4346191300634796, "learning_rate": 5.965398064685546e-08, "loss": 0.2719, "step": 39003 }, { "epoch": 2.89884801189149, "grad_norm": 2.161000073182318, "learning_rate": 5.956650221910387e-08, "loss": 0.2114, "step": 39004 }, { "epoch": 2.8989223337049426, "grad_norm": 2.93141746919888, "learning_rate": 5.9479087787347456e-08, "loss": 0.2919, "step": 39005 }, { "epoch": 2.8989966555183946, "grad_norm": 2.2659696086851664, "learning_rate": 5.9391737352147984e-08, "loss": 0.1957, "step": 39006 }, { "epoch": 2.899070977331847, "grad_norm": 2.380136670078325, "learning_rate": 5.9304450914068336e-08, "loss": 0.2707, "step": 39007 }, { "epoch": 2.899145299145299, "grad_norm": 3.328013136933186, "learning_rate": 5.921722847367139e-08, "loss": 0.3154, "step": 39008 }, { "epoch": 2.8992196209587515, "grad_norm": 2.6317633255825377, "learning_rate": 5.913007003151783e-08, "loss": 0.3072, "step": 39009 }, { "epoch": 2.8992939427722035, "grad_norm": 2.4803835868039665, "learning_rate": 5.904297558817052e-08, "loss": 0.2463, "step": 39010 }, { "epoch": 2.899368264585656, "grad_norm": 3.288542275468557, "learning_rate": 5.89559451441879e-08, "loss": 0.3789, "step": 39011 }, { "epoch": 2.899442586399108, "grad_norm": 1.7633643533819894, "learning_rate": 5.886897870013064e-08, "loss": 0.1975, "step": 39012 }, { "epoch": 2.8995169082125605, "grad_norm": 2.521589756945347, "learning_rate": 5.8782076256559405e-08, "loss": 0.2722, "step": 39013 }, { "epoch": 2.8995912300260125, "grad_norm": 2.9036603786175528, "learning_rate": 5.869523781403263e-08, "loss": 0.3736, "step": 39014 }, { "epoch": 2.899665551839465, "grad_norm": 2.911928792353064, "learning_rate": 5.860846337311099e-08, "loss": 0.3264, "step": 39015 }, { "epoch": 2.899739873652917, "grad_norm": 2.1031361103408077, "learning_rate": 5.852175293435181e-08, "loss": 0.1886, "step": 39016 }, { "epoch": 2.8998141954663694, "grad_norm": 2.2723992999141305, "learning_rate": 5.843510649831463e-08, "loss": 0.2644, "step": 39017 }, { "epoch": 2.8998885172798214, "grad_norm": 2.422824738105679, "learning_rate": 5.834852406555569e-08, "loss": 0.2057, "step": 39018 }, { "epoch": 2.899962839093274, "grad_norm": 2.174581944927764, "learning_rate": 5.826200563663231e-08, "loss": 0.28, "step": 39019 }, { "epoch": 2.9000371609067264, "grad_norm": 2.5839004877205065, "learning_rate": 5.8175551212104055e-08, "loss": 0.3113, "step": 39020 }, { "epoch": 2.9001114827201784, "grad_norm": 2.4514556132465106, "learning_rate": 5.808916079252491e-08, "loss": 0.2732, "step": 39021 }, { "epoch": 2.9001858045336304, "grad_norm": 2.7137952341191363, "learning_rate": 5.800283437845111e-08, "loss": 0.2397, "step": 39022 }, { "epoch": 2.900260126347083, "grad_norm": 2.2825328487048955, "learning_rate": 5.7916571970439984e-08, "loss": 0.266, "step": 39023 }, { "epoch": 2.9003344481605353, "grad_norm": 2.180499119931682, "learning_rate": 5.783037356904553e-08, "loss": 0.2715, "step": 39024 }, { "epoch": 2.9004087699739873, "grad_norm": 2.238345120085793, "learning_rate": 5.774423917482397e-08, "loss": 0.2445, "step": 39025 }, { "epoch": 2.9004830917874393, "grad_norm": 2.2701810916835345, "learning_rate": 5.76581687883293e-08, "loss": 0.2847, "step": 39026 }, { "epoch": 2.900557413600892, "grad_norm": 2.3489576408395383, "learning_rate": 5.757216241011554e-08, "loss": 0.2045, "step": 39027 }, { "epoch": 2.9006317354143443, "grad_norm": 1.9075976720640315, "learning_rate": 5.748622004073556e-08, "loss": 0.2397, "step": 39028 }, { "epoch": 2.9007060572277963, "grad_norm": 2.4301442622230707, "learning_rate": 5.740034168074338e-08, "loss": 0.2958, "step": 39029 }, { "epoch": 2.9007803790412487, "grad_norm": 2.875042167352843, "learning_rate": 5.7314527330692984e-08, "loss": 0.3661, "step": 39030 }, { "epoch": 2.9008547008547008, "grad_norm": 4.311697003642341, "learning_rate": 5.722877699113394e-08, "loss": 0.4026, "step": 39031 }, { "epoch": 2.9009290226681532, "grad_norm": 2.1480863427608043, "learning_rate": 5.714309066262136e-08, "loss": 0.2408, "step": 39032 }, { "epoch": 2.9010033444816052, "grad_norm": 1.9707161881747453, "learning_rate": 5.705746834570591e-08, "loss": 0.18, "step": 39033 }, { "epoch": 2.9010776662950577, "grad_norm": 2.4899143746054464, "learning_rate": 5.6971910040938274e-08, "loss": 0.3008, "step": 39034 }, { "epoch": 2.9011519881085097, "grad_norm": 2.303998224198669, "learning_rate": 5.688641574886911e-08, "loss": 0.232, "step": 39035 }, { "epoch": 2.901226309921962, "grad_norm": 2.2275872447063243, "learning_rate": 5.68009854700502e-08, "loss": 0.2115, "step": 39036 }, { "epoch": 2.901300631735414, "grad_norm": 2.7534168237170054, "learning_rate": 5.6715619205029993e-08, "loss": 0.3143, "step": 39037 }, { "epoch": 2.9013749535488667, "grad_norm": 3.059135471045298, "learning_rate": 5.663031695435917e-08, "loss": 0.2748, "step": 39038 }, { "epoch": 2.901449275362319, "grad_norm": 2.1195194720719086, "learning_rate": 5.6545078718586164e-08, "loss": 0.2749, "step": 39039 }, { "epoch": 2.901523597175771, "grad_norm": 2.270717029989466, "learning_rate": 5.645990449825944e-08, "loss": 0.2598, "step": 39040 }, { "epoch": 2.901597918989223, "grad_norm": 2.3162158139963913, "learning_rate": 5.637479429392856e-08, "loss": 0.2453, "step": 39041 }, { "epoch": 2.9016722408026756, "grad_norm": 2.417741744011078, "learning_rate": 5.6289748106140854e-08, "loss": 0.2948, "step": 39042 }, { "epoch": 2.901746562616128, "grad_norm": 1.9134375652195912, "learning_rate": 5.620476593544366e-08, "loss": 0.1765, "step": 39043 }, { "epoch": 2.90182088442958, "grad_norm": 2.3969299945239424, "learning_rate": 5.611984778238433e-08, "loss": 0.2278, "step": 39044 }, { "epoch": 2.901895206243032, "grad_norm": 2.02273029458234, "learning_rate": 5.603499364750908e-08, "loss": 0.2614, "step": 39045 }, { "epoch": 2.9019695280564846, "grad_norm": 2.9548985565834234, "learning_rate": 5.595020353136415e-08, "loss": 0.3486, "step": 39046 }, { "epoch": 2.902043849869937, "grad_norm": 2.145307838744929, "learning_rate": 5.586547743449689e-08, "loss": 0.2251, "step": 39047 }, { "epoch": 2.902118171683389, "grad_norm": 1.8505075834111362, "learning_rate": 5.578081535745128e-08, "loss": 0.1973, "step": 39048 }, { "epoch": 2.902192493496841, "grad_norm": 2.322623733043001, "learning_rate": 5.569621730077357e-08, "loss": 0.3178, "step": 39049 }, { "epoch": 2.9022668153102935, "grad_norm": 2.569804813760652, "learning_rate": 5.5611683265006655e-08, "loss": 0.2486, "step": 39050 }, { "epoch": 2.902341137123746, "grad_norm": 2.2150735440989453, "learning_rate": 5.552721325069677e-08, "loss": 0.2126, "step": 39051 }, { "epoch": 2.902415458937198, "grad_norm": 2.325891986283915, "learning_rate": 5.544280725838569e-08, "loss": 0.2795, "step": 39052 }, { "epoch": 2.9024897807506504, "grad_norm": 2.5472336858624707, "learning_rate": 5.5358465288617435e-08, "loss": 0.2992, "step": 39053 }, { "epoch": 2.9025641025641025, "grad_norm": 2.4726642334555473, "learning_rate": 5.5274187341936017e-08, "loss": 0.2483, "step": 39054 }, { "epoch": 2.902638424377555, "grad_norm": 2.521973241256204, "learning_rate": 5.518997341888432e-08, "loss": 0.2252, "step": 39055 }, { "epoch": 2.902712746191007, "grad_norm": 2.2915868562660884, "learning_rate": 5.5105823520001934e-08, "loss": 0.2074, "step": 39056 }, { "epoch": 2.9027870680044594, "grad_norm": 2.662311677771648, "learning_rate": 5.502173764583396e-08, "loss": 0.2959, "step": 39057 }, { "epoch": 2.9028613898179114, "grad_norm": 2.1060902709420852, "learning_rate": 5.493771579691776e-08, "loss": 0.1938, "step": 39058 }, { "epoch": 2.902935711631364, "grad_norm": 2.2115984876406243, "learning_rate": 5.485375797379844e-08, "loss": 0.2273, "step": 39059 }, { "epoch": 2.903010033444816, "grad_norm": 2.4587550042822026, "learning_rate": 5.4769864177014464e-08, "loss": 0.2249, "step": 39060 }, { "epoch": 2.9030843552582684, "grad_norm": 2.8117919480129734, "learning_rate": 5.4686034407105404e-08, "loss": 0.3829, "step": 39061 }, { "epoch": 2.903158677071721, "grad_norm": 1.674853642651149, "learning_rate": 5.4602268664611936e-08, "loss": 0.2212, "step": 39062 }, { "epoch": 2.903232998885173, "grad_norm": 2.9848502856555204, "learning_rate": 5.4518566950072514e-08, "loss": 0.2398, "step": 39063 }, { "epoch": 2.903307320698625, "grad_norm": 2.8320722638723286, "learning_rate": 5.4434929264027824e-08, "loss": 0.3028, "step": 39064 }, { "epoch": 2.9033816425120773, "grad_norm": 2.2569074591958476, "learning_rate": 5.4351355607014103e-08, "loss": 0.257, "step": 39065 }, { "epoch": 2.9034559643255298, "grad_norm": 1.9576779823536705, "learning_rate": 5.42678459795698e-08, "loss": 0.1921, "step": 39066 }, { "epoch": 2.903530286138982, "grad_norm": 2.1293099287966752, "learning_rate": 5.418440038223449e-08, "loss": 0.2456, "step": 39067 }, { "epoch": 2.903604607952434, "grad_norm": 1.8166780291116795, "learning_rate": 5.4101018815542196e-08, "loss": 0.2094, "step": 39068 }, { "epoch": 2.9036789297658863, "grad_norm": 2.1548937011949616, "learning_rate": 5.4017701280032476e-08, "loss": 0.2065, "step": 39069 }, { "epoch": 2.9037532515793387, "grad_norm": 2.3376103566852695, "learning_rate": 5.393444777624046e-08, "loss": 0.242, "step": 39070 }, { "epoch": 2.9038275733927907, "grad_norm": 2.107984043173819, "learning_rate": 5.3851258304702394e-08, "loss": 0.2967, "step": 39071 }, { "epoch": 2.9039018952062428, "grad_norm": 2.3365981057977256, "learning_rate": 5.37681328659545e-08, "loss": 0.2604, "step": 39072 }, { "epoch": 2.903976217019695, "grad_norm": 2.7859991526280683, "learning_rate": 5.3685071460530814e-08, "loss": 0.354, "step": 39073 }, { "epoch": 2.9040505388331477, "grad_norm": 2.080301392798304, "learning_rate": 5.360207408896645e-08, "loss": 0.22, "step": 39074 }, { "epoch": 2.9041248606465997, "grad_norm": 2.6041033689776354, "learning_rate": 5.3519140751796536e-08, "loss": 0.2378, "step": 39075 }, { "epoch": 2.904199182460052, "grad_norm": 2.8868389173887277, "learning_rate": 5.343627144955399e-08, "loss": 0.2631, "step": 39076 }, { "epoch": 2.904273504273504, "grad_norm": 1.9149975163830184, "learning_rate": 5.335346618277171e-08, "loss": 0.2383, "step": 39077 }, { "epoch": 2.9043478260869566, "grad_norm": 2.849663558945071, "learning_rate": 5.327072495198482e-08, "loss": 0.3207, "step": 39078 }, { "epoch": 2.9044221479004086, "grad_norm": 2.4369999464707104, "learning_rate": 5.3188047757724015e-08, "loss": 0.2596, "step": 39079 }, { "epoch": 2.904496469713861, "grad_norm": 2.729293567681964, "learning_rate": 5.310543460052331e-08, "loss": 0.3669, "step": 39080 }, { "epoch": 2.904570791527313, "grad_norm": 2.442102731725447, "learning_rate": 5.302288548091339e-08, "loss": 0.237, "step": 39081 }, { "epoch": 2.9046451133407656, "grad_norm": 2.866201263948876, "learning_rate": 5.2940400399426054e-08, "loss": 0.2946, "step": 39082 }, { "epoch": 2.9047194351542176, "grad_norm": 2.5942119608853487, "learning_rate": 5.285797935659198e-08, "loss": 0.2476, "step": 39083 }, { "epoch": 2.90479375696767, "grad_norm": 2.607582893167961, "learning_rate": 5.277562235294298e-08, "loss": 0.299, "step": 39084 }, { "epoch": 2.9048680787811225, "grad_norm": 3.1461968809049603, "learning_rate": 5.269332938900751e-08, "loss": 0.2936, "step": 39085 }, { "epoch": 2.9049424005945745, "grad_norm": 2.3932982049950926, "learning_rate": 5.261110046531626e-08, "loss": 0.2519, "step": 39086 }, { "epoch": 2.9050167224080266, "grad_norm": 2.642177099129318, "learning_rate": 5.25289355823988e-08, "loss": 0.2533, "step": 39087 }, { "epoch": 2.905091044221479, "grad_norm": 2.3490324170362076, "learning_rate": 5.24468347407836e-08, "loss": 0.2876, "step": 39088 }, { "epoch": 2.9051653660349315, "grad_norm": 2.3012692145467457, "learning_rate": 5.2364797941000245e-08, "loss": 0.2841, "step": 39089 }, { "epoch": 2.9052396878483835, "grad_norm": 2.4313754133802603, "learning_rate": 5.228282518357608e-08, "loss": 0.2106, "step": 39090 }, { "epoch": 2.9053140096618355, "grad_norm": 2.9391468631840425, "learning_rate": 5.2200916469038464e-08, "loss": 0.3994, "step": 39091 }, { "epoch": 2.905388331475288, "grad_norm": 2.637709230421922, "learning_rate": 5.211907179791587e-08, "loss": 0.2518, "step": 39092 }, { "epoch": 2.9054626532887404, "grad_norm": 2.090779145112338, "learning_rate": 5.2037291170733415e-08, "loss": 0.2353, "step": 39093 }, { "epoch": 2.9055369751021924, "grad_norm": 2.543728101494887, "learning_rate": 5.1955574588019585e-08, "loss": 0.2934, "step": 39094 }, { "epoch": 2.905611296915645, "grad_norm": 1.9853450392106455, "learning_rate": 5.187392205029951e-08, "loss": 0.2215, "step": 39095 }, { "epoch": 2.905685618729097, "grad_norm": 2.549672286084417, "learning_rate": 5.179233355809832e-08, "loss": 0.2941, "step": 39096 }, { "epoch": 2.9057599405425494, "grad_norm": 2.530533515319715, "learning_rate": 5.171080911194226e-08, "loss": 0.2951, "step": 39097 }, { "epoch": 2.9058342623560014, "grad_norm": 2.0092386525462382, "learning_rate": 5.162934871235647e-08, "loss": 0.1985, "step": 39098 }, { "epoch": 2.905908584169454, "grad_norm": 2.596812189892561, "learning_rate": 5.154795235986387e-08, "loss": 0.2909, "step": 39099 }, { "epoch": 2.905982905982906, "grad_norm": 2.3484573950056817, "learning_rate": 5.146662005498959e-08, "loss": 0.2435, "step": 39100 }, { "epoch": 2.9060572277963583, "grad_norm": 1.731027329646839, "learning_rate": 5.1385351798257656e-08, "loss": 0.1772, "step": 39101 }, { "epoch": 2.9061315496098103, "grad_norm": 1.9297584053265522, "learning_rate": 5.130414759018987e-08, "loss": 0.2218, "step": 39102 }, { "epoch": 2.906205871423263, "grad_norm": 2.3418753568535733, "learning_rate": 5.1223007431310256e-08, "loss": 0.2939, "step": 39103 }, { "epoch": 2.906280193236715, "grad_norm": 2.018327854346335, "learning_rate": 5.114193132214062e-08, "loss": 0.2395, "step": 39104 }, { "epoch": 2.9063545150501673, "grad_norm": 8.333854807098538, "learning_rate": 5.1060919263202777e-08, "loss": 0.2829, "step": 39105 }, { "epoch": 2.9064288368636193, "grad_norm": 2.332349635346506, "learning_rate": 5.0979971255019635e-08, "loss": 0.2404, "step": 39106 }, { "epoch": 2.9065031586770718, "grad_norm": 2.4682541047778144, "learning_rate": 5.089908729810966e-08, "loss": 0.2399, "step": 39107 }, { "epoch": 2.9065774804905242, "grad_norm": 2.7876428056213247, "learning_rate": 5.081826739299578e-08, "loss": 0.2959, "step": 39108 }, { "epoch": 2.9066518023039762, "grad_norm": 2.3273830499391024, "learning_rate": 5.073751154019757e-08, "loss": 0.2725, "step": 39109 }, { "epoch": 2.9067261241174283, "grad_norm": 2.09306914265413, "learning_rate": 5.065681974023462e-08, "loss": 0.2813, "step": 39110 }, { "epoch": 2.9068004459308807, "grad_norm": 2.5592093617027882, "learning_rate": 5.057619199362762e-08, "loss": 0.2586, "step": 39111 }, { "epoch": 2.906874767744333, "grad_norm": 2.2100428521883666, "learning_rate": 5.049562830089394e-08, "loss": 0.2084, "step": 39112 }, { "epoch": 2.906949089557785, "grad_norm": 2.4628396947657176, "learning_rate": 5.0415128662554266e-08, "loss": 0.4033, "step": 39113 }, { "epoch": 2.907023411371237, "grad_norm": 2.2778429837121257, "learning_rate": 5.0334693079124867e-08, "loss": 0.2458, "step": 39114 }, { "epoch": 2.9070977331846897, "grad_norm": 1.7970164824546446, "learning_rate": 5.025432155112531e-08, "loss": 0.2162, "step": 39115 }, { "epoch": 2.907172054998142, "grad_norm": 2.4288961353794463, "learning_rate": 5.017401407907185e-08, "loss": 0.2778, "step": 39116 }, { "epoch": 2.907246376811594, "grad_norm": 1.9285778071098114, "learning_rate": 5.009377066348075e-08, "loss": 0.2487, "step": 39117 }, { "epoch": 2.9073206986250466, "grad_norm": 2.2782521224467174, "learning_rate": 5.00135913048716e-08, "loss": 0.2816, "step": 39118 }, { "epoch": 2.9073950204384986, "grad_norm": 1.7354214102008891, "learning_rate": 4.9933476003757307e-08, "loss": 0.1772, "step": 39119 }, { "epoch": 2.907469342251951, "grad_norm": 2.511712035590729, "learning_rate": 4.9853424760655246e-08, "loss": 0.2667, "step": 39120 }, { "epoch": 2.907543664065403, "grad_norm": 2.3896464695513226, "learning_rate": 4.977343757608055e-08, "loss": 0.2733, "step": 39121 }, { "epoch": 2.9076179858788556, "grad_norm": 2.2797739750649293, "learning_rate": 4.969351445054838e-08, "loss": 0.2315, "step": 39122 }, { "epoch": 2.9076923076923076, "grad_norm": 6.729024898438026, "learning_rate": 4.961365538457275e-08, "loss": 0.299, "step": 39123 }, { "epoch": 2.90776662950576, "grad_norm": 1.8461094159843718, "learning_rate": 4.9533860378668806e-08, "loss": 0.2326, "step": 39124 }, { "epoch": 2.907840951319212, "grad_norm": 2.1108042263918096, "learning_rate": 4.9454129433348374e-08, "loss": 0.2206, "step": 39125 }, { "epoch": 2.9079152731326645, "grad_norm": 2.8822491402851944, "learning_rate": 4.93744625491277e-08, "loss": 0.3067, "step": 39126 }, { "epoch": 2.9079895949461165, "grad_norm": 2.360092376799695, "learning_rate": 4.9294859726516375e-08, "loss": 0.2399, "step": 39127 }, { "epoch": 2.908063916759569, "grad_norm": 2.3457874805241277, "learning_rate": 4.9215320966029546e-08, "loss": 0.2727, "step": 39128 }, { "epoch": 2.908138238573021, "grad_norm": 1.978312049150381, "learning_rate": 4.913584626817791e-08, "loss": 0.2429, "step": 39129 }, { "epoch": 2.9082125603864735, "grad_norm": 2.503214456997661, "learning_rate": 4.9056435633473285e-08, "loss": 0.2803, "step": 39130 }, { "epoch": 2.908286882199926, "grad_norm": 1.9942237060953638, "learning_rate": 4.8977089062426376e-08, "loss": 0.2431, "step": 39131 }, { "epoch": 2.908361204013378, "grad_norm": 2.633577687980292, "learning_rate": 4.889780655554899e-08, "loss": 0.2885, "step": 39132 }, { "epoch": 2.90843552582683, "grad_norm": 2.7605358841559107, "learning_rate": 4.881858811335183e-08, "loss": 0.366, "step": 39133 }, { "epoch": 2.9085098476402824, "grad_norm": 3.0547882788493452, "learning_rate": 4.8739433736343376e-08, "loss": 0.3596, "step": 39134 }, { "epoch": 2.908584169453735, "grad_norm": 2.239752805018744, "learning_rate": 4.866034342503434e-08, "loss": 0.2252, "step": 39135 }, { "epoch": 2.908658491267187, "grad_norm": 2.5962768865599584, "learning_rate": 4.8581317179934304e-08, "loss": 0.2849, "step": 39136 }, { "epoch": 2.908732813080639, "grad_norm": 1.7868485694952954, "learning_rate": 4.850235500155065e-08, "loss": 0.2131, "step": 39137 }, { "epoch": 2.9088071348940914, "grad_norm": 2.0197965984493136, "learning_rate": 4.8423456890392964e-08, "loss": 0.2199, "step": 39138 }, { "epoch": 2.908881456707544, "grad_norm": 2.8122314434670765, "learning_rate": 4.8344622846967506e-08, "loss": 0.3473, "step": 39139 }, { "epoch": 2.908955778520996, "grad_norm": 1.9014261641785721, "learning_rate": 4.826585287178498e-08, "loss": 0.2824, "step": 39140 }, { "epoch": 2.9090301003344483, "grad_norm": 3.2788217788011447, "learning_rate": 4.818714696534943e-08, "loss": 0.2292, "step": 39141 }, { "epoch": 2.9091044221479003, "grad_norm": 2.3983273609433637, "learning_rate": 4.810850512816822e-08, "loss": 0.2238, "step": 39142 }, { "epoch": 2.909178743961353, "grad_norm": 2.4643678409201284, "learning_rate": 4.802992736074874e-08, "loss": 0.213, "step": 39143 }, { "epoch": 2.909253065774805, "grad_norm": 2.3955431697473135, "learning_rate": 4.7951413663596124e-08, "loss": 0.3477, "step": 39144 }, { "epoch": 2.9093273875882573, "grad_norm": 2.6536606295968483, "learning_rate": 4.787296403721664e-08, "loss": 0.2737, "step": 39145 }, { "epoch": 2.9094017094017093, "grad_norm": 2.8602020213493944, "learning_rate": 4.779457848211322e-08, "loss": 0.3564, "step": 39146 }, { "epoch": 2.9094760312151617, "grad_norm": 2.12173443495429, "learning_rate": 4.771625699879323e-08, "loss": 0.229, "step": 39147 }, { "epoch": 2.9095503530286138, "grad_norm": 2.121950645322829, "learning_rate": 4.76379995877585e-08, "loss": 0.2344, "step": 39148 }, { "epoch": 2.909624674842066, "grad_norm": 2.793335962159959, "learning_rate": 4.7559806249514176e-08, "loss": 0.3096, "step": 39149 }, { "epoch": 2.9096989966555182, "grad_norm": 2.12238926525227, "learning_rate": 4.7481676984564294e-08, "loss": 0.2532, "step": 39150 }, { "epoch": 2.9097733184689707, "grad_norm": 3.0279217689181492, "learning_rate": 4.7403611793410683e-08, "loss": 0.3985, "step": 39151 }, { "epoch": 2.9098476402824227, "grad_norm": 2.83769343377522, "learning_rate": 4.732561067655517e-08, "loss": 0.253, "step": 39152 }, { "epoch": 2.909921962095875, "grad_norm": 2.0610496392193327, "learning_rate": 4.724767363450178e-08, "loss": 0.209, "step": 39153 }, { "epoch": 2.9099962839093276, "grad_norm": 2.7663724145428144, "learning_rate": 4.716980066775123e-08, "loss": 0.3146, "step": 39154 }, { "epoch": 2.9100706057227796, "grad_norm": 2.143687820414062, "learning_rate": 4.709199177680534e-08, "loss": 0.2698, "step": 39155 }, { "epoch": 2.9101449275362317, "grad_norm": 2.3187196818578335, "learning_rate": 4.7014246962164834e-08, "loss": 0.2405, "step": 39156 }, { "epoch": 2.910219249349684, "grad_norm": 2.363775153054688, "learning_rate": 4.69365662243304e-08, "loss": 0.2573, "step": 39157 }, { "epoch": 2.9102935711631366, "grad_norm": 3.5125233137187406, "learning_rate": 4.6858949563801657e-08, "loss": 0.3734, "step": 39158 }, { "epoch": 2.9103678929765886, "grad_norm": 2.7053775758741865, "learning_rate": 4.6781396981078195e-08, "loss": 0.2738, "step": 39159 }, { "epoch": 2.9104422147900406, "grad_norm": 2.7664548155961666, "learning_rate": 4.670390847665962e-08, "loss": 0.2527, "step": 39160 }, { "epoch": 2.910516536603493, "grad_norm": 2.193339621137016, "learning_rate": 4.662648405104442e-08, "loss": 0.2512, "step": 39161 }, { "epoch": 2.9105908584169455, "grad_norm": 2.666409327875863, "learning_rate": 4.65491237047333e-08, "loss": 0.2736, "step": 39162 }, { "epoch": 2.9106651802303976, "grad_norm": 2.3634224543962383, "learning_rate": 4.647182743822143e-08, "loss": 0.3175, "step": 39163 }, { "epoch": 2.91073950204385, "grad_norm": 2.3552634108837704, "learning_rate": 4.639459525200729e-08, "loss": 0.2946, "step": 39164 }, { "epoch": 2.910813823857302, "grad_norm": 2.6149880243274803, "learning_rate": 4.631742714658827e-08, "loss": 0.341, "step": 39165 }, { "epoch": 2.9108881456707545, "grad_norm": 2.2633903874298826, "learning_rate": 4.624032312246063e-08, "loss": 0.2389, "step": 39166 }, { "epoch": 2.9109624674842065, "grad_norm": 2.9735401067770333, "learning_rate": 4.616328318012286e-08, "loss": 0.2472, "step": 39167 }, { "epoch": 2.911036789297659, "grad_norm": 2.6543822668102393, "learning_rate": 4.608630732006902e-08, "loss": 0.3378, "step": 39168 }, { "epoch": 2.911111111111111, "grad_norm": 1.8167310410887378, "learning_rate": 4.6009395542794266e-08, "loss": 0.1653, "step": 39169 }, { "epoch": 2.9111854329245634, "grad_norm": 2.709838498629632, "learning_rate": 4.5932547848794865e-08, "loss": 0.2963, "step": 39170 }, { "epoch": 2.9112597547380155, "grad_norm": 3.10117084046822, "learning_rate": 4.585576423856486e-08, "loss": 0.2926, "step": 39171 }, { "epoch": 2.911334076551468, "grad_norm": 2.1433408940363092, "learning_rate": 4.577904471259942e-08, "loss": 0.2145, "step": 39172 }, { "epoch": 2.9114083983649204, "grad_norm": 2.5525587633220117, "learning_rate": 4.5702389271391477e-08, "loss": 0.3559, "step": 39173 }, { "epoch": 2.9114827201783724, "grad_norm": 2.863798851332607, "learning_rate": 4.56257979154362e-08, "loss": 0.3419, "step": 39174 }, { "epoch": 2.9115570419918244, "grad_norm": 2.8965114421752127, "learning_rate": 4.554927064522541e-08, "loss": 0.2657, "step": 39175 }, { "epoch": 2.911631363805277, "grad_norm": 3.0141524743842925, "learning_rate": 4.547280746125093e-08, "loss": 0.2766, "step": 39176 }, { "epoch": 2.9117056856187293, "grad_norm": 2.252799765411515, "learning_rate": 4.5396408364005715e-08, "loss": 0.2814, "step": 39177 }, { "epoch": 2.9117800074321813, "grad_norm": 2.7802033598481013, "learning_rate": 4.53200733539827e-08, "loss": 0.3279, "step": 39178 }, { "epoch": 2.9118543292456334, "grad_norm": 1.6835840745616972, "learning_rate": 4.524380243167259e-08, "loss": 0.1931, "step": 39179 }, { "epoch": 2.911928651059086, "grad_norm": 2.424510760879427, "learning_rate": 4.516759559756612e-08, "loss": 0.2871, "step": 39180 }, { "epoch": 2.9120029728725383, "grad_norm": 2.432052050332721, "learning_rate": 4.509145285215399e-08, "loss": 0.309, "step": 39181 }, { "epoch": 2.9120772946859903, "grad_norm": 2.3075558742075764, "learning_rate": 4.501537419592694e-08, "loss": 0.3133, "step": 39182 }, { "epoch": 2.9121516164994423, "grad_norm": 2.0844575389532087, "learning_rate": 4.4939359629374565e-08, "loss": 0.1756, "step": 39183 }, { "epoch": 2.912225938312895, "grad_norm": 2.479224864156677, "learning_rate": 4.486340915298537e-08, "loss": 0.3439, "step": 39184 }, { "epoch": 2.9123002601263472, "grad_norm": 2.0987836137773876, "learning_rate": 4.478752276725007e-08, "loss": 0.2732, "step": 39185 }, { "epoch": 2.9123745819397993, "grad_norm": 3.172174525888161, "learning_rate": 4.471170047265605e-08, "loss": 0.3039, "step": 39186 }, { "epoch": 2.9124489037532517, "grad_norm": 2.545258370686535, "learning_rate": 4.46359422696907e-08, "loss": 0.2878, "step": 39187 }, { "epoch": 2.9125232255667037, "grad_norm": 2.6606189052940383, "learning_rate": 4.456024815884252e-08, "loss": 0.3113, "step": 39188 }, { "epoch": 2.912597547380156, "grad_norm": 1.4430470412054164, "learning_rate": 4.448461814060001e-08, "loss": 0.1309, "step": 39189 }, { "epoch": 2.912671869193608, "grad_norm": 2.2048662987666376, "learning_rate": 4.440905221544834e-08, "loss": 0.2628, "step": 39190 }, { "epoch": 2.9127461910070607, "grad_norm": 2.100723729800072, "learning_rate": 4.433355038387377e-08, "loss": 0.2416, "step": 39191 }, { "epoch": 2.9128205128205127, "grad_norm": 2.7132052729294815, "learning_rate": 4.4258112646364815e-08, "loss": 0.2085, "step": 39192 }, { "epoch": 2.912894834633965, "grad_norm": 2.873349794152491, "learning_rate": 4.4182739003404416e-08, "loss": 0.2647, "step": 39193 }, { "epoch": 2.912969156447417, "grad_norm": 2.05626336823591, "learning_rate": 4.410742945547886e-08, "loss": 0.3014, "step": 39194 }, { "epoch": 2.9130434782608696, "grad_norm": 2.082738513274255, "learning_rate": 4.4032184003074405e-08, "loss": 0.2294, "step": 39195 }, { "epoch": 2.913117800074322, "grad_norm": 2.4672925597629973, "learning_rate": 4.395700264667291e-08, "loss": 0.3431, "step": 39196 }, { "epoch": 2.913192121887774, "grad_norm": 2.4731769168043836, "learning_rate": 4.388188538676064e-08, "loss": 0.2451, "step": 39197 }, { "epoch": 2.913266443701226, "grad_norm": 2.8723489275500866, "learning_rate": 4.3806832223819426e-08, "loss": 0.3019, "step": 39198 }, { "epoch": 2.9133407655146786, "grad_norm": 1.9470886394592672, "learning_rate": 4.3731843158333345e-08, "loss": 0.2402, "step": 39199 }, { "epoch": 2.913415087328131, "grad_norm": 2.4383968770549154, "learning_rate": 4.365691819078532e-08, "loss": 0.2828, "step": 39200 }, { "epoch": 2.913489409141583, "grad_norm": 2.514530569960645, "learning_rate": 4.358205732165721e-08, "loss": 0.2712, "step": 39201 }, { "epoch": 2.913563730955035, "grad_norm": 1.9437119514439432, "learning_rate": 4.350726055143084e-08, "loss": 0.1972, "step": 39202 }, { "epoch": 2.9136380527684875, "grad_norm": 2.6084264637944443, "learning_rate": 4.343252788058916e-08, "loss": 0.3122, "step": 39203 }, { "epoch": 2.91371237458194, "grad_norm": 1.8140988928936468, "learning_rate": 4.3357859309611786e-08, "loss": 0.1752, "step": 39204 }, { "epoch": 2.913786696395392, "grad_norm": 3.189536532534115, "learning_rate": 4.328325483897944e-08, "loss": 0.4425, "step": 39205 }, { "epoch": 2.913861018208844, "grad_norm": 2.7789997864580807, "learning_rate": 4.3208714469172855e-08, "loss": 0.3057, "step": 39206 }, { "epoch": 2.9139353400222965, "grad_norm": 2.526670530333431, "learning_rate": 4.313423820067275e-08, "loss": 0.3598, "step": 39207 }, { "epoch": 2.914009661835749, "grad_norm": 2.2111385847152834, "learning_rate": 4.3059826033957644e-08, "loss": 0.261, "step": 39208 }, { "epoch": 2.914083983649201, "grad_norm": 2.5662916357416883, "learning_rate": 4.298547796950603e-08, "loss": 0.2834, "step": 39209 }, { "epoch": 2.9141583054626534, "grad_norm": 2.6663327270056825, "learning_rate": 4.291119400779753e-08, "loss": 0.3449, "step": 39210 }, { "epoch": 2.9142326272761054, "grad_norm": 2.285979086901986, "learning_rate": 4.2836974149310653e-08, "loss": 0.245, "step": 39211 }, { "epoch": 2.914306949089558, "grad_norm": 2.3539100540632325, "learning_rate": 4.2762818394522786e-08, "loss": 0.21, "step": 39212 }, { "epoch": 2.91438127090301, "grad_norm": 1.9581471422630503, "learning_rate": 4.2688726743910224e-08, "loss": 0.2446, "step": 39213 }, { "epoch": 2.9144555927164624, "grad_norm": 2.456980676676215, "learning_rate": 4.261469919795147e-08, "loss": 0.2568, "step": 39214 }, { "epoch": 2.9145299145299144, "grad_norm": 2.436122323167548, "learning_rate": 4.254073575712281e-08, "loss": 0.3298, "step": 39215 }, { "epoch": 2.914604236343367, "grad_norm": 2.547661248399531, "learning_rate": 4.2466836421900526e-08, "loss": 0.2725, "step": 39216 }, { "epoch": 2.914678558156819, "grad_norm": 2.595089396409764, "learning_rate": 4.2393001192759796e-08, "loss": 0.2889, "step": 39217 }, { "epoch": 2.9147528799702713, "grad_norm": 2.3269639874836936, "learning_rate": 4.23192300701758e-08, "loss": 0.2476, "step": 39218 }, { "epoch": 2.914827201783724, "grad_norm": 2.7226073567473987, "learning_rate": 4.2245523054624814e-08, "loss": 0.2578, "step": 39219 }, { "epoch": 2.914901523597176, "grad_norm": 2.7606372235995225, "learning_rate": 4.21718801465798e-08, "loss": 0.3186, "step": 39220 }, { "epoch": 2.914975845410628, "grad_norm": 3.6946990187394664, "learning_rate": 4.2098301346515935e-08, "loss": 0.3404, "step": 39221 }, { "epoch": 2.9150501672240803, "grad_norm": 2.332618039926382, "learning_rate": 4.202478665490617e-08, "loss": 0.2765, "step": 39222 }, { "epoch": 2.9151244890375327, "grad_norm": 2.266442474119833, "learning_rate": 4.195133607222346e-08, "loss": 0.2516, "step": 39223 }, { "epoch": 2.9151988108509848, "grad_norm": 2.0200720208953773, "learning_rate": 4.187794959894298e-08, "loss": 0.2448, "step": 39224 }, { "epoch": 2.9152731326644368, "grad_norm": 2.0633022934146568, "learning_rate": 4.180462723553436e-08, "loss": 0.2294, "step": 39225 }, { "epoch": 2.9153474544778892, "grad_norm": 2.3073063351879473, "learning_rate": 4.173136898247165e-08, "loss": 0.2536, "step": 39226 }, { "epoch": 2.9154217762913417, "grad_norm": 2.1580144797048257, "learning_rate": 4.1658174840225606e-08, "loss": 0.2843, "step": 39227 }, { "epoch": 2.9154960981047937, "grad_norm": 2.1910232485702354, "learning_rate": 4.158504480926695e-08, "loss": 0.2604, "step": 39228 }, { "epoch": 2.915570419918246, "grad_norm": 1.864127065273859, "learning_rate": 4.151197889006753e-08, "loss": 0.2467, "step": 39229 }, { "epoch": 2.915644741731698, "grad_norm": 2.7240960303338544, "learning_rate": 4.1438977083096964e-08, "loss": 0.2206, "step": 39230 }, { "epoch": 2.9157190635451506, "grad_norm": 2.4364187734508316, "learning_rate": 4.1366039388826e-08, "loss": 0.3073, "step": 39231 }, { "epoch": 2.9157933853586027, "grad_norm": 2.1217269101342517, "learning_rate": 4.1293165807723135e-08, "loss": 0.2983, "step": 39232 }, { "epoch": 2.915867707172055, "grad_norm": 1.9939914979084532, "learning_rate": 4.122035634025801e-08, "loss": 0.2543, "step": 39233 }, { "epoch": 2.915942028985507, "grad_norm": 2.5480419285695475, "learning_rate": 4.1147610986900236e-08, "loss": 0.2324, "step": 39234 }, { "epoch": 2.9160163507989596, "grad_norm": 3.0695717715113187, "learning_rate": 4.107492974811722e-08, "loss": 0.3154, "step": 39235 }, { "epoch": 2.9160906726124116, "grad_norm": 2.5368280722295022, "learning_rate": 4.100231262437637e-08, "loss": 0.2212, "step": 39236 }, { "epoch": 2.916164994425864, "grad_norm": 1.9695956731125994, "learning_rate": 4.09297596161462e-08, "loss": 0.1879, "step": 39237 }, { "epoch": 2.916239316239316, "grad_norm": 2.823251434811741, "learning_rate": 4.0857270723894116e-08, "loss": 0.3263, "step": 39238 }, { "epoch": 2.9163136380527686, "grad_norm": 1.8601307101272238, "learning_rate": 4.0784845948086404e-08, "loss": 0.1645, "step": 39239 }, { "epoch": 2.9163879598662206, "grad_norm": 2.3580176191542805, "learning_rate": 4.071248528918825e-08, "loss": 0.2801, "step": 39240 }, { "epoch": 2.916462281679673, "grad_norm": 1.8710795600540089, "learning_rate": 4.064018874766706e-08, "loss": 0.2075, "step": 39241 }, { "epoch": 2.9165366034931255, "grad_norm": 1.8873838296190366, "learning_rate": 4.0567956323986915e-08, "loss": 0.2174, "step": 39242 }, { "epoch": 2.9166109253065775, "grad_norm": 2.1546339937774475, "learning_rate": 4.049578801861409e-08, "loss": 0.2521, "step": 39243 }, { "epoch": 2.9166852471200295, "grad_norm": 2.58342402281663, "learning_rate": 4.042368383201156e-08, "loss": 0.2827, "step": 39244 }, { "epoch": 2.916759568933482, "grad_norm": 2.7643763613627974, "learning_rate": 4.035164376464562e-08, "loss": 0.2638, "step": 39245 }, { "epoch": 2.9168338907469344, "grad_norm": 2.59130954239037, "learning_rate": 4.027966781697923e-08, "loss": 0.2527, "step": 39246 }, { "epoch": 2.9169082125603865, "grad_norm": 2.1674463789729748, "learning_rate": 4.020775598947535e-08, "loss": 0.2468, "step": 39247 }, { "epoch": 2.9169825343738385, "grad_norm": 2.2663568936604293, "learning_rate": 4.0135908282596946e-08, "loss": 0.3689, "step": 39248 }, { "epoch": 2.917056856187291, "grad_norm": 2.751790796636159, "learning_rate": 4.006412469680587e-08, "loss": 0.2772, "step": 39249 }, { "epoch": 2.9171311780007434, "grad_norm": 3.478094241129682, "learning_rate": 3.99924052325662e-08, "loss": 0.375, "step": 39250 }, { "epoch": 2.9172054998141954, "grad_norm": 2.3996751370073675, "learning_rate": 3.992074989033867e-08, "loss": 0.225, "step": 39251 }, { "epoch": 2.917279821627648, "grad_norm": 2.2151974252712963, "learning_rate": 3.984915867058403e-08, "loss": 0.2244, "step": 39252 }, { "epoch": 2.9173541434411, "grad_norm": 2.4743438846057684, "learning_rate": 3.977763157376413e-08, "loss": 0.2611, "step": 39253 }, { "epoch": 2.9174284652545523, "grad_norm": 2.158381490628328, "learning_rate": 3.970616860033971e-08, "loss": 0.2447, "step": 39254 }, { "epoch": 2.9175027870680044, "grad_norm": 2.4345616901654155, "learning_rate": 3.96347697507693e-08, "loss": 0.2691, "step": 39255 }, { "epoch": 2.917577108881457, "grad_norm": 2.464494389595515, "learning_rate": 3.956343502551363e-08, "loss": 0.2398, "step": 39256 }, { "epoch": 2.917651430694909, "grad_norm": 7.868664824142911, "learning_rate": 3.949216442503123e-08, "loss": 0.3804, "step": 39257 }, { "epoch": 2.9177257525083613, "grad_norm": 2.693243003569619, "learning_rate": 3.942095794978173e-08, "loss": 0.3426, "step": 39258 }, { "epoch": 2.9178000743218133, "grad_norm": 2.2732127943749134, "learning_rate": 3.934981560022366e-08, "loss": 0.2748, "step": 39259 }, { "epoch": 2.917874396135266, "grad_norm": 1.9531079339656103, "learning_rate": 3.927873737681442e-08, "loss": 0.1745, "step": 39260 }, { "epoch": 2.917948717948718, "grad_norm": 1.8344351829050736, "learning_rate": 3.9207723280011434e-08, "loss": 0.1984, "step": 39261 }, { "epoch": 2.9180230397621703, "grad_norm": 3.328436920778671, "learning_rate": 3.913677331027321e-08, "loss": 0.2904, "step": 39262 }, { "epoch": 2.9180973615756223, "grad_norm": 2.508924858133791, "learning_rate": 3.9065887468054956e-08, "loss": 0.3586, "step": 39263 }, { "epoch": 2.9181716833890747, "grad_norm": 2.33124075925549, "learning_rate": 3.8995065753814063e-08, "loss": 0.3137, "step": 39264 }, { "epoch": 2.918246005202527, "grad_norm": 2.636921345818926, "learning_rate": 3.892430816800574e-08, "loss": 0.3726, "step": 39265 }, { "epoch": 2.918320327015979, "grad_norm": 2.6104717669273105, "learning_rate": 3.8853614711086284e-08, "loss": 0.2754, "step": 39266 }, { "epoch": 2.9183946488294312, "grad_norm": 2.1502550456244016, "learning_rate": 3.878298538350977e-08, "loss": 0.2905, "step": 39267 }, { "epoch": 2.9184689706428837, "grad_norm": 1.8412578996480604, "learning_rate": 3.871242018573251e-08, "loss": 0.1813, "step": 39268 }, { "epoch": 2.918543292456336, "grad_norm": 2.779254513806132, "learning_rate": 3.864191911820636e-08, "loss": 0.2794, "step": 39269 }, { "epoch": 2.918617614269788, "grad_norm": 3.1219844521529954, "learning_rate": 3.857148218138762e-08, "loss": 0.3519, "step": 39270 }, { "epoch": 2.91869193608324, "grad_norm": 1.9338856834489113, "learning_rate": 3.8501109375729264e-08, "loss": 0.1816, "step": 39271 }, { "epoch": 2.9187662578966926, "grad_norm": 1.9382268944758425, "learning_rate": 3.843080070168314e-08, "loss": 0.2197, "step": 39272 }, { "epoch": 2.918840579710145, "grad_norm": 3.3772716294433405, "learning_rate": 3.8360556159703356e-08, "loss": 0.3863, "step": 39273 }, { "epoch": 2.918914901523597, "grad_norm": 2.019356227650411, "learning_rate": 3.829037575024064e-08, "loss": 0.2572, "step": 39274 }, { "epoch": 2.9189892233370496, "grad_norm": 2.0085100713650186, "learning_rate": 3.822025947374797e-08, "loss": 0.1979, "step": 39275 }, { "epoch": 2.9190635451505016, "grad_norm": 2.3329034613464796, "learning_rate": 3.8150207330676095e-08, "loss": 0.2364, "step": 39276 }, { "epoch": 2.919137866963954, "grad_norm": 1.872688507962992, "learning_rate": 3.808021932147688e-08, "loss": 0.2209, "step": 39277 }, { "epoch": 2.919212188777406, "grad_norm": 2.5089317267398625, "learning_rate": 3.8010295446599953e-08, "loss": 0.2825, "step": 39278 }, { "epoch": 2.9192865105908585, "grad_norm": 2.3404035344196767, "learning_rate": 3.7940435706496083e-08, "loss": 0.2416, "step": 39279 }, { "epoch": 2.9193608324043105, "grad_norm": 2.402884252674971, "learning_rate": 3.7870640101614896e-08, "loss": 0.2332, "step": 39280 }, { "epoch": 2.919435154217763, "grad_norm": 2.656346787173325, "learning_rate": 3.780090863240493e-08, "loss": 0.3072, "step": 39281 }, { "epoch": 2.919509476031215, "grad_norm": 3.4441041111080697, "learning_rate": 3.7731241299315824e-08, "loss": 0.2116, "step": 39282 }, { "epoch": 2.9195837978446675, "grad_norm": 2.387965546787511, "learning_rate": 3.7661638102797216e-08, "loss": 0.3017, "step": 39283 }, { "epoch": 2.9196581196581195, "grad_norm": 2.8493903898260373, "learning_rate": 3.759209904329431e-08, "loss": 0.3767, "step": 39284 }, { "epoch": 2.919732441471572, "grad_norm": 2.1790675449725514, "learning_rate": 3.7522624121257846e-08, "loss": 0.3146, "step": 39285 }, { "epoch": 2.919806763285024, "grad_norm": 2.3742119365775842, "learning_rate": 3.745321333713303e-08, "loss": 0.2745, "step": 39286 }, { "epoch": 2.9198810850984764, "grad_norm": 2.1650384869518553, "learning_rate": 3.738386669136951e-08, "loss": 0.2514, "step": 39287 }, { "epoch": 2.919955406911929, "grad_norm": 2.6805251990126044, "learning_rate": 3.7314584184409144e-08, "loss": 0.297, "step": 39288 }, { "epoch": 2.920029728725381, "grad_norm": 3.2351488544618907, "learning_rate": 3.724536581670157e-08, "loss": 0.311, "step": 39289 }, { "epoch": 2.920104050538833, "grad_norm": 2.202176647588604, "learning_rate": 3.7176211588692e-08, "loss": 0.2458, "step": 39290 }, { "epoch": 2.9201783723522854, "grad_norm": 1.9469033376725533, "learning_rate": 3.7107121500824514e-08, "loss": 0.1904, "step": 39291 }, { "epoch": 2.920252694165738, "grad_norm": 2.6343140206324764, "learning_rate": 3.703809555354432e-08, "loss": 0.2742, "step": 39292 }, { "epoch": 2.92032701597919, "grad_norm": 2.4319153081599585, "learning_rate": 3.69691337472966e-08, "loss": 0.2795, "step": 39293 }, { "epoch": 2.920401337792642, "grad_norm": 2.4835844367632083, "learning_rate": 3.6900236082523246e-08, "loss": 0.2779, "step": 39294 }, { "epoch": 2.9204756596060943, "grad_norm": 2.322318052129733, "learning_rate": 3.683140255967055e-08, "loss": 0.2252, "step": 39295 }, { "epoch": 2.920549981419547, "grad_norm": 2.375360726854931, "learning_rate": 3.6762633179179274e-08, "loss": 0.312, "step": 39296 }, { "epoch": 2.920624303232999, "grad_norm": 2.8781987823541133, "learning_rate": 3.669392794149462e-08, "loss": 0.261, "step": 39297 }, { "epoch": 2.9206986250464513, "grad_norm": 2.7344581868449804, "learning_rate": 3.662528684705624e-08, "loss": 0.2932, "step": 39298 }, { "epoch": 2.9207729468599033, "grad_norm": 2.300573099483593, "learning_rate": 3.655670989630822e-08, "loss": 0.2578, "step": 39299 }, { "epoch": 2.9208472686733558, "grad_norm": 2.329860956231076, "learning_rate": 3.6488197089690203e-08, "loss": 0.3608, "step": 39300 }, { "epoch": 2.9209215904868078, "grad_norm": 2.208405007796447, "learning_rate": 3.6419748427645174e-08, "loss": 0.2855, "step": 39301 }, { "epoch": 2.9209959123002602, "grad_norm": 2.2394824665057373, "learning_rate": 3.635136391061167e-08, "loss": 0.2697, "step": 39302 }, { "epoch": 2.9210702341137122, "grad_norm": 4.360665351482616, "learning_rate": 3.6283043539031556e-08, "loss": 0.2094, "step": 39303 }, { "epoch": 2.9211445559271647, "grad_norm": 2.387239996018327, "learning_rate": 3.621478731334449e-08, "loss": 0.2903, "step": 39304 }, { "epoch": 2.9212188777406167, "grad_norm": 2.2073155939389832, "learning_rate": 3.614659523399011e-08, "loss": 0.2744, "step": 39305 }, { "epoch": 2.921293199554069, "grad_norm": 2.198493963629237, "learning_rate": 3.607846730140585e-08, "loss": 0.2515, "step": 39306 }, { "epoch": 2.921367521367521, "grad_norm": 2.4392492016689102, "learning_rate": 3.6010403516032465e-08, "loss": 0.2833, "step": 39307 }, { "epoch": 2.9214418431809737, "grad_norm": 2.5989879051711835, "learning_rate": 3.5942403878306275e-08, "loss": 0.2506, "step": 39308 }, { "epoch": 2.9215161649944257, "grad_norm": 2.7814806382592687, "learning_rate": 3.587446838866693e-08, "loss": 0.2922, "step": 39309 }, { "epoch": 2.921590486807878, "grad_norm": 2.578576143631791, "learning_rate": 3.580659704754963e-08, "loss": 0.2877, "step": 39310 }, { "epoch": 2.9216648086213306, "grad_norm": 2.465656063638098, "learning_rate": 3.573878985539292e-08, "loss": 0.3039, "step": 39311 }, { "epoch": 2.9217391304347826, "grad_norm": 2.70076707346134, "learning_rate": 3.5671046812633117e-08, "loss": 0.2276, "step": 39312 }, { "epoch": 2.9218134522482346, "grad_norm": 2.4170934379763516, "learning_rate": 3.560336791970542e-08, "loss": 0.1699, "step": 39313 }, { "epoch": 2.921887774061687, "grad_norm": 2.307198233236665, "learning_rate": 3.5535753177046164e-08, "loss": 0.274, "step": 39314 }, { "epoch": 2.9219620958751396, "grad_norm": 2.745231313680308, "learning_rate": 3.546820258509165e-08, "loss": 0.3473, "step": 39315 }, { "epoch": 2.9220364176885916, "grad_norm": 2.5234159708450297, "learning_rate": 3.540071614427487e-08, "loss": 0.2856, "step": 39316 }, { "epoch": 2.9221107395020436, "grad_norm": 2.5175266758817196, "learning_rate": 3.533329385503215e-08, "loss": 0.3163, "step": 39317 }, { "epoch": 2.922185061315496, "grad_norm": 2.6810218529374437, "learning_rate": 3.526593571779646e-08, "loss": 0.3056, "step": 39318 }, { "epoch": 2.9222593831289485, "grad_norm": 2.2680066928511002, "learning_rate": 3.5198641733000804e-08, "loss": 0.2035, "step": 39319 }, { "epoch": 2.9223337049424005, "grad_norm": 3.123328243693195, "learning_rate": 3.513141190108038e-08, "loss": 0.3713, "step": 39320 }, { "epoch": 2.922408026755853, "grad_norm": 2.009383448739977, "learning_rate": 3.506424622246596e-08, "loss": 0.1931, "step": 39321 }, { "epoch": 2.922482348569305, "grad_norm": 2.141484905165254, "learning_rate": 3.499714469759163e-08, "loss": 0.212, "step": 39322 }, { "epoch": 2.9225566703827575, "grad_norm": 2.242288172757138, "learning_rate": 3.493010732688817e-08, "loss": 0.2716, "step": 39323 }, { "epoch": 2.9226309921962095, "grad_norm": 1.8768164501688256, "learning_rate": 3.486313411078746e-08, "loss": 0.1707, "step": 39324 }, { "epoch": 2.922705314009662, "grad_norm": 3.064463570830399, "learning_rate": 3.4796225049721355e-08, "loss": 0.291, "step": 39325 }, { "epoch": 2.922779635823114, "grad_norm": 1.8642912274593029, "learning_rate": 3.472938014411953e-08, "loss": 0.2505, "step": 39326 }, { "epoch": 2.9228539576365664, "grad_norm": 2.393101918474403, "learning_rate": 3.4662599394413856e-08, "loss": 0.2378, "step": 39327 }, { "epoch": 2.9229282794500184, "grad_norm": 2.248888681838517, "learning_rate": 3.459588280103177e-08, "loss": 0.2661, "step": 39328 }, { "epoch": 2.923002601263471, "grad_norm": 2.23213694651846, "learning_rate": 3.452923036440625e-08, "loss": 0.2469, "step": 39329 }, { "epoch": 2.9230769230769234, "grad_norm": 2.6577906740565123, "learning_rate": 3.446264208496253e-08, "loss": 0.2884, "step": 39330 }, { "epoch": 2.9231512448903754, "grad_norm": 2.3052372976473325, "learning_rate": 3.439611796313247e-08, "loss": 0.2848, "step": 39331 }, { "epoch": 2.9232255667038274, "grad_norm": 2.5188487407902347, "learning_rate": 3.432965799934351e-08, "loss": 0.2526, "step": 39332 }, { "epoch": 2.92329988851728, "grad_norm": 2.1056683569298706, "learning_rate": 3.426326219402199e-08, "loss": 0.2849, "step": 39333 }, { "epoch": 2.9233742103307323, "grad_norm": 2.675830989722443, "learning_rate": 3.419693054759754e-08, "loss": 0.2689, "step": 39334 }, { "epoch": 2.9234485321441843, "grad_norm": 2.5606030371648423, "learning_rate": 3.4130663060494284e-08, "loss": 0.28, "step": 39335 }, { "epoch": 2.9235228539576363, "grad_norm": 2.7097273595847935, "learning_rate": 3.4064459733142986e-08, "loss": 0.2052, "step": 39336 }, { "epoch": 2.923597175771089, "grad_norm": 2.350417812105948, "learning_rate": 3.399832056596664e-08, "loss": 0.2417, "step": 39337 }, { "epoch": 2.9236714975845413, "grad_norm": 2.5811525848881316, "learning_rate": 3.393224555939156e-08, "loss": 0.2379, "step": 39338 }, { "epoch": 2.9237458193979933, "grad_norm": 2.233367864031029, "learning_rate": 3.386623471384409e-08, "loss": 0.3374, "step": 39339 }, { "epoch": 2.9238201412114453, "grad_norm": 2.1420819299481364, "learning_rate": 3.380028802974833e-08, "loss": 0.1803, "step": 39340 }, { "epoch": 2.9238944630248977, "grad_norm": 2.4560868272098237, "learning_rate": 3.3734405507529486e-08, "loss": 0.2459, "step": 39341 }, { "epoch": 2.92396878483835, "grad_norm": 1.9504006022283396, "learning_rate": 3.3668587147611677e-08, "loss": 0.2144, "step": 39342 }, { "epoch": 2.9240431066518022, "grad_norm": 1.967680527281792, "learning_rate": 3.3602832950417884e-08, "loss": 0.2322, "step": 39343 }, { "epoch": 2.9241174284652547, "grad_norm": 2.5002595718863683, "learning_rate": 3.3537142916372226e-08, "loss": 0.3145, "step": 39344 }, { "epoch": 2.9241917502787067, "grad_norm": 2.248195597244316, "learning_rate": 3.347151704589768e-08, "loss": 0.2907, "step": 39345 }, { "epoch": 2.924266072092159, "grad_norm": 1.7084132808161223, "learning_rate": 3.340595533941504e-08, "loss": 0.1833, "step": 39346 }, { "epoch": 2.924340393905611, "grad_norm": 1.9370223771360051, "learning_rate": 3.33404577973484e-08, "loss": 0.1821, "step": 39347 }, { "epoch": 2.9244147157190636, "grad_norm": 2.4492816447684196, "learning_rate": 3.327502442011965e-08, "loss": 0.2661, "step": 39348 }, { "epoch": 2.9244890375325157, "grad_norm": 2.0631220295935826, "learning_rate": 3.320965520814845e-08, "loss": 0.1698, "step": 39349 }, { "epoch": 2.924563359345968, "grad_norm": 2.1122411448101013, "learning_rate": 3.314435016185669e-08, "loss": 0.2616, "step": 39350 }, { "epoch": 2.92463768115942, "grad_norm": 2.5248281924471945, "learning_rate": 3.3079109281664024e-08, "loss": 0.2978, "step": 39351 }, { "epoch": 2.9247120029728726, "grad_norm": 2.3561960878754684, "learning_rate": 3.3013932567991236e-08, "loss": 0.2946, "step": 39352 }, { "epoch": 2.924786324786325, "grad_norm": 3.0768856954877264, "learning_rate": 3.2948820021256876e-08, "loss": 0.3638, "step": 39353 }, { "epoch": 2.924860646599777, "grad_norm": 2.8048946085278317, "learning_rate": 3.288377164188172e-08, "loss": 0.365, "step": 39354 }, { "epoch": 2.924934968413229, "grad_norm": 1.911234323915924, "learning_rate": 3.2818787430283214e-08, "loss": 0.2332, "step": 39355 }, { "epoch": 2.9250092902266815, "grad_norm": 2.665174708008457, "learning_rate": 3.275386738688102e-08, "loss": 0.2968, "step": 39356 }, { "epoch": 2.925083612040134, "grad_norm": 1.8433878042231908, "learning_rate": 3.268901151209147e-08, "loss": 0.2606, "step": 39357 }, { "epoch": 2.925157933853586, "grad_norm": 2.1657305864489547, "learning_rate": 3.262421980633312e-08, "loss": 0.2502, "step": 39358 }, { "epoch": 2.925232255667038, "grad_norm": 2.295487900994625, "learning_rate": 3.255949227002231e-08, "loss": 0.2546, "step": 39359 }, { "epoch": 2.9253065774804905, "grad_norm": 2.562183038746979, "learning_rate": 3.249482890357647e-08, "loss": 0.2683, "step": 39360 }, { "epoch": 2.925380899293943, "grad_norm": 1.7639888452104222, "learning_rate": 3.2430229707411944e-08, "loss": 0.1706, "step": 39361 }, { "epoch": 2.925455221107395, "grad_norm": 2.443937485523205, "learning_rate": 3.2365694681945056e-08, "loss": 0.2385, "step": 39362 }, { "epoch": 2.925529542920847, "grad_norm": 2.584652702912999, "learning_rate": 3.2301223827589935e-08, "loss": 0.2927, "step": 39363 }, { "epoch": 2.9256038647342995, "grad_norm": 3.187223871775656, "learning_rate": 3.22368171447629e-08, "loss": 0.2358, "step": 39364 }, { "epoch": 2.925678186547752, "grad_norm": 2.1228497313698234, "learning_rate": 3.217247463387696e-08, "loss": 0.28, "step": 39365 }, { "epoch": 2.925752508361204, "grad_norm": 2.4839752473388743, "learning_rate": 3.2108196295348446e-08, "loss": 0.2378, "step": 39366 }, { "epoch": 2.9258268301746564, "grad_norm": 2.332089684453966, "learning_rate": 3.204398212958926e-08, "loss": 0.2587, "step": 39367 }, { "epoch": 2.9259011519881084, "grad_norm": 2.5282837379221537, "learning_rate": 3.197983213701461e-08, "loss": 0.3137, "step": 39368 }, { "epoch": 2.925975473801561, "grad_norm": 2.5161466073524923, "learning_rate": 3.19157463180364e-08, "loss": 0.2572, "step": 39369 }, { "epoch": 2.926049795615013, "grad_norm": 2.422510040517772, "learning_rate": 3.185172467306763e-08, "loss": 0.3141, "step": 39370 }, { "epoch": 2.9261241174284653, "grad_norm": 2.5808640164503784, "learning_rate": 3.178776720251908e-08, "loss": 0.2896, "step": 39371 }, { "epoch": 2.9261984392419174, "grad_norm": 2.8985708605196288, "learning_rate": 3.172387390680487e-08, "loss": 0.3273, "step": 39372 }, { "epoch": 2.92627276105537, "grad_norm": 2.104781474007724, "learning_rate": 3.1660044786334667e-08, "loss": 0.2001, "step": 39373 }, { "epoch": 2.926347082868822, "grad_norm": 2.1920781604481308, "learning_rate": 3.159627984152036e-08, "loss": 0.2308, "step": 39374 }, { "epoch": 2.9264214046822743, "grad_norm": 2.4317073581421806, "learning_rate": 3.153257907277163e-08, "loss": 0.2891, "step": 39375 }, { "epoch": 2.9264957264957268, "grad_norm": 1.9428518144995077, "learning_rate": 3.1468942480498144e-08, "loss": 0.2007, "step": 39376 }, { "epoch": 2.9265700483091788, "grad_norm": 3.0002398762354625, "learning_rate": 3.1405370065111794e-08, "loss": 0.3641, "step": 39377 }, { "epoch": 2.926644370122631, "grad_norm": 2.0738685034912607, "learning_rate": 3.1341861827020036e-08, "loss": 0.1734, "step": 39378 }, { "epoch": 2.9267186919360832, "grad_norm": 2.4565927072119083, "learning_rate": 3.127841776663143e-08, "loss": 0.3414, "step": 39379 }, { "epoch": 2.9267930137495357, "grad_norm": 2.7459823270094144, "learning_rate": 3.121503788435565e-08, "loss": 0.3044, "step": 39380 }, { "epoch": 2.9268673355629877, "grad_norm": 1.7545047616510572, "learning_rate": 3.115172218060014e-08, "loss": 0.2241, "step": 39381 }, { "epoch": 2.9269416573764397, "grad_norm": 2.2771976838854004, "learning_rate": 3.108847065577236e-08, "loss": 0.2497, "step": 39382 }, { "epoch": 2.927015979189892, "grad_norm": 2.932092903384951, "learning_rate": 3.1025283310279765e-08, "loss": 0.3902, "step": 39383 }, { "epoch": 2.9270903010033447, "grad_norm": 1.9108377869919972, "learning_rate": 3.0962160144529795e-08, "loss": 0.2354, "step": 39384 }, { "epoch": 2.9271646228167967, "grad_norm": 2.8850010563797963, "learning_rate": 3.0899101158927693e-08, "loss": 0.339, "step": 39385 }, { "epoch": 2.927238944630249, "grad_norm": 2.7034419171365482, "learning_rate": 3.083610635387979e-08, "loss": 0.3367, "step": 39386 }, { "epoch": 2.927313266443701, "grad_norm": 3.431392916276663, "learning_rate": 3.0773175729791325e-08, "loss": 0.3205, "step": 39387 }, { "epoch": 2.9273875882571536, "grad_norm": 2.1814499534355103, "learning_rate": 3.071030928706864e-08, "loss": 0.2221, "step": 39388 }, { "epoch": 2.9274619100706056, "grad_norm": 2.241664626739926, "learning_rate": 3.0647507026115855e-08, "loss": 0.229, "step": 39389 }, { "epoch": 2.927536231884058, "grad_norm": 2.066689970826122, "learning_rate": 3.0584768947337084e-08, "loss": 0.2282, "step": 39390 }, { "epoch": 2.92761055369751, "grad_norm": 2.0539261008807443, "learning_rate": 3.052209505113535e-08, "loss": 0.2213, "step": 39391 }, { "epoch": 2.9276848755109626, "grad_norm": 2.3747582860535363, "learning_rate": 3.0459485337915874e-08, "loss": 0.2546, "step": 39392 }, { "epoch": 2.9277591973244146, "grad_norm": 2.5825712843988775, "learning_rate": 3.039693980808167e-08, "loss": 0.3033, "step": 39393 }, { "epoch": 2.927833519137867, "grad_norm": 3.425275739770417, "learning_rate": 3.0334458462033536e-08, "loss": 0.2745, "step": 39394 }, { "epoch": 2.927907840951319, "grad_norm": 2.5455231189943848, "learning_rate": 3.0272041300175584e-08, "loss": 0.2906, "step": 39395 }, { "epoch": 2.9279821627647715, "grad_norm": 3.3488664585670858, "learning_rate": 3.0209688322908604e-08, "loss": 0.3738, "step": 39396 }, { "epoch": 2.9280564845782235, "grad_norm": 2.580756885307338, "learning_rate": 3.0147399530633395e-08, "loss": 0.2791, "step": 39397 }, { "epoch": 2.928130806391676, "grad_norm": 1.9141139979165458, "learning_rate": 3.008517492375296e-08, "loss": 0.2464, "step": 39398 }, { "epoch": 2.9282051282051285, "grad_norm": 2.285011753182299, "learning_rate": 3.002301450266698e-08, "loss": 0.3367, "step": 39399 }, { "epoch": 2.9282794500185805, "grad_norm": 2.2676819943015616, "learning_rate": 2.996091826777514e-08, "loss": 0.2308, "step": 39400 }, { "epoch": 2.9283537718320325, "grad_norm": 2.0649244947713163, "learning_rate": 2.9898886219478226e-08, "loss": 0.2206, "step": 39401 }, { "epoch": 2.928428093645485, "grad_norm": 1.8938207608055386, "learning_rate": 2.9836918358174815e-08, "loss": 0.2258, "step": 39402 }, { "epoch": 2.9285024154589374, "grad_norm": 2.340107896971518, "learning_rate": 2.977501468426347e-08, "loss": 0.3499, "step": 39403 }, { "epoch": 2.9285767372723894, "grad_norm": 1.993056674950745, "learning_rate": 2.9713175198142764e-08, "loss": 0.3, "step": 39404 }, { "epoch": 2.9286510590858414, "grad_norm": 2.8661011706782777, "learning_rate": 2.9651399900212376e-08, "loss": 0.2855, "step": 39405 }, { "epoch": 2.928725380899294, "grad_norm": 2.5102473371861347, "learning_rate": 2.9589688790868653e-08, "loss": 0.3071, "step": 39406 }, { "epoch": 2.9287997027127464, "grad_norm": 2.5411121341512213, "learning_rate": 2.9528041870509062e-08, "loss": 0.2924, "step": 39407 }, { "epoch": 2.9288740245261984, "grad_norm": 2.469845745786476, "learning_rate": 2.9466459139531057e-08, "loss": 0.3221, "step": 39408 }, { "epoch": 2.928948346339651, "grad_norm": 2.448429977033252, "learning_rate": 2.9404940598329878e-08, "loss": 0.2907, "step": 39409 }, { "epoch": 2.929022668153103, "grad_norm": 2.33206468154336, "learning_rate": 2.9343486247302988e-08, "loss": 0.2558, "step": 39410 }, { "epoch": 2.9290969899665553, "grad_norm": 2.217087022879225, "learning_rate": 2.9282096086844513e-08, "loss": 0.2533, "step": 39411 }, { "epoch": 2.9291713117800073, "grad_norm": 3.832163254666845, "learning_rate": 2.9220770117351915e-08, "loss": 0.3708, "step": 39412 }, { "epoch": 2.92924563359346, "grad_norm": 3.0263832236035664, "learning_rate": 2.91595083392171e-08, "loss": 0.3401, "step": 39413 }, { "epoch": 2.929319955406912, "grad_norm": 2.001479294714865, "learning_rate": 2.9098310752837533e-08, "loss": 0.2131, "step": 39414 }, { "epoch": 2.9293942772203643, "grad_norm": 2.633110378741476, "learning_rate": 2.9037177358605117e-08, "loss": 0.2636, "step": 39415 }, { "epoch": 2.9294685990338163, "grad_norm": 2.5619019317148117, "learning_rate": 2.8976108156912876e-08, "loss": 0.3006, "step": 39416 }, { "epoch": 2.9295429208472687, "grad_norm": 2.6019566729484183, "learning_rate": 2.8915103148157154e-08, "loss": 0.2924, "step": 39417 }, { "epoch": 2.9296172426607208, "grad_norm": 2.189581664013287, "learning_rate": 2.885416233272764e-08, "loss": 0.218, "step": 39418 }, { "epoch": 2.9296915644741732, "grad_norm": 2.6007752117106215, "learning_rate": 2.879328571101736e-08, "loss": 0.2328, "step": 39419 }, { "epoch": 2.9297658862876252, "grad_norm": 2.0075392697589223, "learning_rate": 2.8732473283418217e-08, "loss": 0.2224, "step": 39420 }, { "epoch": 2.9298402081010777, "grad_norm": 2.93676235559274, "learning_rate": 2.867172505032212e-08, "loss": 0.3345, "step": 39421 }, { "epoch": 2.92991452991453, "grad_norm": 2.7754203877089756, "learning_rate": 2.8611041012120976e-08, "loss": 0.2924, "step": 39422 }, { "epoch": 2.929988851727982, "grad_norm": 2.6980373527511685, "learning_rate": 2.855042116920448e-08, "loss": 0.3412, "step": 39423 }, { "epoch": 2.930063173541434, "grad_norm": 2.5566533179816617, "learning_rate": 2.848986552196231e-08, "loss": 0.2904, "step": 39424 }, { "epoch": 2.9301374953548867, "grad_norm": 2.845810164126964, "learning_rate": 2.8429374070786387e-08, "loss": 0.2317, "step": 39425 }, { "epoch": 2.930211817168339, "grad_norm": 2.264370790811007, "learning_rate": 2.836894681606306e-08, "loss": 0.2127, "step": 39426 }, { "epoch": 2.930286138981791, "grad_norm": 2.1491803201739863, "learning_rate": 2.830858375818424e-08, "loss": 0.2197, "step": 39427 }, { "epoch": 2.930360460795243, "grad_norm": 2.4259829744411463, "learning_rate": 2.824828489753628e-08, "loss": 0.2962, "step": 39428 }, { "epoch": 2.9304347826086956, "grad_norm": 2.3265580545891504, "learning_rate": 2.818805023450888e-08, "loss": 0.2257, "step": 39429 }, { "epoch": 2.930509104422148, "grad_norm": 2.5483954361439243, "learning_rate": 2.8127879769489496e-08, "loss": 0.3235, "step": 39430 }, { "epoch": 2.9305834262356, "grad_norm": 2.5678149694244308, "learning_rate": 2.8067773502865604e-08, "loss": 0.2768, "step": 39431 }, { "epoch": 2.9306577480490525, "grad_norm": 2.7136534356168287, "learning_rate": 2.8007731435023554e-08, "loss": 0.3301, "step": 39432 }, { "epoch": 2.9307320698625046, "grad_norm": 2.7059570120846983, "learning_rate": 2.7947753566350823e-08, "loss": 0.3528, "step": 39433 }, { "epoch": 2.930806391675957, "grad_norm": 2.474630292624732, "learning_rate": 2.788783989723265e-08, "loss": 0.2171, "step": 39434 }, { "epoch": 2.930880713489409, "grad_norm": 2.4449739060405014, "learning_rate": 2.7827990428055395e-08, "loss": 0.2767, "step": 39435 }, { "epoch": 2.9309550353028615, "grad_norm": 2.6526114510150913, "learning_rate": 2.776820515920431e-08, "loss": 0.3175, "step": 39436 }, { "epoch": 2.9310293571163135, "grad_norm": 2.713482079655595, "learning_rate": 2.7708484091063524e-08, "loss": 0.3325, "step": 39437 }, { "epoch": 2.931103678929766, "grad_norm": 2.2930399240185126, "learning_rate": 2.7648827224018294e-08, "loss": 0.2825, "step": 39438 }, { "epoch": 2.931178000743218, "grad_norm": 2.3940195726402616, "learning_rate": 2.7589234558452748e-08, "loss": 0.3122, "step": 39439 }, { "epoch": 2.9312523225566705, "grad_norm": 3.9869605781274537, "learning_rate": 2.752970609474992e-08, "loss": 0.2918, "step": 39440 }, { "epoch": 2.9313266443701225, "grad_norm": 2.7441367540864037, "learning_rate": 2.7470241833293942e-08, "loss": 0.2814, "step": 39441 }, { "epoch": 2.931400966183575, "grad_norm": 2.5061355639400267, "learning_rate": 2.7410841774466733e-08, "loss": 0.2737, "step": 39442 }, { "epoch": 2.931475287997027, "grad_norm": 2.611863471825101, "learning_rate": 2.7351505918651323e-08, "loss": 0.2672, "step": 39443 }, { "epoch": 2.9315496098104794, "grad_norm": 1.9025926099394947, "learning_rate": 2.7292234266228513e-08, "loss": 0.2044, "step": 39444 }, { "epoch": 2.931623931623932, "grad_norm": 2.36510723699713, "learning_rate": 2.7233026817582444e-08, "loss": 0.2845, "step": 39445 }, { "epoch": 2.931698253437384, "grad_norm": 2.4384326621787613, "learning_rate": 2.7173883573091697e-08, "loss": 0.3055, "step": 39446 }, { "epoch": 2.931772575250836, "grad_norm": 2.55907477044483, "learning_rate": 2.7114804533138196e-08, "loss": 0.2504, "step": 39447 }, { "epoch": 2.9318468970642884, "grad_norm": 2.4286609626323794, "learning_rate": 2.7055789698101632e-08, "loss": 0.2685, "step": 39448 }, { "epoch": 2.931921218877741, "grad_norm": 2.250958570117343, "learning_rate": 2.6996839068363922e-08, "loss": 0.2287, "step": 39449 }, { "epoch": 2.931995540691193, "grad_norm": 2.2176315762346968, "learning_rate": 2.6937952644301435e-08, "loss": 0.2432, "step": 39450 }, { "epoch": 2.932069862504645, "grad_norm": 2.9078264077108815, "learning_rate": 2.6879130426296085e-08, "loss": 0.3405, "step": 39451 }, { "epoch": 2.9321441843180973, "grad_norm": 2.2089193013612918, "learning_rate": 2.682037241472424e-08, "loss": 0.2633, "step": 39452 }, { "epoch": 2.9322185061315498, "grad_norm": 2.375712306365319, "learning_rate": 2.67616786099667e-08, "loss": 0.2521, "step": 39453 }, { "epoch": 2.932292827945002, "grad_norm": 2.345215007190657, "learning_rate": 2.670304901239873e-08, "loss": 0.283, "step": 39454 }, { "epoch": 2.9323671497584543, "grad_norm": 2.106072242697811, "learning_rate": 2.664448362240002e-08, "loss": 0.195, "step": 39455 }, { "epoch": 2.9324414715719063, "grad_norm": 2.1012582999377973, "learning_rate": 2.6585982440346937e-08, "loss": 0.1911, "step": 39456 }, { "epoch": 2.9325157933853587, "grad_norm": 2.101578449852061, "learning_rate": 2.6527545466614736e-08, "loss": 0.2791, "step": 39457 }, { "epoch": 2.9325901151988107, "grad_norm": 2.336297572348635, "learning_rate": 2.6469172701580892e-08, "loss": 0.2022, "step": 39458 }, { "epoch": 2.932664437012263, "grad_norm": 2.489261859276839, "learning_rate": 2.6410864145621772e-08, "loss": 0.2172, "step": 39459 }, { "epoch": 2.932738758825715, "grad_norm": 3.3693814729971767, "learning_rate": 2.6352619799111524e-08, "loss": 0.2234, "step": 39460 }, { "epoch": 2.9328130806391677, "grad_norm": 2.5210746581190584, "learning_rate": 2.6294439662425397e-08, "loss": 0.2413, "step": 39461 }, { "epoch": 2.9328874024526197, "grad_norm": 2.754380780226285, "learning_rate": 2.623632373593865e-08, "loss": 0.2801, "step": 39462 }, { "epoch": 2.932961724266072, "grad_norm": 2.5565629836607484, "learning_rate": 2.617827202002432e-08, "loss": 0.2786, "step": 39463 }, { "epoch": 2.9330360460795246, "grad_norm": 2.3185066013753355, "learning_rate": 2.6120284515056548e-08, "loss": 0.2789, "step": 39464 }, { "epoch": 2.9331103678929766, "grad_norm": 2.1301873817829837, "learning_rate": 2.6062361221409484e-08, "loss": 0.2474, "step": 39465 }, { "epoch": 2.9331846897064286, "grad_norm": 2.325534458855201, "learning_rate": 2.6004502139456157e-08, "loss": 0.2581, "step": 39466 }, { "epoch": 2.933259011519881, "grad_norm": 2.2597395041154926, "learning_rate": 2.5946707269567385e-08, "loss": 0.3167, "step": 39467 }, { "epoch": 2.9333333333333336, "grad_norm": 2.766911457245396, "learning_rate": 2.58889766121162e-08, "loss": 0.2772, "step": 39468 }, { "epoch": 2.9334076551467856, "grad_norm": 2.5824340239730055, "learning_rate": 2.583131016747453e-08, "loss": 0.3205, "step": 39469 }, { "epoch": 2.9334819769602376, "grad_norm": 2.066901508111732, "learning_rate": 2.577370793601319e-08, "loss": 0.2314, "step": 39470 }, { "epoch": 2.93355629877369, "grad_norm": 2.290364932422956, "learning_rate": 2.5716169918102996e-08, "loss": 0.251, "step": 39471 }, { "epoch": 2.9336306205871425, "grad_norm": 2.7408032474389596, "learning_rate": 2.5658696114114758e-08, "loss": 0.2715, "step": 39472 }, { "epoch": 2.9337049424005945, "grad_norm": 2.172041116023759, "learning_rate": 2.5601286524419287e-08, "loss": 0.3014, "step": 39473 }, { "epoch": 2.9337792642140466, "grad_norm": 2.750266408514135, "learning_rate": 2.554394114938408e-08, "loss": 0.2042, "step": 39474 }, { "epoch": 2.933853586027499, "grad_norm": 2.2898455053337203, "learning_rate": 2.5486659989379937e-08, "loss": 0.2941, "step": 39475 }, { "epoch": 2.9339279078409515, "grad_norm": 2.570300985779667, "learning_rate": 2.5429443044774348e-08, "loss": 0.2634, "step": 39476 }, { "epoch": 2.9340022296544035, "grad_norm": 2.1246700281464173, "learning_rate": 2.537229031593702e-08, "loss": 0.2775, "step": 39477 }, { "epoch": 2.934076551467856, "grad_norm": 2.30468404619652, "learning_rate": 2.5315201803235434e-08, "loss": 0.2909, "step": 39478 }, { "epoch": 2.934150873281308, "grad_norm": 2.093026365275838, "learning_rate": 2.5258177507037073e-08, "loss": 0.2437, "step": 39479 }, { "epoch": 2.9342251950947604, "grad_norm": 2.142899473591508, "learning_rate": 2.5201217427708312e-08, "loss": 0.2673, "step": 39480 }, { "epoch": 2.9342995169082124, "grad_norm": 2.2916123406760067, "learning_rate": 2.5144321565617745e-08, "loss": 0.2383, "step": 39481 }, { "epoch": 2.934373838721665, "grad_norm": 2.018343220302557, "learning_rate": 2.5087489921129525e-08, "loss": 0.2231, "step": 39482 }, { "epoch": 2.934448160535117, "grad_norm": 2.238938324830848, "learning_rate": 2.5030722494611137e-08, "loss": 0.2614, "step": 39483 }, { "epoch": 2.9345224823485694, "grad_norm": 2.707582254184144, "learning_rate": 2.497401928642673e-08, "loss": 0.3623, "step": 39484 }, { "epoch": 2.9345968041620214, "grad_norm": 3.317998855492273, "learning_rate": 2.4917380296941574e-08, "loss": 0.3217, "step": 39485 }, { "epoch": 2.934671125975474, "grad_norm": 2.479886252695728, "learning_rate": 2.4860805526522037e-08, "loss": 0.2516, "step": 39486 }, { "epoch": 2.9347454477889263, "grad_norm": 2.2698512690533543, "learning_rate": 2.480429497553005e-08, "loss": 0.2994, "step": 39487 }, { "epoch": 2.9348197696023783, "grad_norm": 2.11740445650133, "learning_rate": 2.4747848644330884e-08, "loss": 0.2268, "step": 39488 }, { "epoch": 2.9348940914158304, "grad_norm": 2.8644960940321362, "learning_rate": 2.4691466533287578e-08, "loss": 0.3743, "step": 39489 }, { "epoch": 2.934968413229283, "grad_norm": 2.404303411512265, "learning_rate": 2.4635148642763173e-08, "loss": 0.3006, "step": 39490 }, { "epoch": 2.9350427350427353, "grad_norm": 1.7948431713366457, "learning_rate": 2.4578894973119606e-08, "loss": 0.2023, "step": 39491 }, { "epoch": 2.9351170568561873, "grad_norm": 2.157943134603117, "learning_rate": 2.4522705524719913e-08, "loss": 0.2441, "step": 39492 }, { "epoch": 2.9351913786696393, "grad_norm": 1.6836473351004577, "learning_rate": 2.4466580297926033e-08, "loss": 0.1851, "step": 39493 }, { "epoch": 2.9352657004830918, "grad_norm": 1.7371184981978391, "learning_rate": 2.4410519293097677e-08, "loss": 0.1813, "step": 39494 }, { "epoch": 2.9353400222965442, "grad_norm": 1.9027406654880192, "learning_rate": 2.435452251059678e-08, "loss": 0.2278, "step": 39495 }, { "epoch": 2.9354143441099962, "grad_norm": 2.1303775601628976, "learning_rate": 2.4298589950784156e-08, "loss": 0.2698, "step": 39496 }, { "epoch": 2.9354886659234483, "grad_norm": 2.4240174006387964, "learning_rate": 2.4242721614019525e-08, "loss": 0.3067, "step": 39497 }, { "epoch": 2.9355629877369007, "grad_norm": 1.5204053347265312, "learning_rate": 2.41869175006626e-08, "loss": 0.1721, "step": 39498 }, { "epoch": 2.935637309550353, "grad_norm": 2.083901750644646, "learning_rate": 2.4131177611073086e-08, "loss": 0.2219, "step": 39499 }, { "epoch": 2.935711631363805, "grad_norm": 1.632596365284614, "learning_rate": 2.4075501945609593e-08, "loss": 0.2351, "step": 39500 }, { "epoch": 2.9357859531772577, "grad_norm": 2.0740121036455124, "learning_rate": 2.4019890504630715e-08, "loss": 0.2321, "step": 39501 }, { "epoch": 2.9358602749907097, "grad_norm": 2.05890080570226, "learning_rate": 2.3964343288492843e-08, "loss": 0.2661, "step": 39502 }, { "epoch": 2.935934596804162, "grad_norm": 1.8938495730475482, "learning_rate": 2.390886029755568e-08, "loss": 0.2083, "step": 39503 }, { "epoch": 2.936008918617614, "grad_norm": 2.4225275331401206, "learning_rate": 2.3853441532175614e-08, "loss": 0.2816, "step": 39504 }, { "epoch": 2.9360832404310666, "grad_norm": 2.1311729449195074, "learning_rate": 2.3798086992709025e-08, "loss": 0.2662, "step": 39505 }, { "epoch": 2.9361575622445186, "grad_norm": 2.3329014912429824, "learning_rate": 2.3742796679513402e-08, "loss": 0.2614, "step": 39506 }, { "epoch": 2.936231884057971, "grad_norm": 2.9808450884224467, "learning_rate": 2.368757059294291e-08, "loss": 0.3504, "step": 39507 }, { "epoch": 2.936306205871423, "grad_norm": 1.7509055502642805, "learning_rate": 2.3632408733355038e-08, "loss": 0.2195, "step": 39508 }, { "epoch": 2.9363805276848756, "grad_norm": 1.7447377367939254, "learning_rate": 2.357731110110395e-08, "loss": 0.2132, "step": 39509 }, { "epoch": 2.936454849498328, "grad_norm": 3.596177094507878, "learning_rate": 2.3522277696544914e-08, "loss": 0.3727, "step": 39510 }, { "epoch": 2.93652917131178, "grad_norm": 1.933353066967744, "learning_rate": 2.3467308520030984e-08, "loss": 0.2214, "step": 39511 }, { "epoch": 2.936603493125232, "grad_norm": 2.22675954617472, "learning_rate": 2.341240357191743e-08, "loss": 0.2354, "step": 39512 }, { "epoch": 2.9366778149386845, "grad_norm": 2.627495919513236, "learning_rate": 2.3357562852557302e-08, "loss": 0.3044, "step": 39513 }, { "epoch": 2.936752136752137, "grad_norm": 2.188308032530539, "learning_rate": 2.3302786362303654e-08, "loss": 0.2746, "step": 39514 }, { "epoch": 2.936826458565589, "grad_norm": 2.455991051163688, "learning_rate": 2.3248074101508423e-08, "loss": 0.2374, "step": 39515 }, { "epoch": 2.936900780379041, "grad_norm": 2.1933224031133047, "learning_rate": 2.319342607052466e-08, "loss": 0.3075, "step": 39516 }, { "epoch": 2.9369751021924935, "grad_norm": 1.660752374067886, "learning_rate": 2.313884226970431e-08, "loss": 0.1742, "step": 39517 }, { "epoch": 2.937049424005946, "grad_norm": 2.2178988240548674, "learning_rate": 2.30843226993982e-08, "loss": 0.2744, "step": 39518 }, { "epoch": 2.937123745819398, "grad_norm": 2.4478294911662375, "learning_rate": 2.302986735995716e-08, "loss": 0.2527, "step": 39519 }, { "epoch": 2.9371980676328504, "grad_norm": 1.9728026672504468, "learning_rate": 2.297547625173202e-08, "loss": 0.2484, "step": 39520 }, { "epoch": 2.9372723894463024, "grad_norm": 2.1176089968279803, "learning_rate": 2.2921149375073616e-08, "loss": 0.2606, "step": 39521 }, { "epoch": 2.937346711259755, "grad_norm": 2.6193280656080113, "learning_rate": 2.2866886730331662e-08, "loss": 0.261, "step": 39522 }, { "epoch": 2.937421033073207, "grad_norm": 1.9746265067740978, "learning_rate": 2.281268831785366e-08, "loss": 0.2215, "step": 39523 }, { "epoch": 2.9374953548866594, "grad_norm": 1.9421532346298613, "learning_rate": 2.2758554137991553e-08, "loss": 0.2049, "step": 39524 }, { "epoch": 2.9375696767001114, "grad_norm": 1.8993856750982423, "learning_rate": 2.2704484191090614e-08, "loss": 0.2234, "step": 39525 }, { "epoch": 2.937643998513564, "grad_norm": 2.1249343090938315, "learning_rate": 2.265047847750168e-08, "loss": 0.2171, "step": 39526 }, { "epoch": 2.937718320327016, "grad_norm": 2.850874009643008, "learning_rate": 2.2596536997571138e-08, "loss": 0.2739, "step": 39527 }, { "epoch": 2.9377926421404683, "grad_norm": 2.469694192488893, "learning_rate": 2.254265975164538e-08, "loss": 0.2544, "step": 39528 }, { "epoch": 2.9378669639539203, "grad_norm": 2.3705235468475103, "learning_rate": 2.2488846740073012e-08, "loss": 0.2727, "step": 39529 }, { "epoch": 2.937941285767373, "grad_norm": 2.919179312359972, "learning_rate": 2.2435097963199314e-08, "loss": 0.3247, "step": 39530 }, { "epoch": 2.938015607580825, "grad_norm": 2.55901103263823, "learning_rate": 2.238141342137179e-08, "loss": 0.2595, "step": 39531 }, { "epoch": 2.9380899293942773, "grad_norm": 3.8729329045095784, "learning_rate": 2.2327793114934605e-08, "loss": 0.3277, "step": 39532 }, { "epoch": 2.9381642512077297, "grad_norm": 2.4233513792949917, "learning_rate": 2.2274237044233037e-08, "loss": 0.2882, "step": 39533 }, { "epoch": 2.9382385730211817, "grad_norm": 2.308199329305671, "learning_rate": 2.222074520961237e-08, "loss": 0.2951, "step": 39534 }, { "epoch": 2.9383128948346338, "grad_norm": 2.984825437985795, "learning_rate": 2.216731761141677e-08, "loss": 0.2908, "step": 39535 }, { "epoch": 2.938387216648086, "grad_norm": 2.2115617803958134, "learning_rate": 2.211395424999041e-08, "loss": 0.2606, "step": 39536 }, { "epoch": 2.9384615384615387, "grad_norm": 2.519907142317755, "learning_rate": 2.2060655125676344e-08, "loss": 0.2941, "step": 39537 }, { "epoch": 2.9385358602749907, "grad_norm": 2.5533049292626013, "learning_rate": 2.2007420238817635e-08, "loss": 0.2476, "step": 39538 }, { "epoch": 2.9386101820884427, "grad_norm": 2.688813517022145, "learning_rate": 2.1954249589758448e-08, "loss": 0.3227, "step": 39539 }, { "epoch": 2.938684503901895, "grad_norm": 2.513256133523086, "learning_rate": 2.1901143178839623e-08, "loss": 0.289, "step": 39540 }, { "epoch": 2.9387588257153476, "grad_norm": 2.0093305169236078, "learning_rate": 2.184810100640422e-08, "loss": 0.2661, "step": 39541 }, { "epoch": 2.9388331475287996, "grad_norm": 2.3440638962439433, "learning_rate": 2.1795123072791967e-08, "loss": 0.2512, "step": 39542 }, { "epoch": 2.938907469342252, "grad_norm": 2.139310215418732, "learning_rate": 2.1742209378345925e-08, "loss": 0.2546, "step": 39543 }, { "epoch": 2.938981791155704, "grad_norm": 2.548715595984802, "learning_rate": 2.168935992340471e-08, "loss": 0.3088, "step": 39544 }, { "epoch": 2.9390561129691566, "grad_norm": 2.4819162059346427, "learning_rate": 2.1636574708310266e-08, "loss": 0.2735, "step": 39545 }, { "epoch": 2.9391304347826086, "grad_norm": 2.1173929188052223, "learning_rate": 2.1583853733402325e-08, "loss": 0.2857, "step": 39546 }, { "epoch": 2.939204756596061, "grad_norm": 2.6348603096378955, "learning_rate": 2.153119699901951e-08, "loss": 0.3335, "step": 39547 }, { "epoch": 2.939279078409513, "grad_norm": 2.6177507065262944, "learning_rate": 2.1478604505500435e-08, "loss": 0.2445, "step": 39548 }, { "epoch": 2.9393534002229655, "grad_norm": 2.36139144680256, "learning_rate": 2.1426076253184825e-08, "loss": 0.276, "step": 39549 }, { "epoch": 2.9394277220364176, "grad_norm": 2.755814774324816, "learning_rate": 2.137361224241019e-08, "loss": 0.2324, "step": 39550 }, { "epoch": 2.93950204384987, "grad_norm": 2.1946967926875662, "learning_rate": 2.132121247351404e-08, "loss": 0.174, "step": 39551 }, { "epoch": 2.939576365663322, "grad_norm": 2.100217683772348, "learning_rate": 2.126887694683499e-08, "loss": 0.2583, "step": 39552 }, { "epoch": 2.9396506874767745, "grad_norm": 2.298192351342658, "learning_rate": 2.1216605662709443e-08, "loss": 0.2221, "step": 39553 }, { "epoch": 2.9397250092902265, "grad_norm": 2.5157499121496687, "learning_rate": 2.116439862147268e-08, "loss": 0.3197, "step": 39554 }, { "epoch": 2.939799331103679, "grad_norm": 2.1718913242921105, "learning_rate": 2.1112255823462213e-08, "loss": 0.2636, "step": 39555 }, { "epoch": 2.9398736529171314, "grad_norm": 2.1531090811109133, "learning_rate": 2.1060177269013328e-08, "loss": 0.2588, "step": 39556 }, { "epoch": 2.9399479747305834, "grad_norm": 2.7901649418179795, "learning_rate": 2.100816295846131e-08, "loss": 0.3532, "step": 39557 }, { "epoch": 2.9400222965440355, "grad_norm": 5.5621864880918634, "learning_rate": 2.0956212892140337e-08, "loss": 0.3784, "step": 39558 }, { "epoch": 2.940096618357488, "grad_norm": 2.179045862214729, "learning_rate": 2.09043270703857e-08, "loss": 0.2874, "step": 39559 }, { "epoch": 2.9401709401709404, "grad_norm": 2.056784442694257, "learning_rate": 2.085250549353157e-08, "loss": 0.2053, "step": 39560 }, { "epoch": 2.9402452619843924, "grad_norm": 2.689842283604442, "learning_rate": 2.080074816191102e-08, "loss": 0.3302, "step": 39561 }, { "epoch": 2.9403195837978444, "grad_norm": 2.2065793494819257, "learning_rate": 2.0749055075858226e-08, "loss": 0.225, "step": 39562 }, { "epoch": 2.940393905611297, "grad_norm": 3.5794709315285473, "learning_rate": 2.0697426235704034e-08, "loss": 0.273, "step": 39563 }, { "epoch": 2.9404682274247493, "grad_norm": 2.9970164652710203, "learning_rate": 2.0645861641782617e-08, "loss": 0.2965, "step": 39564 }, { "epoch": 2.9405425492382014, "grad_norm": 2.067844168255939, "learning_rate": 2.0594361294425934e-08, "loss": 0.2641, "step": 39565 }, { "epoch": 2.940616871051654, "grad_norm": 2.4388519622349665, "learning_rate": 2.0542925193963726e-08, "loss": 0.2654, "step": 39566 }, { "epoch": 2.940691192865106, "grad_norm": 3.1524182926489046, "learning_rate": 2.0491553340729054e-08, "loss": 0.3242, "step": 39567 }, { "epoch": 2.9407655146785583, "grad_norm": 2.742142769079851, "learning_rate": 2.0440245735051656e-08, "loss": 0.3233, "step": 39568 }, { "epoch": 2.9408398364920103, "grad_norm": 2.407184382274867, "learning_rate": 2.038900237726238e-08, "loss": 0.339, "step": 39569 }, { "epoch": 2.9409141583054628, "grad_norm": 2.1438471216649777, "learning_rate": 2.033782326769096e-08, "loss": 0.2281, "step": 39570 }, { "epoch": 2.940988480118915, "grad_norm": 2.1958363501904916, "learning_rate": 2.0286708406667133e-08, "loss": 0.2497, "step": 39571 }, { "epoch": 2.9410628019323672, "grad_norm": 2.387792961088862, "learning_rate": 2.0235657794519525e-08, "loss": 0.2893, "step": 39572 }, { "epoch": 2.9411371237458193, "grad_norm": 2.2195848099805158, "learning_rate": 2.0184671431575654e-08, "loss": 0.2073, "step": 39573 }, { "epoch": 2.9412114455592717, "grad_norm": 2.8111674987038495, "learning_rate": 2.0133749318166363e-08, "loss": 0.2361, "step": 39574 }, { "epoch": 2.9412857673727237, "grad_norm": 2.593977483849959, "learning_rate": 2.0082891454618058e-08, "loss": 0.2733, "step": 39575 }, { "epoch": 2.941360089186176, "grad_norm": 3.0794205092937976, "learning_rate": 2.003209784125826e-08, "loss": 0.346, "step": 39576 }, { "epoch": 2.941434410999628, "grad_norm": 2.499611185508483, "learning_rate": 1.9981368478413364e-08, "loss": 0.3094, "step": 39577 }, { "epoch": 2.9415087328130807, "grad_norm": 2.213238016040995, "learning_rate": 1.9930703366410898e-08, "loss": 0.2578, "step": 39578 }, { "epoch": 2.941583054626533, "grad_norm": 2.124119980914511, "learning_rate": 1.988010250557726e-08, "loss": 0.2274, "step": 39579 }, { "epoch": 2.941657376439985, "grad_norm": 2.433656334650459, "learning_rate": 1.9829565896237746e-08, "loss": 0.274, "step": 39580 }, { "epoch": 2.941731698253437, "grad_norm": 3.0387395551206553, "learning_rate": 1.9779093538717654e-08, "loss": 0.3226, "step": 39581 }, { "epoch": 2.9418060200668896, "grad_norm": 2.3906407198815254, "learning_rate": 1.9728685433341165e-08, "loss": 0.2766, "step": 39582 }, { "epoch": 2.941880341880342, "grad_norm": 2.6151166471323637, "learning_rate": 1.967834158043358e-08, "loss": 0.3145, "step": 39583 }, { "epoch": 2.941954663693794, "grad_norm": 2.7737232842552006, "learning_rate": 1.962806198031908e-08, "loss": 0.337, "step": 39584 }, { "epoch": 2.942028985507246, "grad_norm": 2.043336864354857, "learning_rate": 1.9577846633321852e-08, "loss": 0.2198, "step": 39585 }, { "epoch": 2.9421033073206986, "grad_norm": 3.4803274676606306, "learning_rate": 1.9527695539763858e-08, "loss": 0.3987, "step": 39586 }, { "epoch": 2.942177629134151, "grad_norm": 2.3074565776177667, "learning_rate": 1.9477608699969285e-08, "loss": 0.2845, "step": 39587 }, { "epoch": 2.942251950947603, "grad_norm": 2.7107160930055776, "learning_rate": 1.94275861142601e-08, "loss": 0.3613, "step": 39588 }, { "epoch": 2.9423262727610555, "grad_norm": 2.567803756738831, "learning_rate": 1.9377627782958263e-08, "loss": 0.2763, "step": 39589 }, { "epoch": 2.9424005945745075, "grad_norm": 2.791648600755901, "learning_rate": 1.9327733706384632e-08, "loss": 0.2196, "step": 39590 }, { "epoch": 2.94247491638796, "grad_norm": 2.3068100210268967, "learning_rate": 1.9277903884861172e-08, "loss": 0.2495, "step": 39591 }, { "epoch": 2.942549238201412, "grad_norm": 2.8427289077675693, "learning_rate": 1.9228138318709844e-08, "loss": 0.2916, "step": 39592 }, { "epoch": 2.9426235600148645, "grad_norm": 3.6196531354796435, "learning_rate": 1.9178437008249285e-08, "loss": 0.2737, "step": 39593 }, { "epoch": 2.9426978818283165, "grad_norm": 3.423740165260341, "learning_rate": 1.912879995380035e-08, "loss": 0.2566, "step": 39594 }, { "epoch": 2.942772203641769, "grad_norm": 2.439576879216394, "learning_rate": 1.907922715568167e-08, "loss": 0.3073, "step": 39595 }, { "epoch": 2.942846525455221, "grad_norm": 2.5454415461863844, "learning_rate": 1.902971861421299e-08, "loss": 0.2622, "step": 39596 }, { "epoch": 2.9429208472686734, "grad_norm": 1.800980034270584, "learning_rate": 1.8980274329714053e-08, "loss": 0.1654, "step": 39597 }, { "epoch": 2.9429951690821254, "grad_norm": 2.79298188466345, "learning_rate": 1.893089430250128e-08, "loss": 0.3064, "step": 39598 }, { "epoch": 2.943069490895578, "grad_norm": 2.8320915024265867, "learning_rate": 1.8881578532894408e-08, "loss": 0.2834, "step": 39599 }, { "epoch": 2.94314381270903, "grad_norm": 2.466718639632458, "learning_rate": 1.883232702120874e-08, "loss": 0.2145, "step": 39600 }, { "epoch": 2.9432181345224824, "grad_norm": 2.22727718353655, "learning_rate": 1.8783139767764025e-08, "loss": 0.2035, "step": 39601 }, { "epoch": 2.943292456335935, "grad_norm": 2.5968510539827894, "learning_rate": 1.8734016772875563e-08, "loss": 0.3236, "step": 39602 }, { "epoch": 2.943366778149387, "grad_norm": 2.2900561491537026, "learning_rate": 1.8684958036858657e-08, "loss": 0.2106, "step": 39603 }, { "epoch": 2.943441099962839, "grad_norm": 2.504271534922968, "learning_rate": 1.8635963560030835e-08, "loss": 0.2433, "step": 39604 }, { "epoch": 2.9435154217762913, "grad_norm": 2.4388762148878573, "learning_rate": 1.8587033342706285e-08, "loss": 0.2789, "step": 39605 }, { "epoch": 2.943589743589744, "grad_norm": 2.2080465584682147, "learning_rate": 1.8538167385200314e-08, "loss": 0.2301, "step": 39606 }, { "epoch": 2.943664065403196, "grad_norm": 3.1062784236527086, "learning_rate": 1.848936568782822e-08, "loss": 0.3026, "step": 39607 }, { "epoch": 2.943738387216648, "grad_norm": 1.8338512642467213, "learning_rate": 1.8440628250904202e-08, "loss": 0.1987, "step": 39608 }, { "epoch": 2.9438127090301003, "grad_norm": 2.5809358388077204, "learning_rate": 1.839195507474023e-08, "loss": 0.2623, "step": 39609 }, { "epoch": 2.9438870308435527, "grad_norm": 2.8771520497320915, "learning_rate": 1.834334615965161e-08, "loss": 0.2978, "step": 39610 }, { "epoch": 2.9439613526570048, "grad_norm": 2.947109365580149, "learning_rate": 1.829480150595031e-08, "loss": 0.2182, "step": 39611 }, { "epoch": 2.944035674470457, "grad_norm": 2.2229955749418857, "learning_rate": 1.8246321113949417e-08, "loss": 0.263, "step": 39612 }, { "epoch": 2.9441099962839092, "grad_norm": 2.0144987536971946, "learning_rate": 1.81979049839609e-08, "loss": 0.2262, "step": 39613 }, { "epoch": 2.9441843180973617, "grad_norm": 2.183905644818574, "learning_rate": 1.8149553116296737e-08, "loss": 0.3174, "step": 39614 }, { "epoch": 2.9442586399108137, "grad_norm": 2.0194358477341163, "learning_rate": 1.8101265511267786e-08, "loss": 0.2219, "step": 39615 }, { "epoch": 2.944332961724266, "grad_norm": 2.067259699786697, "learning_rate": 1.80530421691838e-08, "loss": 0.2344, "step": 39616 }, { "epoch": 2.944407283537718, "grad_norm": 2.3492890698020363, "learning_rate": 1.8004883090357862e-08, "loss": 0.2486, "step": 39617 }, { "epoch": 2.9444816053511706, "grad_norm": 2.4549993178950658, "learning_rate": 1.7956788275097503e-08, "loss": 0.3163, "step": 39618 }, { "epoch": 2.9445559271646227, "grad_norm": 2.276408938646771, "learning_rate": 1.7908757723713586e-08, "loss": 0.367, "step": 39619 }, { "epoch": 2.944630248978075, "grad_norm": 2.1850282769500415, "learning_rate": 1.7860791436515868e-08, "loss": 0.2686, "step": 39620 }, { "epoch": 2.9447045707915276, "grad_norm": 2.49650478087247, "learning_rate": 1.7812889413811875e-08, "loss": 0.2882, "step": 39621 }, { "epoch": 2.9447788926049796, "grad_norm": 2.7580568901556277, "learning_rate": 1.7765051655910247e-08, "loss": 0.24, "step": 39622 }, { "epoch": 2.9448532144184316, "grad_norm": 2.710786107234988, "learning_rate": 1.7717278163119635e-08, "loss": 0.2191, "step": 39623 }, { "epoch": 2.944927536231884, "grad_norm": 3.6877631031653295, "learning_rate": 1.7669568935747562e-08, "loss": 0.3851, "step": 39624 }, { "epoch": 2.9450018580453365, "grad_norm": 2.643070624928012, "learning_rate": 1.7621923974099343e-08, "loss": 0.2806, "step": 39625 }, { "epoch": 2.9450761798587886, "grad_norm": 2.193958376491278, "learning_rate": 1.757434327848473e-08, "loss": 0.2237, "step": 39626 }, { "epoch": 2.9451505016722406, "grad_norm": 2.4051935316959354, "learning_rate": 1.7526826849207922e-08, "loss": 0.2569, "step": 39627 }, { "epoch": 2.945224823485693, "grad_norm": 2.6374351545416626, "learning_rate": 1.7479374686576456e-08, "loss": 0.2968, "step": 39628 }, { "epoch": 2.9452991452991455, "grad_norm": 2.86303503589845, "learning_rate": 1.743198679089342e-08, "loss": 0.2605, "step": 39629 }, { "epoch": 2.9453734671125975, "grad_norm": 2.3297574125715856, "learning_rate": 1.738466316246634e-08, "loss": 0.2644, "step": 39630 }, { "epoch": 2.9454477889260495, "grad_norm": 2.629155853197215, "learning_rate": 1.733740380159832e-08, "loss": 0.2421, "step": 39631 }, { "epoch": 2.945522110739502, "grad_norm": 2.6078416336618955, "learning_rate": 1.7290208708594658e-08, "loss": 0.2925, "step": 39632 }, { "epoch": 2.9455964325529544, "grad_norm": 2.429007723448842, "learning_rate": 1.7243077883758452e-08, "loss": 0.259, "step": 39633 }, { "epoch": 2.9456707543664065, "grad_norm": 2.059979307532063, "learning_rate": 1.7196011327392793e-08, "loss": 0.2304, "step": 39634 }, { "epoch": 2.945745076179859, "grad_norm": 2.3107758046850004, "learning_rate": 1.714900903980188e-08, "loss": 0.1926, "step": 39635 }, { "epoch": 2.945819397993311, "grad_norm": 2.938054967268751, "learning_rate": 1.7102071021286583e-08, "loss": 0.2713, "step": 39636 }, { "epoch": 2.9458937198067634, "grad_norm": 2.90255241117263, "learning_rate": 1.7055197272151103e-08, "loss": 0.2914, "step": 39637 }, { "epoch": 2.9459680416202154, "grad_norm": 1.96974080246207, "learning_rate": 1.7008387792696313e-08, "loss": 0.2494, "step": 39638 }, { "epoch": 2.946042363433668, "grad_norm": 2.6379846224417687, "learning_rate": 1.696164258322308e-08, "loss": 0.3415, "step": 39639 }, { "epoch": 2.94611668524712, "grad_norm": 2.5430975488547336, "learning_rate": 1.691496164403339e-08, "loss": 0.2845, "step": 39640 }, { "epoch": 2.9461910070605724, "grad_norm": 2.5342176716668994, "learning_rate": 1.6868344975427e-08, "loss": 0.2054, "step": 39641 }, { "epoch": 2.9462653288740244, "grad_norm": 2.572585835519603, "learning_rate": 1.6821792577703668e-08, "loss": 0.3866, "step": 39642 }, { "epoch": 2.946339650687477, "grad_norm": 2.7112214544324096, "learning_rate": 1.6775304451164265e-08, "loss": 0.3206, "step": 39643 }, { "epoch": 2.9464139725009293, "grad_norm": 2.681260420028606, "learning_rate": 1.6728880596107446e-08, "loss": 0.3024, "step": 39644 }, { "epoch": 2.9464882943143813, "grad_norm": 2.2739531352866647, "learning_rate": 1.6682521012831855e-08, "loss": 0.2751, "step": 39645 }, { "epoch": 2.9465626161278333, "grad_norm": 2.5636846955533565, "learning_rate": 1.6636225701636145e-08, "loss": 0.2914, "step": 39646 }, { "epoch": 2.946636937941286, "grad_norm": 2.204299020527316, "learning_rate": 1.6589994662818964e-08, "loss": 0.2741, "step": 39647 }, { "epoch": 2.9467112597547382, "grad_norm": 1.7637300883795834, "learning_rate": 1.6543827896676744e-08, "loss": 0.2388, "step": 39648 }, { "epoch": 2.9467855815681903, "grad_norm": 2.3780090405434, "learning_rate": 1.6497725403508134e-08, "loss": 0.3118, "step": 39649 }, { "epoch": 2.9468599033816423, "grad_norm": 2.512009370009433, "learning_rate": 1.6451687183608457e-08, "loss": 0.3151, "step": 39650 }, { "epoch": 2.9469342251950947, "grad_norm": 1.8665975080706885, "learning_rate": 1.6405713237275246e-08, "loss": 0.2177, "step": 39651 }, { "epoch": 2.947008547008547, "grad_norm": 2.613853062521887, "learning_rate": 1.635980356480382e-08, "loss": 0.2176, "step": 39652 }, { "epoch": 2.947082868821999, "grad_norm": 2.871004793097396, "learning_rate": 1.6313958166489507e-08, "loss": 0.2802, "step": 39653 }, { "epoch": 2.9471571906354512, "grad_norm": 2.0441876486410395, "learning_rate": 1.626817704262873e-08, "loss": 0.22, "step": 39654 }, { "epoch": 2.9472315124489037, "grad_norm": 2.6313699229002356, "learning_rate": 1.622246019351459e-08, "loss": 0.3042, "step": 39655 }, { "epoch": 2.947305834262356, "grad_norm": 1.7354819373244337, "learning_rate": 1.6176807619442402e-08, "loss": 0.207, "step": 39656 }, { "epoch": 2.947380156075808, "grad_norm": 2.2832213948024123, "learning_rate": 1.613121932070638e-08, "loss": 0.2402, "step": 39657 }, { "epoch": 2.9474544778892606, "grad_norm": 2.6726025354590934, "learning_rate": 1.6085695297599624e-08, "loss": 0.362, "step": 39658 }, { "epoch": 2.9475287997027126, "grad_norm": 3.5681842500774272, "learning_rate": 1.6040235550414118e-08, "loss": 0.3342, "step": 39659 }, { "epoch": 2.947603121516165, "grad_norm": 3.34161600495299, "learning_rate": 1.5994840079445183e-08, "loss": 0.3187, "step": 39660 }, { "epoch": 2.947677443329617, "grad_norm": 2.7583942050011494, "learning_rate": 1.594950888498148e-08, "loss": 0.3341, "step": 39661 }, { "epoch": 2.9477517651430696, "grad_norm": 2.791626187951171, "learning_rate": 1.5904241967318325e-08, "loss": 0.3115, "step": 39662 }, { "epoch": 2.9478260869565216, "grad_norm": 2.3359272833676403, "learning_rate": 1.5859039326744375e-08, "loss": 0.3089, "step": 39663 }, { "epoch": 2.947900408769974, "grad_norm": 2.9783962214630613, "learning_rate": 1.581390096355273e-08, "loss": 0.3035, "step": 39664 }, { "epoch": 2.947974730583426, "grad_norm": 2.2414883029574755, "learning_rate": 1.576882687803316e-08, "loss": 0.2528, "step": 39665 }, { "epoch": 2.9480490523968785, "grad_norm": 2.4141710631506146, "learning_rate": 1.5723817070475434e-08, "loss": 0.3367, "step": 39666 }, { "epoch": 2.948123374210331, "grad_norm": 2.0203392533232885, "learning_rate": 1.5678871541170425e-08, "loss": 0.2321, "step": 39667 }, { "epoch": 2.948197696023783, "grad_norm": 2.8039867259741644, "learning_rate": 1.5633990290406797e-08, "loss": 0.3929, "step": 39668 }, { "epoch": 2.948272017837235, "grad_norm": 2.1031174125270327, "learning_rate": 1.5589173318472097e-08, "loss": 0.2645, "step": 39669 }, { "epoch": 2.9483463396506875, "grad_norm": 2.9463426890690423, "learning_rate": 1.5544420625657196e-08, "loss": 0.2996, "step": 39670 }, { "epoch": 2.94842066146414, "grad_norm": 2.194060213248093, "learning_rate": 1.5499732212249652e-08, "loss": 0.2373, "step": 39671 }, { "epoch": 2.948494983277592, "grad_norm": 2.3020740559030353, "learning_rate": 1.5455108078537007e-08, "loss": 0.2558, "step": 39672 }, { "epoch": 2.948569305091044, "grad_norm": 2.279609056114108, "learning_rate": 1.541054822480459e-08, "loss": 0.2704, "step": 39673 }, { "epoch": 2.9486436269044964, "grad_norm": 2.4878024559865963, "learning_rate": 1.5366052651342168e-08, "loss": 0.2628, "step": 39674 }, { "epoch": 2.948717948717949, "grad_norm": 1.9107931448353312, "learning_rate": 1.532162135843507e-08, "loss": 0.2373, "step": 39675 }, { "epoch": 2.948792270531401, "grad_norm": 2.4776639699364558, "learning_rate": 1.527725434636973e-08, "loss": 0.292, "step": 39676 }, { "epoch": 2.9488665923448534, "grad_norm": 2.774232947570552, "learning_rate": 1.523295161543037e-08, "loss": 0.3725, "step": 39677 }, { "epoch": 2.9489409141583054, "grad_norm": 2.1885094330833494, "learning_rate": 1.5188713165903422e-08, "loss": 0.2033, "step": 39678 }, { "epoch": 2.949015235971758, "grad_norm": 2.4074462132804353, "learning_rate": 1.514453899807422e-08, "loss": 0.2769, "step": 39679 }, { "epoch": 2.94908955778521, "grad_norm": 2.757895862382995, "learning_rate": 1.5100429112225867e-08, "loss": 0.3422, "step": 39680 }, { "epoch": 2.9491638795986623, "grad_norm": 2.8057702336459167, "learning_rate": 1.505638350864258e-08, "loss": 0.3454, "step": 39681 }, { "epoch": 2.9492382014121143, "grad_norm": 2.7791853509360176, "learning_rate": 1.5012402187608578e-08, "loss": 0.2766, "step": 39682 }, { "epoch": 2.949312523225567, "grad_norm": 1.6623520309865665, "learning_rate": 1.4968485149406965e-08, "loss": 0.1607, "step": 39683 }, { "epoch": 2.949386845039019, "grad_norm": 2.9051944617791245, "learning_rate": 1.492463239431974e-08, "loss": 0.3025, "step": 39684 }, { "epoch": 2.9494611668524713, "grad_norm": 2.0142589958089983, "learning_rate": 1.4880843922628896e-08, "loss": 0.1846, "step": 39685 }, { "epoch": 2.9495354886659233, "grad_norm": 2.208078088184423, "learning_rate": 1.4837119734618654e-08, "loss": 0.206, "step": 39686 }, { "epoch": 2.9496098104793758, "grad_norm": 2.301707615867861, "learning_rate": 1.479345983056768e-08, "loss": 0.3011, "step": 39687 }, { "epoch": 2.9496841322928278, "grad_norm": 3.560667460436628, "learning_rate": 1.474986421075908e-08, "loss": 0.2462, "step": 39688 }, { "epoch": 2.9497584541062802, "grad_norm": 2.4372349812827503, "learning_rate": 1.4706332875471519e-08, "loss": 0.257, "step": 39689 }, { "epoch": 2.9498327759197327, "grad_norm": 2.315738494157775, "learning_rate": 1.4662865824988104e-08, "loss": 0.2255, "step": 39690 }, { "epoch": 2.9499070977331847, "grad_norm": 2.429557132270082, "learning_rate": 1.4619463059586393e-08, "loss": 0.2953, "step": 39691 }, { "epoch": 2.9499814195466367, "grad_norm": 2.066530003070924, "learning_rate": 1.4576124579546159e-08, "loss": 0.2252, "step": 39692 }, { "epoch": 2.950055741360089, "grad_norm": 1.6437951073944672, "learning_rate": 1.4532850385147178e-08, "loss": 0.1781, "step": 39693 }, { "epoch": 2.9501300631735417, "grad_norm": 2.574960185873171, "learning_rate": 1.448964047666701e-08, "loss": 0.2791, "step": 39694 }, { "epoch": 2.9502043849869937, "grad_norm": 1.6387096556886174, "learning_rate": 1.444649485438543e-08, "loss": 0.1469, "step": 39695 }, { "epoch": 2.9502787068004457, "grad_norm": 1.8837174309045794, "learning_rate": 1.440341351857777e-08, "loss": 0.246, "step": 39696 }, { "epoch": 2.950353028613898, "grad_norm": 2.4067199053906294, "learning_rate": 1.4360396469523807e-08, "loss": 0.2762, "step": 39697 }, { "epoch": 2.9504273504273506, "grad_norm": 2.4042123850922463, "learning_rate": 1.431744370749999e-08, "loss": 0.213, "step": 39698 }, { "epoch": 2.9505016722408026, "grad_norm": 2.2310066215491844, "learning_rate": 1.427455523278165e-08, "loss": 0.2555, "step": 39699 }, { "epoch": 2.950575994054255, "grad_norm": 2.758563428348517, "learning_rate": 1.4231731045645236e-08, "loss": 0.2365, "step": 39700 }, { "epoch": 2.950650315867707, "grad_norm": 2.1482651231527354, "learning_rate": 1.4188971146367192e-08, "loss": 0.2644, "step": 39701 }, { "epoch": 2.9507246376811596, "grad_norm": 2.162577844597467, "learning_rate": 1.4146275535222853e-08, "loss": 0.2322, "step": 39702 }, { "epoch": 2.9507989594946116, "grad_norm": 2.359132508894688, "learning_rate": 1.4103644212485334e-08, "loss": 0.2784, "step": 39703 }, { "epoch": 2.950873281308064, "grad_norm": 1.8981340241619316, "learning_rate": 1.4061077178432193e-08, "loss": 0.282, "step": 39704 }, { "epoch": 2.950947603121516, "grad_norm": 2.9343005060428053, "learning_rate": 1.401857443333432e-08, "loss": 0.2576, "step": 39705 }, { "epoch": 2.9510219249349685, "grad_norm": 2.2825109065857276, "learning_rate": 1.3976135977467053e-08, "loss": 0.2773, "step": 39706 }, { "epoch": 2.9510962467484205, "grad_norm": 2.7399140958647417, "learning_rate": 1.3933761811103508e-08, "loss": 0.3248, "step": 39707 }, { "epoch": 2.951170568561873, "grad_norm": 2.575722483930816, "learning_rate": 1.3891451934516798e-08, "loss": 0.3376, "step": 39708 }, { "epoch": 2.951244890375325, "grad_norm": 2.3588932251855077, "learning_rate": 1.3849206347977817e-08, "loss": 0.3128, "step": 39709 }, { "epoch": 2.9513192121887775, "grad_norm": 2.3321840973626276, "learning_rate": 1.3807025051759681e-08, "loss": 0.2358, "step": 39710 }, { "epoch": 2.9513935340022295, "grad_norm": 2.5587343851216704, "learning_rate": 1.3764908046134396e-08, "loss": 0.2638, "step": 39711 }, { "epoch": 2.951467855815682, "grad_norm": 2.333226424716061, "learning_rate": 1.3722855331371743e-08, "loss": 0.2369, "step": 39712 }, { "epoch": 2.9515421776291344, "grad_norm": 2.299417171144011, "learning_rate": 1.3680866907743728e-08, "loss": 0.2703, "step": 39713 }, { "epoch": 2.9516164994425864, "grad_norm": 2.4524235477525185, "learning_rate": 1.3638942775520136e-08, "loss": 0.2728, "step": 39714 }, { "epoch": 2.9516908212560384, "grad_norm": 1.8737018947374673, "learning_rate": 1.359708293497075e-08, "loss": 0.2741, "step": 39715 }, { "epoch": 2.951765143069491, "grad_norm": 2.302666240068621, "learning_rate": 1.3555287386365357e-08, "loss": 0.2378, "step": 39716 }, { "epoch": 2.9518394648829434, "grad_norm": 2.8183975120630693, "learning_rate": 1.3513556129972627e-08, "loss": 0.3284, "step": 39717 }, { "epoch": 2.9519137866963954, "grad_norm": 4.998014888805311, "learning_rate": 1.3471889166061235e-08, "loss": 0.3575, "step": 39718 }, { "epoch": 2.9519881085098474, "grad_norm": 2.4119184564750085, "learning_rate": 1.3430286494899858e-08, "loss": 0.2494, "step": 39719 }, { "epoch": 2.9520624303233, "grad_norm": 2.9356158196328686, "learning_rate": 1.3388748116756056e-08, "loss": 0.3053, "step": 39720 }, { "epoch": 2.9521367521367523, "grad_norm": 2.901770250520768, "learning_rate": 1.3347274031897395e-08, "loss": 0.3513, "step": 39721 }, { "epoch": 2.9522110739502043, "grad_norm": 2.5915868781766593, "learning_rate": 1.3305864240590327e-08, "loss": 0.2392, "step": 39722 }, { "epoch": 2.952285395763657, "grad_norm": 1.9254017104479018, "learning_rate": 1.3264518743102417e-08, "loss": 0.2291, "step": 39723 }, { "epoch": 2.952359717577109, "grad_norm": 2.499914303954517, "learning_rate": 1.3223237539700118e-08, "loss": 0.3092, "step": 39724 }, { "epoch": 2.9524340393905613, "grad_norm": 2.358770128171136, "learning_rate": 1.3182020630646552e-08, "loss": 0.266, "step": 39725 }, { "epoch": 2.9525083612040133, "grad_norm": 2.536944380152436, "learning_rate": 1.3140868016210395e-08, "loss": 0.2643, "step": 39726 }, { "epoch": 2.9525826830174657, "grad_norm": 4.313307138114066, "learning_rate": 1.3099779696654768e-08, "loss": 0.272, "step": 39727 }, { "epoch": 2.9526570048309178, "grad_norm": 2.864772502168675, "learning_rate": 1.3058755672245016e-08, "loss": 0.2383, "step": 39728 }, { "epoch": 2.95273132664437, "grad_norm": 2.9957700783859975, "learning_rate": 1.301779594324426e-08, "loss": 0.2967, "step": 39729 }, { "epoch": 2.9528056484578222, "grad_norm": 2.545318666297095, "learning_rate": 1.2976900509916735e-08, "loss": 0.3031, "step": 39730 }, { "epoch": 2.9528799702712747, "grad_norm": 2.262620373553399, "learning_rate": 1.2936069372526671e-08, "loss": 0.2688, "step": 39731 }, { "epoch": 2.9529542920847267, "grad_norm": 2.547173333775452, "learning_rate": 1.2895302531334974e-08, "loss": 0.2711, "step": 39732 }, { "epoch": 2.953028613898179, "grad_norm": 2.643181438688097, "learning_rate": 1.2854599986604766e-08, "loss": 0.3102, "step": 39733 }, { "epoch": 2.953102935711631, "grad_norm": 3.89711423946869, "learning_rate": 1.2813961738599168e-08, "loss": 0.3513, "step": 39734 }, { "epoch": 2.9531772575250836, "grad_norm": 3.200566583638802, "learning_rate": 1.2773387787579083e-08, "loss": 0.3162, "step": 39735 }, { "epoch": 2.953251579338536, "grad_norm": 2.981307516829487, "learning_rate": 1.2732878133805416e-08, "loss": 0.3048, "step": 39736 }, { "epoch": 2.953325901151988, "grad_norm": 3.5353500008155074, "learning_rate": 1.2692432777540176e-08, "loss": 0.2547, "step": 39737 }, { "epoch": 2.95340022296544, "grad_norm": 3.064528319029249, "learning_rate": 1.2652051719042046e-08, "loss": 0.319, "step": 39738 }, { "epoch": 2.9534745447788926, "grad_norm": 2.628291080798028, "learning_rate": 1.261173495857193e-08, "loss": 0.3239, "step": 39739 }, { "epoch": 2.953548866592345, "grad_norm": 2.673845838572069, "learning_rate": 1.257148249638962e-08, "loss": 0.2983, "step": 39740 }, { "epoch": 2.953623188405797, "grad_norm": 2.887883164607652, "learning_rate": 1.2531294332752686e-08, "loss": 0.27, "step": 39741 }, { "epoch": 2.953697510219249, "grad_norm": 2.4549164192579513, "learning_rate": 1.249117046792203e-08, "loss": 0.2938, "step": 39742 }, { "epoch": 2.9537718320327015, "grad_norm": 3.462052758877037, "learning_rate": 1.2451110902155228e-08, "loss": 0.272, "step": 39743 }, { "epoch": 2.953846153846154, "grad_norm": 2.091535923485151, "learning_rate": 1.2411115635709847e-08, "loss": 0.2363, "step": 39744 }, { "epoch": 2.953920475659606, "grad_norm": 2.329415293324145, "learning_rate": 1.2371184668842351e-08, "loss": 0.3035, "step": 39745 }, { "epoch": 2.9539947974730585, "grad_norm": 2.1059447033857595, "learning_rate": 1.2331318001812532e-08, "loss": 0.2154, "step": 39746 }, { "epoch": 2.9540691192865105, "grad_norm": 2.072105200719983, "learning_rate": 1.2291515634874629e-08, "loss": 0.2643, "step": 39747 }, { "epoch": 2.954143441099963, "grad_norm": 2.7432904060800687, "learning_rate": 1.2251777568286216e-08, "loss": 0.2778, "step": 39748 }, { "epoch": 2.954217762913415, "grad_norm": 2.559373921359333, "learning_rate": 1.2212103802301533e-08, "loss": 0.2394, "step": 39749 }, { "epoch": 2.9542920847268674, "grad_norm": 2.3186606368128007, "learning_rate": 1.2172494337178154e-08, "loss": 0.2993, "step": 39750 }, { "epoch": 2.9543664065403195, "grad_norm": 2.2244554183340224, "learning_rate": 1.2132949173169206e-08, "loss": 0.2835, "step": 39751 }, { "epoch": 2.954440728353772, "grad_norm": 2.6654761659682644, "learning_rate": 1.2093468310531154e-08, "loss": 0.2962, "step": 39752 }, { "epoch": 2.954515050167224, "grad_norm": 2.7693496611644357, "learning_rate": 1.2054051749516016e-08, "loss": 0.2483, "step": 39753 }, { "epoch": 2.9545893719806764, "grad_norm": 2.730067814690313, "learning_rate": 1.2014699490379146e-08, "loss": 0.3138, "step": 39754 }, { "epoch": 2.954663693794129, "grad_norm": 2.139970323757908, "learning_rate": 1.197541153337367e-08, "loss": 0.2233, "step": 39755 }, { "epoch": 2.954738015607581, "grad_norm": 2.5874521485332895, "learning_rate": 1.1936187878752725e-08, "loss": 0.2676, "step": 39756 }, { "epoch": 2.954812337421033, "grad_norm": 2.0424278790090815, "learning_rate": 1.1897028526767219e-08, "loss": 0.2347, "step": 39757 }, { "epoch": 2.9548866592344853, "grad_norm": 2.6927722649556984, "learning_rate": 1.185793347767028e-08, "loss": 0.2578, "step": 39758 }, { "epoch": 2.954960981047938, "grad_norm": 2.3670702664974144, "learning_rate": 1.1818902731713933e-08, "loss": 0.2593, "step": 39759 }, { "epoch": 2.95503530286139, "grad_norm": 1.9525713696141318, "learning_rate": 1.1779936289150196e-08, "loss": 0.1925, "step": 39760 }, { "epoch": 2.955109624674842, "grad_norm": 2.193740805360909, "learning_rate": 1.174103415022887e-08, "loss": 0.2738, "step": 39761 }, { "epoch": 2.9551839464882943, "grad_norm": 2.6644617697118163, "learning_rate": 1.1702196315199754e-08, "loss": 0.3, "step": 39762 }, { "epoch": 2.9552582683017468, "grad_norm": 2.2478153000017618, "learning_rate": 1.1663422784313761e-08, "loss": 0.2614, "step": 39763 }, { "epoch": 2.9553325901151988, "grad_norm": 2.1102200878795356, "learning_rate": 1.1624713557820689e-08, "loss": 0.2902, "step": 39764 }, { "epoch": 2.955406911928651, "grad_norm": 2.2329669895783284, "learning_rate": 1.1586068635969228e-08, "loss": 0.2762, "step": 39765 }, { "epoch": 2.9554812337421033, "grad_norm": 2.6967236691349004, "learning_rate": 1.1547488019008069e-08, "loss": 0.3585, "step": 39766 }, { "epoch": 2.9555555555555557, "grad_norm": 2.3787246014143415, "learning_rate": 1.1508971707185902e-08, "loss": 0.2412, "step": 39767 }, { "epoch": 2.9556298773690077, "grad_norm": 2.4314117150514702, "learning_rate": 1.1470519700751414e-08, "loss": 0.2421, "step": 39768 }, { "epoch": 2.95570419918246, "grad_norm": 2.6545186476576186, "learning_rate": 1.1432131999951079e-08, "loss": 0.2485, "step": 39769 }, { "epoch": 2.955778520995912, "grad_norm": 2.3501575533169166, "learning_rate": 1.1393808605032475e-08, "loss": 0.2767, "step": 39770 }, { "epoch": 2.9558528428093647, "grad_norm": 2.0697350040810223, "learning_rate": 1.135554951624207e-08, "loss": 0.2188, "step": 39771 }, { "epoch": 2.9559271646228167, "grad_norm": 2.9478025223039936, "learning_rate": 1.1317354733826335e-08, "loss": 0.1945, "step": 39772 }, { "epoch": 2.956001486436269, "grad_norm": 2.5735003699213426, "learning_rate": 1.1279224258031741e-08, "loss": 0.3296, "step": 39773 }, { "epoch": 2.956075808249721, "grad_norm": 1.9843016687832735, "learning_rate": 1.1241158089102533e-08, "loss": 0.2462, "step": 39774 }, { "epoch": 2.9561501300631736, "grad_norm": 2.417662753274276, "learning_rate": 1.1203156227285184e-08, "loss": 0.2545, "step": 39775 }, { "epoch": 2.9562244518766256, "grad_norm": 2.0937739868904055, "learning_rate": 1.116521867282283e-08, "loss": 0.2412, "step": 39776 }, { "epoch": 2.956298773690078, "grad_norm": 2.3215795703705795, "learning_rate": 1.1127345425960834e-08, "loss": 0.2903, "step": 39777 }, { "epoch": 2.9563730955035306, "grad_norm": 2.4773392722961636, "learning_rate": 1.1089536486943442e-08, "loss": 0.3105, "step": 39778 }, { "epoch": 2.9564474173169826, "grad_norm": 2.2325339285570363, "learning_rate": 1.1051791856012684e-08, "loss": 0.3206, "step": 39779 }, { "epoch": 2.9565217391304346, "grad_norm": 2.8706617755035713, "learning_rate": 1.1014111533411697e-08, "loss": 0.2868, "step": 39780 }, { "epoch": 2.956596060943887, "grad_norm": 1.8953715892630612, "learning_rate": 1.0976495519384734e-08, "loss": 0.2307, "step": 39781 }, { "epoch": 2.9566703827573395, "grad_norm": 2.6373562555349404, "learning_rate": 1.0938943814171599e-08, "loss": 0.336, "step": 39782 }, { "epoch": 2.9567447045707915, "grad_norm": 1.9169049572076966, "learning_rate": 1.0901456418015432e-08, "loss": 0.1915, "step": 39783 }, { "epoch": 2.9568190263842435, "grad_norm": 2.455677164261969, "learning_rate": 1.0864033331158264e-08, "loss": 0.2732, "step": 39784 }, { "epoch": 2.956893348197696, "grad_norm": 2.491153561334728, "learning_rate": 1.08266745538399e-08, "loss": 0.3034, "step": 39785 }, { "epoch": 2.9569676700111485, "grad_norm": 1.9875057067875057, "learning_rate": 1.0789380086300149e-08, "loss": 0.21, "step": 39786 }, { "epoch": 2.9570419918246005, "grad_norm": 2.913340771516945, "learning_rate": 1.0752149928781042e-08, "loss": 0.3204, "step": 39787 }, { "epoch": 2.9571163136380525, "grad_norm": 2.058549517282247, "learning_rate": 1.0714984081521273e-08, "loss": 0.2624, "step": 39788 }, { "epoch": 2.957190635451505, "grad_norm": 2.175877945721571, "learning_rate": 1.0677882544759544e-08, "loss": 0.2755, "step": 39789 }, { "epoch": 2.9572649572649574, "grad_norm": 2.126891014874014, "learning_rate": 1.064084531873566e-08, "loss": 0.2527, "step": 39790 }, { "epoch": 2.9573392790784094, "grad_norm": 2.085841933481094, "learning_rate": 1.060387240368721e-08, "loss": 0.2475, "step": 39791 }, { "epoch": 2.957413600891862, "grad_norm": 2.734888250387839, "learning_rate": 1.0566963799854002e-08, "loss": 0.2137, "step": 39792 }, { "epoch": 2.957487922705314, "grad_norm": 2.412180705008586, "learning_rate": 1.0530119507471404e-08, "loss": 0.2808, "step": 39793 }, { "epoch": 2.9575622445187664, "grad_norm": 2.638324014547226, "learning_rate": 1.0493339526777002e-08, "loss": 0.2912, "step": 39794 }, { "epoch": 2.9576365663322184, "grad_norm": 1.8625781666561574, "learning_rate": 1.0456623858009496e-08, "loss": 0.2038, "step": 39795 }, { "epoch": 2.957710888145671, "grad_norm": 2.0887026619716407, "learning_rate": 1.0419972501403141e-08, "loss": 0.2027, "step": 39796 }, { "epoch": 2.957785209959123, "grad_norm": 3.3421891411193156, "learning_rate": 1.0383385457194417e-08, "loss": 0.4261, "step": 39797 }, { "epoch": 2.9578595317725753, "grad_norm": 2.3205349746786603, "learning_rate": 1.03468627256198e-08, "loss": 0.2356, "step": 39798 }, { "epoch": 2.9579338535860273, "grad_norm": 2.6383871381121162, "learning_rate": 1.0310404306913547e-08, "loss": 0.3262, "step": 39799 }, { "epoch": 2.95800817539948, "grad_norm": 2.9466561289947686, "learning_rate": 1.0274010201309915e-08, "loss": 0.2436, "step": 39800 }, { "epoch": 2.9580824972129323, "grad_norm": 2.731571022396517, "learning_rate": 1.023768040904427e-08, "loss": 0.3161, "step": 39801 }, { "epoch": 2.9581568190263843, "grad_norm": 1.9916456017689534, "learning_rate": 1.0201414930350873e-08, "loss": 0.199, "step": 39802 }, { "epoch": 2.9582311408398363, "grad_norm": 3.0732216628201816, "learning_rate": 1.0165213765461757e-08, "loss": 0.3149, "step": 39803 }, { "epoch": 2.9583054626532888, "grad_norm": 2.0678270595887014, "learning_rate": 1.012907691461007e-08, "loss": 0.2157, "step": 39804 }, { "epoch": 2.958379784466741, "grad_norm": 2.4828001757528315, "learning_rate": 1.009300437803007e-08, "loss": 0.3032, "step": 39805 }, { "epoch": 2.9584541062801932, "grad_norm": 2.425256975526545, "learning_rate": 1.0056996155952681e-08, "loss": 0.2696, "step": 39806 }, { "epoch": 2.9585284280936452, "grad_norm": 2.8577320439761777, "learning_rate": 1.0021052248609941e-08, "loss": 0.3174, "step": 39807 }, { "epoch": 2.9586027499070977, "grad_norm": 2.5000927548954, "learning_rate": 9.985172656233887e-09, "loss": 0.2747, "step": 39808 }, { "epoch": 2.95867707172055, "grad_norm": 2.3977126151298847, "learning_rate": 9.949357379054336e-09, "loss": 0.2679, "step": 39809 }, { "epoch": 2.958751393534002, "grad_norm": 2.638662655972933, "learning_rate": 9.913606417302214e-09, "loss": 0.2983, "step": 39810 }, { "epoch": 2.9588257153474546, "grad_norm": 2.1740153401172924, "learning_rate": 9.877919771208444e-09, "loss": 0.2229, "step": 39811 }, { "epoch": 2.9589000371609067, "grad_norm": 2.3386971484777628, "learning_rate": 9.842297441001735e-09, "loss": 0.2931, "step": 39812 }, { "epoch": 2.958974358974359, "grad_norm": 2.1741023209789367, "learning_rate": 9.806739426911904e-09, "loss": 0.2629, "step": 39813 }, { "epoch": 2.959048680787811, "grad_norm": 1.6874578627044865, "learning_rate": 9.771245729168765e-09, "loss": 0.2072, "step": 39814 }, { "epoch": 2.9591230026012636, "grad_norm": 2.7884047397194847, "learning_rate": 9.735816347999916e-09, "loss": 0.3035, "step": 39815 }, { "epoch": 2.9591973244147156, "grad_norm": 2.21416698238169, "learning_rate": 9.70045128363295e-09, "loss": 0.2191, "step": 39816 }, { "epoch": 2.959271646228168, "grad_norm": 2.251138082331472, "learning_rate": 9.665150536296575e-09, "loss": 0.2649, "step": 39817 }, { "epoch": 2.95934596804162, "grad_norm": 2.489410288988751, "learning_rate": 9.629914106217275e-09, "loss": 0.3186, "step": 39818 }, { "epoch": 2.9594202898550726, "grad_norm": 2.147868627207177, "learning_rate": 9.594741993621537e-09, "loss": 0.2475, "step": 39819 }, { "epoch": 2.9594946116685246, "grad_norm": 2.5472618225217656, "learning_rate": 9.559634198738067e-09, "loss": 0.2758, "step": 39820 }, { "epoch": 2.959568933481977, "grad_norm": 2.0530148094967817, "learning_rate": 9.524590721790016e-09, "loss": 0.2381, "step": 39821 }, { "epoch": 2.959643255295429, "grad_norm": 1.7291926414452583, "learning_rate": 9.489611563004985e-09, "loss": 0.1772, "step": 39822 }, { "epoch": 2.9597175771088815, "grad_norm": 2.3102907788920226, "learning_rate": 9.454696722607237e-09, "loss": 0.2817, "step": 39823 }, { "epoch": 2.959791898922334, "grad_norm": 1.5564672089080585, "learning_rate": 9.419846200821036e-09, "loss": 0.1696, "step": 39824 }, { "epoch": 2.959866220735786, "grad_norm": 1.5055426804023297, "learning_rate": 9.385059997872869e-09, "loss": 0.1747, "step": 39825 }, { "epoch": 2.959940542549238, "grad_norm": 2.709517532663133, "learning_rate": 9.35033811398478e-09, "loss": 0.3313, "step": 39826 }, { "epoch": 2.9600148643626905, "grad_norm": 2.0985678106991683, "learning_rate": 9.315680549379924e-09, "loss": 0.2495, "step": 39827 }, { "epoch": 2.960089186176143, "grad_norm": 2.234964780525373, "learning_rate": 9.281087304283676e-09, "loss": 0.2624, "step": 39828 }, { "epoch": 2.960163507989595, "grad_norm": 2.645778846028688, "learning_rate": 9.24655837891697e-09, "loss": 0.3408, "step": 39829 }, { "epoch": 2.960237829803047, "grad_norm": 2.0682448349219005, "learning_rate": 9.212093773501852e-09, "loss": 0.1983, "step": 39830 }, { "epoch": 2.9603121516164994, "grad_norm": 2.2185166626905257, "learning_rate": 9.177693488261474e-09, "loss": 0.2631, "step": 39831 }, { "epoch": 2.960386473429952, "grad_norm": 2.541871031165693, "learning_rate": 9.143357523416774e-09, "loss": 0.3236, "step": 39832 }, { "epoch": 2.960460795243404, "grad_norm": 2.3804584915665092, "learning_rate": 9.109085879188683e-09, "loss": 0.2816, "step": 39833 }, { "epoch": 2.9605351170568563, "grad_norm": 2.00885567127302, "learning_rate": 9.07487855579814e-09, "loss": 0.2546, "step": 39834 }, { "epoch": 2.9606094388703084, "grad_norm": 2.793589432518583, "learning_rate": 9.040735553464963e-09, "loss": 0.3238, "step": 39835 }, { "epoch": 2.960683760683761, "grad_norm": 2.5202307261264605, "learning_rate": 9.006656872408981e-09, "loss": 0.2535, "step": 39836 }, { "epoch": 2.960758082497213, "grad_norm": 2.3448114336469112, "learning_rate": 8.972642512850016e-09, "loss": 0.2069, "step": 39837 }, { "epoch": 2.9608324043106653, "grad_norm": 2.2016751004751876, "learning_rate": 8.938692475006784e-09, "loss": 0.3115, "step": 39838 }, { "epoch": 2.9609067261241173, "grad_norm": 2.6307118226541544, "learning_rate": 8.904806759096884e-09, "loss": 0.2788, "step": 39839 }, { "epoch": 2.9609810479375698, "grad_norm": 3.062210069255181, "learning_rate": 8.870985365340145e-09, "loss": 0.2068, "step": 39840 }, { "epoch": 2.961055369751022, "grad_norm": 2.639770115226236, "learning_rate": 8.83722829395417e-09, "loss": 0.2, "step": 39841 }, { "epoch": 2.9611296915644743, "grad_norm": 2.2292445365049383, "learning_rate": 8.80353554515545e-09, "loss": 0.2688, "step": 39842 }, { "epoch": 2.9612040133779263, "grad_norm": 2.1310627074670756, "learning_rate": 8.769907119160481e-09, "loss": 0.2953, "step": 39843 }, { "epoch": 2.9612783351913787, "grad_norm": 2.214812040742675, "learning_rate": 8.736343016186866e-09, "loss": 0.2782, "step": 39844 }, { "epoch": 2.9613526570048307, "grad_norm": 2.318651158699141, "learning_rate": 8.702843236449987e-09, "loss": 0.303, "step": 39845 }, { "epoch": 2.961426978818283, "grad_norm": 3.1129817361755365, "learning_rate": 8.669407780166339e-09, "loss": 0.3409, "step": 39846 }, { "epoch": 2.9615013006317357, "grad_norm": 3.783620029724562, "learning_rate": 8.636036647550195e-09, "loss": 0.298, "step": 39847 }, { "epoch": 2.9615756224451877, "grad_norm": 3.330840975548734, "learning_rate": 8.602729838816937e-09, "loss": 0.2822, "step": 39848 }, { "epoch": 2.9616499442586397, "grad_norm": 2.0203830996867738, "learning_rate": 8.569487354180838e-09, "loss": 0.2438, "step": 39849 }, { "epoch": 2.961724266072092, "grad_norm": 2.4178835230002615, "learning_rate": 8.536309193856174e-09, "loss": 0.2364, "step": 39850 }, { "epoch": 2.9617985878855446, "grad_norm": 2.122742603220545, "learning_rate": 8.503195358056104e-09, "loss": 0.231, "step": 39851 }, { "epoch": 2.9618729096989966, "grad_norm": 2.0918116664653605, "learning_rate": 8.470145846994904e-09, "loss": 0.2725, "step": 39852 }, { "epoch": 2.9619472315124487, "grad_norm": 1.902141556710353, "learning_rate": 8.437160660883514e-09, "loss": 0.1924, "step": 39853 }, { "epoch": 2.962021553325901, "grad_norm": 1.9352461171553763, "learning_rate": 8.404239799935098e-09, "loss": 0.2263, "step": 39854 }, { "epoch": 2.9620958751393536, "grad_norm": 2.71656047343186, "learning_rate": 8.37138326436282e-09, "loss": 0.2961, "step": 39855 }, { "epoch": 2.9621701969528056, "grad_norm": 2.264610289468931, "learning_rate": 8.33859105437651e-09, "loss": 0.2586, "step": 39856 }, { "epoch": 2.962244518766258, "grad_norm": 2.2264548864720846, "learning_rate": 8.305863170187112e-09, "loss": 0.224, "step": 39857 }, { "epoch": 2.96231884057971, "grad_norm": 2.463929774155451, "learning_rate": 8.273199612006677e-09, "loss": 0.2911, "step": 39858 }, { "epoch": 2.9623931623931625, "grad_norm": 2.5891355915416625, "learning_rate": 8.240600380045038e-09, "loss": 0.3051, "step": 39859 }, { "epoch": 2.9624674842066145, "grad_norm": 2.200595448136898, "learning_rate": 8.208065474512028e-09, "loss": 0.2842, "step": 39860 }, { "epoch": 2.962541806020067, "grad_norm": 2.390683405846637, "learning_rate": 8.175594895617478e-09, "loss": 0.2904, "step": 39861 }, { "epoch": 2.962616127833519, "grad_norm": 2.5381918550151417, "learning_rate": 8.14318864357011e-09, "loss": 0.2515, "step": 39862 }, { "epoch": 2.9626904496469715, "grad_norm": 2.2443071424217336, "learning_rate": 8.110846718577537e-09, "loss": 0.2774, "step": 39863 }, { "epoch": 2.9627647714604235, "grad_norm": 3.9042915195119896, "learning_rate": 8.07856912084959e-09, "loss": 0.2756, "step": 39864 }, { "epoch": 2.962839093273876, "grad_norm": 2.3975593385503178, "learning_rate": 8.046355850592769e-09, "loss": 0.276, "step": 39865 }, { "epoch": 2.962913415087328, "grad_norm": 2.3428666846831834, "learning_rate": 8.014206908015798e-09, "loss": 0.2511, "step": 39866 }, { "epoch": 2.9629877369007804, "grad_norm": 2.175032124670506, "learning_rate": 7.982122293324068e-09, "loss": 0.3276, "step": 39867 }, { "epoch": 2.9630620587142324, "grad_norm": 2.449467931903386, "learning_rate": 7.950102006726301e-09, "loss": 0.2933, "step": 39868 }, { "epoch": 2.963136380527685, "grad_norm": 2.0788251002699396, "learning_rate": 7.918146048425668e-09, "loss": 0.1947, "step": 39869 }, { "epoch": 2.9632107023411374, "grad_norm": 1.927436112091783, "learning_rate": 7.88625441863089e-09, "loss": 0.2414, "step": 39870 }, { "epoch": 2.9632850241545894, "grad_norm": 1.9760965282777174, "learning_rate": 7.854427117545139e-09, "loss": 0.2142, "step": 39871 }, { "epoch": 2.9633593459680414, "grad_norm": 2.9098449882485715, "learning_rate": 7.822664145373803e-09, "loss": 0.3435, "step": 39872 }, { "epoch": 2.963433667781494, "grad_norm": 2.5998088665485426, "learning_rate": 7.790965502323389e-09, "loss": 0.2663, "step": 39873 }, { "epoch": 2.9635079895949463, "grad_norm": 2.1366413859406816, "learning_rate": 7.759331188594844e-09, "loss": 0.207, "step": 39874 }, { "epoch": 2.9635823114083983, "grad_norm": 2.165505465867608, "learning_rate": 7.727761204393558e-09, "loss": 0.2673, "step": 39875 }, { "epoch": 2.9636566332218504, "grad_norm": 1.8191482812315913, "learning_rate": 7.696255549922704e-09, "loss": 0.2376, "step": 39876 }, { "epoch": 2.963730955035303, "grad_norm": 2.4631688288409355, "learning_rate": 7.664814225384343e-09, "loss": 0.2407, "step": 39877 }, { "epoch": 2.9638052768487553, "grad_norm": 2.1757758242887433, "learning_rate": 7.633437230981645e-09, "loss": 0.276, "step": 39878 }, { "epoch": 2.9638795986622073, "grad_norm": 2.1431486146682563, "learning_rate": 7.602124566916669e-09, "loss": 0.2264, "step": 39879 }, { "epoch": 2.9639539204756598, "grad_norm": 2.9663224711450615, "learning_rate": 7.570876233390367e-09, "loss": 0.2531, "step": 39880 }, { "epoch": 2.9640282422891118, "grad_norm": 2.3322684760027603, "learning_rate": 7.5396922306048e-09, "loss": 0.2612, "step": 39881 }, { "epoch": 2.9641025641025642, "grad_norm": 3.092256916722357, "learning_rate": 7.508572558759808e-09, "loss": 0.3168, "step": 39882 }, { "epoch": 2.9641768859160162, "grad_norm": 2.392108949731775, "learning_rate": 7.477517218056341e-09, "loss": 0.2659, "step": 39883 }, { "epoch": 2.9642512077294687, "grad_norm": 2.3406429755156606, "learning_rate": 7.446526208694238e-09, "loss": 0.2557, "step": 39884 }, { "epoch": 2.9643255295429207, "grad_norm": 2.367839215434961, "learning_rate": 7.415599530873341e-09, "loss": 0.2602, "step": 39885 }, { "epoch": 2.964399851356373, "grad_norm": 2.3184928615748506, "learning_rate": 7.384737184791269e-09, "loss": 0.2465, "step": 39886 }, { "epoch": 2.964474173169825, "grad_norm": 2.2601128098005434, "learning_rate": 7.353939170648971e-09, "loss": 0.26, "step": 39887 }, { "epoch": 2.9645484949832777, "grad_norm": 2.2705053418656425, "learning_rate": 7.32320548864296e-09, "loss": 0.2447, "step": 39888 }, { "epoch": 2.9646228167967297, "grad_norm": 2.3975425281435863, "learning_rate": 7.2925361389708516e-09, "loss": 0.2626, "step": 39889 }, { "epoch": 2.964697138610182, "grad_norm": 2.524054600366541, "learning_rate": 7.261931121832489e-09, "loss": 0.2965, "step": 39890 }, { "epoch": 2.964771460423634, "grad_norm": 2.350853321035371, "learning_rate": 7.231390437422159e-09, "loss": 0.2908, "step": 39891 }, { "epoch": 2.9648457822370866, "grad_norm": 2.6731665393762043, "learning_rate": 7.200914085937482e-09, "loss": 0.2428, "step": 39892 }, { "epoch": 2.964920104050539, "grad_norm": 2.107515058834091, "learning_rate": 7.1705020675760795e-09, "loss": 0.2559, "step": 39893 }, { "epoch": 2.964994425863991, "grad_norm": 2.5138957631356993, "learning_rate": 7.1401543825311285e-09, "loss": 0.3038, "step": 39894 }, { "epoch": 2.965068747677443, "grad_norm": 2.571904465426984, "learning_rate": 7.1098710310002485e-09, "loss": 0.2832, "step": 39895 }, { "epoch": 2.9651430694908956, "grad_norm": 2.2960462408033697, "learning_rate": 7.0796520131777294e-09, "loss": 0.3328, "step": 39896 }, { "epoch": 2.965217391304348, "grad_norm": 2.0348851531165995, "learning_rate": 7.049497329256749e-09, "loss": 0.2473, "step": 39897 }, { "epoch": 2.9652917131178, "grad_norm": 2.6177080666581882, "learning_rate": 7.019406979433818e-09, "loss": 0.2879, "step": 39898 }, { "epoch": 2.965366034931252, "grad_norm": 2.093225459861376, "learning_rate": 6.989380963901005e-09, "loss": 0.2558, "step": 39899 }, { "epoch": 2.9654403567447045, "grad_norm": 2.708596348097293, "learning_rate": 6.959419282851487e-09, "loss": 0.2928, "step": 39900 }, { "epoch": 2.965514678558157, "grad_norm": 2.8001266066470953, "learning_rate": 6.9295219364795555e-09, "loss": 0.3585, "step": 39901 }, { "epoch": 2.965589000371609, "grad_norm": 2.751268186832536, "learning_rate": 6.899688924976167e-09, "loss": 0.3449, "step": 39902 }, { "epoch": 2.9656633221850615, "grad_norm": 2.774212231449642, "learning_rate": 6.8699202485345005e-09, "loss": 0.2903, "step": 39903 }, { "epoch": 2.9657376439985135, "grad_norm": 2.2753962257978553, "learning_rate": 6.840215907345516e-09, "loss": 0.2335, "step": 39904 }, { "epoch": 2.965811965811966, "grad_norm": 2.5538013804025215, "learning_rate": 6.81057590160128e-09, "loss": 0.2097, "step": 39905 }, { "epoch": 2.965886287625418, "grad_norm": 1.8025458043565077, "learning_rate": 6.781000231491642e-09, "loss": 0.1911, "step": 39906 }, { "epoch": 2.9659606094388704, "grad_norm": 2.2010731240751698, "learning_rate": 6.751488897207559e-09, "loss": 0.2096, "step": 39907 }, { "epoch": 2.9660349312523224, "grad_norm": 2.4363657621507913, "learning_rate": 6.72204189893888e-09, "loss": 0.256, "step": 39908 }, { "epoch": 2.966109253065775, "grad_norm": 2.680253412860973, "learning_rate": 6.692659236874344e-09, "loss": 0.2964, "step": 39909 }, { "epoch": 2.966183574879227, "grad_norm": 3.563444839015973, "learning_rate": 6.663340911204908e-09, "loss": 0.2798, "step": 39910 }, { "epoch": 2.9662578966926794, "grad_norm": 2.2680701848550413, "learning_rate": 6.6340869221181996e-09, "loss": 0.2056, "step": 39911 }, { "epoch": 2.966332218506132, "grad_norm": 1.9758964499881693, "learning_rate": 6.604897269802957e-09, "loss": 0.2091, "step": 39912 }, { "epoch": 2.966406540319584, "grad_norm": 3.152071577466209, "learning_rate": 6.575771954445698e-09, "loss": 0.2871, "step": 39913 }, { "epoch": 2.966480862133036, "grad_norm": 2.4263307273621675, "learning_rate": 6.54671097623627e-09, "loss": 0.2611, "step": 39914 }, { "epoch": 2.9665551839464883, "grad_norm": 2.290524929824351, "learning_rate": 6.517714335360081e-09, "loss": 0.2833, "step": 39915 }, { "epoch": 2.9666295057599408, "grad_norm": 2.4066990817260776, "learning_rate": 6.488782032004759e-09, "loss": 0.2522, "step": 39916 }, { "epoch": 2.966703827573393, "grad_norm": 2.3651281119151237, "learning_rate": 6.45991406635571e-09, "loss": 0.2715, "step": 39917 }, { "epoch": 2.966778149386845, "grad_norm": 2.384786149179619, "learning_rate": 6.431110438599453e-09, "loss": 0.2538, "step": 39918 }, { "epoch": 2.9668524712002973, "grad_norm": 1.861816046153388, "learning_rate": 6.402371148921394e-09, "loss": 0.2465, "step": 39919 }, { "epoch": 2.9669267930137497, "grad_norm": 2.6060995139400633, "learning_rate": 6.37369619750583e-09, "loss": 0.2521, "step": 39920 }, { "epoch": 2.9670011148272017, "grad_norm": 2.7825393726907364, "learning_rate": 6.345085584538169e-09, "loss": 0.2753, "step": 39921 }, { "epoch": 2.9670754366406538, "grad_norm": 2.197908569315609, "learning_rate": 6.316539310202707e-09, "loss": 0.2246, "step": 39922 }, { "epoch": 2.9671497584541062, "grad_norm": 2.0637664316752193, "learning_rate": 6.288057374681522e-09, "loss": 0.2288, "step": 39923 }, { "epoch": 2.9672240802675587, "grad_norm": 1.6707763777221927, "learning_rate": 6.25963977816002e-09, "loss": 0.1998, "step": 39924 }, { "epoch": 2.9672984020810107, "grad_norm": 2.310028014943883, "learning_rate": 6.231286520820279e-09, "loss": 0.178, "step": 39925 }, { "epoch": 2.967372723894463, "grad_norm": 2.332489935095687, "learning_rate": 6.202997602845484e-09, "loss": 0.2403, "step": 39926 }, { "epoch": 2.967447045707915, "grad_norm": 2.4241887260286163, "learning_rate": 6.174773024416603e-09, "loss": 0.3412, "step": 39927 }, { "epoch": 2.9675213675213676, "grad_norm": 2.34102429949416, "learning_rate": 6.146612785716821e-09, "loss": 0.3018, "step": 39928 }, { "epoch": 2.9675956893348197, "grad_norm": 2.5008155226771036, "learning_rate": 6.118516886925996e-09, "loss": 0.3506, "step": 39929 }, { "epoch": 2.967670011148272, "grad_norm": 2.2622222302099533, "learning_rate": 6.090485328226204e-09, "loss": 0.2497, "step": 39930 }, { "epoch": 2.967744332961724, "grad_norm": 2.075432495895055, "learning_rate": 6.06251810979619e-09, "loss": 0.259, "step": 39931 }, { "epoch": 2.9678186547751766, "grad_norm": 2.45329099417696, "learning_rate": 6.0346152318180305e-09, "loss": 0.2907, "step": 39932 }, { "epoch": 2.9678929765886286, "grad_norm": 2.1342138546173093, "learning_rate": 6.006776694470473e-09, "loss": 0.2513, "step": 39933 }, { "epoch": 2.967967298402081, "grad_norm": 2.324763898571921, "learning_rate": 5.979002497932263e-09, "loss": 0.2655, "step": 39934 }, { "epoch": 2.9680416202155335, "grad_norm": 3.057229784828978, "learning_rate": 5.951292642383255e-09, "loss": 0.3814, "step": 39935 }, { "epoch": 2.9681159420289855, "grad_norm": 2.777731700295226, "learning_rate": 5.923647128001087e-09, "loss": 0.3105, "step": 39936 }, { "epoch": 2.9681902638424376, "grad_norm": 1.9121187469112602, "learning_rate": 5.896065954963393e-09, "loss": 0.2115, "step": 39937 }, { "epoch": 2.96826458565589, "grad_norm": 2.921560428738427, "learning_rate": 5.868549123448919e-09, "loss": 0.2945, "step": 39938 }, { "epoch": 2.9683389074693425, "grad_norm": 2.847159103822611, "learning_rate": 5.841096633633081e-09, "loss": 0.302, "step": 39939 }, { "epoch": 2.9684132292827945, "grad_norm": 2.276489979782878, "learning_rate": 5.813708485694625e-09, "loss": 0.2931, "step": 39940 }, { "epoch": 2.9684875510962465, "grad_norm": 2.0338873153445407, "learning_rate": 5.786384679808965e-09, "loss": 0.2303, "step": 39941 }, { "epoch": 2.968561872909699, "grad_norm": 2.4540685137095535, "learning_rate": 5.759125216152628e-09, "loss": 0.2175, "step": 39942 }, { "epoch": 2.9686361947231514, "grad_norm": 2.636839695120892, "learning_rate": 5.7319300948988075e-09, "loss": 0.1684, "step": 39943 }, { "epoch": 2.9687105165366034, "grad_norm": 2.2535500448169112, "learning_rate": 5.7047993162251404e-09, "loss": 0.2055, "step": 39944 }, { "epoch": 2.9687848383500555, "grad_norm": 3.2553545417629937, "learning_rate": 5.67773288030593e-09, "loss": 0.3179, "step": 39945 }, { "epoch": 2.968859160163508, "grad_norm": 2.1260482785576564, "learning_rate": 5.650730787314374e-09, "loss": 0.2992, "step": 39946 }, { "epoch": 2.9689334819769604, "grad_norm": 2.005651819778124, "learning_rate": 5.623793037424774e-09, "loss": 0.2506, "step": 39947 }, { "epoch": 2.9690078037904124, "grad_norm": 2.475766511975315, "learning_rate": 5.596919630810327e-09, "loss": 0.21, "step": 39948 }, { "epoch": 2.969082125603865, "grad_norm": 1.9798098054782727, "learning_rate": 5.570110567645337e-09, "loss": 0.1988, "step": 39949 }, { "epoch": 2.969156447417317, "grad_norm": 2.1686206592827433, "learning_rate": 5.543365848100779e-09, "loss": 0.2411, "step": 39950 }, { "epoch": 2.9692307692307693, "grad_norm": 2.462315736055959, "learning_rate": 5.516685472348737e-09, "loss": 0.2681, "step": 39951 }, { "epoch": 2.9693050910442214, "grad_norm": 1.9872268937871014, "learning_rate": 5.4900694405624065e-09, "loss": 0.2181, "step": 39952 }, { "epoch": 2.969379412857674, "grad_norm": 1.6386241810593465, "learning_rate": 5.463517752912761e-09, "loss": 0.179, "step": 39953 }, { "epoch": 2.969453734671126, "grad_norm": 2.216782295506433, "learning_rate": 5.437030409570776e-09, "loss": 0.2652, "step": 39954 }, { "epoch": 2.9695280564845783, "grad_norm": 2.5169900761886757, "learning_rate": 5.410607410705205e-09, "loss": 0.2931, "step": 39955 }, { "epoch": 2.9696023782980303, "grad_norm": 2.5286357186770556, "learning_rate": 5.3842487564881304e-09, "loss": 0.254, "step": 39956 }, { "epoch": 2.9696767001114828, "grad_norm": 2.327614495060768, "learning_rate": 5.35795444708942e-09, "loss": 0.2345, "step": 39957 }, { "epoch": 2.9697510219249352, "grad_norm": 2.2325863814348557, "learning_rate": 5.331724482676715e-09, "loss": 0.2634, "step": 39958 }, { "epoch": 2.9698253437383872, "grad_norm": 2.3350749376430113, "learning_rate": 5.305558863419879e-09, "loss": 0.3409, "step": 39959 }, { "epoch": 2.9698996655518393, "grad_norm": 2.1605871018111795, "learning_rate": 5.2794575894876684e-09, "loss": 0.2714, "step": 39960 }, { "epoch": 2.9699739873652917, "grad_norm": 2.448813381464581, "learning_rate": 5.253420661047726e-09, "loss": 0.294, "step": 39961 }, { "epoch": 2.970048309178744, "grad_norm": 2.2750764031142072, "learning_rate": 5.227448078266584e-09, "loss": 0.2402, "step": 39962 }, { "epoch": 2.970122630992196, "grad_norm": 2.5926036389735363, "learning_rate": 5.201539841314107e-09, "loss": 0.2186, "step": 39963 }, { "epoch": 2.970196952805648, "grad_norm": 2.3069011490441516, "learning_rate": 5.175695950354609e-09, "loss": 0.3242, "step": 39964 }, { "epoch": 2.9702712746191007, "grad_norm": 2.0212396978850657, "learning_rate": 5.1499164055557326e-09, "loss": 0.2449, "step": 39965 }, { "epoch": 2.970345596432553, "grad_norm": 2.3484121773432984, "learning_rate": 5.124201207082902e-09, "loss": 0.275, "step": 39966 }, { "epoch": 2.970419918246005, "grad_norm": 2.479913698758587, "learning_rate": 5.098550355101539e-09, "loss": 0.2777, "step": 39967 }, { "epoch": 2.9704942400594576, "grad_norm": 3.2318771905998385, "learning_rate": 5.0729638497781785e-09, "loss": 0.3828, "step": 39968 }, { "epoch": 2.9705685618729096, "grad_norm": 2.1603118520953446, "learning_rate": 5.047441691276023e-09, "loss": 0.2399, "step": 39969 }, { "epoch": 2.970642883686362, "grad_norm": 2.2234576787414664, "learning_rate": 5.021983879759384e-09, "loss": 0.2402, "step": 39970 }, { "epoch": 2.970717205499814, "grad_norm": 2.0574385591685536, "learning_rate": 4.996590415392577e-09, "loss": 0.1635, "step": 39971 }, { "epoch": 2.9707915273132666, "grad_norm": 2.8397983431079665, "learning_rate": 4.971261298338803e-09, "loss": 0.3239, "step": 39972 }, { "epoch": 2.9708658491267186, "grad_norm": 2.523569604766329, "learning_rate": 4.945996528762376e-09, "loss": 0.3104, "step": 39973 }, { "epoch": 2.970940170940171, "grad_norm": 2.963107637754793, "learning_rate": 4.9207961068242775e-09, "loss": 0.3011, "step": 39974 }, { "epoch": 2.971014492753623, "grad_norm": 3.1854242537945825, "learning_rate": 4.89566003268771e-09, "loss": 0.3361, "step": 39975 }, { "epoch": 2.9710888145670755, "grad_norm": 2.1733150669739594, "learning_rate": 4.8705883065147675e-09, "loss": 0.3052, "step": 39976 }, { "epoch": 2.9711631363805275, "grad_norm": 2.016645486974069, "learning_rate": 4.845580928466431e-09, "loss": 0.1777, "step": 39977 }, { "epoch": 2.97123745819398, "grad_norm": 2.1856020401526464, "learning_rate": 4.820637898702574e-09, "loss": 0.2009, "step": 39978 }, { "epoch": 2.971311780007432, "grad_norm": 2.065517567623751, "learning_rate": 4.795759217386397e-09, "loss": 0.2105, "step": 39979 }, { "epoch": 2.9713861018208845, "grad_norm": 2.1466425986428446, "learning_rate": 4.770944884675554e-09, "loss": 0.2242, "step": 39980 }, { "epoch": 2.971460423634337, "grad_norm": 1.9548174568069967, "learning_rate": 4.746194900729917e-09, "loss": 0.2399, "step": 39981 }, { "epoch": 2.971534745447789, "grad_norm": 2.3944029842886967, "learning_rate": 4.721509265710467e-09, "loss": 0.3265, "step": 39982 }, { "epoch": 2.971609067261241, "grad_norm": 2.1212662205860453, "learning_rate": 4.6968879797759656e-09, "loss": 0.221, "step": 39983 }, { "epoch": 2.9716833890746934, "grad_norm": 2.059497782506909, "learning_rate": 4.672331043084066e-09, "loss": 0.2771, "step": 39984 }, { "epoch": 2.971757710888146, "grad_norm": 2.404732110074742, "learning_rate": 4.647838455792419e-09, "loss": 0.2325, "step": 39985 }, { "epoch": 2.971832032701598, "grad_norm": 2.7899333499688503, "learning_rate": 4.6234102180597875e-09, "loss": 0.2649, "step": 39986 }, { "epoch": 2.97190635451505, "grad_norm": 2.2906206562723814, "learning_rate": 4.599046330042711e-09, "loss": 0.2531, "step": 39987 }, { "epoch": 2.9719806763285024, "grad_norm": 2.665934482729459, "learning_rate": 4.574746791897733e-09, "loss": 0.2765, "step": 39988 }, { "epoch": 2.972054998141955, "grad_norm": 2.4069971586589785, "learning_rate": 4.550511603783614e-09, "loss": 0.3147, "step": 39989 }, { "epoch": 2.972129319955407, "grad_norm": 2.7704145976442662, "learning_rate": 4.5263407658535655e-09, "loss": 0.3557, "step": 39990 }, { "epoch": 2.9722036417688593, "grad_norm": 1.868805136554273, "learning_rate": 4.5022342782641285e-09, "loss": 0.1762, "step": 39991 }, { "epoch": 2.9722779635823113, "grad_norm": 2.601311348552715, "learning_rate": 4.478192141170734e-09, "loss": 0.2564, "step": 39992 }, { "epoch": 2.972352285395764, "grad_norm": 2.3771778672174784, "learning_rate": 4.454214354728814e-09, "loss": 0.2512, "step": 39993 }, { "epoch": 2.972426607209216, "grad_norm": 2.3125583638251306, "learning_rate": 4.430300919091579e-09, "loss": 0.2214, "step": 39994 }, { "epoch": 2.9725009290226683, "grad_norm": 2.6250934542553828, "learning_rate": 4.40645183441335e-09, "loss": 0.3377, "step": 39995 }, { "epoch": 2.9725752508361203, "grad_norm": 2.38398720784748, "learning_rate": 4.3826671008484475e-09, "loss": 0.1966, "step": 39996 }, { "epoch": 2.9726495726495727, "grad_norm": 2.306417143085513, "learning_rate": 4.3589467185478625e-09, "loss": 0.261, "step": 39997 }, { "epoch": 2.9727238944630248, "grad_norm": 2.410333571493631, "learning_rate": 4.335290687667026e-09, "loss": 0.2387, "step": 39998 }, { "epoch": 2.9727982162764772, "grad_norm": 2.406127249568632, "learning_rate": 4.311699008356929e-09, "loss": 0.2365, "step": 39999 }, { "epoch": 2.9728725380899292, "grad_norm": 1.9141989242284079, "learning_rate": 4.2881716807685605e-09, "loss": 0.2531, "step": 40000 }, { "epoch": 2.9729468599033817, "grad_norm": 2.2073767919191116, "learning_rate": 4.264708705055132e-09, "loss": 0.2603, "step": 40001 }, { "epoch": 2.9730211817168337, "grad_norm": 2.086829422059998, "learning_rate": 4.2413100813665235e-09, "loss": 0.261, "step": 40002 }, { "epoch": 2.973095503530286, "grad_norm": 2.7067286265537347, "learning_rate": 4.217975809853725e-09, "loss": 0.2835, "step": 40003 }, { "epoch": 2.9731698253437386, "grad_norm": 2.4380658452259345, "learning_rate": 4.194705890666617e-09, "loss": 0.2742, "step": 40004 }, { "epoch": 2.9732441471571907, "grad_norm": 2.377259544922095, "learning_rate": 4.17150032395619e-09, "loss": 0.2323, "step": 40005 }, { "epoch": 2.9733184689706427, "grad_norm": 2.205077144441777, "learning_rate": 4.148359109870104e-09, "loss": 0.2181, "step": 40006 }, { "epoch": 2.973392790784095, "grad_norm": 2.6827848912584087, "learning_rate": 4.125282248558238e-09, "loss": 0.4036, "step": 40007 }, { "epoch": 2.9734671125975476, "grad_norm": 2.3745851765778823, "learning_rate": 4.102269740169362e-09, "loss": 0.3104, "step": 40008 }, { "epoch": 2.9735414344109996, "grad_norm": 2.4996497467389687, "learning_rate": 4.079321584852247e-09, "loss": 0.2795, "step": 40009 }, { "epoch": 2.9736157562244516, "grad_norm": 2.5418254970795946, "learning_rate": 4.056437782753442e-09, "loss": 0.2927, "step": 40010 }, { "epoch": 2.973690078037904, "grad_norm": 1.7683726729708653, "learning_rate": 4.033618334019496e-09, "loss": 0.2218, "step": 40011 }, { "epoch": 2.9737643998513565, "grad_norm": 2.328118826207384, "learning_rate": 4.0108632387991785e-09, "loss": 0.2862, "step": 40012 }, { "epoch": 2.9738387216648086, "grad_norm": 2.594487928630145, "learning_rate": 3.98817249723904e-09, "loss": 0.2877, "step": 40013 }, { "epoch": 2.973913043478261, "grad_norm": 2.9596441494355394, "learning_rate": 3.965546109483409e-09, "loss": 0.2844, "step": 40014 }, { "epoch": 2.973987365291713, "grad_norm": 2.1337728468790527, "learning_rate": 3.942984075679945e-09, "loss": 0.2679, "step": 40015 }, { "epoch": 2.9740616871051655, "grad_norm": 2.370679523003944, "learning_rate": 3.920486395971868e-09, "loss": 0.2501, "step": 40016 }, { "epoch": 2.9741360089186175, "grad_norm": 2.660334959892635, "learning_rate": 3.8980530705046145e-09, "loss": 0.2793, "step": 40017 }, { "epoch": 2.97421033073207, "grad_norm": 2.651071584818343, "learning_rate": 3.875684099424737e-09, "loss": 0.2792, "step": 40018 }, { "epoch": 2.974284652545522, "grad_norm": 2.692392668535048, "learning_rate": 3.853379482873232e-09, "loss": 0.2364, "step": 40019 }, { "epoch": 2.9743589743589745, "grad_norm": 2.0595073898670475, "learning_rate": 3.831139220995539e-09, "loss": 0.2191, "step": 40020 }, { "epoch": 2.9744332961724265, "grad_norm": 2.6828702300261322, "learning_rate": 3.808963313933767e-09, "loss": 0.3105, "step": 40021 }, { "epoch": 2.974507617985879, "grad_norm": 2.5801058947458513, "learning_rate": 3.786851761832244e-09, "loss": 0.2945, "step": 40022 }, { "epoch": 2.974581939799331, "grad_norm": 2.114424468469458, "learning_rate": 3.764804564830859e-09, "loss": 0.243, "step": 40023 }, { "epoch": 2.9746562616127834, "grad_norm": 2.763379648211291, "learning_rate": 3.742821723073942e-09, "loss": 0.3506, "step": 40024 }, { "epoch": 2.9747305834262354, "grad_norm": 1.9930221184361456, "learning_rate": 3.720903236702489e-09, "loss": 0.259, "step": 40025 }, { "epoch": 2.974804905239688, "grad_norm": 2.6833259639291187, "learning_rate": 3.699049105856389e-09, "loss": 0.2895, "step": 40026 }, { "epoch": 2.9748792270531403, "grad_norm": 2.4140762384939034, "learning_rate": 3.6772593306777516e-09, "loss": 0.2323, "step": 40027 }, { "epoch": 2.9749535488665924, "grad_norm": 2.57075857719652, "learning_rate": 3.6555339113053534e-09, "loss": 0.251, "step": 40028 }, { "epoch": 2.9750278706800444, "grad_norm": 2.3024926628236244, "learning_rate": 3.6338728478801934e-09, "loss": 0.2548, "step": 40029 }, { "epoch": 2.975102192493497, "grad_norm": 2.3248505937164, "learning_rate": 3.612276140542159e-09, "loss": 0.2369, "step": 40030 }, { "epoch": 2.9751765143069493, "grad_norm": 2.5394186509847048, "learning_rate": 3.5907437894289187e-09, "loss": 0.245, "step": 40031 }, { "epoch": 2.9752508361204013, "grad_norm": 2.649518023054501, "learning_rate": 3.5692757946803603e-09, "loss": 0.2403, "step": 40032 }, { "epoch": 2.9753251579338533, "grad_norm": 2.2798322193933025, "learning_rate": 3.547872156434151e-09, "loss": 0.2688, "step": 40033 }, { "epoch": 2.975399479747306, "grad_norm": 2.542910027097938, "learning_rate": 3.526532874826849e-09, "loss": 0.2885, "step": 40034 }, { "epoch": 2.9754738015607582, "grad_norm": 2.5655126689616794, "learning_rate": 3.505257949998342e-09, "loss": 0.2649, "step": 40035 }, { "epoch": 2.9755481233742103, "grad_norm": 2.7538161264124073, "learning_rate": 3.484047382084077e-09, "loss": 0.321, "step": 40036 }, { "epoch": 2.9756224451876627, "grad_norm": 2.1578205456134, "learning_rate": 3.462901171220612e-09, "loss": 0.2271, "step": 40037 }, { "epoch": 2.9756967670011147, "grad_norm": 2.3143382673379023, "learning_rate": 3.4418193175445035e-09, "loss": 0.2596, "step": 40038 }, { "epoch": 2.975771088814567, "grad_norm": 2.09175132595941, "learning_rate": 3.4208018211912e-09, "loss": 0.2638, "step": 40039 }, { "epoch": 2.975845410628019, "grad_norm": 1.9197159584960874, "learning_rate": 3.3998486822961474e-09, "loss": 0.2031, "step": 40040 }, { "epoch": 2.9759197324414717, "grad_norm": 2.0537030918592465, "learning_rate": 3.3789599009936834e-09, "loss": 0.2424, "step": 40041 }, { "epoch": 2.9759940542549237, "grad_norm": 2.5122570330033542, "learning_rate": 3.3581354774192555e-09, "loss": 0.3508, "step": 40042 }, { "epoch": 2.976068376068376, "grad_norm": 2.474498515660122, "learning_rate": 3.3373754117060897e-09, "loss": 0.3048, "step": 40043 }, { "epoch": 2.976142697881828, "grad_norm": 2.3715912163002377, "learning_rate": 3.3166797039885235e-09, "loss": 0.2379, "step": 40044 }, { "epoch": 2.9762170196952806, "grad_norm": 2.5747641985421317, "learning_rate": 3.2960483543997835e-09, "loss": 0.2263, "step": 40045 }, { "epoch": 2.976291341508733, "grad_norm": 2.766404626052043, "learning_rate": 3.275481363071986e-09, "loss": 0.3196, "step": 40046 }, { "epoch": 2.976365663322185, "grad_norm": 2.8179022239421325, "learning_rate": 3.2549787301383586e-09, "loss": 0.3084, "step": 40047 }, { "epoch": 2.976439985135637, "grad_norm": 2.5449216343875642, "learning_rate": 3.2345404557299066e-09, "loss": 0.3222, "step": 40048 }, { "epoch": 2.9765143069490896, "grad_norm": 3.1929813403165173, "learning_rate": 3.214166539978747e-09, "loss": 0.2945, "step": 40049 }, { "epoch": 2.976588628762542, "grad_norm": 2.1808235378464884, "learning_rate": 3.193856983015886e-09, "loss": 0.2761, "step": 40050 }, { "epoch": 2.976662950575994, "grad_norm": 1.9540319600549567, "learning_rate": 3.1736117849734406e-09, "loss": 0.2298, "step": 40051 }, { "epoch": 2.976737272389446, "grad_norm": 2.7342841325548033, "learning_rate": 3.1534309459790856e-09, "loss": 0.3048, "step": 40052 }, { "epoch": 2.9768115942028985, "grad_norm": 2.3531339569323153, "learning_rate": 3.1333144661649385e-09, "loss": 0.3088, "step": 40053 }, { "epoch": 2.976885916016351, "grad_norm": 2.6997741863513856, "learning_rate": 3.1132623456608946e-09, "loss": 0.3201, "step": 40054 }, { "epoch": 2.976960237829803, "grad_norm": 1.982815047676164, "learning_rate": 3.0932745845935198e-09, "loss": 0.2059, "step": 40055 }, { "epoch": 2.977034559643255, "grad_norm": 2.8691344140014214, "learning_rate": 3.07335118309382e-09, "loss": 0.3082, "step": 40056 }, { "epoch": 2.9771088814567075, "grad_norm": 2.2473136517211305, "learning_rate": 3.053492141288361e-09, "loss": 0.2892, "step": 40057 }, { "epoch": 2.97718320327016, "grad_norm": 2.4275628471984905, "learning_rate": 3.0336974593059287e-09, "loss": 0.3008, "step": 40058 }, { "epoch": 2.977257525083612, "grad_norm": 2.428714229751299, "learning_rate": 3.0139671372741985e-09, "loss": 0.2702, "step": 40059 }, { "epoch": 2.9773318468970644, "grad_norm": 3.0484575080034353, "learning_rate": 2.9943011753197358e-09, "loss": 0.3346, "step": 40060 }, { "epoch": 2.9774061687105164, "grad_norm": 1.9728338498220803, "learning_rate": 2.9746995735691065e-09, "loss": 0.3211, "step": 40061 }, { "epoch": 2.977480490523969, "grad_norm": 2.9551109716319806, "learning_rate": 2.9551623321488752e-09, "loss": 0.283, "step": 40062 }, { "epoch": 2.977554812337421, "grad_norm": 2.179255672963011, "learning_rate": 2.935689451184498e-09, "loss": 0.2532, "step": 40063 }, { "epoch": 2.9776291341508734, "grad_norm": 2.5923551344566875, "learning_rate": 2.9162809308014296e-09, "loss": 0.3088, "step": 40064 }, { "epoch": 2.9777034559643254, "grad_norm": 2.2561208332914258, "learning_rate": 2.896936771125125e-09, "loss": 0.2675, "step": 40065 }, { "epoch": 2.977777777777778, "grad_norm": 2.724774116778207, "learning_rate": 2.8776569722788194e-09, "loss": 0.3065, "step": 40066 }, { "epoch": 2.97785209959123, "grad_norm": 2.7594043699479514, "learning_rate": 2.858441534387968e-09, "loss": 0.3002, "step": 40067 }, { "epoch": 2.9779264214046823, "grad_norm": 2.2204241848545956, "learning_rate": 2.839290457575805e-09, "loss": 0.2543, "step": 40068 }, { "epoch": 2.978000743218135, "grad_norm": 2.4805046718841144, "learning_rate": 2.820203741965566e-09, "loss": 0.2748, "step": 40069 }, { "epoch": 2.978075065031587, "grad_norm": 2.199822889289772, "learning_rate": 2.801181387679375e-09, "loss": 0.2643, "step": 40070 }, { "epoch": 2.978149386845039, "grad_norm": 2.232100747349315, "learning_rate": 2.7822233948415766e-09, "loss": 0.226, "step": 40071 }, { "epoch": 2.9782237086584913, "grad_norm": 2.156587069678227, "learning_rate": 2.7633297635720756e-09, "loss": 0.2434, "step": 40072 }, { "epoch": 2.9782980304719437, "grad_norm": 2.1498956507584763, "learning_rate": 2.7445004939941066e-09, "loss": 0.2368, "step": 40073 }, { "epoch": 2.9783723522853958, "grad_norm": 2.205191627865577, "learning_rate": 2.725735586228684e-09, "loss": 0.2625, "step": 40074 }, { "epoch": 2.978446674098848, "grad_norm": 2.297195779850602, "learning_rate": 2.7070350403957112e-09, "loss": 0.2756, "step": 40075 }, { "epoch": 2.9785209959123002, "grad_norm": 3.7361567870775607, "learning_rate": 2.6883988566162033e-09, "loss": 0.2591, "step": 40076 }, { "epoch": 2.9785953177257527, "grad_norm": 2.7588683038810218, "learning_rate": 2.669827035010064e-09, "loss": 0.3073, "step": 40077 }, { "epoch": 2.9786696395392047, "grad_norm": 2.213662799301513, "learning_rate": 2.6513195756960876e-09, "loss": 0.2243, "step": 40078 }, { "epoch": 2.9787439613526567, "grad_norm": 3.0212316712365963, "learning_rate": 2.6328764787952877e-09, "loss": 0.2591, "step": 40079 }, { "epoch": 2.978818283166109, "grad_norm": 1.655916574217922, "learning_rate": 2.6144977444253483e-09, "loss": 0.1678, "step": 40080 }, { "epoch": 2.9788926049795617, "grad_norm": 2.714781995576299, "learning_rate": 2.5961833727039533e-09, "loss": 0.3314, "step": 40081 }, { "epoch": 2.9789669267930137, "grad_norm": 1.876678472367609, "learning_rate": 2.577933363749896e-09, "loss": 0.2039, "step": 40082 }, { "epoch": 2.979041248606466, "grad_norm": 2.232130891087683, "learning_rate": 2.5597477176797503e-09, "loss": 0.2239, "step": 40083 }, { "epoch": 2.979115570419918, "grad_norm": 3.0101587634089717, "learning_rate": 2.54162643461231e-09, "loss": 0.3567, "step": 40084 }, { "epoch": 2.9791898922333706, "grad_norm": 2.3479451802672164, "learning_rate": 2.5235695146619278e-09, "loss": 0.2359, "step": 40085 }, { "epoch": 2.9792642140468226, "grad_norm": 2.318701712559331, "learning_rate": 2.5055769579473977e-09, "loss": 0.226, "step": 40086 }, { "epoch": 2.979338535860275, "grad_norm": 2.753352933025003, "learning_rate": 2.4876487645819623e-09, "loss": 0.2445, "step": 40087 }, { "epoch": 2.979412857673727, "grad_norm": 2.0046928080893993, "learning_rate": 2.469784934683306e-09, "loss": 0.2588, "step": 40088 }, { "epoch": 2.9794871794871796, "grad_norm": 2.200187906385626, "learning_rate": 2.4519854683646704e-09, "loss": 0.2601, "step": 40089 }, { "epoch": 2.9795615013006316, "grad_norm": 2.153788464569223, "learning_rate": 2.4342503657415197e-09, "loss": 0.2816, "step": 40090 }, { "epoch": 2.979635823114084, "grad_norm": 1.9769660000970852, "learning_rate": 2.4165796269293163e-09, "loss": 0.2161, "step": 40091 }, { "epoch": 2.9797101449275365, "grad_norm": 3.0045282403268168, "learning_rate": 2.3989732520390828e-09, "loss": 0.3395, "step": 40092 }, { "epoch": 2.9797844667409885, "grad_norm": 2.2690537206065113, "learning_rate": 2.3814312411851727e-09, "loss": 0.2357, "step": 40093 }, { "epoch": 2.9798587885544405, "grad_norm": 2.5667893545278946, "learning_rate": 2.363953594481938e-09, "loss": 0.2444, "step": 40094 }, { "epoch": 2.979933110367893, "grad_norm": 2.2259341402285178, "learning_rate": 2.346540312040402e-09, "loss": 0.2782, "step": 40095 }, { "epoch": 2.9800074321813455, "grad_norm": 1.9363326768981077, "learning_rate": 2.3291913939726963e-09, "loss": 0.2281, "step": 40096 }, { "epoch": 2.9800817539947975, "grad_norm": 2.0702983608365164, "learning_rate": 2.311906840390954e-09, "loss": 0.1911, "step": 40097 }, { "epoch": 2.9801560758082495, "grad_norm": 2.48086538547052, "learning_rate": 2.2946866514073074e-09, "loss": 0.2627, "step": 40098 }, { "epoch": 2.980230397621702, "grad_norm": 3.037778376928117, "learning_rate": 2.2775308271305587e-09, "loss": 0.3079, "step": 40099 }, { "epoch": 2.9803047194351544, "grad_norm": 2.1764225617408894, "learning_rate": 2.26043936767284e-09, "loss": 0.2527, "step": 40100 }, { "epoch": 2.9803790412486064, "grad_norm": 2.827688795618221, "learning_rate": 2.2434122731440632e-09, "loss": 0.3621, "step": 40101 }, { "epoch": 2.980453363062059, "grad_norm": 2.4592035082698103, "learning_rate": 2.226449543653031e-09, "loss": 0.2394, "step": 40102 }, { "epoch": 2.980527684875511, "grad_norm": 2.7328182638636376, "learning_rate": 2.209551179309655e-09, "loss": 0.3105, "step": 40103 }, { "epoch": 2.9806020066889634, "grad_norm": 1.8433571387420258, "learning_rate": 2.192717180221626e-09, "loss": 0.2435, "step": 40104 }, { "epoch": 2.9806763285024154, "grad_norm": 1.9932014353862235, "learning_rate": 2.1759475464988577e-09, "loss": 0.1871, "step": 40105 }, { "epoch": 2.980750650315868, "grad_norm": 2.443175818601453, "learning_rate": 2.1592422782479304e-09, "loss": 0.2876, "step": 40106 }, { "epoch": 2.98082497212932, "grad_norm": 2.131059086041711, "learning_rate": 2.1426013755776463e-09, "loss": 0.2878, "step": 40107 }, { "epoch": 2.9808992939427723, "grad_norm": 2.2739782636658252, "learning_rate": 2.126024838594587e-09, "loss": 0.2771, "step": 40108 }, { "epoch": 2.9809736157562243, "grad_norm": 2.4489231196663113, "learning_rate": 2.1095126674053336e-09, "loss": 0.2995, "step": 40109 }, { "epoch": 2.981047937569677, "grad_norm": 1.6663706989852962, "learning_rate": 2.0930648621164673e-09, "loss": 0.1697, "step": 40110 }, { "epoch": 2.981122259383129, "grad_norm": 3.1645909869892495, "learning_rate": 2.0766814228334597e-09, "loss": 0.3644, "step": 40111 }, { "epoch": 2.9811965811965813, "grad_norm": 4.452597940173077, "learning_rate": 2.060362349661782e-09, "loss": 0.3448, "step": 40112 }, { "epoch": 2.9812709030100333, "grad_norm": 2.9360037384807676, "learning_rate": 2.0441076427069053e-09, "loss": 0.2853, "step": 40113 }, { "epoch": 2.9813452248234857, "grad_norm": 2.270626438822549, "learning_rate": 2.02791730207319e-09, "loss": 0.2335, "step": 40114 }, { "epoch": 2.981419546636938, "grad_norm": 1.7929899924470727, "learning_rate": 2.011791327866108e-09, "loss": 0.235, "step": 40115 }, { "epoch": 2.98149386845039, "grad_norm": 2.702203016762974, "learning_rate": 1.9957297201866898e-09, "loss": 0.3899, "step": 40116 }, { "epoch": 2.9815681902638422, "grad_norm": 2.6486480618504293, "learning_rate": 1.979732479140406e-09, "loss": 0.3532, "step": 40117 }, { "epoch": 2.9816425120772947, "grad_norm": 2.4601682888572545, "learning_rate": 1.9637996048305074e-09, "loss": 0.3439, "step": 40118 }, { "epoch": 2.981716833890747, "grad_norm": 2.7194472431060395, "learning_rate": 1.9479310973591348e-09, "loss": 0.2872, "step": 40119 }, { "epoch": 2.981791155704199, "grad_norm": 2.0898329492705527, "learning_rate": 1.9321269568273183e-09, "loss": 0.2349, "step": 40120 }, { "epoch": 2.981865477517651, "grad_norm": 2.6080271046177947, "learning_rate": 1.9163871833383087e-09, "loss": 0.2721, "step": 40121 }, { "epoch": 2.9819397993311036, "grad_norm": 2.7843622138123933, "learning_rate": 1.9007117769920258e-09, "loss": 0.3262, "step": 40122 }, { "epoch": 2.982014121144556, "grad_norm": 2.0421596316984245, "learning_rate": 1.8851007378906105e-09, "loss": 0.1899, "step": 40123 }, { "epoch": 2.982088442958008, "grad_norm": 3.251713583759432, "learning_rate": 1.869554066133983e-09, "loss": 0.3197, "step": 40124 }, { "epoch": 2.9821627647714606, "grad_norm": 2.4378981359510195, "learning_rate": 1.854071761823173e-09, "loss": 0.2951, "step": 40125 }, { "epoch": 2.9822370865849126, "grad_norm": 2.1223646202822875, "learning_rate": 1.8386538250569907e-09, "loss": 0.256, "step": 40126 }, { "epoch": 2.982311408398365, "grad_norm": 2.025584972209539, "learning_rate": 1.823300255934246e-09, "loss": 0.1996, "step": 40127 }, { "epoch": 2.982385730211817, "grad_norm": 2.2132146246468603, "learning_rate": 1.8080110545537488e-09, "loss": 0.2361, "step": 40128 }, { "epoch": 2.9824600520252695, "grad_norm": 2.5738365351890584, "learning_rate": 1.792786221015419e-09, "loss": 0.2901, "step": 40129 }, { "epoch": 2.9825343738387216, "grad_norm": 2.0445058724720124, "learning_rate": 1.777625755415846e-09, "loss": 0.2235, "step": 40130 }, { "epoch": 2.982608695652174, "grad_norm": 2.3627728513916546, "learning_rate": 1.7625296578538398e-09, "loss": 0.2911, "step": 40131 }, { "epoch": 2.982683017465626, "grad_norm": 2.2478393110364583, "learning_rate": 1.7474979284259895e-09, "loss": 0.251, "step": 40132 }, { "epoch": 2.9827573392790785, "grad_norm": 2.1711626339911567, "learning_rate": 1.7325305672277748e-09, "loss": 0.2772, "step": 40133 }, { "epoch": 2.9828316610925305, "grad_norm": 2.4162218965838256, "learning_rate": 1.7176275743580051e-09, "loss": 0.2391, "step": 40134 }, { "epoch": 2.982905982905983, "grad_norm": 2.3644419970593513, "learning_rate": 1.7027889499110495e-09, "loss": 0.2791, "step": 40135 }, { "epoch": 2.982980304719435, "grad_norm": 3.51237270150491, "learning_rate": 1.688014693982387e-09, "loss": 0.273, "step": 40136 }, { "epoch": 2.9830546265328874, "grad_norm": 2.449242547743849, "learning_rate": 1.6733048066674973e-09, "loss": 0.2511, "step": 40137 }, { "epoch": 2.98312894834634, "grad_norm": 2.4461179975691496, "learning_rate": 1.6586592880607488e-09, "loss": 0.2875, "step": 40138 }, { "epoch": 2.983203270159792, "grad_norm": 2.324593336150043, "learning_rate": 1.644078138257621e-09, "loss": 0.3041, "step": 40139 }, { "epoch": 2.983277591973244, "grad_norm": 2.7723444852159655, "learning_rate": 1.6295613573513724e-09, "loss": 0.2803, "step": 40140 }, { "epoch": 2.9833519137866964, "grad_norm": 2.3304099300709766, "learning_rate": 1.6151089454341517e-09, "loss": 0.3011, "step": 40141 }, { "epoch": 2.983426235600149, "grad_norm": 2.173304860415263, "learning_rate": 1.6007209026003278e-09, "loss": 0.2191, "step": 40142 }, { "epoch": 2.983500557413601, "grad_norm": 2.5672881111975756, "learning_rate": 1.5863972289431596e-09, "loss": 0.2574, "step": 40143 }, { "epoch": 2.983574879227053, "grad_norm": 2.4029105326237894, "learning_rate": 1.5721379245525748e-09, "loss": 0.2345, "step": 40144 }, { "epoch": 2.9836492010405053, "grad_norm": 2.241872073704306, "learning_rate": 1.5579429895229426e-09, "loss": 0.2397, "step": 40145 }, { "epoch": 2.983723522853958, "grad_norm": 2.681883655851187, "learning_rate": 1.543812423944191e-09, "loss": 0.3554, "step": 40146 }, { "epoch": 2.98379784466741, "grad_norm": 3.536981685760354, "learning_rate": 1.5297462279073583e-09, "loss": 0.3046, "step": 40147 }, { "epoch": 2.9838721664808623, "grad_norm": 2.276481841496306, "learning_rate": 1.5157444015034828e-09, "loss": 0.2591, "step": 40148 }, { "epoch": 2.9839464882943143, "grad_norm": 2.653623876575113, "learning_rate": 1.5018069448224926e-09, "loss": 0.3408, "step": 40149 }, { "epoch": 2.9840208101077668, "grad_norm": 2.8375392486986306, "learning_rate": 1.4879338579543156e-09, "loss": 0.3283, "step": 40150 }, { "epoch": 2.984095131921219, "grad_norm": 2.681466339326837, "learning_rate": 1.4741251409877699e-09, "loss": 0.3074, "step": 40151 }, { "epoch": 2.9841694537346712, "grad_norm": 1.967828713587912, "learning_rate": 1.4603807940116732e-09, "loss": 0.2332, "step": 40152 }, { "epoch": 2.9842437755481233, "grad_norm": 2.302098518160699, "learning_rate": 1.4467008171148434e-09, "loss": 0.277, "step": 40153 }, { "epoch": 2.9843180973615757, "grad_norm": 2.719183954634463, "learning_rate": 1.4330852103860982e-09, "loss": 0.3222, "step": 40154 }, { "epoch": 2.9843924191750277, "grad_norm": 2.5907749794119157, "learning_rate": 1.419533973912035e-09, "loss": 0.3364, "step": 40155 }, { "epoch": 2.98446674098848, "grad_norm": 2.3237431776668536, "learning_rate": 1.4060471077792515e-09, "loss": 0.2629, "step": 40156 }, { "epoch": 2.984541062801932, "grad_norm": 2.137488160970774, "learning_rate": 1.3926246120765651e-09, "loss": 0.2364, "step": 40157 }, { "epoch": 2.9846153846153847, "grad_norm": 2.4539160668211255, "learning_rate": 1.3792664868894634e-09, "loss": 0.2778, "step": 40158 }, { "epoch": 2.9846897064288367, "grad_norm": 2.3566175618916834, "learning_rate": 1.3659727323034333e-09, "loss": 0.2636, "step": 40159 }, { "epoch": 2.984764028242289, "grad_norm": 2.8548915969834017, "learning_rate": 1.3527433484050722e-09, "loss": 0.3291, "step": 40160 }, { "epoch": 2.9848383500557416, "grad_norm": 3.0979616876389526, "learning_rate": 1.339578335277647e-09, "loss": 0.3383, "step": 40161 }, { "epoch": 2.9849126718691936, "grad_norm": 1.901118330615189, "learning_rate": 1.3264776930088651e-09, "loss": 0.2207, "step": 40162 }, { "epoch": 2.9849869936826456, "grad_norm": 2.2727011367965444, "learning_rate": 1.3134414216797731e-09, "loss": 0.328, "step": 40163 }, { "epoch": 2.985061315496098, "grad_norm": 2.1695890837460556, "learning_rate": 1.300469521376968e-09, "loss": 0.2662, "step": 40164 }, { "epoch": 2.9851356373095506, "grad_norm": 2.37644345776792, "learning_rate": 1.2875619921826066e-09, "loss": 0.2553, "step": 40165 }, { "epoch": 2.9852099591230026, "grad_norm": 1.9461819120546, "learning_rate": 1.2747188341810658e-09, "loss": 0.2388, "step": 40166 }, { "epoch": 2.9852842809364546, "grad_norm": 1.6596127649906791, "learning_rate": 1.2619400474533916e-09, "loss": 0.1915, "step": 40167 }, { "epoch": 2.985358602749907, "grad_norm": 1.8556599228288277, "learning_rate": 1.2492256320817408e-09, "loss": 0.2134, "step": 40168 }, { "epoch": 2.9854329245633595, "grad_norm": 2.002251835147929, "learning_rate": 1.2365755881493801e-09, "loss": 0.23, "step": 40169 }, { "epoch": 2.9855072463768115, "grad_norm": 4.2775359003890605, "learning_rate": 1.2239899157373558e-09, "loss": 0.3014, "step": 40170 }, { "epoch": 2.985581568190264, "grad_norm": 2.4671002890544043, "learning_rate": 1.2114686149267142e-09, "loss": 0.2712, "step": 40171 }, { "epoch": 2.985655890003716, "grad_norm": 2.7413660585433055, "learning_rate": 1.199011685797391e-09, "loss": 0.3013, "step": 40172 }, { "epoch": 2.9857302118171685, "grad_norm": 2.8016054130668437, "learning_rate": 1.1866191284304329e-09, "loss": 0.2569, "step": 40173 }, { "epoch": 2.9858045336306205, "grad_norm": 1.8817916910073178, "learning_rate": 1.1742909429046655e-09, "loss": 0.2222, "step": 40174 }, { "epoch": 2.985878855444073, "grad_norm": 2.313635117790195, "learning_rate": 1.162027129300025e-09, "loss": 0.2957, "step": 40175 }, { "epoch": 2.985953177257525, "grad_norm": 3.4866659122920423, "learning_rate": 1.1498276876964476e-09, "loss": 0.3654, "step": 40176 }, { "epoch": 2.9860274990709774, "grad_norm": 2.24481632975338, "learning_rate": 1.1376926181705384e-09, "loss": 0.2815, "step": 40177 }, { "epoch": 2.9861018208844294, "grad_norm": 2.0518777992047355, "learning_rate": 1.1256219208011231e-09, "loss": 0.1623, "step": 40178 }, { "epoch": 2.986176142697882, "grad_norm": 2.584901886851931, "learning_rate": 1.113615595667028e-09, "loss": 0.2878, "step": 40179 }, { "epoch": 2.986250464511334, "grad_norm": 2.32823790531232, "learning_rate": 1.101673642843748e-09, "loss": 0.2086, "step": 40180 }, { "epoch": 2.9863247863247864, "grad_norm": 2.183772870889278, "learning_rate": 1.089796062410109e-09, "loss": 0.2603, "step": 40181 }, { "epoch": 2.9863991081382384, "grad_norm": 3.1711377540138046, "learning_rate": 1.0779828544404957e-09, "loss": 0.2564, "step": 40182 }, { "epoch": 2.986473429951691, "grad_norm": 3.0806892324389312, "learning_rate": 1.0662340190126241e-09, "loss": 0.3558, "step": 40183 }, { "epoch": 2.9865477517651433, "grad_norm": 2.49349024126879, "learning_rate": 1.0545495562019892e-09, "loss": 0.2744, "step": 40184 }, { "epoch": 2.9866220735785953, "grad_norm": 2.0426831530908403, "learning_rate": 1.042929466082976e-09, "loss": 0.2617, "step": 40185 }, { "epoch": 2.9866963953920473, "grad_norm": 2.2555418280793633, "learning_rate": 1.031373748729969e-09, "loss": 0.2848, "step": 40186 }, { "epoch": 2.9867707172055, "grad_norm": 2.4422785227587993, "learning_rate": 1.019882404218464e-09, "loss": 0.2812, "step": 40187 }, { "epoch": 2.9868450390189523, "grad_norm": 2.341649388609416, "learning_rate": 1.0084554326228458e-09, "loss": 0.2447, "step": 40188 }, { "epoch": 2.9869193608324043, "grad_norm": 3.2262755762047246, "learning_rate": 9.970928340152785e-10, "loss": 0.3175, "step": 40189 }, { "epoch": 2.9869936826458563, "grad_norm": 2.3881008637582095, "learning_rate": 9.857946084701475e-10, "loss": 0.2451, "step": 40190 }, { "epoch": 2.9870680044593088, "grad_norm": 2.229513098444318, "learning_rate": 9.74560756059617e-10, "loss": 0.2511, "step": 40191 }, { "epoch": 2.987142326272761, "grad_norm": 2.7156130179335976, "learning_rate": 9.633912768558518e-10, "loss": 0.3482, "step": 40192 }, { "epoch": 2.9872166480862132, "grad_norm": 2.2670968841330885, "learning_rate": 9.522861709310161e-10, "loss": 0.2608, "step": 40193 }, { "epoch": 2.9872909698996657, "grad_norm": 2.420136570103883, "learning_rate": 9.412454383561642e-10, "loss": 0.2348, "step": 40194 }, { "epoch": 2.9873652917131177, "grad_norm": 2.0648076590167084, "learning_rate": 9.302690792034608e-10, "loss": 0.2112, "step": 40195 }, { "epoch": 2.98743961352657, "grad_norm": 2.0970208361153237, "learning_rate": 9.193570935428497e-10, "loss": 0.2236, "step": 40196 }, { "epoch": 2.987513935340022, "grad_norm": 2.1818864615313265, "learning_rate": 9.085094814442752e-10, "loss": 0.217, "step": 40197 }, { "epoch": 2.9875882571534746, "grad_norm": 2.5243208982555165, "learning_rate": 8.977262429776812e-10, "loss": 0.2718, "step": 40198 }, { "epoch": 2.9876625789669267, "grad_norm": 2.57690866695747, "learning_rate": 8.870073782130118e-10, "loss": 0.284, "step": 40199 }, { "epoch": 2.987736900780379, "grad_norm": 2.5416148249716453, "learning_rate": 8.763528872191007e-10, "loss": 0.3396, "step": 40200 }, { "epoch": 2.987811222593831, "grad_norm": 2.3201702172413405, "learning_rate": 8.657627700636717e-10, "loss": 0.2373, "step": 40201 }, { "epoch": 2.9878855444072836, "grad_norm": 2.893413889635484, "learning_rate": 8.552370268166688e-10, "loss": 0.2799, "step": 40202 }, { "epoch": 2.987959866220736, "grad_norm": 2.3819367276941663, "learning_rate": 8.44775657543595e-10, "loss": 0.2531, "step": 40203 }, { "epoch": 2.988034188034188, "grad_norm": 2.2380620355439076, "learning_rate": 8.343786623143946e-10, "loss": 0.1985, "step": 40204 }, { "epoch": 2.98810850984764, "grad_norm": 2.6120729040490565, "learning_rate": 8.240460411934603e-10, "loss": 0.239, "step": 40205 }, { "epoch": 2.9881828316610926, "grad_norm": 2.4291972556026846, "learning_rate": 8.137777942496261e-10, "loss": 0.2618, "step": 40206 }, { "epoch": 2.988257153474545, "grad_norm": 2.616923839838277, "learning_rate": 8.035739215472849e-10, "loss": 0.3296, "step": 40207 }, { "epoch": 2.988331475287997, "grad_norm": 1.7938264568724642, "learning_rate": 7.934344231519397e-10, "loss": 0.2133, "step": 40208 }, { "epoch": 2.988405797101449, "grad_norm": 2.3223296839718515, "learning_rate": 7.833592991313144e-10, "loss": 0.2759, "step": 40209 }, { "epoch": 2.9884801189149015, "grad_norm": 2.874500875458084, "learning_rate": 7.733485495475812e-10, "loss": 0.4187, "step": 40210 }, { "epoch": 2.988554440728354, "grad_norm": 1.8873320218144614, "learning_rate": 7.634021744662435e-10, "loss": 0.2538, "step": 40211 }, { "epoch": 2.988628762541806, "grad_norm": 3.1172577572987703, "learning_rate": 7.53520173951694e-10, "loss": 0.2989, "step": 40212 }, { "epoch": 2.988703084355258, "grad_norm": 2.3324684807863245, "learning_rate": 7.437025480661053e-10, "loss": 0.2416, "step": 40213 }, { "epoch": 2.9887774061687105, "grad_norm": 1.7856683622333753, "learning_rate": 7.339492968749806e-10, "loss": 0.1859, "step": 40214 }, { "epoch": 2.988851727982163, "grad_norm": 2.7520927801364645, "learning_rate": 7.242604204393822e-10, "loss": 0.3391, "step": 40215 }, { "epoch": 2.988926049795615, "grad_norm": 2.4557196418309144, "learning_rate": 7.146359188214824e-10, "loss": 0.2954, "step": 40216 }, { "epoch": 2.9890003716090674, "grad_norm": 2.0773974638164545, "learning_rate": 7.050757920845641e-10, "loss": 0.2293, "step": 40217 }, { "epoch": 2.9890746934225194, "grad_norm": 2.9804049853158308, "learning_rate": 6.955800402896895e-10, "loss": 0.3292, "step": 40218 }, { "epoch": 2.989149015235972, "grad_norm": 1.9054508679864575, "learning_rate": 6.861486634979209e-10, "loss": 0.2051, "step": 40219 }, { "epoch": 2.989223337049424, "grad_norm": 2.8516040206034456, "learning_rate": 6.767816617692103e-10, "loss": 0.3423, "step": 40220 }, { "epoch": 2.9892976588628764, "grad_norm": 2.5750755592081025, "learning_rate": 6.674790351657301e-10, "loss": 0.2644, "step": 40221 }, { "epoch": 2.9893719806763284, "grad_norm": 2.502787344741603, "learning_rate": 6.582407837452121e-10, "loss": 0.2787, "step": 40222 }, { "epoch": 2.989446302489781, "grad_norm": 2.2289029262962883, "learning_rate": 6.490669075687184e-10, "loss": 0.2505, "step": 40223 }, { "epoch": 2.989520624303233, "grad_norm": 1.907405673123655, "learning_rate": 6.399574066939806e-10, "loss": 0.181, "step": 40224 }, { "epoch": 2.9895949461166853, "grad_norm": 2.699583952031315, "learning_rate": 6.309122811809509e-10, "loss": 0.3263, "step": 40225 }, { "epoch": 2.9896692679301378, "grad_norm": 2.4148922329337883, "learning_rate": 6.219315310873608e-10, "loss": 0.2798, "step": 40226 }, { "epoch": 2.98974358974359, "grad_norm": 2.5086464887455473, "learning_rate": 6.130151564698316e-10, "loss": 0.3098, "step": 40227 }, { "epoch": 2.989817911557042, "grad_norm": 3.04666873033881, "learning_rate": 6.041631573883156e-10, "loss": 0.2544, "step": 40228 }, { "epoch": 2.9898922333704943, "grad_norm": 2.0596976989789213, "learning_rate": 5.953755338972134e-10, "loss": 0.259, "step": 40229 }, { "epoch": 2.9899665551839467, "grad_norm": 2.1130876007063644, "learning_rate": 5.866522860553669e-10, "loss": 0.2354, "step": 40230 }, { "epoch": 2.9900408769973987, "grad_norm": 2.278192361083804, "learning_rate": 5.779934139171773e-10, "loss": 0.2718, "step": 40231 }, { "epoch": 2.9901151988108507, "grad_norm": 2.7301454252793906, "learning_rate": 5.693989175392655e-10, "loss": 0.3057, "step": 40232 }, { "epoch": 2.990189520624303, "grad_norm": 2.5545798541656075, "learning_rate": 5.608687969760329e-10, "loss": 0.2956, "step": 40233 }, { "epoch": 2.9902638424377557, "grad_norm": 2.259791560399331, "learning_rate": 5.524030522841007e-10, "loss": 0.3204, "step": 40234 }, { "epoch": 2.9903381642512077, "grad_norm": 2.4205887839375926, "learning_rate": 5.440016835167594e-10, "loss": 0.1908, "step": 40235 }, { "epoch": 2.9904124860646597, "grad_norm": 2.710975348643602, "learning_rate": 5.356646907273e-10, "loss": 0.352, "step": 40236 }, { "epoch": 2.990486807878112, "grad_norm": 2.298100053134309, "learning_rate": 5.273920739712335e-10, "loss": 0.3288, "step": 40237 }, { "epoch": 2.9905611296915646, "grad_norm": 2.467141429094685, "learning_rate": 5.191838333007404e-10, "loss": 0.2238, "step": 40238 }, { "epoch": 2.9906354515050166, "grad_norm": 3.3078538670183764, "learning_rate": 5.110399687691114e-10, "loss": 0.3955, "step": 40239 }, { "epoch": 2.990709773318469, "grad_norm": 2.2315997054336387, "learning_rate": 5.029604804285271e-10, "loss": 0.2571, "step": 40240 }, { "epoch": 2.990784095131921, "grad_norm": 2.1555401920940853, "learning_rate": 4.949453683311678e-10, "loss": 0.2621, "step": 40241 }, { "epoch": 2.9908584169453736, "grad_norm": 1.920667734164621, "learning_rate": 4.869946325281039e-10, "loss": 0.2403, "step": 40242 }, { "epoch": 2.9909327387588256, "grad_norm": 2.204726477173359, "learning_rate": 4.791082730715158e-10, "loss": 0.2506, "step": 40243 }, { "epoch": 2.991007060572278, "grad_norm": 2.4520689265205564, "learning_rate": 4.712862900113635e-10, "loss": 0.2771, "step": 40244 }, { "epoch": 2.99108138238573, "grad_norm": 2.462976419264463, "learning_rate": 4.6352868339760716e-10, "loss": 0.2966, "step": 40245 }, { "epoch": 2.9911557041991825, "grad_norm": 2.1385447959634476, "learning_rate": 4.5583545328131697e-10, "loss": 0.2549, "step": 40246 }, { "epoch": 2.9912300260126345, "grad_norm": 2.476300719643502, "learning_rate": 4.482065997113427e-10, "loss": 0.3136, "step": 40247 }, { "epoch": 2.991304347826087, "grad_norm": 2.409275660101979, "learning_rate": 4.4064212273764453e-10, "loss": 0.2552, "step": 40248 }, { "epoch": 2.9913786696395395, "grad_norm": 2.4795234640397403, "learning_rate": 4.3314202240796187e-10, "loss": 0.2362, "step": 40249 }, { "epoch": 2.9914529914529915, "grad_norm": 2.597788929080394, "learning_rate": 4.257062987700344e-10, "loss": 0.2821, "step": 40250 }, { "epoch": 2.9915273132664435, "grad_norm": 2.5075137246224877, "learning_rate": 4.1833495187382224e-10, "loss": 0.3, "step": 40251 }, { "epoch": 2.991601635079896, "grad_norm": 2.4520402255779934, "learning_rate": 4.1102798176484435e-10, "loss": 0.2568, "step": 40252 }, { "epoch": 2.9916759568933484, "grad_norm": 3.573705733785666, "learning_rate": 4.0378538849084045e-10, "loss": 0.4262, "step": 40253 }, { "epoch": 2.9917502787068004, "grad_norm": 2.4546707945160122, "learning_rate": 3.966071720984399e-10, "loss": 0.3586, "step": 40254 }, { "epoch": 2.9918246005202525, "grad_norm": 2.5664091413650723, "learning_rate": 3.894933326331618e-10, "loss": 0.267, "step": 40255 }, { "epoch": 2.991898922333705, "grad_norm": 2.004958531296946, "learning_rate": 3.8244387014274573e-10, "loss": 0.2154, "step": 40256 }, { "epoch": 2.9919732441471574, "grad_norm": 2.5453268725803495, "learning_rate": 3.7545878467049046e-10, "loss": 0.296, "step": 40257 }, { "epoch": 2.9920475659606094, "grad_norm": 2.585628433270433, "learning_rate": 3.685380762630253e-10, "loss": 0.2805, "step": 40258 }, { "epoch": 2.992121887774062, "grad_norm": 2.457830319778102, "learning_rate": 3.616817449625387e-10, "loss": 0.2741, "step": 40259 }, { "epoch": 2.992196209587514, "grad_norm": 2.4060069921222187, "learning_rate": 3.548897908156601e-10, "loss": 0.2952, "step": 40260 }, { "epoch": 2.9922705314009663, "grad_norm": 2.00150958368844, "learning_rate": 3.4816221386568816e-10, "loss": 0.2508, "step": 40261 }, { "epoch": 2.9923448532144183, "grad_norm": 3.355521018519241, "learning_rate": 3.4149901415481137e-10, "loss": 0.3037, "step": 40262 }, { "epoch": 2.992419175027871, "grad_norm": 2.9552439655923157, "learning_rate": 3.349001917263284e-10, "loss": 0.2895, "step": 40263 }, { "epoch": 2.992493496841323, "grad_norm": 2.069877286171995, "learning_rate": 3.2836574662353793e-10, "loss": 0.2479, "step": 40264 }, { "epoch": 2.9925678186547753, "grad_norm": 2.3490299040444995, "learning_rate": 3.2189567888751826e-10, "loss": 0.3342, "step": 40265 }, { "epoch": 2.9926421404682273, "grad_norm": 2.8359681295379358, "learning_rate": 3.1548998855934763e-10, "loss": 0.3581, "step": 40266 }, { "epoch": 2.9927164622816798, "grad_norm": 2.311901015181544, "learning_rate": 3.0914867568232475e-10, "loss": 0.2373, "step": 40267 }, { "epoch": 2.9927907840951318, "grad_norm": 2.2960637563892163, "learning_rate": 3.0287174029641765e-10, "loss": 0.342, "step": 40268 }, { "epoch": 2.9928651059085842, "grad_norm": 2.7531695754174472, "learning_rate": 2.966591824415943e-10, "loss": 0.2923, "step": 40269 }, { "epoch": 2.9929394277220362, "grad_norm": 2.381711368585733, "learning_rate": 2.905110021578228e-10, "loss": 0.2767, "step": 40270 }, { "epoch": 2.9930137495354887, "grad_norm": 2.173004194120739, "learning_rate": 2.844271994839609e-10, "loss": 0.3025, "step": 40271 }, { "epoch": 2.993088071348941, "grad_norm": 2.7254624158188476, "learning_rate": 2.784077744610869e-10, "loss": 0.2665, "step": 40272 }, { "epoch": 2.993162393162393, "grad_norm": 2.457255643768156, "learning_rate": 2.7245272712694834e-10, "loss": 0.312, "step": 40273 }, { "epoch": 2.993236714975845, "grad_norm": 2.3587160996303984, "learning_rate": 2.6656205751929287e-10, "loss": 0.2676, "step": 40274 }, { "epoch": 2.9933110367892977, "grad_norm": 2.9838669933776627, "learning_rate": 2.6073576567808847e-10, "loss": 0.2578, "step": 40275 }, { "epoch": 2.99338535860275, "grad_norm": 2.6877085763542494, "learning_rate": 2.54973851637752e-10, "loss": 0.3274, "step": 40276 }, { "epoch": 2.993459680416202, "grad_norm": 2.678664991609586, "learning_rate": 2.492763154382516e-10, "loss": 0.3426, "step": 40277 }, { "epoch": 2.993534002229654, "grad_norm": 2.3157711207923453, "learning_rate": 2.436431571140041e-10, "loss": 0.2537, "step": 40278 }, { "epoch": 2.9936083240431066, "grad_norm": 1.9967573059872223, "learning_rate": 2.380743767027571e-10, "loss": 0.2449, "step": 40279 }, { "epoch": 2.993682645856559, "grad_norm": 2.6229188166682933, "learning_rate": 2.3256997424003779e-10, "loss": 0.2617, "step": 40280 }, { "epoch": 2.993756967670011, "grad_norm": 2.624761054432499, "learning_rate": 2.2712994976137326e-10, "loss": 0.3421, "step": 40281 }, { "epoch": 2.9938312894834636, "grad_norm": 2.4867081505245383, "learning_rate": 2.2175430330229064e-10, "loss": 0.228, "step": 40282 }, { "epoch": 2.9939056112969156, "grad_norm": 2.4330470816443412, "learning_rate": 2.164430348949864e-10, "loss": 0.2566, "step": 40283 }, { "epoch": 2.993979933110368, "grad_norm": 3.0103056117705744, "learning_rate": 2.1119614457609793e-10, "loss": 0.2877, "step": 40284 }, { "epoch": 2.99405425492382, "grad_norm": 2.4304971493167296, "learning_rate": 2.060136323789319e-10, "loss": 0.2464, "step": 40285 }, { "epoch": 2.9941285767372725, "grad_norm": 2.7024419496353627, "learning_rate": 2.00895498336795e-10, "loss": 0.2667, "step": 40286 }, { "epoch": 2.9942028985507245, "grad_norm": 2.7527423926398358, "learning_rate": 1.958417424818837e-10, "loss": 0.3399, "step": 40287 }, { "epoch": 2.994277220364177, "grad_norm": 2.49973726220057, "learning_rate": 1.9085236484750469e-10, "loss": 0.2968, "step": 40288 }, { "epoch": 2.994351542177629, "grad_norm": 2.20077994301895, "learning_rate": 1.8592736546585444e-10, "loss": 0.3268, "step": 40289 }, { "epoch": 2.9944258639910815, "grad_norm": 2.550385593975549, "learning_rate": 1.8106674436801918e-10, "loss": 0.3119, "step": 40290 }, { "epoch": 2.9945001858045335, "grad_norm": 3.5028272668028926, "learning_rate": 1.762705015861954e-10, "loss": 0.3253, "step": 40291 }, { "epoch": 2.994574507617986, "grad_norm": 2.557350125626886, "learning_rate": 1.715386371492489e-10, "loss": 0.2486, "step": 40292 }, { "epoch": 2.994648829431438, "grad_norm": 2.082133836619473, "learning_rate": 1.6687115109048636e-10, "loss": 0.2272, "step": 40293 }, { "epoch": 2.9947231512448904, "grad_norm": 2.5524987658073854, "learning_rate": 1.6226804343766334e-10, "loss": 0.3415, "step": 40294 }, { "epoch": 2.994797473058343, "grad_norm": 3.01939899995227, "learning_rate": 1.577293142218661e-10, "loss": 0.3934, "step": 40295 }, { "epoch": 2.994871794871795, "grad_norm": 2.2253969032709726, "learning_rate": 1.5325496347196045e-10, "loss": 0.1901, "step": 40296 }, { "epoch": 2.994946116685247, "grad_norm": 1.9809730527158311, "learning_rate": 1.4884499121570194e-10, "loss": 0.23, "step": 40297 }, { "epoch": 2.9950204384986994, "grad_norm": 2.0800838699463986, "learning_rate": 1.4449939748306663e-10, "loss": 0.222, "step": 40298 }, { "epoch": 2.995094760312152, "grad_norm": 2.029456011591522, "learning_rate": 1.4021818230069983e-10, "loss": 0.2504, "step": 40299 }, { "epoch": 2.995169082125604, "grad_norm": 2.032705964727888, "learning_rate": 1.3600134569746736e-10, "loss": 0.2369, "step": 40300 }, { "epoch": 2.995243403939056, "grad_norm": 2.122742415950085, "learning_rate": 1.3184888769890436e-10, "loss": 0.2864, "step": 40301 }, { "epoch": 2.9953177257525083, "grad_norm": 2.0039383922405762, "learning_rate": 1.277608083338766e-10, "loss": 0.2145, "step": 40302 }, { "epoch": 2.995392047565961, "grad_norm": 2.760579357490325, "learning_rate": 1.2373710762680903e-10, "loss": 0.3386, "step": 40303 }, { "epoch": 2.995466369379413, "grad_norm": 1.9232691139272833, "learning_rate": 1.1977778560323672e-10, "loss": 0.2199, "step": 40304 }, { "epoch": 2.9955406911928653, "grad_norm": 2.0104637378859094, "learning_rate": 1.1588284229091529e-10, "loss": 0.2106, "step": 40305 }, { "epoch": 2.9956150130063173, "grad_norm": 2.2777228958547977, "learning_rate": 1.120522777131594e-10, "loss": 0.3164, "step": 40306 }, { "epoch": 2.9956893348197697, "grad_norm": 2.0382526069684332, "learning_rate": 1.0828609189550421e-10, "loss": 0.2404, "step": 40307 }, { "epoch": 2.9957636566332217, "grad_norm": 2.392390499777469, "learning_rate": 1.045842848623746e-10, "loss": 0.1812, "step": 40308 }, { "epoch": 2.995837978446674, "grad_norm": 2.536686584086079, "learning_rate": 1.0094685663597503e-10, "loss": 0.2629, "step": 40309 }, { "epoch": 2.9959123002601262, "grad_norm": 4.752491661437059, "learning_rate": 9.737380724184064e-11, "loss": 0.2842, "step": 40310 }, { "epoch": 2.9959866220735787, "grad_norm": 2.4605499957522743, "learning_rate": 9.386513670106568e-11, "loss": 0.2101, "step": 40311 }, { "epoch": 2.9960609438870307, "grad_norm": 2.5724386026081945, "learning_rate": 9.04208450369648e-11, "loss": 0.2463, "step": 40312 }, { "epoch": 2.996135265700483, "grad_norm": 3.3703526778086386, "learning_rate": 8.704093227285271e-11, "loss": 0.3082, "step": 40313 }, { "epoch": 2.996209587513935, "grad_norm": 2.0317780828500025, "learning_rate": 8.372539842871341e-11, "loss": 0.2946, "step": 40314 }, { "epoch": 2.9962839093273876, "grad_norm": 3.183845840829182, "learning_rate": 8.047424352675137e-11, "loss": 0.282, "step": 40315 }, { "epoch": 2.9963582311408397, "grad_norm": 2.603210510940012, "learning_rate": 7.728746758806082e-11, "loss": 0.284, "step": 40316 }, { "epoch": 2.996432552954292, "grad_norm": 2.327281852211034, "learning_rate": 7.416507063262579e-11, "loss": 0.2402, "step": 40317 }, { "epoch": 2.9965068747677446, "grad_norm": 1.9981999452458938, "learning_rate": 7.11070526815405e-11, "loss": 0.211, "step": 40318 }, { "epoch": 2.9965811965811966, "grad_norm": 2.884948325091126, "learning_rate": 6.811341375367875e-11, "loss": 0.4184, "step": 40319 }, { "epoch": 2.9966555183946486, "grad_norm": 2.9198342093430605, "learning_rate": 6.518415386791432e-11, "loss": 0.2275, "step": 40320 }, { "epoch": 2.996729840208101, "grad_norm": 2.1633312338762005, "learning_rate": 6.231927304423125e-11, "loss": 0.2668, "step": 40321 }, { "epoch": 2.9968041620215535, "grad_norm": 2.3457227973023578, "learning_rate": 5.951877130039307e-11, "loss": 0.2808, "step": 40322 }, { "epoch": 2.9968784838350055, "grad_norm": 2.742265680843966, "learning_rate": 5.6782648654163386e-11, "loss": 0.2505, "step": 40323 }, { "epoch": 2.9969528056484576, "grad_norm": 2.3801925856390436, "learning_rate": 5.4110905123305746e-11, "loss": 0.3027, "step": 40324 }, { "epoch": 2.99702712746191, "grad_norm": 2.433948559771289, "learning_rate": 5.150354072558372e-11, "loss": 0.2638, "step": 40325 }, { "epoch": 2.9971014492753625, "grad_norm": 2.192741899043909, "learning_rate": 4.8960555477650664e-11, "loss": 0.2559, "step": 40326 }, { "epoch": 2.9971757710888145, "grad_norm": 2.4931020822529972, "learning_rate": 4.6481949395049684e-11, "loss": 0.2007, "step": 40327 }, { "epoch": 2.997250092902267, "grad_norm": 2.2205967694345494, "learning_rate": 4.406772249443414e-11, "loss": 0.2484, "step": 40328 }, { "epoch": 2.997324414715719, "grad_norm": 2.6728905187150236, "learning_rate": 4.171787479134715e-11, "loss": 0.3023, "step": 40329 }, { "epoch": 2.9973987365291714, "grad_norm": 2.334805526395336, "learning_rate": 3.943240630133183e-11, "loss": 0.3536, "step": 40330 }, { "epoch": 2.9974730583426235, "grad_norm": 2.136766274804861, "learning_rate": 3.721131703771086e-11, "loss": 0.2369, "step": 40331 }, { "epoch": 2.997547380156076, "grad_norm": 2.4247182347698333, "learning_rate": 3.505460701602736e-11, "loss": 0.2584, "step": 40332 }, { "epoch": 2.997621701969528, "grad_norm": 2.350510570886473, "learning_rate": 3.296227625071424e-11, "loss": 0.2934, "step": 40333 }, { "epoch": 2.9976960237829804, "grad_norm": 3.5852298817582864, "learning_rate": 3.093432475287372e-11, "loss": 0.199, "step": 40334 }, { "epoch": 2.9977703455964324, "grad_norm": 2.3909905208873923, "learning_rate": 2.897075253804893e-11, "loss": 0.2722, "step": 40335 }, { "epoch": 2.997844667409885, "grad_norm": 2.1649562161819227, "learning_rate": 2.7071559617342092e-11, "loss": 0.2363, "step": 40336 }, { "epoch": 2.997918989223337, "grad_norm": 2.1260732138371434, "learning_rate": 2.5236746004075884e-11, "loss": 0.2068, "step": 40337 }, { "epoch": 2.9979933110367893, "grad_norm": 2.13065931576497, "learning_rate": 2.3466311708242316e-11, "loss": 0.2519, "step": 40338 }, { "epoch": 2.9980676328502414, "grad_norm": 2.623874644667594, "learning_rate": 2.1760256743164065e-11, "loss": 0.2676, "step": 40339 }, { "epoch": 2.998141954663694, "grad_norm": 2.18680551636473, "learning_rate": 2.011858111994336e-11, "loss": 0.2371, "step": 40340 }, { "epoch": 2.9982162764771463, "grad_norm": 1.7971929890576164, "learning_rate": 1.8541284847461982e-11, "loss": 0.2432, "step": 40341 }, { "epoch": 2.9982905982905983, "grad_norm": 2.4343502623108373, "learning_rate": 1.7028367936822164e-11, "loss": 0.2594, "step": 40342 }, { "epoch": 2.9983649201040503, "grad_norm": 2.2151009576217664, "learning_rate": 1.5579830398015915e-11, "loss": 0.1792, "step": 40343 }, { "epoch": 2.9984392419175028, "grad_norm": 4.963977786065685, "learning_rate": 1.4195672238814795e-11, "loss": 0.3084, "step": 40344 }, { "epoch": 2.9985135637309552, "grad_norm": 1.810075011681478, "learning_rate": 1.2875893470321032e-11, "loss": 0.1943, "step": 40345 }, { "epoch": 2.9985878855444072, "grad_norm": 2.864114028266298, "learning_rate": 1.162049410030619e-11, "loss": 0.3196, "step": 40346 }, { "epoch": 2.9986622073578593, "grad_norm": 2.201908895952839, "learning_rate": 1.0429474136541828e-11, "loss": 0.2159, "step": 40347 }, { "epoch": 2.9987365291713117, "grad_norm": 2.168562085485719, "learning_rate": 9.30283358679951e-12, "loss": 0.2532, "step": 40348 }, { "epoch": 2.998810850984764, "grad_norm": 2.321739346359361, "learning_rate": 8.24057245774057e-12, "loss": 0.276, "step": 40349 }, { "epoch": 2.998885172798216, "grad_norm": 2.622512612188606, "learning_rate": 7.242690757136572e-12, "loss": 0.2705, "step": 40350 }, { "epoch": 2.9989594946116687, "grad_norm": 2.6331663010775923, "learning_rate": 6.3091884916488545e-12, "loss": 0.3366, "step": 40351 }, { "epoch": 2.9990338164251207, "grad_norm": 2.061024352304541, "learning_rate": 5.440065665718308e-12, "loss": 0.2297, "step": 40352 }, { "epoch": 2.999108138238573, "grad_norm": 2.236900557010988, "learning_rate": 4.635322286006272e-12, "loss": 0.2708, "step": 40353 }, { "epoch": 2.999182460052025, "grad_norm": 1.8610911397406855, "learning_rate": 3.894958358063861e-12, "loss": 0.2076, "step": 40354 }, { "epoch": 2.9992567818654776, "grad_norm": 2.278115809603047, "learning_rate": 3.2189738863319662e-12, "loss": 0.2445, "step": 40355 }, { "epoch": 2.9993311036789296, "grad_norm": 3.0298359822189402, "learning_rate": 2.6073688741412583e-12, "loss": 0.3676, "step": 40356 }, { "epoch": 2.999405425492382, "grad_norm": 2.8882073950933305, "learning_rate": 2.060143327042852e-12, "loss": 0.2945, "step": 40357 }, { "epoch": 2.999479747305834, "grad_norm": 3.0612108879959745, "learning_rate": 1.5772972472571923e-12, "loss": 0.3518, "step": 40358 }, { "epoch": 2.9995540691192866, "grad_norm": 2.251123742194672, "learning_rate": 1.1588306392251724e-12, "loss": 0.27, "step": 40359 }, { "epoch": 2.999628390932739, "grad_norm": 2.135228916629286, "learning_rate": 8.047435040570151e-13, "loss": 0.2802, "step": 40360 }, { "epoch": 2.999702712746191, "grad_norm": 2.557315160810283, "learning_rate": 5.150358450833892e-13, "loss": 0.2373, "step": 40361 }, { "epoch": 2.999777034559643, "grad_norm": 2.055866533431557, "learning_rate": 2.8970766341451794e-13, "loss": 0.2046, "step": 40362 }, { "epoch": 2.9998513563730955, "grad_norm": 2.6167004431319256, "learning_rate": 1.2875896238107033e-13, "loss": 0.3132, "step": 40363 }, { "epoch": 2.999925678186548, "grad_norm": 2.4809934584293156, "learning_rate": 3.218974087282334e-14, "loss": 0.2706, "step": 40364 }, { "epoch": 3.0, "grad_norm": 1.8287183526819755, "learning_rate": 0.0, "loss": 0.2173, "step": 40365 }, { "epoch": 3.0, "step": 40365, "total_flos": 1.0766080975732736e+16, "train_loss": 0.5869575524024105, "train_runtime": 148650.666, "train_samples_per_second": 8.689, "train_steps_per_second": 0.272 } ], "logging_steps": 1.0, "max_steps": 40365, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0766080975732736e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }